wandb 0.22.1__py3-none-win_arm64.whl → 0.22.3__py3-none-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wandb/__init__.py +1 -1
- wandb/__init__.pyi +7 -4
- wandb/_pydantic/__init__.py +8 -1
- wandb/_pydantic/base.py +54 -18
- wandb/_pydantic/field_types.py +8 -3
- wandb/_pydantic/pagination.py +46 -0
- wandb/_pydantic/utils.py +2 -2
- wandb/apis/public/api.py +24 -19
- wandb/apis/public/artifacts.py +259 -270
- wandb/apis/public/registries/_utils.py +40 -54
- wandb/apis/public/registries/registries_search.py +70 -85
- wandb/apis/public/registries/registry.py +173 -156
- wandb/apis/public/runs.py +27 -6
- wandb/apis/public/utils.py +43 -20
- wandb/automations/_generated/create_automation.py +2 -2
- wandb/automations/_generated/create_generic_webhook_integration.py +4 -4
- wandb/automations/_generated/delete_automation.py +2 -2
- wandb/automations/_generated/fragments.py +31 -52
- wandb/automations/_generated/generic_webhook_integrations_by_entity.py +3 -3
- wandb/automations/_generated/get_automations.py +3 -3
- wandb/automations/_generated/get_automations_by_entity.py +3 -3
- wandb/automations/_generated/input_types.py +9 -9
- wandb/automations/_generated/integrations_by_entity.py +3 -3
- wandb/automations/_generated/operations.py +6 -6
- wandb/automations/_generated/slack_integrations_by_entity.py +3 -3
- wandb/automations/_generated/update_automation.py +2 -2
- wandb/automations/_utils.py +3 -3
- wandb/automations/actions.py +3 -3
- wandb/automations/automations.py +6 -5
- wandb/bin/gpu_stats.exe +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/beta.py +23 -3
- wandb/cli/beta_leet.py +75 -0
- wandb/cli/beta_sync.py +1 -1
- wandb/cli/cli.py +34 -7
- wandb/errors/term.py +8 -8
- wandb/jupyter.py +0 -51
- wandb/old/settings.py +6 -6
- wandb/proto/v3/wandb_api_pb2.py +86 -0
- wandb/proto/v3/wandb_server_pb2.py +38 -37
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_sync_pb2.py +19 -6
- wandb/proto/v4/wandb_api_pb2.py +37 -0
- wandb/proto/v4/wandb_server_pb2.py +38 -37
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_sync_pb2.py +10 -6
- wandb/proto/v5/wandb_api_pb2.py +38 -0
- wandb/proto/v5/wandb_server_pb2.py +38 -37
- wandb/proto/v5/wandb_settings_pb2.py +2 -2
- wandb/proto/v5/wandb_sync_pb2.py +10 -6
- wandb/proto/v6/wandb_api_pb2.py +48 -0
- wandb/proto/v6/wandb_server_pb2.py +38 -37
- wandb/proto/v6/wandb_settings_pb2.py +2 -2
- wandb/proto/v6/wandb_sync_pb2.py +10 -6
- wandb/proto/wandb_api_pb2.py +18 -0
- wandb/proto/wandb_generate_proto.py +1 -0
- wandb/sdk/artifacts/_generated/__init__.py +96 -40
- wandb/sdk/artifacts/_generated/add_aliases.py +3 -3
- wandb/sdk/artifacts/_generated/add_artifact_collection_tags.py +26 -0
- wandb/sdk/artifacts/_generated/artifact_by_id.py +2 -2
- wandb/sdk/artifacts/_generated/artifact_by_name.py +3 -3
- wandb/sdk/artifacts/_generated/artifact_collection_membership_file_urls.py +27 -8
- wandb/sdk/artifacts/_generated/artifact_collection_membership_files.py +27 -8
- wandb/sdk/artifacts/_generated/artifact_created_by.py +7 -20
- wandb/sdk/artifacts/_generated/artifact_file_urls.py +19 -6
- wandb/sdk/artifacts/_generated/artifact_membership_by_name.py +26 -0
- wandb/sdk/artifacts/_generated/artifact_type.py +5 -5
- wandb/sdk/artifacts/_generated/artifact_used_by.py +8 -17
- wandb/sdk/artifacts/_generated/artifact_version_files.py +19 -8
- wandb/sdk/artifacts/_generated/delete_aliases.py +3 -3
- wandb/sdk/artifacts/_generated/delete_artifact.py +4 -4
- wandb/sdk/artifacts/_generated/delete_artifact_collection_tags.py +23 -0
- wandb/sdk/artifacts/_generated/delete_artifact_portfolio.py +4 -4
- wandb/sdk/artifacts/_generated/delete_artifact_sequence.py +4 -4
- wandb/sdk/artifacts/_generated/delete_registry.py +21 -0
- wandb/sdk/artifacts/_generated/fetch_artifact_manifest.py +8 -20
- wandb/sdk/artifacts/_generated/fetch_linked_artifacts.py +13 -35
- wandb/sdk/artifacts/_generated/fetch_org_info_from_entity.py +28 -0
- wandb/sdk/artifacts/_generated/fetch_registries.py +18 -8
- wandb/sdk/{projects → artifacts}/_generated/fetch_registry.py +4 -4
- wandb/sdk/artifacts/_generated/fragments.py +183 -333
- wandb/sdk/artifacts/_generated/input_types.py +133 -7
- wandb/sdk/artifacts/_generated/link_artifact.py +5 -5
- wandb/sdk/artifacts/_generated/operations.py +1053 -548
- wandb/sdk/artifacts/_generated/project_artifact_collection.py +9 -77
- wandb/sdk/artifacts/_generated/project_artifact_collections.py +21 -9
- wandb/sdk/artifacts/_generated/project_artifact_type.py +3 -3
- wandb/sdk/artifacts/_generated/project_artifact_types.py +19 -6
- wandb/sdk/artifacts/_generated/project_artifacts.py +7 -8
- wandb/sdk/artifacts/_generated/registry_collections.py +21 -9
- wandb/sdk/artifacts/_generated/registry_versions.py +20 -9
- wandb/sdk/artifacts/_generated/rename_registry.py +25 -0
- wandb/sdk/artifacts/_generated/run_input_artifacts.py +5 -9
- wandb/sdk/artifacts/_generated/run_output_artifacts.py +5 -9
- wandb/sdk/artifacts/_generated/type_info.py +2 -2
- wandb/sdk/artifacts/_generated/unlink_artifact.py +3 -5
- wandb/sdk/artifacts/_generated/update_artifact.py +3 -3
- wandb/sdk/artifacts/_generated/update_artifact_collection_type.py +28 -0
- wandb/sdk/artifacts/_generated/update_artifact_portfolio.py +7 -16
- wandb/sdk/artifacts/_generated/update_artifact_sequence.py +7 -16
- wandb/sdk/artifacts/_generated/upsert_registry.py +25 -0
- wandb/sdk/artifacts/_gqlutils.py +170 -6
- wandb/sdk/artifacts/_models/__init__.py +9 -0
- wandb/sdk/artifacts/_models/artifact_collection.py +109 -0
- wandb/sdk/artifacts/_models/manifest.py +26 -0
- wandb/sdk/artifacts/_models/pagination.py +26 -0
- wandb/sdk/artifacts/_models/registry.py +100 -0
- wandb/sdk/artifacts/_validators.py +45 -27
- wandb/sdk/artifacts/artifact.py +249 -244
- wandb/sdk/artifacts/artifact_file_cache.py +1 -1
- wandb/sdk/artifacts/artifact_manifest.py +37 -32
- wandb/sdk/artifacts/artifact_manifest_entry.py +82 -133
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +43 -61
- wandb/sdk/artifacts/storage_handler.py +18 -12
- wandb/sdk/artifacts/storage_handlers/azure_handler.py +11 -6
- wandb/sdk/artifacts/storage_handlers/gcs_handler.py +17 -12
- wandb/sdk/artifacts/storage_handlers/http_handler.py +9 -4
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +10 -6
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +5 -4
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +10 -8
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +6 -4
- wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +24 -21
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +4 -2
- wandb/sdk/artifacts/storage_policies/_multipart.py +187 -0
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +61 -242
- wandb/sdk/artifacts/storage_policy.py +25 -12
- wandb/sdk/data_types/image.py +2 -2
- wandb/sdk/data_types/object_3d.py +67 -2
- wandb/sdk/interface/interface.py +72 -64
- wandb/sdk/interface/interface_queue.py +27 -18
- wandb/sdk/interface/interface_shared.py +61 -23
- wandb/sdk/interface/interface_sock.py +9 -5
- wandb/sdk/internal/_generated/server_features_query.py +4 -4
- wandb/sdk/internal/job_builder.py +27 -10
- wandb/sdk/internal/sender.py +4 -1
- wandb/sdk/launch/create_job.py +2 -1
- wandb/sdk/launch/inputs/schema.py +13 -10
- wandb/sdk/lib/apikey.py +8 -12
- wandb/sdk/lib/asyncio_compat.py +1 -1
- wandb/sdk/lib/asyncio_manager.py +5 -5
- wandb/sdk/lib/console_capture.py +38 -30
- wandb/sdk/lib/progress.py +151 -125
- wandb/sdk/lib/retry.py +3 -2
- wandb/sdk/lib/service/service_connection.py +2 -2
- wandb/sdk/lib/wb_logging.py +2 -1
- wandb/sdk/mailbox/mailbox.py +1 -1
- wandb/sdk/wandb_init.py +11 -14
- wandb/sdk/wandb_run.py +14 -48
- wandb/sdk/wandb_settings.py +114 -30
- {wandb-0.22.1.dist-info → wandb-0.22.3.dist-info}/METADATA +2 -1
- {wandb-0.22.1.dist-info → wandb-0.22.3.dist-info}/RECORD +154 -146
- wandb/sdk/artifacts/_generated/artifact_via_membership_by_name.py +0 -26
- wandb/sdk/artifacts/_generated/create_artifact_collection_tag_assignments.py +0 -36
- wandb/sdk/artifacts/_generated/delete_artifact_collection_tag_assignments.py +0 -25
- wandb/sdk/artifacts/_generated/move_artifact_collection.py +0 -35
- wandb/sdk/projects/_generated/__init__.py +0 -26
- wandb/sdk/projects/_generated/delete_project.py +0 -22
- wandb/sdk/projects/_generated/enums.py +0 -4
- wandb/sdk/projects/_generated/fragments.py +0 -41
- wandb/sdk/projects/_generated/input_types.py +0 -13
- wandb/sdk/projects/_generated/operations.py +0 -88
- wandb/sdk/projects/_generated/rename_project.py +0 -27
- wandb/sdk/projects/_generated/upsert_registry_project.py +0 -27
- {wandb-0.22.1.dist-info → wandb-0.22.3.dist-info}/WHEEL +0 -0
- {wandb-0.22.1.dist-info → wandb-0.22.3.dist-info}/entry_points.txt +0 -0
- {wandb-0.22.1.dist-info → wandb-0.22.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -147,7 +147,7 @@ class ArtifactFileCache:
|
|
|
147
147
|
if temp_size:
|
|
148
148
|
wandb.termwarn(
|
|
149
149
|
f"Cache contains {util.to_human_size(temp_size)} of temporary files. "
|
|
150
|
-
"Run `wandb artifact cleanup --remove-temp` to remove them."
|
|
150
|
+
"Run `wandb artifact cache cleanup --remove-temp` to remove them."
|
|
151
151
|
)
|
|
152
152
|
|
|
153
153
|
entries = []
|
|
@@ -2,75 +2,80 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Dict
|
|
7
|
+
|
|
8
|
+
from pydantic import Field
|
|
9
|
+
from typing_extensions import Annotated
|
|
6
10
|
|
|
7
11
|
from wandb.sdk.internal.internal_api import Api as InternalApi
|
|
8
12
|
from wandb.sdk.lib.hashutil import HexMD5
|
|
9
13
|
|
|
14
|
+
from ._models.base_model import ArtifactsBase
|
|
15
|
+
|
|
10
16
|
if TYPE_CHECKING:
|
|
11
|
-
from
|
|
12
|
-
from
|
|
17
|
+
from .artifact_manifest_entry import ArtifactManifestEntry
|
|
18
|
+
from .storage_policy import StoragePolicy
|
|
13
19
|
|
|
14
20
|
|
|
15
|
-
class ArtifactManifest:
|
|
16
|
-
|
|
21
|
+
class ArtifactManifest(ArtifactsBase, ABC):
|
|
22
|
+
# Note: this can't be named "version" since it conflicts with the prior `version()` classmethod.
|
|
23
|
+
manifest_version: Annotated[Any, Field(repr=False)]
|
|
24
|
+
entries: Dict[str, ArtifactManifestEntry] = Field(default_factory=dict) # noqa: UP006
|
|
25
|
+
|
|
26
|
+
storage_policy: Annotated[StoragePolicy, Field(exclude=True, repr=False)]
|
|
17
27
|
|
|
18
28
|
@classmethod
|
|
29
|
+
def version(cls) -> int:
|
|
30
|
+
return cls.model_fields["manifest_version"].default
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
@abstractmethod
|
|
19
34
|
def from_manifest_json(
|
|
20
|
-
cls, manifest_json: dict, api: InternalApi | None = None
|
|
35
|
+
cls, manifest_json: dict[str, Any], api: InternalApi | None = None
|
|
21
36
|
) -> ArtifactManifest:
|
|
22
|
-
if "version"
|
|
37
|
+
if (version := manifest_json.get("version")) is None:
|
|
23
38
|
raise ValueError("Invalid manifest format. Must contain version field.")
|
|
24
|
-
|
|
39
|
+
|
|
25
40
|
for sub in cls.__subclasses__():
|
|
26
41
|
if sub.version() == version:
|
|
27
42
|
return sub.from_manifest_json(manifest_json, api=api)
|
|
28
43
|
raise ValueError("Invalid manifest version.")
|
|
29
44
|
|
|
30
|
-
@classmethod
|
|
31
|
-
def version(cls) -> int:
|
|
32
|
-
raise NotImplementedError
|
|
33
|
-
|
|
34
|
-
def __init__(
|
|
35
|
-
self,
|
|
36
|
-
storage_policy: StoragePolicy,
|
|
37
|
-
entries: Mapping[str, ArtifactManifestEntry] | None = None,
|
|
38
|
-
) -> None:
|
|
39
|
-
self.storage_policy = storage_policy
|
|
40
|
-
self.entries = dict(entries) if entries else {}
|
|
41
|
-
|
|
42
45
|
def __len__(self) -> int:
|
|
43
46
|
return len(self.entries)
|
|
44
47
|
|
|
45
|
-
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def to_manifest_json(self) -> dict[str, Any]:
|
|
46
50
|
raise NotImplementedError
|
|
47
51
|
|
|
52
|
+
@abstractmethod
|
|
48
53
|
def digest(self) -> HexMD5:
|
|
49
54
|
raise NotImplementedError
|
|
50
55
|
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def size(self) -> int:
|
|
58
|
+
raise NotImplementedError
|
|
59
|
+
|
|
51
60
|
def add_entry(self, entry: ArtifactManifestEntry, overwrite: bool = False) -> None:
|
|
52
|
-
path = entry.path
|
|
53
61
|
if (
|
|
54
62
|
(not overwrite)
|
|
55
|
-
and (old_entry := self.entries.get(path))
|
|
63
|
+
and (old_entry := self.entries.get(entry.path))
|
|
56
64
|
and (entry.digest != old_entry.digest)
|
|
57
65
|
):
|
|
58
|
-
raise ValueError(f"Cannot add the same path twice: {path!r}")
|
|
59
|
-
self.entries[path] = entry
|
|
66
|
+
raise ValueError(f"Cannot add the same path twice: {entry.path!r}")
|
|
67
|
+
self.entries[entry.path] = entry
|
|
60
68
|
|
|
61
69
|
def remove_entry(self, entry: ArtifactManifestEntry) -> None:
|
|
62
70
|
try:
|
|
63
71
|
del self.entries[entry.path]
|
|
64
72
|
except LookupError:
|
|
65
|
-
raise FileNotFoundError(f"Cannot remove missing entry:
|
|
73
|
+
raise FileNotFoundError(f"Cannot remove missing entry: {entry.path!r}")
|
|
66
74
|
|
|
67
75
|
def get_entry_by_path(self, path: str) -> ArtifactManifestEntry | None:
|
|
68
76
|
return self.entries.get(path)
|
|
69
77
|
|
|
70
78
|
def get_entries_in_directory(self, directory: str) -> list[ArtifactManifestEntry]:
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
# entry keys (paths) use forward slash even for windows
|
|
75
|
-
if key.startswith(f"{directory}/")
|
|
76
|
-
]
|
|
79
|
+
# entry keys (paths) use forward slash even for windows
|
|
80
|
+
dir_prefix = f"{directory}/"
|
|
81
|
+
return [obj for key, obj in self.entries.items() if key.startswith(dir_prefix)]
|
|
@@ -1,17 +1,24 @@
|
|
|
1
1
|
"""Artifact manifest entry."""
|
|
2
2
|
|
|
3
|
+
# Older-style type annotations required for Pydantic v1 / python 3.8 compatibility.
|
|
4
|
+
# ruff: noqa: UP006, UP007, UP045
|
|
5
|
+
|
|
3
6
|
from __future__ import annotations
|
|
4
7
|
|
|
5
8
|
import concurrent.futures
|
|
6
9
|
import hashlib
|
|
7
|
-
import json
|
|
8
10
|
import logging
|
|
9
11
|
import os
|
|
10
12
|
from contextlib import suppress
|
|
11
|
-
from
|
|
12
|
-
from typing import TYPE_CHECKING
|
|
13
|
+
from os.path import getsize
|
|
14
|
+
from typing import TYPE_CHECKING, Any, Dict, Final, Optional, Union
|
|
13
15
|
from urllib.parse import urlparse
|
|
14
16
|
|
|
17
|
+
from pydantic import Field, NonNegativeInt
|
|
18
|
+
from typing_extensions import Annotated, Self
|
|
19
|
+
|
|
20
|
+
from wandb._pydantic import field_validator, model_validator
|
|
21
|
+
from wandb._strutils import nameof
|
|
15
22
|
from wandb.proto.wandb_deprecated import Deprecated
|
|
16
23
|
from wandb.sdk.lib.deprecate import deprecate
|
|
17
24
|
from wandb.sdk.lib.filesystem import copy_or_overwrite_changed
|
|
@@ -22,27 +29,18 @@ from wandb.sdk.lib.hashutil import (
|
|
|
22
29
|
hex_to_b64_id,
|
|
23
30
|
md5_file_b64,
|
|
24
31
|
)
|
|
25
|
-
from wandb.sdk.lib.paths import FilePathStr, LogicalPath,
|
|
32
|
+
from wandb.sdk.lib.paths import FilePathStr, LogicalPath, URIStr
|
|
26
33
|
|
|
27
|
-
|
|
34
|
+
from ._models.base_model import ArtifactsBase
|
|
28
35
|
|
|
29
36
|
if TYPE_CHECKING:
|
|
30
|
-
from
|
|
37
|
+
from .artifact import Artifact
|
|
31
38
|
|
|
32
|
-
from wandb.sdk.artifacts.artifact import Artifact
|
|
33
39
|
|
|
34
|
-
|
|
35
|
-
path: str
|
|
36
|
-
digest: str
|
|
37
|
-
skip_cache: bool
|
|
38
|
-
ref: str
|
|
39
|
-
birthArtifactID: str
|
|
40
|
-
size: int
|
|
41
|
-
extra: dict
|
|
42
|
-
local_path: str
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
43
41
|
|
|
44
42
|
|
|
45
|
-
_WB_ARTIFACT_SCHEME = "wandb-artifact"
|
|
43
|
+
_WB_ARTIFACT_SCHEME: Final[str] = "wandb-artifact"
|
|
46
44
|
|
|
47
45
|
|
|
48
46
|
def _checksum_cache_path(file_path: str) -> str:
|
|
@@ -87,76 +85,54 @@ def _write_cached_checksum(file_path: str, checksum: str) -> None:
|
|
|
87
85
|
logger.debug(f"Failed to write checksum cache for {file_path!r}")
|
|
88
86
|
|
|
89
87
|
|
|
90
|
-
class ArtifactManifestEntry:
|
|
91
|
-
"""A single entry in an artifact manifest.
|
|
88
|
+
class ArtifactManifestEntry(ArtifactsBase):
|
|
89
|
+
"""A single entry in an artifact manifest.
|
|
90
|
+
|
|
91
|
+
External code should avoid instantiating this class directly.
|
|
92
|
+
"""
|
|
92
93
|
|
|
93
94
|
path: LogicalPath
|
|
94
|
-
digest: B64MD5 | URIStr | FilePathStr | ETag
|
|
95
|
-
skip_cache: bool
|
|
96
|
-
ref: FilePathStr | URIStr | None
|
|
97
|
-
birth_artifact_id: str | None
|
|
98
|
-
size: int | None
|
|
99
|
-
extra: dict
|
|
100
|
-
local_path: str | None
|
|
101
|
-
|
|
102
|
-
_parent_artifact: Artifact | None = None
|
|
103
|
-
_download_url: str | None = None
|
|
104
|
-
|
|
105
|
-
def __init__(
|
|
106
|
-
self,
|
|
107
|
-
path: StrPath,
|
|
108
|
-
digest: B64MD5 | URIStr | FilePathStr | ETag,
|
|
109
|
-
skip_cache: bool | None = False,
|
|
110
|
-
ref: FilePathStr | URIStr | None = None,
|
|
111
|
-
birth_artifact_id: str | None = None,
|
|
112
|
-
size: int | None = None,
|
|
113
|
-
extra: dict | None = None,
|
|
114
|
-
local_path: StrPath | None = None,
|
|
115
|
-
) -> None:
|
|
116
|
-
self.path = LogicalPath(path)
|
|
117
|
-
self.digest = digest
|
|
118
|
-
self.ref = ref
|
|
119
|
-
self.birth_artifact_id = birth_artifact_id
|
|
120
|
-
self.size = size
|
|
121
|
-
self.extra = extra or {}
|
|
122
|
-
self.local_path = str(local_path) if local_path else None
|
|
123
|
-
if self.local_path and self.size is None:
|
|
124
|
-
self.size = Path(self.local_path).stat().st_size
|
|
125
|
-
self.skip_cache = skip_cache or False
|
|
126
95
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
96
|
+
digest: Union[B64MD5, ETag, URIStr, FilePathStr]
|
|
97
|
+
ref: Union[URIStr, FilePathStr, None] = None
|
|
98
|
+
birth_artifact_id: Annotated[Optional[str], Field(alias="birthArtifactID")] = None
|
|
99
|
+
size: Optional[NonNegativeInt] = None
|
|
100
|
+
extra: Dict[str, Any] = Field(default_factory=dict)
|
|
101
|
+
local_path: Optional[str] = None
|
|
102
|
+
|
|
103
|
+
skip_cache: bool = False
|
|
104
|
+
|
|
105
|
+
# Note: Pydantic considers these private attributes, omitting them from validation and comparison logic.
|
|
106
|
+
_parent_artifact: Optional[Artifact] = None
|
|
107
|
+
_download_url: Optional[str] = None
|
|
108
|
+
|
|
109
|
+
@field_validator("path", mode="before")
|
|
110
|
+
def _validate_path(cls, v: Any) -> LogicalPath:
|
|
111
|
+
"""Coerce `path` to a LogicalPath.
|
|
112
|
+
|
|
113
|
+
LogicalPath doesn't implement its own pydantic validator, and implementing one for
|
|
114
|
+
both pydantic V1 _and_ V2 would add too much boilerplate. Until we drop V1 support,
|
|
115
|
+
just coerce to LogicalPath in the field validator here.
|
|
147
116
|
"""
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
117
|
+
return LogicalPath(v)
|
|
118
|
+
|
|
119
|
+
@field_validator("local_path", mode="before")
|
|
120
|
+
def _validate_local_path(cls, v: Any) -> str | None:
|
|
121
|
+
"""Coerce `local_path` to a str. Necessary if the input is a `PosixPath`."""
|
|
122
|
+
return str(v) if v else None
|
|
123
|
+
|
|
124
|
+
@model_validator(mode="after")
|
|
125
|
+
def _infer_size_from_local_path(self) -> Self:
|
|
126
|
+
"""If `size` isn't set, try to infer it from `local_path`."""
|
|
127
|
+
if (self.size is None) and self.local_path:
|
|
128
|
+
self.size = getsize(self.local_path)
|
|
129
|
+
return self
|
|
130
|
+
|
|
131
|
+
def __repr__(self) -> str:
|
|
132
|
+
# For compatibility with prior behavior, don't display `extra` if it's empty
|
|
133
|
+
exclude = None if self.extra else {"extra"}
|
|
134
|
+
repr_dict = self.model_dump(by_alias=False, exclude_none=True, exclude=exclude)
|
|
135
|
+
return f"{nameof(type(self))}({', '.join(f'{k}={v!r}' for k, v in repr_dict.items())})"
|
|
160
136
|
|
|
161
137
|
@property
|
|
162
138
|
def name(self) -> LogicalPath:
|
|
@@ -182,7 +158,6 @@ class ArtifactManifestEntry:
|
|
|
182
158
|
root: str | None = None,
|
|
183
159
|
skip_cache: bool | None = None,
|
|
184
160
|
executor: concurrent.futures.Executor | None = None,
|
|
185
|
-
multipart: bool | None = None,
|
|
186
161
|
) -> FilePathStr:
|
|
187
162
|
"""Download this artifact entry to the specified root path.
|
|
188
163
|
|
|
@@ -193,18 +168,9 @@ class ArtifactManifestEntry:
|
|
|
193
168
|
Returns:
|
|
194
169
|
(str): The path of the downloaded artifact entry.
|
|
195
170
|
"""
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
root = root or self._parent_artifact._default_root()
|
|
200
|
-
self._parent_artifact._add_download_root(root)
|
|
201
|
-
path = str(Path(self.path))
|
|
202
|
-
dest_path = os.path.join(root, path)
|
|
203
|
-
|
|
204
|
-
if skip_cache:
|
|
205
|
-
override_cache_path = dest_path
|
|
206
|
-
else:
|
|
207
|
-
override_cache_path = None
|
|
171
|
+
artifact = self.parent_artifact()
|
|
172
|
+
rootdir = artifact._add_download_root(root)
|
|
173
|
+
dest_path = os.path.join(rootdir, self.path)
|
|
208
174
|
|
|
209
175
|
# Skip checking the cache (and possibly downloading) if the file already exists
|
|
210
176
|
# and has the digest we're expecting.
|
|
@@ -224,30 +190,28 @@ class ArtifactManifestEntry:
|
|
|
224
190
|
if self.digest == md5_hash:
|
|
225
191
|
return FilePathStr(dest_path)
|
|
226
192
|
|
|
193
|
+
# Override the target cache path IF we're skipping the cache.
|
|
194
|
+
# Note that `override_cache_path is None` <=> `skip_cache is False`.
|
|
195
|
+
override_cache_path = FilePathStr(dest_path) if skip_cache else None
|
|
196
|
+
storage_policy = artifact.manifest.storage_policy
|
|
227
197
|
if self.ref is not None:
|
|
228
|
-
cache_path =
|
|
198
|
+
cache_path = storage_policy.load_reference(
|
|
229
199
|
self, local=True, dest_path=override_cache_path
|
|
230
200
|
)
|
|
231
201
|
else:
|
|
232
|
-
cache_path =
|
|
233
|
-
self
|
|
234
|
-
self,
|
|
235
|
-
dest_path=override_cache_path,
|
|
236
|
-
executor=executor,
|
|
237
|
-
multipart=multipart,
|
|
202
|
+
cache_path = storage_policy.load_file(
|
|
203
|
+
artifact, self, dest_path=override_cache_path, executor=executor
|
|
238
204
|
)
|
|
239
205
|
|
|
240
206
|
# Determine the final path
|
|
241
|
-
final_path = (
|
|
242
|
-
dest_path
|
|
243
|
-
if skip_cache
|
|
244
|
-
else copy_or_overwrite_changed(cache_path, dest_path)
|
|
207
|
+
final_path = FilePathStr(
|
|
208
|
+
override_cache_path or copy_or_overwrite_changed(cache_path, dest_path)
|
|
245
209
|
)
|
|
246
210
|
|
|
247
211
|
# Cache the checksum for future downloads
|
|
248
|
-
_write_cached_checksum(
|
|
212
|
+
_write_cached_checksum(final_path, self.digest)
|
|
249
213
|
|
|
250
|
-
return
|
|
214
|
+
return final_path
|
|
251
215
|
|
|
252
216
|
def ref_target(self) -> FilePathStr | URIStr:
|
|
253
217
|
"""Get the reference URL that is targeted by this artifact entry.
|
|
@@ -260,11 +224,9 @@ class ArtifactManifestEntry:
|
|
|
260
224
|
"""
|
|
261
225
|
if self.ref is None:
|
|
262
226
|
raise ValueError("Only reference entries support ref_target().")
|
|
263
|
-
if self._parent_artifact is None:
|
|
227
|
+
if (parent_artifact := self._parent_artifact) is None:
|
|
264
228
|
return self.ref
|
|
265
|
-
return
|
|
266
|
-
self._parent_artifact.manifest.entries[self.path], local=False
|
|
267
|
-
)
|
|
229
|
+
return parent_artifact.manifest.storage_policy.load_reference(self, local=False)
|
|
268
230
|
|
|
269
231
|
def ref_url(self) -> str:
|
|
270
232
|
"""Get a URL to this artifact entry.
|
|
@@ -285,26 +247,13 @@ class ArtifactManifestEntry:
|
|
|
285
247
|
raise ValueError("Parent artifact is not set")
|
|
286
248
|
elif (parent_id := parent_artifact.id) is None:
|
|
287
249
|
raise ValueError("Parent artifact ID is not set")
|
|
288
|
-
return f"{_WB_ARTIFACT_SCHEME}://{b64_to_hex_id(
|
|
289
|
-
|
|
290
|
-
def to_json(self) ->
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
if self.size is not None:
|
|
296
|
-
contents["size"] = self.size
|
|
297
|
-
if self.ref:
|
|
298
|
-
contents["ref"] = self.ref
|
|
299
|
-
if self.birth_artifact_id:
|
|
300
|
-
contents["birthArtifactID"] = self.birth_artifact_id
|
|
301
|
-
if self.local_path:
|
|
302
|
-
contents["local_path"] = self.local_path
|
|
303
|
-
if self.skip_cache:
|
|
304
|
-
contents["skip_cache"] = self.skip_cache
|
|
305
|
-
if self.extra:
|
|
306
|
-
contents["extra"] = self.extra
|
|
307
|
-
return contents
|
|
250
|
+
return f"{_WB_ARTIFACT_SCHEME}://{b64_to_hex_id(parent_id)}/{self.path}"
|
|
251
|
+
|
|
252
|
+
def to_json(self) -> dict[str, Any]:
|
|
253
|
+
# NOTE: The method name `to_json` is a bit misleading, as this returns a
|
|
254
|
+
# python dict, NOT a JSON string. The historical name is kept for continuity,
|
|
255
|
+
# but consider deprecating this in favor of `BaseModel.model_dump()`.
|
|
256
|
+
return self.model_dump(exclude_none=True) # type: ignore[return-value]
|
|
308
257
|
|
|
309
258
|
def _is_artifact_reference(self) -> bool:
|
|
310
259
|
return self.ref is not None and urlparse(self.ref).scheme == _WB_ARTIFACT_SCHEME
|
|
@@ -1,61 +1,48 @@
|
|
|
1
1
|
"""Artifact manifest v1."""
|
|
2
2
|
|
|
3
|
+
# Older-style type annotations required for Pydantic v1 / python 3.8 compatibility.
|
|
4
|
+
# ruff: noqa: UP006
|
|
5
|
+
|
|
3
6
|
from __future__ import annotations
|
|
4
7
|
|
|
5
8
|
from operator import itemgetter
|
|
6
|
-
from typing import Any,
|
|
9
|
+
from typing import Any, ClassVar, Dict, Literal, final
|
|
10
|
+
|
|
11
|
+
from pydantic import Field
|
|
12
|
+
from typing_extensions import Annotated
|
|
7
13
|
|
|
8
|
-
from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
|
|
9
|
-
from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
|
|
10
|
-
from wandb.sdk.artifacts.storage_policy import StoragePolicy
|
|
11
14
|
from wandb.sdk.internal.internal_api import Api as InternalApi
|
|
12
15
|
from wandb.sdk.lib.hashutil import HexMD5, _md5
|
|
13
16
|
|
|
17
|
+
from .._factories import make_storage_policy
|
|
18
|
+
from .._models.manifest import ArtifactManifestV1Data
|
|
19
|
+
from ..artifact_manifest import ArtifactManifest
|
|
20
|
+
from ..artifact_manifest_entry import ArtifactManifestEntry
|
|
21
|
+
from ..storage_policy import StoragePolicy
|
|
22
|
+
|
|
14
23
|
|
|
24
|
+
@final
|
|
15
25
|
class ArtifactManifestV1(ArtifactManifest):
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
26
|
+
manifest_version: Annotated[Literal[1], Field(repr=False)] = 1
|
|
27
|
+
entries: Dict[str, ArtifactManifestEntry] = Field(default_factory=dict)
|
|
28
|
+
|
|
29
|
+
storage_policy: StoragePolicy = Field(
|
|
30
|
+
default_factory=make_storage_policy, exclude=True, repr=False
|
|
31
|
+
)
|
|
19
32
|
|
|
20
33
|
@classmethod
|
|
21
34
|
def from_manifest_json(
|
|
22
|
-
cls, manifest_json: dict, api: InternalApi | None = None
|
|
35
|
+
cls, manifest_json: dict[str, Any], api: InternalApi | None = None
|
|
23
36
|
) -> ArtifactManifestV1:
|
|
24
|
-
|
|
25
|
-
raise ValueError(
|
|
26
|
-
"Expected manifest version 1, got {}".format(manifest_json["version"])
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
storage_policy_name = manifest_json["storagePolicy"]
|
|
30
|
-
storage_policy_config = manifest_json.get("storagePolicyConfig", {})
|
|
31
|
-
storage_policy_cls = StoragePolicy.lookup_by_name(storage_policy_name)
|
|
32
|
-
|
|
33
|
-
entries: Mapping[str, ArtifactManifestEntry]
|
|
34
|
-
entries = {
|
|
35
|
-
name: ArtifactManifestEntry(
|
|
36
|
-
path=name,
|
|
37
|
-
digest=val["digest"],
|
|
38
|
-
birth_artifact_id=val.get("birthArtifactID"),
|
|
39
|
-
ref=val.get("ref"),
|
|
40
|
-
size=val.get("size"),
|
|
41
|
-
extra=val.get("extra"),
|
|
42
|
-
local_path=val.get("local_path"),
|
|
43
|
-
skip_cache=val.get("skip_cache"),
|
|
44
|
-
)
|
|
45
|
-
for name, val in manifest_json["contents"].items()
|
|
46
|
-
}
|
|
37
|
+
data = ArtifactManifestV1Data(**manifest_json)
|
|
47
38
|
|
|
39
|
+
policy_name = data.storage_policy
|
|
40
|
+
policy_cfg = data.storage_policy_config
|
|
41
|
+
policy = StoragePolicy.lookup_by_name(policy_name).from_config(policy_cfg, api)
|
|
48
42
|
return cls(
|
|
49
|
-
|
|
43
|
+
manifest_version=data.version, entries=data.contents, storage_policy=policy
|
|
50
44
|
)
|
|
51
45
|
|
|
52
|
-
def __init__(
|
|
53
|
-
self,
|
|
54
|
-
storage_policy: StoragePolicy,
|
|
55
|
-
entries: Mapping[str, ArtifactManifestEntry] | None = None,
|
|
56
|
-
) -> None:
|
|
57
|
-
super().__init__(storage_policy, entries=entries)
|
|
58
|
-
|
|
59
46
|
def to_manifest_json(self) -> dict:
|
|
60
47
|
"""This is the JSON that's stored in wandb_manifest.json.
|
|
61
48
|
|
|
@@ -64,31 +51,26 @@ class ArtifactManifestV1(ArtifactManifest):
|
|
|
64
51
|
system. We don't need to include the local paths in the artifact manifest
|
|
65
52
|
contents.
|
|
66
53
|
"""
|
|
67
|
-
|
|
68
|
-
for name, entry in sorted(self.entries.items(), key=itemgetter(0)):
|
|
69
|
-
json_entry: dict[str, Any] = {
|
|
70
|
-
"digest": entry.digest,
|
|
71
|
-
}
|
|
72
|
-
if entry.birth_artifact_id:
|
|
73
|
-
json_entry["birthArtifactID"] = entry.birth_artifact_id
|
|
74
|
-
if entry.ref:
|
|
75
|
-
json_entry["ref"] = entry.ref
|
|
76
|
-
if entry.extra:
|
|
77
|
-
json_entry["extra"] = entry.extra
|
|
78
|
-
if entry.size is not None:
|
|
79
|
-
json_entry["size"] = entry.size
|
|
80
|
-
contents[name] = json_entry
|
|
54
|
+
omit_entry_fields = {"path", "local_path", "skip_cache"}
|
|
81
55
|
return {
|
|
82
|
-
"version": self.
|
|
56
|
+
"version": self.manifest_version,
|
|
83
57
|
"storagePolicy": self.storage_policy.name(),
|
|
84
|
-
"storagePolicyConfig": self.storage_policy.config()
|
|
85
|
-
"contents":
|
|
58
|
+
"storagePolicyConfig": self.storage_policy.config(),
|
|
59
|
+
"contents": {
|
|
60
|
+
path: entry.model_dump(exclude=omit_entry_fields, exclude_defaults=True)
|
|
61
|
+
for path, entry in self.entries.items()
|
|
62
|
+
},
|
|
86
63
|
}
|
|
87
64
|
|
|
65
|
+
_DIGEST_HEADER: ClassVar[bytes] = b"wandb-artifact-manifest-v1\n"
|
|
66
|
+
"""Encoded prefix/header for the ArtifactManifest digest."""
|
|
67
|
+
|
|
88
68
|
def digest(self) -> HexMD5:
|
|
89
|
-
hasher = _md5()
|
|
90
|
-
hasher.update(b"wandb-artifact-manifest-v1\n")
|
|
69
|
+
hasher = _md5(self._DIGEST_HEADER)
|
|
91
70
|
# sort by key (path)
|
|
92
|
-
for
|
|
93
|
-
hasher.update(f"{
|
|
94
|
-
return
|
|
71
|
+
for path, entry in sorted(self.entries.items(), key=itemgetter(0)):
|
|
72
|
+
hasher.update(f"{path}:{entry.digest}\n".encode())
|
|
73
|
+
return hasher.hexdigest()
|
|
74
|
+
|
|
75
|
+
def size(self) -> int:
|
|
76
|
+
return sum(entry.size for entry in self.entries.values() if entry.size)
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import TYPE_CHECKING, Final
|
|
6
7
|
|
|
7
8
|
from wandb.sdk.lib.paths import FilePathStr, URIStr
|
|
8
9
|
|
|
@@ -12,18 +13,11 @@ if TYPE_CHECKING:
|
|
|
12
13
|
from wandb.sdk.artifacts.artifact import Artifact
|
|
13
14
|
from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
|
|
14
15
|
|
|
15
|
-
DEFAULT_MAX_OBJECTS = 10**7
|
|
16
|
+
DEFAULT_MAX_OBJECTS: Final[int] = 10_000_000 # 10**7
|
|
16
17
|
|
|
17
18
|
|
|
18
|
-
class
|
|
19
|
-
|
|
20
|
-
"""Checks whether this handler can handle the given url.
|
|
21
|
-
|
|
22
|
-
Returns:
|
|
23
|
-
Whether this handler can handle the given url.
|
|
24
|
-
"""
|
|
25
|
-
raise NotImplementedError
|
|
26
|
-
|
|
19
|
+
class _BaseStorageHandler(ABC):
|
|
20
|
+
@abstractmethod
|
|
27
21
|
def load_path(
|
|
28
22
|
self,
|
|
29
23
|
manifest_entry: ArtifactManifestEntry,
|
|
@@ -40,6 +34,7 @@ class StorageHandler:
|
|
|
40
34
|
"""
|
|
41
35
|
raise NotImplementedError
|
|
42
36
|
|
|
37
|
+
@abstractmethod
|
|
43
38
|
def store_path(
|
|
44
39
|
self,
|
|
45
40
|
artifact: Artifact,
|
|
@@ -47,7 +42,7 @@ class StorageHandler:
|
|
|
47
42
|
name: str | None = None,
|
|
48
43
|
checksum: bool = True,
|
|
49
44
|
max_objects: int | None = None,
|
|
50
|
-
) ->
|
|
45
|
+
) -> list[ArtifactManifestEntry]:
|
|
51
46
|
"""Store the file or directory at the given path to the specified artifact.
|
|
52
47
|
|
|
53
48
|
Args:
|
|
@@ -60,3 +55,14 @@ class StorageHandler:
|
|
|
60
55
|
A list of manifest entries to store within the artifact
|
|
61
56
|
"""
|
|
62
57
|
raise NotImplementedError
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class StorageHandler(_BaseStorageHandler, ABC): # Handles a single storage protocol
|
|
61
|
+
@abstractmethod
|
|
62
|
+
def can_handle(self, parsed_url: ParseResult) -> bool:
|
|
63
|
+
"""Checks whether this handler can handle the given url.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Whether this handler can handle the given url.
|
|
67
|
+
"""
|
|
68
|
+
raise NotImplementedError
|
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
from pathlib import PurePosixPath
|
|
6
6
|
from types import ModuleType
|
|
7
|
-
from typing import TYPE_CHECKING
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
8
|
from urllib.parse import ParseResult, parse_qsl, urlparse
|
|
9
9
|
|
|
10
10
|
import wandb
|
|
@@ -20,17 +20,22 @@ if TYPE_CHECKING:
|
|
|
20
20
|
import azure.storage.blob # type: ignore
|
|
21
21
|
|
|
22
22
|
from wandb.sdk.artifacts.artifact import Artifact
|
|
23
|
+
from wandb.sdk.artifacts.artifact_file_cache import ArtifactFileCache
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
class AzureHandler(StorageHandler):
|
|
27
|
+
_scheme: str
|
|
28
|
+
_cache: ArtifactFileCache
|
|
29
|
+
|
|
30
|
+
def __init__(self, scheme: str = "https") -> None:
|
|
31
|
+
self._scheme = scheme
|
|
32
|
+
self._cache = get_artifact_file_cache()
|
|
33
|
+
|
|
26
34
|
def can_handle(self, parsed_url: ParseResult) -> bool:
|
|
27
|
-
return parsed_url.scheme ==
|
|
35
|
+
return parsed_url.scheme == self._scheme and parsed_url.netloc.endswith(
|
|
28
36
|
".blob.core.windows.net"
|
|
29
37
|
)
|
|
30
38
|
|
|
31
|
-
def __init__(self, scheme: str | None = None) -> None:
|
|
32
|
-
self._cache = get_artifact_file_cache()
|
|
33
|
-
|
|
34
39
|
def load_path(
|
|
35
40
|
self,
|
|
36
41
|
manifest_entry: ArtifactManifestEntry,
|
|
@@ -101,7 +106,7 @@ class AzureHandler(StorageHandler):
|
|
|
101
106
|
name: StrPath | None = None,
|
|
102
107
|
checksum: bool = True,
|
|
103
108
|
max_objects: int | None = None,
|
|
104
|
-
) ->
|
|
109
|
+
) -> list[ArtifactManifestEntry]:
|
|
105
110
|
account_url, container_name, blob_name, query = self._parse_uri(path)
|
|
106
111
|
path = URIStr(f"{account_url}/{container_name}/{blob_name}")
|
|
107
112
|
|