wandb 0.15.3__py3-none-any.whl → 0.15.5__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +1 -1
- wandb/analytics/sentry.py +1 -0
- wandb/apis/importers/base.py +20 -5
- wandb/apis/importers/mlflow.py +7 -1
- wandb/apis/internal.py +12 -0
- wandb/apis/public.py +247 -1387
- wandb/apis/reports/_panels.py +58 -35
- wandb/beta/workflows.py +6 -7
- wandb/cli/cli.py +130 -60
- wandb/data_types.py +3 -1
- wandb/filesync/dir_watcher.py +21 -27
- wandb/filesync/step_checksum.py +8 -8
- wandb/filesync/step_prepare.py +23 -10
- wandb/filesync/step_upload.py +13 -13
- wandb/filesync/upload_job.py +4 -8
- wandb/integration/cohere/__init__.py +3 -0
- wandb/integration/cohere/cohere.py +21 -0
- wandb/integration/cohere/resolver.py +347 -0
- wandb/integration/gym/__init__.py +4 -6
- wandb/integration/huggingface/__init__.py +3 -0
- wandb/integration/huggingface/huggingface.py +18 -0
- wandb/integration/huggingface/resolver.py +213 -0
- wandb/integration/langchain/wandb_tracer.py +16 -179
- wandb/integration/openai/__init__.py +1 -3
- wandb/integration/openai/openai.py +11 -143
- wandb/integration/openai/resolver.py +111 -38
- wandb/integration/sagemaker/config.py +2 -2
- wandb/integration/tensorboard/log.py +4 -4
- wandb/old/settings.py +24 -7
- wandb/proto/v3/wandb_telemetry_pb2.py +12 -12
- wandb/proto/v4/wandb_telemetry_pb2.py +12 -12
- wandb/proto/wandb_deprecated.py +3 -1
- wandb/sdk/__init__.py +1 -1
- wandb/sdk/artifacts/__init__.py +0 -0
- wandb/sdk/artifacts/artifact.py +2101 -0
- wandb/sdk/artifacts/artifact_download_logger.py +42 -0
- wandb/sdk/artifacts/artifact_manifest.py +67 -0
- wandb/sdk/artifacts/artifact_manifest_entry.py +159 -0
- wandb/sdk/artifacts/artifact_manifests/__init__.py +0 -0
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +91 -0
- wandb/sdk/{internal → artifacts}/artifact_saver.py +6 -5
- wandb/sdk/artifacts/artifact_state.py +10 -0
- wandb/sdk/{interface/artifacts/artifact_cache.py → artifacts/artifacts_cache.py} +22 -12
- wandb/sdk/artifacts/exceptions.py +55 -0
- wandb/sdk/artifacts/storage_handler.py +59 -0
- wandb/sdk/artifacts/storage_handlers/__init__.py +0 -0
- wandb/sdk/artifacts/storage_handlers/azure_handler.py +192 -0
- wandb/sdk/artifacts/storage_handlers/gcs_handler.py +224 -0
- wandb/sdk/artifacts/storage_handlers/http_handler.py +112 -0
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +134 -0
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +53 -0
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +301 -0
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +67 -0
- wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +132 -0
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +72 -0
- wandb/sdk/artifacts/storage_layout.py +6 -0
- wandb/sdk/artifacts/storage_policies/__init__.py +0 -0
- wandb/sdk/artifacts/storage_policies/s3_bucket_policy.py +61 -0
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +386 -0
- wandb/sdk/{interface/artifacts/artifact_storage.py → artifacts/storage_policy.py} +5 -57
- wandb/sdk/data_types/_dtypes.py +7 -12
- wandb/sdk/data_types/base_types/json_metadata.py +3 -2
- wandb/sdk/data_types/base_types/media.py +8 -8
- wandb/sdk/data_types/base_types/wb_value.py +12 -13
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +5 -6
- wandb/sdk/data_types/helper_types/classes.py +6 -8
- wandb/sdk/data_types/helper_types/image_mask.py +5 -6
- wandb/sdk/data_types/histogram.py +4 -3
- wandb/sdk/data_types/html.py +3 -4
- wandb/sdk/data_types/image.py +11 -9
- wandb/sdk/data_types/molecule.py +5 -3
- wandb/sdk/data_types/object_3d.py +7 -5
- wandb/sdk/data_types/plotly.py +3 -2
- wandb/sdk/data_types/saved_model.py +11 -11
- wandb/sdk/data_types/trace_tree.py +5 -4
- wandb/sdk/data_types/utils.py +3 -5
- wandb/sdk/data_types/video.py +5 -4
- wandb/sdk/integration_utils/auto_logging.py +215 -0
- wandb/sdk/interface/interface.py +15 -15
- wandb/sdk/internal/file_pusher.py +8 -16
- wandb/sdk/internal/file_stream.py +5 -11
- wandb/sdk/internal/handler.py +13 -1
- wandb/sdk/internal/internal_api.py +287 -13
- wandb/sdk/internal/job_builder.py +119 -30
- wandb/sdk/internal/sender.py +6 -26
- wandb/sdk/internal/settings_static.py +2 -0
- wandb/sdk/internal/system/assets/__init__.py +2 -0
- wandb/sdk/internal/system/assets/gpu.py +42 -0
- wandb/sdk/internal/system/assets/gpu_amd.py +216 -0
- wandb/sdk/internal/system/env_probe_helpers.py +13 -0
- wandb/sdk/internal/system/system_info.py +3 -3
- wandb/sdk/internal/tb_watcher.py +32 -22
- wandb/sdk/internal/thread_local_settings.py +18 -0
- wandb/sdk/launch/_project_spec.py +57 -11
- wandb/sdk/launch/agent/agent.py +147 -65
- wandb/sdk/launch/agent/job_status_tracker.py +34 -0
- wandb/sdk/launch/agent/run_queue_item_file_saver.py +45 -0
- wandb/sdk/launch/builder/abstract.py +5 -1
- wandb/sdk/launch/builder/build.py +21 -18
- wandb/sdk/launch/builder/docker_builder.py +10 -4
- wandb/sdk/launch/builder/kaniko_builder.py +113 -23
- wandb/sdk/launch/builder/noop.py +6 -3
- wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +46 -14
- wandb/sdk/launch/environment/aws_environment.py +3 -2
- wandb/sdk/launch/environment/azure_environment.py +124 -0
- wandb/sdk/launch/environment/gcp_environment.py +2 -4
- wandb/sdk/launch/environment/local_environment.py +1 -1
- wandb/sdk/launch/errors.py +19 -0
- wandb/sdk/launch/github_reference.py +32 -19
- wandb/sdk/launch/launch.py +3 -8
- wandb/sdk/launch/launch_add.py +6 -2
- wandb/sdk/launch/loader.py +21 -2
- wandb/sdk/launch/registry/azure_container_registry.py +132 -0
- wandb/sdk/launch/registry/elastic_container_registry.py +39 -5
- wandb/sdk/launch/registry/google_artifact_registry.py +68 -26
- wandb/sdk/launch/registry/local_registry.py +2 -1
- wandb/sdk/launch/runner/abstract.py +24 -3
- wandb/sdk/launch/runner/kubernetes_runner.py +479 -26
- wandb/sdk/launch/runner/local_container.py +103 -51
- wandb/sdk/launch/runner/local_process.py +1 -1
- wandb/sdk/launch/runner/sagemaker_runner.py +60 -10
- wandb/sdk/launch/runner/vertex_runner.py +10 -5
- wandb/sdk/launch/sweeps/__init__.py +7 -9
- wandb/sdk/launch/sweeps/scheduler.py +307 -77
- wandb/sdk/launch/sweeps/scheduler_sweep.py +2 -1
- wandb/sdk/launch/sweeps/utils.py +82 -35
- wandb/sdk/launch/utils.py +89 -75
- wandb/sdk/lib/_settings_toposort_generated.py +7 -0
- wandb/sdk/lib/capped_dict.py +26 -0
- wandb/sdk/lib/{git.py → gitlib.py} +76 -59
- wandb/sdk/lib/hashutil.py +12 -4
- wandb/sdk/lib/paths.py +96 -8
- wandb/sdk/lib/sock_client.py +2 -2
- wandb/sdk/lib/timer.py +1 -0
- wandb/sdk/service/server.py +22 -9
- wandb/sdk/service/server_sock.py +1 -1
- wandb/sdk/service/service.py +27 -8
- wandb/sdk/verify/verify.py +4 -7
- wandb/sdk/wandb_config.py +2 -6
- wandb/sdk/wandb_init.py +57 -53
- wandb/sdk/wandb_require.py +7 -0
- wandb/sdk/wandb_run.py +61 -223
- wandb/sdk/wandb_settings.py +28 -4
- wandb/testing/relay.py +15 -2
- wandb/util.py +74 -36
- {wandb-0.15.3.dist-info → wandb-0.15.5.dist-info}/METADATA +15 -9
- {wandb-0.15.3.dist-info → wandb-0.15.5.dist-info}/RECORD +151 -116
- {wandb-0.15.3.dist-info → wandb-0.15.5.dist-info}/entry_points.txt +1 -0
- wandb/integration/langchain/util.py +0 -191
- wandb/sdk/interface/artifacts/__init__.py +0 -33
- wandb/sdk/interface/artifacts/artifact.py +0 -615
- wandb/sdk/interface/artifacts/artifact_manifest.py +0 -131
- wandb/sdk/wandb_artifacts.py +0 -2226
- {wandb-0.15.3.dist-info → wandb-0.15.5.dist-info}/LICENSE +0 -0
- {wandb-0.15.3.dist-info → wandb-0.15.5.dist-info}/WHEEL +0 -0
- {wandb-0.15.3.dist-info → wandb-0.15.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,42 @@
|
|
1
|
+
"""Artifact download logger."""
|
2
|
+
import multiprocessing.dummy
|
3
|
+
import time
|
4
|
+
from typing import Callable
|
5
|
+
|
6
|
+
from wandb.errors.term import termlog
|
7
|
+
|
8
|
+
|
9
|
+
class ArtifactDownloadLogger:
|
10
|
+
def __init__(
|
11
|
+
self,
|
12
|
+
nfiles: int,
|
13
|
+
clock_for_testing: Callable[[], float] = time.monotonic,
|
14
|
+
termlog_for_testing: Callable[..., None] = termlog,
|
15
|
+
) -> None:
|
16
|
+
self._nfiles = nfiles
|
17
|
+
self._clock = clock_for_testing
|
18
|
+
self._termlog = termlog_for_testing
|
19
|
+
|
20
|
+
self._n_files_downloaded = 0
|
21
|
+
self._spinner_index = 0
|
22
|
+
self._last_log_time = self._clock()
|
23
|
+
self._lock = multiprocessing.dummy.Lock()
|
24
|
+
|
25
|
+
def notify_downloaded(self) -> None:
|
26
|
+
with self._lock:
|
27
|
+
self._n_files_downloaded += 1
|
28
|
+
if self._n_files_downloaded == self._nfiles:
|
29
|
+
self._termlog(
|
30
|
+
f" {self._nfiles} of {self._nfiles} files downloaded. ",
|
31
|
+
# ^ trailing spaces to wipe out ellipsis from previous logs
|
32
|
+
newline=True,
|
33
|
+
)
|
34
|
+
self._last_log_time = self._clock()
|
35
|
+
elif self._clock() - self._last_log_time > 0.1:
|
36
|
+
self._spinner_index += 1
|
37
|
+
spinner = r"-\|/"[self._spinner_index % 4]
|
38
|
+
self._termlog(
|
39
|
+
f"{spinner} {self._n_files_downloaded} of {self._nfiles} files downloaded...\r",
|
40
|
+
newline=False,
|
41
|
+
)
|
42
|
+
self._last_log_time = self._clock()
|
@@ -0,0 +1,67 @@
|
|
1
|
+
"""Artifact manifest."""
|
2
|
+
from typing import TYPE_CHECKING, Dict, List, Mapping, Optional
|
3
|
+
|
4
|
+
from wandb.sdk.lib.hashutil import HexMD5
|
5
|
+
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
|
8
|
+
from wandb.sdk.artifacts.storage_policies.wandb_storage_policy import (
|
9
|
+
WandbStoragePolicy,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
class ArtifactManifest:
|
14
|
+
entries: Dict[str, "ArtifactManifestEntry"]
|
15
|
+
|
16
|
+
@classmethod
|
17
|
+
def from_manifest_json(cls, manifest_json: Dict) -> "ArtifactManifest":
|
18
|
+
if "version" not in manifest_json:
|
19
|
+
raise ValueError("Invalid manifest format. Must contain version field.")
|
20
|
+
version = manifest_json["version"]
|
21
|
+
for sub in cls.__subclasses__():
|
22
|
+
if sub.version() == version:
|
23
|
+
return sub.from_manifest_json(manifest_json)
|
24
|
+
raise ValueError("Invalid manifest version.")
|
25
|
+
|
26
|
+
@classmethod
|
27
|
+
def version(cls) -> int:
|
28
|
+
raise NotImplementedError
|
29
|
+
|
30
|
+
def __init__(
|
31
|
+
self,
|
32
|
+
storage_policy: "WandbStoragePolicy",
|
33
|
+
entries: Optional[Mapping[str, "ArtifactManifestEntry"]] = None,
|
34
|
+
) -> None:
|
35
|
+
self.storage_policy = storage_policy
|
36
|
+
self.entries = dict(entries) if entries else {}
|
37
|
+
|
38
|
+
def to_manifest_json(self) -> Dict:
|
39
|
+
raise NotImplementedError
|
40
|
+
|
41
|
+
def digest(self) -> HexMD5:
|
42
|
+
raise NotImplementedError
|
43
|
+
|
44
|
+
def add_entry(self, entry: "ArtifactManifestEntry") -> None:
|
45
|
+
if (
|
46
|
+
entry.path in self.entries
|
47
|
+
and entry.digest != self.entries[entry.path].digest
|
48
|
+
):
|
49
|
+
raise ValueError("Cannot add the same path twice: %s" % entry.path)
|
50
|
+
self.entries[entry.path] = entry
|
51
|
+
|
52
|
+
def remove_entry(self, entry: "ArtifactManifestEntry") -> None:
|
53
|
+
if entry.path not in self.entries:
|
54
|
+
raise FileNotFoundError(f"Cannot remove missing entry: '{entry.path}'")
|
55
|
+
del self.entries[entry.path]
|
56
|
+
|
57
|
+
def get_entry_by_path(self, path: str) -> Optional["ArtifactManifestEntry"]:
|
58
|
+
return self.entries.get(path)
|
59
|
+
|
60
|
+
def get_entries_in_directory(self, directory: str) -> List["ArtifactManifestEntry"]:
|
61
|
+
return [
|
62
|
+
self.entries[entry_key]
|
63
|
+
for entry_key in self.entries
|
64
|
+
if entry_key.startswith(
|
65
|
+
directory + "/"
|
66
|
+
) # entries use forward slash even for windows
|
67
|
+
]
|
@@ -0,0 +1,159 @@
|
|
1
|
+
"""Artifact manifest entry."""
|
2
|
+
import os
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import TYPE_CHECKING, Dict, Optional, Union
|
5
|
+
from urllib.parse import urlparse
|
6
|
+
|
7
|
+
import wandb
|
8
|
+
from wandb import util
|
9
|
+
from wandb.errors.term import termwarn
|
10
|
+
from wandb.sdk.lib import filesystem
|
11
|
+
from wandb.sdk.lib.hashutil import (
|
12
|
+
B64MD5,
|
13
|
+
ETag,
|
14
|
+
b64_to_hex_id,
|
15
|
+
hex_to_b64_id,
|
16
|
+
md5_file_b64,
|
17
|
+
)
|
18
|
+
from wandb.sdk.lib.paths import FilePathStr, LogicalPath, StrPath, URIStr
|
19
|
+
|
20
|
+
if TYPE_CHECKING:
|
21
|
+
from wandb.apis.public import RetryingClient
|
22
|
+
from wandb.sdk.artifacts.artifact import Artifact
|
23
|
+
|
24
|
+
|
25
|
+
class ArtifactManifestEntry:
|
26
|
+
"""A single entry in an artifact manifest."""
|
27
|
+
|
28
|
+
path: LogicalPath
|
29
|
+
digest: Union[B64MD5, URIStr, FilePathStr, ETag]
|
30
|
+
ref: Optional[Union[FilePathStr, URIStr]]
|
31
|
+
birth_artifact_id: Optional[str]
|
32
|
+
size: Optional[int]
|
33
|
+
extra: Dict
|
34
|
+
local_path: Optional[str]
|
35
|
+
|
36
|
+
_parent_artifact: Optional["Artifact"] = None
|
37
|
+
_download_url: Optional[str] = None
|
38
|
+
|
39
|
+
def __init__(
|
40
|
+
self,
|
41
|
+
path: StrPath,
|
42
|
+
digest: Union[B64MD5, URIStr, FilePathStr, ETag],
|
43
|
+
ref: Optional[Union[FilePathStr, URIStr]] = None,
|
44
|
+
birth_artifact_id: Optional[str] = None,
|
45
|
+
size: Optional[int] = None,
|
46
|
+
extra: Optional[Dict] = None,
|
47
|
+
local_path: Optional[StrPath] = None,
|
48
|
+
) -> None:
|
49
|
+
self.path = LogicalPath(path)
|
50
|
+
self.digest = digest
|
51
|
+
self.ref = ref
|
52
|
+
self.birth_artifact_id = birth_artifact_id
|
53
|
+
self.size = size
|
54
|
+
self.extra = extra or {}
|
55
|
+
self.local_path = str(local_path) if local_path else None
|
56
|
+
if self.local_path and self.size is None:
|
57
|
+
self.size = Path(self.local_path).stat().st_size
|
58
|
+
|
59
|
+
@property
|
60
|
+
def name(self) -> LogicalPath:
|
61
|
+
# TODO(hugh): add telemetry to see if anyone is still using this.
|
62
|
+
termwarn("ArtifactManifestEntry.name is deprecated, use .path instead")
|
63
|
+
return self.path
|
64
|
+
|
65
|
+
def parent_artifact(self) -> "Artifact":
|
66
|
+
"""Get the artifact to which this artifact entry belongs.
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
(PublicArtifact): The parent artifact
|
70
|
+
"""
|
71
|
+
if self._parent_artifact is None:
|
72
|
+
raise NotImplementedError
|
73
|
+
return self._parent_artifact
|
74
|
+
|
75
|
+
def download(self, root: Optional[str] = None) -> FilePathStr:
|
76
|
+
"""Download this artifact entry to the specified root path.
|
77
|
+
|
78
|
+
Arguments:
|
79
|
+
root: (str, optional) The root path in which to download this
|
80
|
+
artifact entry. Defaults to the artifact's root.
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
(str): The path of the downloaded artifact entry.
|
84
|
+
"""
|
85
|
+
if self._parent_artifact is None:
|
86
|
+
raise NotImplementedError
|
87
|
+
|
88
|
+
root = root or self._parent_artifact._default_root()
|
89
|
+
self._parent_artifact._add_download_root(root)
|
90
|
+
dest_path = os.path.join(root, self.path)
|
91
|
+
|
92
|
+
# Skip checking the cache (and possibly downloading) if the file already exists
|
93
|
+
# and has the digest we're expecting.
|
94
|
+
if os.path.exists(dest_path) and self.digest == md5_file_b64(dest_path):
|
95
|
+
return FilePathStr(dest_path)
|
96
|
+
|
97
|
+
if self.ref is not None:
|
98
|
+
cache_path = self._parent_artifact.manifest.storage_policy.load_reference(
|
99
|
+
self, local=True
|
100
|
+
)
|
101
|
+
else:
|
102
|
+
cache_path = self._parent_artifact.manifest.storage_policy.load_file(
|
103
|
+
self._parent_artifact, self
|
104
|
+
)
|
105
|
+
return FilePathStr(
|
106
|
+
str(filesystem.copy_or_overwrite_changed(cache_path, dest_path))
|
107
|
+
)
|
108
|
+
|
109
|
+
def ref_target(self) -> Union[FilePathStr, URIStr]:
|
110
|
+
"""Get the reference URL that is targeted by this artifact entry.
|
111
|
+
|
112
|
+
Returns:
|
113
|
+
(str): The reference URL of this artifact entry.
|
114
|
+
|
115
|
+
Raises:
|
116
|
+
ValueError: If this artifact entry was not a reference.
|
117
|
+
"""
|
118
|
+
if self.ref is None:
|
119
|
+
raise ValueError("Only reference entries support ref_target().")
|
120
|
+
if self._parent_artifact is None:
|
121
|
+
return self.ref
|
122
|
+
return self._parent_artifact.manifest.storage_policy.load_reference(
|
123
|
+
self._parent_artifact.manifest.entries[self.path], local=False
|
124
|
+
)
|
125
|
+
|
126
|
+
def ref_url(self) -> str:
|
127
|
+
"""Get a URL to this artifact entry.
|
128
|
+
|
129
|
+
These URLs can be referenced by another artifact.
|
130
|
+
|
131
|
+
Returns:
|
132
|
+
(str): A URL representing this artifact entry.
|
133
|
+
|
134
|
+
Examples:
|
135
|
+
Basic usage
|
136
|
+
```
|
137
|
+
ref_url = source_artifact.get_path('file.txt').ref_url()
|
138
|
+
derived_artifact.add_reference(ref_url)
|
139
|
+
```
|
140
|
+
"""
|
141
|
+
if self._parent_artifact is None:
|
142
|
+
raise NotImplementedError
|
143
|
+
assert self._parent_artifact.id is not None
|
144
|
+
return (
|
145
|
+
"wandb-artifact://"
|
146
|
+
+ b64_to_hex_id(B64MD5(self._parent_artifact.id))
|
147
|
+
+ "/"
|
148
|
+
+ self.path
|
149
|
+
)
|
150
|
+
|
151
|
+
def _is_artifact_reference(self) -> bool:
|
152
|
+
return self.ref is not None and urlparse(self.ref).scheme == "wandb-artifact"
|
153
|
+
|
154
|
+
def _get_referenced_artifact(self, client: "RetryingClient") -> "Artifact":
|
155
|
+
artifact: "Artifact" = wandb.Artifact._from_id(
|
156
|
+
hex_to_b64_id(util.host_from_path(self.ref)), client
|
157
|
+
)
|
158
|
+
assert artifact is not None
|
159
|
+
return artifact
|
File without changes
|
@@ -0,0 +1,91 @@
|
|
1
|
+
"""Artifact manifest v1."""
|
2
|
+
from typing import Any, Dict, Mapping, Optional
|
3
|
+
|
4
|
+
from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
|
5
|
+
from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
|
6
|
+
from wandb.sdk.artifacts.storage_policies.wandb_storage_policy import WandbStoragePolicy
|
7
|
+
from wandb.sdk.artifacts.storage_policy import StoragePolicy
|
8
|
+
from wandb.sdk.lib.hashutil import HexMD5, _md5
|
9
|
+
|
10
|
+
|
11
|
+
class ArtifactManifestV1(ArtifactManifest):
|
12
|
+
@classmethod
|
13
|
+
def version(cls) -> int:
|
14
|
+
return 1
|
15
|
+
|
16
|
+
@classmethod
|
17
|
+
def from_manifest_json(cls, manifest_json: Dict) -> "ArtifactManifestV1":
|
18
|
+
if manifest_json["version"] != cls.version():
|
19
|
+
raise ValueError(
|
20
|
+
"Expected manifest version 1, got %s" % manifest_json["version"]
|
21
|
+
)
|
22
|
+
|
23
|
+
storage_policy_name = manifest_json["storagePolicy"]
|
24
|
+
storage_policy_config = manifest_json.get("storagePolicyConfig", {})
|
25
|
+
storage_policy_cls = StoragePolicy.lookup_by_name(storage_policy_name)
|
26
|
+
if storage_policy_cls is None:
|
27
|
+
raise ValueError('Failed to find storage policy "%s"' % storage_policy_name)
|
28
|
+
if not issubclass(storage_policy_cls, WandbStoragePolicy):
|
29
|
+
raise ValueError(
|
30
|
+
"No handler found for storage handler of type '%s'"
|
31
|
+
% storage_policy_name
|
32
|
+
)
|
33
|
+
|
34
|
+
entries: Mapping[str, ArtifactManifestEntry]
|
35
|
+
entries = {
|
36
|
+
name: ArtifactManifestEntry(
|
37
|
+
path=name,
|
38
|
+
digest=val["digest"],
|
39
|
+
birth_artifact_id=val.get("birthArtifactID"),
|
40
|
+
ref=val.get("ref"),
|
41
|
+
size=val.get("size"),
|
42
|
+
extra=val.get("extra"),
|
43
|
+
local_path=val.get("local_path"),
|
44
|
+
)
|
45
|
+
for name, val in manifest_json["contents"].items()
|
46
|
+
}
|
47
|
+
|
48
|
+
return cls(storage_policy_cls.from_config(storage_policy_config), entries)
|
49
|
+
|
50
|
+
def __init__(
|
51
|
+
self,
|
52
|
+
storage_policy: "WandbStoragePolicy",
|
53
|
+
entries: Optional[Mapping[str, ArtifactManifestEntry]] = None,
|
54
|
+
) -> None:
|
55
|
+
super().__init__(storage_policy, entries=entries)
|
56
|
+
|
57
|
+
def to_manifest_json(self) -> Dict:
|
58
|
+
"""This is the JSON that's stored in wandb_manifest.json.
|
59
|
+
|
60
|
+
If include_local is True we also include the local paths to files. This is
|
61
|
+
used to represent an artifact that's waiting to be saved on the current
|
62
|
+
system. We don't need to include the local paths in the artifact manifest
|
63
|
+
contents.
|
64
|
+
"""
|
65
|
+
contents = {}
|
66
|
+
for entry in sorted(self.entries.values(), key=lambda k: k.path):
|
67
|
+
json_entry: Dict[str, Any] = {
|
68
|
+
"digest": entry.digest,
|
69
|
+
}
|
70
|
+
if entry.birth_artifact_id:
|
71
|
+
json_entry["birthArtifactID"] = entry.birth_artifact_id
|
72
|
+
if entry.ref:
|
73
|
+
json_entry["ref"] = entry.ref
|
74
|
+
if entry.extra:
|
75
|
+
json_entry["extra"] = entry.extra
|
76
|
+
if entry.size is not None:
|
77
|
+
json_entry["size"] = entry.size
|
78
|
+
contents[entry.path] = json_entry
|
79
|
+
return {
|
80
|
+
"version": self.__class__.version(),
|
81
|
+
"storagePolicy": self.storage_policy.name(),
|
82
|
+
"storagePolicyConfig": self.storage_policy.config() or {},
|
83
|
+
"contents": contents,
|
84
|
+
}
|
85
|
+
|
86
|
+
def digest(self) -> HexMD5:
|
87
|
+
hasher = _md5()
|
88
|
+
hasher.update(b"wandb-artifact-manifest-v1\n")
|
89
|
+
for name, entry in sorted(self.entries.items(), key=lambda kv: kv[0]):
|
90
|
+
hasher.update(f"{name}:{entry.digest}\n".encode())
|
91
|
+
return HexMD5(hasher.hexdigest())
|
@@ -1,3 +1,4 @@
|
|
1
|
+
"""Artifact saver."""
|
1
2
|
import concurrent.futures
|
2
3
|
import json
|
3
4
|
import os
|
@@ -8,17 +9,17 @@ from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Sequence
|
|
8
9
|
import wandb
|
9
10
|
import wandb.filesync.step_prepare
|
10
11
|
from wandb import env, util
|
11
|
-
from wandb.sdk.
|
12
|
+
from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
|
12
13
|
from wandb.sdk.lib.filesystem import mkdir_exists_ok
|
13
14
|
from wandb.sdk.lib.hashutil import B64MD5, b64_to_hex_id, md5_file_b64
|
14
15
|
from wandb.sdk.lib.paths import FilePathStr, URIStr
|
15
16
|
|
16
17
|
if TYPE_CHECKING:
|
18
|
+
from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
|
19
|
+
from wandb.sdk.internal.file_pusher import FilePusher
|
17
20
|
from wandb.sdk.internal.internal_api import Api as InternalApi
|
18
21
|
from wandb.sdk.internal.progress import ProgressFn
|
19
22
|
|
20
|
-
from .file_pusher import FilePusher
|
21
|
-
|
22
23
|
if sys.version_info >= (3, 8):
|
23
24
|
from typing import Protocol
|
24
25
|
else:
|
@@ -26,13 +27,13 @@ if TYPE_CHECKING:
|
|
26
27
|
|
27
28
|
class SaveFn(Protocol):
|
28
29
|
def __call__(
|
29
|
-
self, entry: ArtifactManifestEntry, progress_callback: "ProgressFn"
|
30
|
+
self, entry: "ArtifactManifestEntry", progress_callback: "ProgressFn"
|
30
31
|
) -> bool:
|
31
32
|
pass
|
32
33
|
|
33
34
|
class SaveFnAsync(Protocol):
|
34
35
|
def __call__(
|
35
|
-
self, entry: ArtifactManifestEntry, progress_callback: "ProgressFn"
|
36
|
+
self, entry: "ArtifactManifestEntry", progress_callback: "ProgressFn"
|
36
37
|
) -> Awaitable[bool]:
|
37
38
|
pass
|
38
39
|
|
@@ -1,11 +1,13 @@
|
|
1
|
+
"""Artifact cache."""
|
1
2
|
import contextlib
|
2
3
|
import hashlib
|
3
4
|
import os
|
4
5
|
import secrets
|
5
6
|
from typing import IO, TYPE_CHECKING, ContextManager, Dict, Generator, Optional, Tuple
|
6
7
|
|
7
|
-
from wandb import env, util
|
8
|
-
from wandb.sdk.
|
8
|
+
from wandb import env, termwarn, util
|
9
|
+
from wandb.sdk.artifacts.exceptions import ArtifactNotLoggedError
|
10
|
+
from wandb.sdk.lib.capped_dict import CappedDict
|
9
11
|
from wandb.sdk.lib.filesystem import mkdir_exists_ok
|
10
12
|
from wandb.sdk.lib.hashutil import B64MD5, ETag, b64_to_hex_id
|
11
13
|
from wandb.sdk.lib.paths import FilePathStr, StrPath, URIStr
|
@@ -13,7 +15,7 @@ from wandb.sdk.lib.paths import FilePathStr, StrPath, URIStr
|
|
13
15
|
if TYPE_CHECKING:
|
14
16
|
import sys
|
15
17
|
|
16
|
-
from wandb.sdk import
|
18
|
+
from wandb.sdk.artifacts.artifact import Artifact
|
17
19
|
|
18
20
|
if sys.version_info >= (3, 8):
|
19
21
|
from typing import Protocol
|
@@ -33,8 +35,8 @@ class ArtifactsCache:
|
|
33
35
|
mkdir_exists_ok(self._cache_dir)
|
34
36
|
self._md5_obj_dir = os.path.join(self._cache_dir, "obj", "md5")
|
35
37
|
self._etag_obj_dir = os.path.join(self._cache_dir, "obj", "etag")
|
36
|
-
self._artifacts_by_id: Dict[str, Artifact] =
|
37
|
-
self._artifacts_by_client_id: Dict[str, "
|
38
|
+
self._artifacts_by_id: Dict[str, "Artifact"] = CappedDict()
|
39
|
+
self._artifacts_by_client_id: Dict[str, "Artifact"] = CappedDict()
|
38
40
|
|
39
41
|
def check_md5_obj_path(
|
40
42
|
self, b64_md5: B64MD5, size: int
|
@@ -74,18 +76,17 @@ class ArtifactsCache:
|
|
74
76
|
raise ArtifactNotLoggedError(artifact, "store_artifact")
|
75
77
|
self._artifacts_by_id[artifact.id] = artifact
|
76
78
|
|
77
|
-
def get_client_artifact(
|
78
|
-
self, client_id: str
|
79
|
-
) -> Optional["wandb_artifacts.Artifact"]:
|
79
|
+
def get_client_artifact(self, client_id: str) -> Optional["Artifact"]:
|
80
80
|
return self._artifacts_by_client_id.get(client_id)
|
81
81
|
|
82
|
-
def store_client_artifact(self, artifact: "
|
82
|
+
def store_client_artifact(self, artifact: "Artifact") -> None:
|
83
83
|
self._artifacts_by_client_id[artifact._client_id] = artifact
|
84
84
|
|
85
|
-
def cleanup(self, target_size: int) -> int:
|
85
|
+
def cleanup(self, target_size: int, remove_temp: bool = False) -> int:
|
86
86
|
bytes_reclaimed = 0
|
87
87
|
paths = {}
|
88
88
|
total_size = 0
|
89
|
+
temp_size = 0
|
89
90
|
for root, _, files in os.walk(self._cache_dir):
|
90
91
|
for file in files:
|
91
92
|
try:
|
@@ -93,14 +94,23 @@ class ArtifactsCache:
|
|
93
94
|
stat = os.stat(path)
|
94
95
|
|
95
96
|
if file.startswith(ArtifactsCache._TMP_PREFIX):
|
96
|
-
|
97
|
-
|
97
|
+
if remove_temp:
|
98
|
+
os.remove(path)
|
99
|
+
bytes_reclaimed += stat.st_size
|
100
|
+
else:
|
101
|
+
temp_size += stat.st_size
|
98
102
|
continue
|
99
103
|
except OSError:
|
100
104
|
continue
|
101
105
|
paths[path] = stat
|
102
106
|
total_size += stat.st_size
|
103
107
|
|
108
|
+
if temp_size:
|
109
|
+
termwarn(
|
110
|
+
f"Cache contains {util.to_human_size(temp_size)} of temporary files. "
|
111
|
+
"Run `wandb artifact cleanup --remove-temp` to remove them."
|
112
|
+
)
|
113
|
+
|
104
114
|
sorted_paths = sorted(paths.items(), key=lambda x: x[1].st_atime)
|
105
115
|
for path, stat in sorted_paths:
|
106
116
|
if total_size < target_size:
|
@@ -0,0 +1,55 @@
|
|
1
|
+
"""Artifact exceptions."""
|
2
|
+
from typing import TYPE_CHECKING, Optional
|
3
|
+
|
4
|
+
from wandb import errors
|
5
|
+
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from wandb.sdk.artifacts.artifact import Artifact
|
8
|
+
|
9
|
+
|
10
|
+
class ArtifactStatusError(AttributeError):
|
11
|
+
"""Raised when an artifact is in an invalid state for the requested operation."""
|
12
|
+
|
13
|
+
def __init__(
|
14
|
+
self,
|
15
|
+
artifact: Optional["Artifact"] = None,
|
16
|
+
attr: Optional[str] = None,
|
17
|
+
msg: str = "Artifact is in an invalid state for the requested operation.",
|
18
|
+
):
|
19
|
+
object_name = artifact.__class__.__name__ if artifact else "Artifact"
|
20
|
+
method_id = f"{object_name}.{attr}" if attr else object_name
|
21
|
+
super().__init__(msg.format(artifact=artifact, attr=attr, method_id=method_id))
|
22
|
+
# Follow the same pattern as AttributeError.
|
23
|
+
self.obj = artifact
|
24
|
+
self.name = attr or ""
|
25
|
+
|
26
|
+
|
27
|
+
class ArtifactNotLoggedError(ArtifactStatusError):
|
28
|
+
"""Raised for Artifact methods or attributes only available after logging."""
|
29
|
+
|
30
|
+
def __init__(
|
31
|
+
self, artifact: Optional["Artifact"] = None, attr: Optional[str] = None
|
32
|
+
):
|
33
|
+
super().__init__(
|
34
|
+
artifact,
|
35
|
+
attr,
|
36
|
+
"'{method_id}' used prior to logging artifact or while in offline mode. "
|
37
|
+
"Call wait() before accessing logged artifact properties.",
|
38
|
+
)
|
39
|
+
|
40
|
+
|
41
|
+
class ArtifactFinalizedError(ArtifactStatusError):
|
42
|
+
"""Raised for Artifact methods or attributes that can't be changed after logging."""
|
43
|
+
|
44
|
+
def __init__(
|
45
|
+
self, artifact: Optional["Artifact"] = None, attr: Optional[str] = None
|
46
|
+
):
|
47
|
+
super().__init__(
|
48
|
+
artifact,
|
49
|
+
attr,
|
50
|
+
"'{method_id}' used on logged artifact. Can't modify finalized artifact.",
|
51
|
+
)
|
52
|
+
|
53
|
+
|
54
|
+
class WaitTimeoutError(errors.Error):
|
55
|
+
"""Raised when wait() timeout occurs before process is finished."""
|
@@ -0,0 +1,59 @@
|
|
1
|
+
"""Storage handler."""
|
2
|
+
from typing import TYPE_CHECKING, Optional, Sequence, Union
|
3
|
+
|
4
|
+
from wandb.sdk.lib.paths import FilePathStr, URIStr
|
5
|
+
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from urllib.parse import ParseResult
|
8
|
+
|
9
|
+
from wandb.sdk.artifacts.artifact import Artifact
|
10
|
+
from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
|
11
|
+
|
12
|
+
DEFAULT_MAX_OBJECTS = 10000
|
13
|
+
|
14
|
+
|
15
|
+
class StorageHandler:
|
16
|
+
def can_handle(self, parsed_url: "ParseResult") -> bool:
|
17
|
+
"""Checks whether this handler can handle the given url.
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
Whether this handler can handle the given url.
|
21
|
+
"""
|
22
|
+
raise NotImplementedError
|
23
|
+
|
24
|
+
def load_path(
|
25
|
+
self,
|
26
|
+
manifest_entry: "ArtifactManifestEntry",
|
27
|
+
local: bool = False,
|
28
|
+
) -> Union[URIStr, FilePathStr]:
|
29
|
+
"""Load a file or directory given the corresponding index entry.
|
30
|
+
|
31
|
+
Args:
|
32
|
+
manifest_entry: The index entry to load
|
33
|
+
local: Whether to load the file locally or not
|
34
|
+
|
35
|
+
Returns:
|
36
|
+
A path to the file represented by `index_entry`
|
37
|
+
"""
|
38
|
+
raise NotImplementedError
|
39
|
+
|
40
|
+
def store_path(
|
41
|
+
self,
|
42
|
+
artifact: "Artifact",
|
43
|
+
path: Union[URIStr, FilePathStr],
|
44
|
+
name: Optional[str] = None,
|
45
|
+
checksum: bool = True,
|
46
|
+
max_objects: Optional[int] = None,
|
47
|
+
) -> Sequence["ArtifactManifestEntry"]:
|
48
|
+
"""Store the file or directory at the given path to the specified artifact.
|
49
|
+
|
50
|
+
Args:
|
51
|
+
path: The path to store
|
52
|
+
name: If specified, the logical name that should map to `path`
|
53
|
+
checksum: Whether to compute the checksum of the file
|
54
|
+
max_objects: The maximum number of objects to store
|
55
|
+
|
56
|
+
Returns:
|
57
|
+
A list of manifest entries to store within the artifact
|
58
|
+
"""
|
59
|
+
raise NotImplementedError
|
File without changes
|