wandb 0.15.4__py3-none-any.whl → 0.15.5__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +1 -1
- wandb/analytics/sentry.py +1 -0
- wandb/apis/internal.py +3 -0
- wandb/apis/public.py +18 -20
- wandb/beta/workflows.py +5 -6
- wandb/cli/cli.py +27 -27
- wandb/data_types.py +2 -0
- wandb/integration/langchain/wandb_tracer.py +16 -179
- wandb/integration/sagemaker/config.py +2 -2
- wandb/integration/tensorboard/log.py +4 -4
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/proto/wandb_deprecated.py +3 -1
- wandb/sdk/__init__.py +1 -4
- wandb/sdk/artifacts/__init__.py +0 -14
- wandb/sdk/artifacts/artifact.py +1757 -277
- wandb/sdk/artifacts/artifact_manifest_entry.py +26 -6
- wandb/sdk/artifacts/artifact_state.py +10 -0
- wandb/sdk/artifacts/artifacts_cache.py +7 -8
- wandb/sdk/artifacts/exceptions.py +4 -4
- wandb/sdk/artifacts/storage_handler.py +2 -2
- wandb/sdk/artifacts/storage_handlers/azure_handler.py +16 -6
- wandb/sdk/artifacts/storage_handlers/gcs_handler.py +2 -2
- wandb/sdk/artifacts/storage_handlers/http_handler.py +2 -2
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +2 -2
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +2 -2
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +35 -32
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +2 -2
- wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +5 -9
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +2 -2
- wandb/sdk/artifacts/storage_policies/s3_bucket_policy.py +2 -2
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +24 -16
- wandb/sdk/artifacts/storage_policy.py +3 -3
- wandb/sdk/data_types/_dtypes.py +7 -12
- wandb/sdk/data_types/base_types/json_metadata.py +2 -2
- wandb/sdk/data_types/base_types/media.py +5 -6
- wandb/sdk/data_types/base_types/wb_value.py +12 -13
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +4 -5
- wandb/sdk/data_types/helper_types/classes.py +5 -8
- wandb/sdk/data_types/helper_types/image_mask.py +4 -5
- wandb/sdk/data_types/histogram.py +3 -3
- wandb/sdk/data_types/html.py +3 -4
- wandb/sdk/data_types/image.py +4 -5
- wandb/sdk/data_types/molecule.py +2 -2
- wandb/sdk/data_types/object_3d.py +3 -3
- wandb/sdk/data_types/plotly.py +2 -2
- wandb/sdk/data_types/saved_model.py +7 -8
- wandb/sdk/data_types/trace_tree.py +4 -4
- wandb/sdk/data_types/video.py +4 -4
- wandb/sdk/interface/interface.py +8 -10
- wandb/sdk/internal/file_stream.py +2 -3
- wandb/sdk/internal/internal_api.py +99 -4
- wandb/sdk/internal/job_builder.py +15 -7
- wandb/sdk/internal/sender.py +4 -0
- wandb/sdk/internal/settings_static.py +1 -0
- wandb/sdk/launch/_project_spec.py +9 -7
- wandb/sdk/launch/agent/agent.py +115 -58
- wandb/sdk/launch/agent/job_status_tracker.py +34 -0
- wandb/sdk/launch/agent/run_queue_item_file_saver.py +45 -0
- wandb/sdk/launch/builder/abstract.py +5 -1
- wandb/sdk/launch/builder/build.py +16 -10
- wandb/sdk/launch/builder/docker_builder.py +9 -2
- wandb/sdk/launch/builder/kaniko_builder.py +108 -22
- wandb/sdk/launch/builder/noop.py +3 -1
- wandb/sdk/launch/environment/aws_environment.py +2 -1
- wandb/sdk/launch/environment/azure_environment.py +124 -0
- wandb/sdk/launch/github_reference.py +30 -18
- wandb/sdk/launch/launch.py +1 -1
- wandb/sdk/launch/loader.py +15 -0
- wandb/sdk/launch/registry/azure_container_registry.py +132 -0
- wandb/sdk/launch/registry/elastic_container_registry.py +38 -4
- wandb/sdk/launch/registry/google_artifact_registry.py +46 -7
- wandb/sdk/launch/runner/abstract.py +19 -3
- wandb/sdk/launch/runner/kubernetes_runner.py +111 -47
- wandb/sdk/launch/runner/local_container.py +101 -48
- wandb/sdk/launch/runner/sagemaker_runner.py +59 -9
- wandb/sdk/launch/runner/vertex_runner.py +8 -4
- wandb/sdk/launch/sweeps/scheduler.py +102 -27
- wandb/sdk/launch/sweeps/utils.py +21 -0
- wandb/sdk/launch/utils.py +19 -7
- wandb/sdk/lib/_settings_toposort_generated.py +3 -0
- wandb/sdk/service/server.py +22 -9
- wandb/sdk/service/service.py +27 -8
- wandb/sdk/verify/verify.py +6 -9
- wandb/sdk/wandb_config.py +2 -4
- wandb/sdk/wandb_init.py +2 -0
- wandb/sdk/wandb_require.py +7 -0
- wandb/sdk/wandb_run.py +32 -35
- wandb/sdk/wandb_settings.py +10 -3
- wandb/testing/relay.py +15 -2
- wandb/util.py +55 -23
- {wandb-0.15.4.dist-info → wandb-0.15.5.dist-info}/METADATA +11 -8
- {wandb-0.15.4.dist-info → wandb-0.15.5.dist-info}/RECORD +97 -97
- wandb/integration/langchain/util.py +0 -191
- wandb/sdk/artifacts/invalid_artifact.py +0 -23
- wandb/sdk/artifacts/lazy_artifact.py +0 -162
- wandb/sdk/artifacts/local_artifact.py +0 -719
- wandb/sdk/artifacts/public_artifact.py +0 -1188
- {wandb-0.15.4.dist-info → wandb-0.15.5.dist-info}/LICENSE +0 -0
- {wandb-0.15.4.dist-info → wandb-0.15.5.dist-info}/WHEEL +0 -0
- {wandb-0.15.4.dist-info → wandb-0.15.5.dist-info}/entry_points.txt +0 -0
- {wandb-0.15.4.dist-info → wandb-0.15.5.dist-info}/top_level.txt +0 -0
@@ -1,719 +0,0 @@
|
|
1
|
-
"""Local (draft) artifact."""
|
2
|
-
import contextlib
|
3
|
-
import json
|
4
|
-
import os
|
5
|
-
import re
|
6
|
-
import shutil
|
7
|
-
import tempfile
|
8
|
-
import time
|
9
|
-
from pathlib import PurePosixPath
|
10
|
-
from typing import (
|
11
|
-
IO,
|
12
|
-
TYPE_CHECKING,
|
13
|
-
Any,
|
14
|
-
Dict,
|
15
|
-
Generator,
|
16
|
-
List,
|
17
|
-
Optional,
|
18
|
-
Sequence,
|
19
|
-
Tuple,
|
20
|
-
Union,
|
21
|
-
cast,
|
22
|
-
)
|
23
|
-
from urllib.parse import urlparse
|
24
|
-
|
25
|
-
import wandb
|
26
|
-
import wandb.data_types as data_types
|
27
|
-
from wandb import env, util
|
28
|
-
from wandb.errors.term import termlog
|
29
|
-
from wandb.sdk import lib as wandb_lib
|
30
|
-
from wandb.sdk.artifacts.artifact import Artifact as ArtifactInterface
|
31
|
-
from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
|
32
|
-
from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
|
33
|
-
from wandb.sdk.artifacts.artifact_manifests.artifact_manifest_v1 import (
|
34
|
-
ArtifactManifestV1,
|
35
|
-
)
|
36
|
-
from wandb.sdk.artifacts.artifact_saver import get_staging_dir
|
37
|
-
from wandb.sdk.artifacts.artifacts_cache import get_artifacts_cache
|
38
|
-
from wandb.sdk.artifacts.exceptions import (
|
39
|
-
ArtifactFinalizedError,
|
40
|
-
ArtifactNotLoggedError,
|
41
|
-
)
|
42
|
-
from wandb.sdk.artifacts.storage_layout import StorageLayout
|
43
|
-
from wandb.sdk.artifacts.storage_policies.wandb_storage_policy import WandbStoragePolicy
|
44
|
-
from wandb.sdk.lib import filesystem, runid
|
45
|
-
from wandb.sdk.lib.hashutil import B64MD5, b64_to_hex_id, md5_file_b64
|
46
|
-
from wandb.sdk.lib.paths import FilePathStr, LogicalPath, StrPath, URIStr
|
47
|
-
|
48
|
-
if TYPE_CHECKING:
|
49
|
-
import wandb.apis.public
|
50
|
-
|
51
|
-
ARTIFACT_TMP = tempfile.TemporaryDirectory("wandb-artifacts")
|
52
|
-
|
53
|
-
|
54
|
-
class _AddedObj:
|
55
|
-
def __init__(self, entry: ArtifactManifestEntry, obj: data_types.WBValue):
|
56
|
-
self.entry = entry
|
57
|
-
self.obj = obj
|
58
|
-
|
59
|
-
|
60
|
-
def _normalize_metadata(metadata: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
61
|
-
if metadata is None:
|
62
|
-
return {}
|
63
|
-
if not isinstance(metadata, dict):
|
64
|
-
raise TypeError(f"metadata must be dict, not {type(metadata)}")
|
65
|
-
return cast(
|
66
|
-
Dict[str, Any], json.loads(json.dumps(util.json_friendly_val(metadata)))
|
67
|
-
)
|
68
|
-
|
69
|
-
|
70
|
-
class Artifact(ArtifactInterface):
|
71
|
-
"""Flexible and lightweight building block for dataset and model versioning.
|
72
|
-
|
73
|
-
Constructs an empty artifact whose contents can be populated using its
|
74
|
-
`add` family of functions. Once the artifact has all the desired files,
|
75
|
-
you can call `wandb.log_artifact()` to log it.
|
76
|
-
|
77
|
-
Arguments:
|
78
|
-
name: (str) A human-readable name for this artifact, which is how you
|
79
|
-
can identify this artifact in the UI or reference it in `use_artifact`
|
80
|
-
calls. Names can contain letters, numbers, underscores, hyphens, and
|
81
|
-
dots. The name must be unique across a project.
|
82
|
-
type: (str) The type of the artifact, which is used to organize and differentiate
|
83
|
-
artifacts. Common types include `dataset` or `model`, but you can use any string
|
84
|
-
containing letters, numbers, underscores, hyphens, and dots.
|
85
|
-
description: (str, optional) Free text that offers a description of the artifact. The
|
86
|
-
description is markdown rendered in the UI, so this is a good place to place tables,
|
87
|
-
links, etc.
|
88
|
-
metadata: (dict, optional) Structured data associated with the artifact,
|
89
|
-
for example class distribution of a dataset. This will eventually be queryable
|
90
|
-
and plottable in the UI. There is a hard limit of 100 total keys.
|
91
|
-
|
92
|
-
Examples:
|
93
|
-
Basic usage
|
94
|
-
```
|
95
|
-
wandb.init()
|
96
|
-
|
97
|
-
artifact = wandb.Artifact('mnist', type='dataset')
|
98
|
-
artifact.add_dir('mnist/')
|
99
|
-
wandb.log_artifact(artifact)
|
100
|
-
```
|
101
|
-
|
102
|
-
Returns:
|
103
|
-
An `Artifact` object.
|
104
|
-
"""
|
105
|
-
|
106
|
-
_added_objs: Dict[int, _AddedObj]
|
107
|
-
_added_local_paths: Dict[str, ArtifactManifestEntry]
|
108
|
-
_distributed_id: Optional[str]
|
109
|
-
_metadata: dict
|
110
|
-
_logged_artifact: Optional[ArtifactInterface]
|
111
|
-
_incremental: bool
|
112
|
-
_client_id: str
|
113
|
-
|
114
|
-
def __init__(
|
115
|
-
self,
|
116
|
-
name: str,
|
117
|
-
type: str,
|
118
|
-
description: Optional[str] = None,
|
119
|
-
metadata: Optional[dict] = None,
|
120
|
-
incremental: Optional[bool] = None,
|
121
|
-
use_as: Optional[str] = None,
|
122
|
-
) -> None:
|
123
|
-
if not re.match(r"^[a-zA-Z0-9_\-.]+$", name):
|
124
|
-
raise ValueError(
|
125
|
-
"Artifact name may only contain alphanumeric characters, dashes, underscores, and dots. "
|
126
|
-
'Invalid name: "%s"' % name
|
127
|
-
)
|
128
|
-
if type == "job" or type.startswith("wandb-"):
|
129
|
-
raise ValueError(
|
130
|
-
"Artifact types 'job' and 'wandb-*' are reserved for internal use. "
|
131
|
-
"Please use a different type."
|
132
|
-
)
|
133
|
-
|
134
|
-
metadata = _normalize_metadata(metadata)
|
135
|
-
# TODO: this shouldn't be a property of the artifact. It's a more like an
|
136
|
-
# argument to log_artifact.
|
137
|
-
storage_layout = StorageLayout.V2
|
138
|
-
if env.get_use_v1_artifacts():
|
139
|
-
storage_layout = StorageLayout.V1
|
140
|
-
|
141
|
-
self._storage_policy = WandbStoragePolicy(
|
142
|
-
config={
|
143
|
-
"storageLayout": storage_layout,
|
144
|
-
# TODO: storage region
|
145
|
-
}
|
146
|
-
)
|
147
|
-
self._final = False
|
148
|
-
self._digest = ""
|
149
|
-
self._file_entries = None
|
150
|
-
self._manifest = ArtifactManifestV1(self._storage_policy)
|
151
|
-
self._cache = get_artifacts_cache()
|
152
|
-
self._added_objs = {}
|
153
|
-
self._added_local_paths = {}
|
154
|
-
# You can write into this directory when creating artifact files
|
155
|
-
self._artifact_dir = tempfile.TemporaryDirectory()
|
156
|
-
self._type = type
|
157
|
-
self._name = name
|
158
|
-
self._description = description
|
159
|
-
self._metadata = metadata
|
160
|
-
self._distributed_id = None
|
161
|
-
self._logged_artifact = None
|
162
|
-
self._incremental = False
|
163
|
-
self._client_id = runid.generate_id(128)
|
164
|
-
self._sequence_client_id = runid.generate_id(128)
|
165
|
-
self._cache.store_client_artifact(self)
|
166
|
-
self._use_as = use_as
|
167
|
-
|
168
|
-
if incremental:
|
169
|
-
self._incremental = incremental
|
170
|
-
wandb.termwarn("Using experimental arg `incremental`")
|
171
|
-
|
172
|
-
@property
|
173
|
-
def id(self) -> Optional[str]:
|
174
|
-
if self._logged_artifact:
|
175
|
-
return self._logged_artifact.id
|
176
|
-
|
177
|
-
# The artifact hasn't been saved so an ID doesn't exist yet.
|
178
|
-
return None
|
179
|
-
|
180
|
-
@property
|
181
|
-
def entity(self) -> str:
|
182
|
-
if self._logged_artifact:
|
183
|
-
return self._logged_artifact.entity
|
184
|
-
raise ArtifactNotLoggedError(self, "entity")
|
185
|
-
|
186
|
-
@property
|
187
|
-
def project(self) -> str:
|
188
|
-
if self._logged_artifact:
|
189
|
-
return self._logged_artifact.project
|
190
|
-
raise ArtifactNotLoggedError(self, "project")
|
191
|
-
|
192
|
-
@property
|
193
|
-
def name(self) -> str:
|
194
|
-
if self._logged_artifact:
|
195
|
-
return self._logged_artifact.name
|
196
|
-
return self._name
|
197
|
-
|
198
|
-
@property
|
199
|
-
def version(self) -> str:
|
200
|
-
if self._logged_artifact:
|
201
|
-
return self._logged_artifact.version
|
202
|
-
raise ArtifactNotLoggedError(self, "version")
|
203
|
-
|
204
|
-
@property
|
205
|
-
def source_entity(self) -> str:
|
206
|
-
return self.entity
|
207
|
-
|
208
|
-
@property
|
209
|
-
def source_project(self) -> str:
|
210
|
-
return self.project
|
211
|
-
|
212
|
-
@property
|
213
|
-
def source_name(self) -> str:
|
214
|
-
return self.name
|
215
|
-
|
216
|
-
@property
|
217
|
-
def source_version(self) -> str:
|
218
|
-
return self.version
|
219
|
-
|
220
|
-
@property
|
221
|
-
def manifest(self) -> ArtifactManifest:
|
222
|
-
if self._logged_artifact:
|
223
|
-
return self._logged_artifact.manifest
|
224
|
-
|
225
|
-
self.finalize()
|
226
|
-
return self._manifest
|
227
|
-
|
228
|
-
@property
|
229
|
-
def digest(self) -> str:
|
230
|
-
if self._logged_artifact:
|
231
|
-
return self._logged_artifact.digest
|
232
|
-
|
233
|
-
self.finalize()
|
234
|
-
# Digest will be none if the artifact hasn't been saved yet.
|
235
|
-
return self._digest
|
236
|
-
|
237
|
-
@property
|
238
|
-
def type(self) -> str:
|
239
|
-
if self._logged_artifact:
|
240
|
-
return self._logged_artifact.type
|
241
|
-
|
242
|
-
return self._type
|
243
|
-
|
244
|
-
@property
|
245
|
-
def state(self) -> str:
|
246
|
-
if self._logged_artifact:
|
247
|
-
return self._logged_artifact.state
|
248
|
-
|
249
|
-
return "PENDING"
|
250
|
-
|
251
|
-
@property
|
252
|
-
def size(self) -> int:
|
253
|
-
if self._logged_artifact:
|
254
|
-
return self._logged_artifact.size
|
255
|
-
sizes: List[int]
|
256
|
-
sizes = []
|
257
|
-
for entry in self._manifest.entries:
|
258
|
-
e_size = self._manifest.entries[entry].size
|
259
|
-
if e_size is not None:
|
260
|
-
sizes.append(e_size)
|
261
|
-
return sum(sizes)
|
262
|
-
|
263
|
-
@property
|
264
|
-
def commit_hash(self) -> str:
|
265
|
-
if self._logged_artifact:
|
266
|
-
return self._logged_artifact.commit_hash
|
267
|
-
|
268
|
-
raise ArtifactNotLoggedError(self, "commit_hash")
|
269
|
-
|
270
|
-
@property
|
271
|
-
def description(self) -> Optional[str]:
|
272
|
-
if self._logged_artifact:
|
273
|
-
return self._logged_artifact.description
|
274
|
-
|
275
|
-
return self._description
|
276
|
-
|
277
|
-
@description.setter
|
278
|
-
def description(self, desc: Optional[str]) -> None:
|
279
|
-
if self._logged_artifact:
|
280
|
-
self._logged_artifact.description = desc
|
281
|
-
return
|
282
|
-
|
283
|
-
self._description = desc
|
284
|
-
|
285
|
-
@property
|
286
|
-
def metadata(self) -> dict:
|
287
|
-
if self._logged_artifact:
|
288
|
-
return self._logged_artifact.metadata
|
289
|
-
|
290
|
-
return self._metadata
|
291
|
-
|
292
|
-
@metadata.setter
|
293
|
-
def metadata(self, metadata: dict) -> None:
|
294
|
-
metadata = _normalize_metadata(metadata)
|
295
|
-
if self._logged_artifact:
|
296
|
-
self._logged_artifact.metadata = metadata
|
297
|
-
return
|
298
|
-
|
299
|
-
self._metadata = metadata
|
300
|
-
|
301
|
-
@property
|
302
|
-
def aliases(self) -> List[str]:
|
303
|
-
if self._logged_artifact:
|
304
|
-
return self._logged_artifact.aliases
|
305
|
-
|
306
|
-
raise ArtifactNotLoggedError(self, "aliases")
|
307
|
-
|
308
|
-
@aliases.setter
|
309
|
-
def aliases(self, aliases: List[str]) -> None:
|
310
|
-
"""Set artifact aliases.
|
311
|
-
|
312
|
-
Arguments:
|
313
|
-
aliases: (list) The list of aliases associated with this artifact.
|
314
|
-
"""
|
315
|
-
if self._logged_artifact:
|
316
|
-
self._logged_artifact.aliases = aliases
|
317
|
-
return
|
318
|
-
|
319
|
-
raise ArtifactNotLoggedError(self, "aliases")
|
320
|
-
|
321
|
-
@property
|
322
|
-
def use_as(self) -> Optional[str]:
|
323
|
-
return self._use_as
|
324
|
-
|
325
|
-
@property
|
326
|
-
def distributed_id(self) -> Optional[str]:
|
327
|
-
return self._distributed_id
|
328
|
-
|
329
|
-
@distributed_id.setter
|
330
|
-
def distributed_id(self, distributed_id: Optional[str]) -> None:
|
331
|
-
self._distributed_id = distributed_id
|
332
|
-
|
333
|
-
@property
|
334
|
-
def incremental(self) -> bool:
|
335
|
-
return self._incremental
|
336
|
-
|
337
|
-
def used_by(self) -> List["wandb.apis.public.Run"]:
|
338
|
-
if self._logged_artifact:
|
339
|
-
return self._logged_artifact.used_by()
|
340
|
-
|
341
|
-
raise ArtifactNotLoggedError(self, "used_by")
|
342
|
-
|
343
|
-
def logged_by(self) -> Optional["wandb.apis.public.Run"]:
|
344
|
-
if self._logged_artifact:
|
345
|
-
return self._logged_artifact.logged_by()
|
346
|
-
|
347
|
-
raise ArtifactNotLoggedError(self, "logged_by")
|
348
|
-
|
349
|
-
@contextlib.contextmanager
|
350
|
-
def new_file(
|
351
|
-
self, name: str, mode: str = "w", encoding: Optional[str] = None
|
352
|
-
) -> Generator[IO, None, None]:
|
353
|
-
self._ensure_can_add()
|
354
|
-
path = os.path.join(self._artifact_dir.name, name.lstrip("/"))
|
355
|
-
if os.path.exists(path):
|
356
|
-
raise ValueError(f"File with name {name!r} already exists at {path!r}")
|
357
|
-
|
358
|
-
filesystem.mkdir_exists_ok(os.path.dirname(path))
|
359
|
-
try:
|
360
|
-
with util.fsync_open(path, mode, encoding) as f:
|
361
|
-
yield f
|
362
|
-
except UnicodeEncodeError as e:
|
363
|
-
wandb.termerror(
|
364
|
-
f"Failed to open the provided file (UnicodeEncodeError: {e}). Please provide the proper encoding."
|
365
|
-
)
|
366
|
-
raise e
|
367
|
-
self.add_file(path, name=name)
|
368
|
-
|
369
|
-
def add_file(
|
370
|
-
self,
|
371
|
-
local_path: str,
|
372
|
-
name: Optional[str] = None,
|
373
|
-
is_tmp: Optional[bool] = False,
|
374
|
-
) -> ArtifactManifestEntry:
|
375
|
-
self._ensure_can_add()
|
376
|
-
if not os.path.isfile(local_path):
|
377
|
-
raise ValueError("Path is not a file: %s" % local_path)
|
378
|
-
|
379
|
-
name = LogicalPath(name or os.path.basename(local_path))
|
380
|
-
digest = md5_file_b64(local_path)
|
381
|
-
|
382
|
-
if is_tmp:
|
383
|
-
file_path, file_name = os.path.split(name)
|
384
|
-
file_name_parts = file_name.split(".")
|
385
|
-
file_name_parts[0] = b64_to_hex_id(digest)[:20]
|
386
|
-
name = os.path.join(file_path, ".".join(file_name_parts))
|
387
|
-
|
388
|
-
return self._add_local_file(name, local_path, digest=digest)
|
389
|
-
|
390
|
-
def add_dir(self, local_path: str, name: Optional[str] = None) -> None:
|
391
|
-
self._ensure_can_add()
|
392
|
-
if not os.path.isdir(local_path):
|
393
|
-
raise ValueError("Path is not a directory: %s" % local_path)
|
394
|
-
|
395
|
-
termlog(
|
396
|
-
"Adding directory to artifact (%s)... "
|
397
|
-
% os.path.join(".", os.path.normpath(local_path)),
|
398
|
-
newline=False,
|
399
|
-
)
|
400
|
-
start_time = time.time()
|
401
|
-
|
402
|
-
paths = []
|
403
|
-
for dirpath, _, filenames in os.walk(local_path, followlinks=True):
|
404
|
-
for fname in filenames:
|
405
|
-
physical_path = os.path.join(dirpath, fname)
|
406
|
-
logical_path = os.path.relpath(physical_path, start=local_path)
|
407
|
-
if name is not None:
|
408
|
-
logical_path = os.path.join(name, logical_path)
|
409
|
-
paths.append((logical_path, physical_path))
|
410
|
-
|
411
|
-
def add_manifest_file(log_phy_path: Tuple[str, str]) -> None:
|
412
|
-
logical_path, physical_path = log_phy_path
|
413
|
-
self._add_local_file(logical_path, physical_path)
|
414
|
-
|
415
|
-
import multiprocessing.dummy # this uses threads
|
416
|
-
|
417
|
-
num_threads = 8
|
418
|
-
pool = multiprocessing.dummy.Pool(num_threads)
|
419
|
-
pool.map(add_manifest_file, paths)
|
420
|
-
pool.close()
|
421
|
-
pool.join()
|
422
|
-
|
423
|
-
termlog("Done. %.1fs" % (time.time() - start_time), prefix=False)
|
424
|
-
|
425
|
-
def add_reference(
|
426
|
-
self,
|
427
|
-
uri: Union[ArtifactManifestEntry, str],
|
428
|
-
name: Optional[StrPath] = None,
|
429
|
-
checksum: bool = True,
|
430
|
-
max_objects: Optional[int] = None,
|
431
|
-
) -> Sequence[ArtifactManifestEntry]:
|
432
|
-
self._ensure_can_add()
|
433
|
-
if name is not None:
|
434
|
-
name = LogicalPath(name)
|
435
|
-
|
436
|
-
# This is a bit of a hack, we want to check if the uri is a of the type
|
437
|
-
# ArtifactManifestEntry which is a private class returned by Artifact.get_path in
|
438
|
-
# wandb/apis/public.py. If so, then recover the reference URL.
|
439
|
-
if isinstance(uri, ArtifactManifestEntry):
|
440
|
-
ref_url_fn = uri.ref_url
|
441
|
-
uri_str = ref_url_fn()
|
442
|
-
elif isinstance(uri, str):
|
443
|
-
uri_str = uri
|
444
|
-
url = urlparse(str(uri_str))
|
445
|
-
if not url.scheme:
|
446
|
-
raise ValueError(
|
447
|
-
"References must be URIs. To reference a local file, use file://"
|
448
|
-
)
|
449
|
-
|
450
|
-
manifest_entries = self._storage_policy.store_reference(
|
451
|
-
self,
|
452
|
-
URIStr(uri_str),
|
453
|
-
name=name,
|
454
|
-
checksum=checksum,
|
455
|
-
max_objects=max_objects,
|
456
|
-
)
|
457
|
-
for entry in manifest_entries:
|
458
|
-
self._manifest.add_entry(entry)
|
459
|
-
|
460
|
-
return manifest_entries
|
461
|
-
|
462
|
-
def add(self, obj: data_types.WBValue, name: StrPath) -> ArtifactManifestEntry:
|
463
|
-
self._ensure_can_add()
|
464
|
-
name = LogicalPath(name)
|
465
|
-
|
466
|
-
# This is a "hack" to automatically rename tables added to
|
467
|
-
# the wandb /media/tables directory to their sha-based name.
|
468
|
-
# TODO: figure out a more appropriate convention.
|
469
|
-
is_tmp_name = name.startswith("media/tables")
|
470
|
-
|
471
|
-
# Validate that the object is one of the correct wandb.Media types
|
472
|
-
# TODO: move this to checking subclass of wandb.Media once all are
|
473
|
-
# generally supported
|
474
|
-
allowed_types = [
|
475
|
-
data_types.Bokeh,
|
476
|
-
data_types.JoinedTable,
|
477
|
-
data_types.PartitionedTable,
|
478
|
-
data_types.Table,
|
479
|
-
data_types.Classes,
|
480
|
-
data_types.ImageMask,
|
481
|
-
data_types.BoundingBoxes2D,
|
482
|
-
data_types.Audio,
|
483
|
-
data_types.Image,
|
484
|
-
data_types.Video,
|
485
|
-
data_types.Html,
|
486
|
-
data_types.Object3D,
|
487
|
-
data_types.Molecule,
|
488
|
-
data_types._SavedModel,
|
489
|
-
]
|
490
|
-
|
491
|
-
if not any(isinstance(obj, t) for t in allowed_types):
|
492
|
-
raise ValueError(
|
493
|
-
"Found object of type {}, expected one of {}.".format(
|
494
|
-
obj.__class__, allowed_types
|
495
|
-
)
|
496
|
-
)
|
497
|
-
|
498
|
-
obj_id = id(obj)
|
499
|
-
if obj_id in self._added_objs:
|
500
|
-
return self._added_objs[obj_id].entry
|
501
|
-
|
502
|
-
# If the object is coming from another artifact, save it as a reference
|
503
|
-
ref_path = obj._get_artifact_entry_ref_url()
|
504
|
-
if ref_path is not None:
|
505
|
-
return self.add_reference(ref_path, type(obj).with_suffix(name))[0]
|
506
|
-
|
507
|
-
val = obj.to_json(self)
|
508
|
-
name = obj.with_suffix(name)
|
509
|
-
entry = self._manifest.get_entry_by_path(name)
|
510
|
-
if entry is not None:
|
511
|
-
return entry
|
512
|
-
|
513
|
-
def do_write(f: IO) -> None:
|
514
|
-
import json
|
515
|
-
|
516
|
-
# TODO: Do we need to open with utf-8 codec?
|
517
|
-
f.write(json.dumps(val, sort_keys=True))
|
518
|
-
|
519
|
-
if is_tmp_name:
|
520
|
-
file_path = os.path.join(ARTIFACT_TMP.name, str(id(self)), name)
|
521
|
-
folder_path, _ = os.path.split(file_path)
|
522
|
-
if not os.path.exists(folder_path):
|
523
|
-
os.makedirs(folder_path)
|
524
|
-
with open(file_path, "w") as tmp_f:
|
525
|
-
do_write(tmp_f)
|
526
|
-
else:
|
527
|
-
with self.new_file(name) as f:
|
528
|
-
file_path = f.name
|
529
|
-
do_write(f)
|
530
|
-
|
531
|
-
# Note, we add the file from our temp directory.
|
532
|
-
# It will be added again later on finalize, but succeed since
|
533
|
-
# the checksum should match
|
534
|
-
entry = self.add_file(file_path, name, is_tmp_name)
|
535
|
-
self._added_objs[obj_id] = _AddedObj(entry, obj)
|
536
|
-
if obj._artifact_target is None:
|
537
|
-
obj._set_artifact_target(self, entry.path)
|
538
|
-
|
539
|
-
if is_tmp_name:
|
540
|
-
if os.path.exists(file_path):
|
541
|
-
os.remove(file_path)
|
542
|
-
|
543
|
-
return entry
|
544
|
-
|
545
|
-
def remove(self, item: Union[StrPath, "ArtifactManifestEntry"]) -> None:
|
546
|
-
if self._logged_artifact:
|
547
|
-
raise ArtifactFinalizedError(self, "remove")
|
548
|
-
|
549
|
-
if isinstance(item, ArtifactManifestEntry):
|
550
|
-
self._manifest.remove_entry(item)
|
551
|
-
return
|
552
|
-
|
553
|
-
path = str(PurePosixPath(item))
|
554
|
-
entry = self._manifest.get_entry_by_path(path)
|
555
|
-
if entry:
|
556
|
-
self._manifest.remove_entry(entry)
|
557
|
-
return
|
558
|
-
|
559
|
-
entries = self._manifest.get_entries_in_directory(path)
|
560
|
-
if not entries:
|
561
|
-
raise FileNotFoundError(f"No such file or directory: {path}")
|
562
|
-
for entry in entries:
|
563
|
-
self._manifest.remove_entry(entry)
|
564
|
-
|
565
|
-
def get_path(self, name: StrPath) -> ArtifactManifestEntry:
|
566
|
-
if self._logged_artifact:
|
567
|
-
return self._logged_artifact.get_path(name)
|
568
|
-
|
569
|
-
raise ArtifactNotLoggedError(self, "get_path")
|
570
|
-
|
571
|
-
def get(self, name: str) -> Optional[data_types.WBValue]:
|
572
|
-
if self._logged_artifact:
|
573
|
-
return self._logged_artifact.get(name)
|
574
|
-
|
575
|
-
raise ArtifactNotLoggedError(self, "get")
|
576
|
-
|
577
|
-
def download(
|
578
|
-
self, root: Optional[str] = None, recursive: bool = False
|
579
|
-
) -> FilePathStr:
|
580
|
-
if self._logged_artifact:
|
581
|
-
return self._logged_artifact.download(root=root, recursive=recursive)
|
582
|
-
|
583
|
-
raise ArtifactNotLoggedError(self, "download")
|
584
|
-
|
585
|
-
def checkout(self, root: Optional[str] = None) -> str:
|
586
|
-
if self._logged_artifact:
|
587
|
-
return self._logged_artifact.checkout(root=root)
|
588
|
-
|
589
|
-
raise ArtifactNotLoggedError(self, "checkout")
|
590
|
-
|
591
|
-
def verify(self, root: Optional[str] = None) -> None:
|
592
|
-
if self._logged_artifact:
|
593
|
-
return self._logged_artifact.verify(root=root)
|
594
|
-
|
595
|
-
raise ArtifactNotLoggedError(self, "verify")
|
596
|
-
|
597
|
-
def save(
|
598
|
-
self,
|
599
|
-
project: Optional[str] = None,
|
600
|
-
settings: Optional["wandb.wandb_sdk.wandb_settings.Settings"] = None,
|
601
|
-
) -> None:
|
602
|
-
"""Persist any changes made to the artifact.
|
603
|
-
|
604
|
-
If currently in a run, that run will log this artifact. If not currently in a
|
605
|
-
run, a run of type "auto" will be created to track this artifact.
|
606
|
-
|
607
|
-
Arguments:
|
608
|
-
project: (str, optional) A project to use for the artifact in the case that
|
609
|
-
a run is not already in context settings: (wandb.Settings, optional) A
|
610
|
-
settings object to use when initializing an automatic run. Most commonly
|
611
|
-
used in testing harness.
|
612
|
-
|
613
|
-
Returns:
|
614
|
-
None
|
615
|
-
"""
|
616
|
-
if self._incremental:
|
617
|
-
with wandb_lib.telemetry.context() as tel:
|
618
|
-
tel.feature.artifact_incremental = True
|
619
|
-
|
620
|
-
if self._logged_artifact:
|
621
|
-
return self._logged_artifact.save()
|
622
|
-
else:
|
623
|
-
if wandb.run is None:
|
624
|
-
if settings is None:
|
625
|
-
settings = wandb.Settings(silent="true")
|
626
|
-
with wandb.init(
|
627
|
-
project=project, job_type="auto", settings=settings
|
628
|
-
) as run:
|
629
|
-
# redoing this here because in this branch we know we didn't
|
630
|
-
# have the run at the beginning of the method
|
631
|
-
if self._incremental:
|
632
|
-
with wandb_lib.telemetry.context(run=run) as tel:
|
633
|
-
tel.feature.artifact_incremental = True
|
634
|
-
run.log_artifact(self)
|
635
|
-
else:
|
636
|
-
wandb.run.log_artifact(self)
|
637
|
-
|
638
|
-
def delete(self, delete_aliases: bool = False) -> None:
|
639
|
-
if self._logged_artifact:
|
640
|
-
return self._logged_artifact.delete(delete_aliases=delete_aliases)
|
641
|
-
|
642
|
-
raise ArtifactNotLoggedError(self, "delete")
|
643
|
-
|
644
|
-
def wait(self, timeout: Optional[int] = None) -> ArtifactInterface:
|
645
|
-
"""Wait for an artifact to finish logging.
|
646
|
-
|
647
|
-
Arguments:
|
648
|
-
timeout: (int, optional) Wait up to this long.
|
649
|
-
"""
|
650
|
-
if self._logged_artifact:
|
651
|
-
return self._logged_artifact.wait(timeout) # type: ignore [call-arg]
|
652
|
-
|
653
|
-
raise ArtifactNotLoggedError(self, "wait")
|
654
|
-
|
655
|
-
def get_added_local_path_name(self, local_path: str) -> Optional[str]:
|
656
|
-
"""Get the artifact relative name of a file added by a local filesystem path.
|
657
|
-
|
658
|
-
Arguments:
|
659
|
-
local_path: (str) The local path to resolve into an artifact relative name.
|
660
|
-
|
661
|
-
Returns:
|
662
|
-
str: The artifact relative name.
|
663
|
-
|
664
|
-
Examples:
|
665
|
-
Basic usage
|
666
|
-
```
|
667
|
-
artifact = wandb.Artifact('my_dataset', type='dataset')
|
668
|
-
artifact.add_file('path/to/file.txt', name='artifact/path/file.txt')
|
669
|
-
|
670
|
-
# Returns `artifact/path/file.txt`:
|
671
|
-
name = artifact.get_added_local_path_name('path/to/file.txt')
|
672
|
-
```
|
673
|
-
"""
|
674
|
-
entry = self._added_local_paths.get(local_path, None)
|
675
|
-
if entry is None:
|
676
|
-
return None
|
677
|
-
return entry.path
|
678
|
-
|
679
|
-
def finalize(self) -> None:
|
680
|
-
"""Mark this artifact as final, disallowing further modifications.
|
681
|
-
|
682
|
-
This happens automatically when calling `log_artifact`.
|
683
|
-
|
684
|
-
Returns:
|
685
|
-
None
|
686
|
-
"""
|
687
|
-
if self._final:
|
688
|
-
return self._file_entries
|
689
|
-
|
690
|
-
# mark final after all files are added
|
691
|
-
self._final = True
|
692
|
-
self._digest = self._manifest.digest()
|
693
|
-
|
694
|
-
def json_encode(self) -> Dict[str, Any]:
|
695
|
-
if not self._logged_artifact:
|
696
|
-
raise ArtifactNotLoggedError(self, "json_encode")
|
697
|
-
return util.artifact_to_json(self)
|
698
|
-
|
699
|
-
def _ensure_can_add(self) -> None:
|
700
|
-
if self._final:
|
701
|
-
raise ArtifactFinalizedError(artifact=self)
|
702
|
-
|
703
|
-
def _add_local_file(
|
704
|
-
self, name: StrPath, path: StrPath, digest: Optional[B64MD5] = None
|
705
|
-
) -> ArtifactManifestEntry:
|
706
|
-
with tempfile.NamedTemporaryFile(dir=get_staging_dir(), delete=False) as f:
|
707
|
-
staging_path = f.name
|
708
|
-
shutil.copyfile(path, staging_path)
|
709
|
-
os.chmod(staging_path, 0o400)
|
710
|
-
|
711
|
-
entry = ArtifactManifestEntry(
|
712
|
-
path=name,
|
713
|
-
digest=digest or md5_file_b64(staging_path),
|
714
|
-
local_path=staging_path,
|
715
|
-
)
|
716
|
-
|
717
|
-
self._manifest.add_entry(entry)
|
718
|
-
self._added_local_paths[os.fspath(path)] = entry
|
719
|
-
return entry
|