wandb 0.15.4__py3-none-any.whl → 0.15.5__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. wandb/__init__.py +1 -1
  2. wandb/analytics/sentry.py +1 -0
  3. wandb/apis/internal.py +3 -0
  4. wandb/apis/public.py +18 -20
  5. wandb/beta/workflows.py +5 -6
  6. wandb/cli/cli.py +27 -27
  7. wandb/data_types.py +2 -0
  8. wandb/integration/langchain/wandb_tracer.py +16 -179
  9. wandb/integration/sagemaker/config.py +2 -2
  10. wandb/integration/tensorboard/log.py +4 -4
  11. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  12. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  13. wandb/proto/wandb_deprecated.py +3 -1
  14. wandb/sdk/__init__.py +1 -4
  15. wandb/sdk/artifacts/__init__.py +0 -14
  16. wandb/sdk/artifacts/artifact.py +1757 -277
  17. wandb/sdk/artifacts/artifact_manifest_entry.py +26 -6
  18. wandb/sdk/artifacts/artifact_state.py +10 -0
  19. wandb/sdk/artifacts/artifacts_cache.py +7 -8
  20. wandb/sdk/artifacts/exceptions.py +4 -4
  21. wandb/sdk/artifacts/storage_handler.py +2 -2
  22. wandb/sdk/artifacts/storage_handlers/azure_handler.py +16 -6
  23. wandb/sdk/artifacts/storage_handlers/gcs_handler.py +2 -2
  24. wandb/sdk/artifacts/storage_handlers/http_handler.py +2 -2
  25. wandb/sdk/artifacts/storage_handlers/local_file_handler.py +2 -2
  26. wandb/sdk/artifacts/storage_handlers/multi_handler.py +2 -2
  27. wandb/sdk/artifacts/storage_handlers/s3_handler.py +35 -32
  28. wandb/sdk/artifacts/storage_handlers/tracking_handler.py +2 -2
  29. wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +5 -9
  30. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +2 -2
  31. wandb/sdk/artifacts/storage_policies/s3_bucket_policy.py +2 -2
  32. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +24 -16
  33. wandb/sdk/artifacts/storage_policy.py +3 -3
  34. wandb/sdk/data_types/_dtypes.py +7 -12
  35. wandb/sdk/data_types/base_types/json_metadata.py +2 -2
  36. wandb/sdk/data_types/base_types/media.py +5 -6
  37. wandb/sdk/data_types/base_types/wb_value.py +12 -13
  38. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +4 -5
  39. wandb/sdk/data_types/helper_types/classes.py +5 -8
  40. wandb/sdk/data_types/helper_types/image_mask.py +4 -5
  41. wandb/sdk/data_types/histogram.py +3 -3
  42. wandb/sdk/data_types/html.py +3 -4
  43. wandb/sdk/data_types/image.py +4 -5
  44. wandb/sdk/data_types/molecule.py +2 -2
  45. wandb/sdk/data_types/object_3d.py +3 -3
  46. wandb/sdk/data_types/plotly.py +2 -2
  47. wandb/sdk/data_types/saved_model.py +7 -8
  48. wandb/sdk/data_types/trace_tree.py +4 -4
  49. wandb/sdk/data_types/video.py +4 -4
  50. wandb/sdk/interface/interface.py +8 -10
  51. wandb/sdk/internal/file_stream.py +2 -3
  52. wandb/sdk/internal/internal_api.py +99 -4
  53. wandb/sdk/internal/job_builder.py +15 -7
  54. wandb/sdk/internal/sender.py +4 -0
  55. wandb/sdk/internal/settings_static.py +1 -0
  56. wandb/sdk/launch/_project_spec.py +9 -7
  57. wandb/sdk/launch/agent/agent.py +115 -58
  58. wandb/sdk/launch/agent/job_status_tracker.py +34 -0
  59. wandb/sdk/launch/agent/run_queue_item_file_saver.py +45 -0
  60. wandb/sdk/launch/builder/abstract.py +5 -1
  61. wandb/sdk/launch/builder/build.py +16 -10
  62. wandb/sdk/launch/builder/docker_builder.py +9 -2
  63. wandb/sdk/launch/builder/kaniko_builder.py +108 -22
  64. wandb/sdk/launch/builder/noop.py +3 -1
  65. wandb/sdk/launch/environment/aws_environment.py +2 -1
  66. wandb/sdk/launch/environment/azure_environment.py +124 -0
  67. wandb/sdk/launch/github_reference.py +30 -18
  68. wandb/sdk/launch/launch.py +1 -1
  69. wandb/sdk/launch/loader.py +15 -0
  70. wandb/sdk/launch/registry/azure_container_registry.py +132 -0
  71. wandb/sdk/launch/registry/elastic_container_registry.py +38 -4
  72. wandb/sdk/launch/registry/google_artifact_registry.py +46 -7
  73. wandb/sdk/launch/runner/abstract.py +19 -3
  74. wandb/sdk/launch/runner/kubernetes_runner.py +111 -47
  75. wandb/sdk/launch/runner/local_container.py +101 -48
  76. wandb/sdk/launch/runner/sagemaker_runner.py +59 -9
  77. wandb/sdk/launch/runner/vertex_runner.py +8 -4
  78. wandb/sdk/launch/sweeps/scheduler.py +102 -27
  79. wandb/sdk/launch/sweeps/utils.py +21 -0
  80. wandb/sdk/launch/utils.py +19 -7
  81. wandb/sdk/lib/_settings_toposort_generated.py +3 -0
  82. wandb/sdk/service/server.py +22 -9
  83. wandb/sdk/service/service.py +27 -8
  84. wandb/sdk/verify/verify.py +6 -9
  85. wandb/sdk/wandb_config.py +2 -4
  86. wandb/sdk/wandb_init.py +2 -0
  87. wandb/sdk/wandb_require.py +7 -0
  88. wandb/sdk/wandb_run.py +32 -35
  89. wandb/sdk/wandb_settings.py +10 -3
  90. wandb/testing/relay.py +15 -2
  91. wandb/util.py +55 -23
  92. {wandb-0.15.4.dist-info → wandb-0.15.5.dist-info}/METADATA +11 -8
  93. {wandb-0.15.4.dist-info → wandb-0.15.5.dist-info}/RECORD +97 -97
  94. wandb/integration/langchain/util.py +0 -191
  95. wandb/sdk/artifacts/invalid_artifact.py +0 -23
  96. wandb/sdk/artifacts/lazy_artifact.py +0 -162
  97. wandb/sdk/artifacts/local_artifact.py +0 -719
  98. wandb/sdk/artifacts/public_artifact.py +0 -1188
  99. {wandb-0.15.4.dist-info → wandb-0.15.5.dist-info}/LICENSE +0 -0
  100. {wandb-0.15.4.dist-info → wandb-0.15.5.dist-info}/WHEEL +0 -0
  101. {wandb-0.15.4.dist-info → wandb-0.15.5.dist-info}/entry_points.txt +0 -0
  102. {wandb-0.15.4.dist-info → wandb-0.15.5.dist-info}/top_level.txt +0 -0
@@ -1,719 +0,0 @@
1
- """Local (draft) artifact."""
2
- import contextlib
3
- import json
4
- import os
5
- import re
6
- import shutil
7
- import tempfile
8
- import time
9
- from pathlib import PurePosixPath
10
- from typing import (
11
- IO,
12
- TYPE_CHECKING,
13
- Any,
14
- Dict,
15
- Generator,
16
- List,
17
- Optional,
18
- Sequence,
19
- Tuple,
20
- Union,
21
- cast,
22
- )
23
- from urllib.parse import urlparse
24
-
25
- import wandb
26
- import wandb.data_types as data_types
27
- from wandb import env, util
28
- from wandb.errors.term import termlog
29
- from wandb.sdk import lib as wandb_lib
30
- from wandb.sdk.artifacts.artifact import Artifact as ArtifactInterface
31
- from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
32
- from wandb.sdk.artifacts.artifact_manifest_entry import ArtifactManifestEntry
33
- from wandb.sdk.artifacts.artifact_manifests.artifact_manifest_v1 import (
34
- ArtifactManifestV1,
35
- )
36
- from wandb.sdk.artifacts.artifact_saver import get_staging_dir
37
- from wandb.sdk.artifacts.artifacts_cache import get_artifacts_cache
38
- from wandb.sdk.artifacts.exceptions import (
39
- ArtifactFinalizedError,
40
- ArtifactNotLoggedError,
41
- )
42
- from wandb.sdk.artifacts.storage_layout import StorageLayout
43
- from wandb.sdk.artifacts.storage_policies.wandb_storage_policy import WandbStoragePolicy
44
- from wandb.sdk.lib import filesystem, runid
45
- from wandb.sdk.lib.hashutil import B64MD5, b64_to_hex_id, md5_file_b64
46
- from wandb.sdk.lib.paths import FilePathStr, LogicalPath, StrPath, URIStr
47
-
48
- if TYPE_CHECKING:
49
- import wandb.apis.public
50
-
51
- ARTIFACT_TMP = tempfile.TemporaryDirectory("wandb-artifacts")
52
-
53
-
54
- class _AddedObj:
55
- def __init__(self, entry: ArtifactManifestEntry, obj: data_types.WBValue):
56
- self.entry = entry
57
- self.obj = obj
58
-
59
-
60
- def _normalize_metadata(metadata: Optional[Dict[str, Any]]) -> Dict[str, Any]:
61
- if metadata is None:
62
- return {}
63
- if not isinstance(metadata, dict):
64
- raise TypeError(f"metadata must be dict, not {type(metadata)}")
65
- return cast(
66
- Dict[str, Any], json.loads(json.dumps(util.json_friendly_val(metadata)))
67
- )
68
-
69
-
70
- class Artifact(ArtifactInterface):
71
- """Flexible and lightweight building block for dataset and model versioning.
72
-
73
- Constructs an empty artifact whose contents can be populated using its
74
- `add` family of functions. Once the artifact has all the desired files,
75
- you can call `wandb.log_artifact()` to log it.
76
-
77
- Arguments:
78
- name: (str) A human-readable name for this artifact, which is how you
79
- can identify this artifact in the UI or reference it in `use_artifact`
80
- calls. Names can contain letters, numbers, underscores, hyphens, and
81
- dots. The name must be unique across a project.
82
- type: (str) The type of the artifact, which is used to organize and differentiate
83
- artifacts. Common types include `dataset` or `model`, but you can use any string
84
- containing letters, numbers, underscores, hyphens, and dots.
85
- description: (str, optional) Free text that offers a description of the artifact. The
86
- description is markdown rendered in the UI, so this is a good place to place tables,
87
- links, etc.
88
- metadata: (dict, optional) Structured data associated with the artifact,
89
- for example class distribution of a dataset. This will eventually be queryable
90
- and plottable in the UI. There is a hard limit of 100 total keys.
91
-
92
- Examples:
93
- Basic usage
94
- ```
95
- wandb.init()
96
-
97
- artifact = wandb.Artifact('mnist', type='dataset')
98
- artifact.add_dir('mnist/')
99
- wandb.log_artifact(artifact)
100
- ```
101
-
102
- Returns:
103
- An `Artifact` object.
104
- """
105
-
106
- _added_objs: Dict[int, _AddedObj]
107
- _added_local_paths: Dict[str, ArtifactManifestEntry]
108
- _distributed_id: Optional[str]
109
- _metadata: dict
110
- _logged_artifact: Optional[ArtifactInterface]
111
- _incremental: bool
112
- _client_id: str
113
-
114
- def __init__(
115
- self,
116
- name: str,
117
- type: str,
118
- description: Optional[str] = None,
119
- metadata: Optional[dict] = None,
120
- incremental: Optional[bool] = None,
121
- use_as: Optional[str] = None,
122
- ) -> None:
123
- if not re.match(r"^[a-zA-Z0-9_\-.]+$", name):
124
- raise ValueError(
125
- "Artifact name may only contain alphanumeric characters, dashes, underscores, and dots. "
126
- 'Invalid name: "%s"' % name
127
- )
128
- if type == "job" or type.startswith("wandb-"):
129
- raise ValueError(
130
- "Artifact types 'job' and 'wandb-*' are reserved for internal use. "
131
- "Please use a different type."
132
- )
133
-
134
- metadata = _normalize_metadata(metadata)
135
- # TODO: this shouldn't be a property of the artifact. It's a more like an
136
- # argument to log_artifact.
137
- storage_layout = StorageLayout.V2
138
- if env.get_use_v1_artifacts():
139
- storage_layout = StorageLayout.V1
140
-
141
- self._storage_policy = WandbStoragePolicy(
142
- config={
143
- "storageLayout": storage_layout,
144
- # TODO: storage region
145
- }
146
- )
147
- self._final = False
148
- self._digest = ""
149
- self._file_entries = None
150
- self._manifest = ArtifactManifestV1(self._storage_policy)
151
- self._cache = get_artifacts_cache()
152
- self._added_objs = {}
153
- self._added_local_paths = {}
154
- # You can write into this directory when creating artifact files
155
- self._artifact_dir = tempfile.TemporaryDirectory()
156
- self._type = type
157
- self._name = name
158
- self._description = description
159
- self._metadata = metadata
160
- self._distributed_id = None
161
- self._logged_artifact = None
162
- self._incremental = False
163
- self._client_id = runid.generate_id(128)
164
- self._sequence_client_id = runid.generate_id(128)
165
- self._cache.store_client_artifact(self)
166
- self._use_as = use_as
167
-
168
- if incremental:
169
- self._incremental = incremental
170
- wandb.termwarn("Using experimental arg `incremental`")
171
-
172
- @property
173
- def id(self) -> Optional[str]:
174
- if self._logged_artifact:
175
- return self._logged_artifact.id
176
-
177
- # The artifact hasn't been saved so an ID doesn't exist yet.
178
- return None
179
-
180
- @property
181
- def entity(self) -> str:
182
- if self._logged_artifact:
183
- return self._logged_artifact.entity
184
- raise ArtifactNotLoggedError(self, "entity")
185
-
186
- @property
187
- def project(self) -> str:
188
- if self._logged_artifact:
189
- return self._logged_artifact.project
190
- raise ArtifactNotLoggedError(self, "project")
191
-
192
- @property
193
- def name(self) -> str:
194
- if self._logged_artifact:
195
- return self._logged_artifact.name
196
- return self._name
197
-
198
- @property
199
- def version(self) -> str:
200
- if self._logged_artifact:
201
- return self._logged_artifact.version
202
- raise ArtifactNotLoggedError(self, "version")
203
-
204
- @property
205
- def source_entity(self) -> str:
206
- return self.entity
207
-
208
- @property
209
- def source_project(self) -> str:
210
- return self.project
211
-
212
- @property
213
- def source_name(self) -> str:
214
- return self.name
215
-
216
- @property
217
- def source_version(self) -> str:
218
- return self.version
219
-
220
- @property
221
- def manifest(self) -> ArtifactManifest:
222
- if self._logged_artifact:
223
- return self._logged_artifact.manifest
224
-
225
- self.finalize()
226
- return self._manifest
227
-
228
- @property
229
- def digest(self) -> str:
230
- if self._logged_artifact:
231
- return self._logged_artifact.digest
232
-
233
- self.finalize()
234
- # Digest will be none if the artifact hasn't been saved yet.
235
- return self._digest
236
-
237
- @property
238
- def type(self) -> str:
239
- if self._logged_artifact:
240
- return self._logged_artifact.type
241
-
242
- return self._type
243
-
244
- @property
245
- def state(self) -> str:
246
- if self._logged_artifact:
247
- return self._logged_artifact.state
248
-
249
- return "PENDING"
250
-
251
- @property
252
- def size(self) -> int:
253
- if self._logged_artifact:
254
- return self._logged_artifact.size
255
- sizes: List[int]
256
- sizes = []
257
- for entry in self._manifest.entries:
258
- e_size = self._manifest.entries[entry].size
259
- if e_size is not None:
260
- sizes.append(e_size)
261
- return sum(sizes)
262
-
263
- @property
264
- def commit_hash(self) -> str:
265
- if self._logged_artifact:
266
- return self._logged_artifact.commit_hash
267
-
268
- raise ArtifactNotLoggedError(self, "commit_hash")
269
-
270
- @property
271
- def description(self) -> Optional[str]:
272
- if self._logged_artifact:
273
- return self._logged_artifact.description
274
-
275
- return self._description
276
-
277
- @description.setter
278
- def description(self, desc: Optional[str]) -> None:
279
- if self._logged_artifact:
280
- self._logged_artifact.description = desc
281
- return
282
-
283
- self._description = desc
284
-
285
- @property
286
- def metadata(self) -> dict:
287
- if self._logged_artifact:
288
- return self._logged_artifact.metadata
289
-
290
- return self._metadata
291
-
292
- @metadata.setter
293
- def metadata(self, metadata: dict) -> None:
294
- metadata = _normalize_metadata(metadata)
295
- if self._logged_artifact:
296
- self._logged_artifact.metadata = metadata
297
- return
298
-
299
- self._metadata = metadata
300
-
301
- @property
302
- def aliases(self) -> List[str]:
303
- if self._logged_artifact:
304
- return self._logged_artifact.aliases
305
-
306
- raise ArtifactNotLoggedError(self, "aliases")
307
-
308
- @aliases.setter
309
- def aliases(self, aliases: List[str]) -> None:
310
- """Set artifact aliases.
311
-
312
- Arguments:
313
- aliases: (list) The list of aliases associated with this artifact.
314
- """
315
- if self._logged_artifact:
316
- self._logged_artifact.aliases = aliases
317
- return
318
-
319
- raise ArtifactNotLoggedError(self, "aliases")
320
-
321
- @property
322
- def use_as(self) -> Optional[str]:
323
- return self._use_as
324
-
325
- @property
326
- def distributed_id(self) -> Optional[str]:
327
- return self._distributed_id
328
-
329
- @distributed_id.setter
330
- def distributed_id(self, distributed_id: Optional[str]) -> None:
331
- self._distributed_id = distributed_id
332
-
333
- @property
334
- def incremental(self) -> bool:
335
- return self._incremental
336
-
337
- def used_by(self) -> List["wandb.apis.public.Run"]:
338
- if self._logged_artifact:
339
- return self._logged_artifact.used_by()
340
-
341
- raise ArtifactNotLoggedError(self, "used_by")
342
-
343
- def logged_by(self) -> Optional["wandb.apis.public.Run"]:
344
- if self._logged_artifact:
345
- return self._logged_artifact.logged_by()
346
-
347
- raise ArtifactNotLoggedError(self, "logged_by")
348
-
349
- @contextlib.contextmanager
350
- def new_file(
351
- self, name: str, mode: str = "w", encoding: Optional[str] = None
352
- ) -> Generator[IO, None, None]:
353
- self._ensure_can_add()
354
- path = os.path.join(self._artifact_dir.name, name.lstrip("/"))
355
- if os.path.exists(path):
356
- raise ValueError(f"File with name {name!r} already exists at {path!r}")
357
-
358
- filesystem.mkdir_exists_ok(os.path.dirname(path))
359
- try:
360
- with util.fsync_open(path, mode, encoding) as f:
361
- yield f
362
- except UnicodeEncodeError as e:
363
- wandb.termerror(
364
- f"Failed to open the provided file (UnicodeEncodeError: {e}). Please provide the proper encoding."
365
- )
366
- raise e
367
- self.add_file(path, name=name)
368
-
369
- def add_file(
370
- self,
371
- local_path: str,
372
- name: Optional[str] = None,
373
- is_tmp: Optional[bool] = False,
374
- ) -> ArtifactManifestEntry:
375
- self._ensure_can_add()
376
- if not os.path.isfile(local_path):
377
- raise ValueError("Path is not a file: %s" % local_path)
378
-
379
- name = LogicalPath(name or os.path.basename(local_path))
380
- digest = md5_file_b64(local_path)
381
-
382
- if is_tmp:
383
- file_path, file_name = os.path.split(name)
384
- file_name_parts = file_name.split(".")
385
- file_name_parts[0] = b64_to_hex_id(digest)[:20]
386
- name = os.path.join(file_path, ".".join(file_name_parts))
387
-
388
- return self._add_local_file(name, local_path, digest=digest)
389
-
390
- def add_dir(self, local_path: str, name: Optional[str] = None) -> None:
391
- self._ensure_can_add()
392
- if not os.path.isdir(local_path):
393
- raise ValueError("Path is not a directory: %s" % local_path)
394
-
395
- termlog(
396
- "Adding directory to artifact (%s)... "
397
- % os.path.join(".", os.path.normpath(local_path)),
398
- newline=False,
399
- )
400
- start_time = time.time()
401
-
402
- paths = []
403
- for dirpath, _, filenames in os.walk(local_path, followlinks=True):
404
- for fname in filenames:
405
- physical_path = os.path.join(dirpath, fname)
406
- logical_path = os.path.relpath(physical_path, start=local_path)
407
- if name is not None:
408
- logical_path = os.path.join(name, logical_path)
409
- paths.append((logical_path, physical_path))
410
-
411
- def add_manifest_file(log_phy_path: Tuple[str, str]) -> None:
412
- logical_path, physical_path = log_phy_path
413
- self._add_local_file(logical_path, physical_path)
414
-
415
- import multiprocessing.dummy # this uses threads
416
-
417
- num_threads = 8
418
- pool = multiprocessing.dummy.Pool(num_threads)
419
- pool.map(add_manifest_file, paths)
420
- pool.close()
421
- pool.join()
422
-
423
- termlog("Done. %.1fs" % (time.time() - start_time), prefix=False)
424
-
425
- def add_reference(
426
- self,
427
- uri: Union[ArtifactManifestEntry, str],
428
- name: Optional[StrPath] = None,
429
- checksum: bool = True,
430
- max_objects: Optional[int] = None,
431
- ) -> Sequence[ArtifactManifestEntry]:
432
- self._ensure_can_add()
433
- if name is not None:
434
- name = LogicalPath(name)
435
-
436
- # This is a bit of a hack, we want to check if the uri is a of the type
437
- # ArtifactManifestEntry which is a private class returned by Artifact.get_path in
438
- # wandb/apis/public.py. If so, then recover the reference URL.
439
- if isinstance(uri, ArtifactManifestEntry):
440
- ref_url_fn = uri.ref_url
441
- uri_str = ref_url_fn()
442
- elif isinstance(uri, str):
443
- uri_str = uri
444
- url = urlparse(str(uri_str))
445
- if not url.scheme:
446
- raise ValueError(
447
- "References must be URIs. To reference a local file, use file://"
448
- )
449
-
450
- manifest_entries = self._storage_policy.store_reference(
451
- self,
452
- URIStr(uri_str),
453
- name=name,
454
- checksum=checksum,
455
- max_objects=max_objects,
456
- )
457
- for entry in manifest_entries:
458
- self._manifest.add_entry(entry)
459
-
460
- return manifest_entries
461
-
462
- def add(self, obj: data_types.WBValue, name: StrPath) -> ArtifactManifestEntry:
463
- self._ensure_can_add()
464
- name = LogicalPath(name)
465
-
466
- # This is a "hack" to automatically rename tables added to
467
- # the wandb /media/tables directory to their sha-based name.
468
- # TODO: figure out a more appropriate convention.
469
- is_tmp_name = name.startswith("media/tables")
470
-
471
- # Validate that the object is one of the correct wandb.Media types
472
- # TODO: move this to checking subclass of wandb.Media once all are
473
- # generally supported
474
- allowed_types = [
475
- data_types.Bokeh,
476
- data_types.JoinedTable,
477
- data_types.PartitionedTable,
478
- data_types.Table,
479
- data_types.Classes,
480
- data_types.ImageMask,
481
- data_types.BoundingBoxes2D,
482
- data_types.Audio,
483
- data_types.Image,
484
- data_types.Video,
485
- data_types.Html,
486
- data_types.Object3D,
487
- data_types.Molecule,
488
- data_types._SavedModel,
489
- ]
490
-
491
- if not any(isinstance(obj, t) for t in allowed_types):
492
- raise ValueError(
493
- "Found object of type {}, expected one of {}.".format(
494
- obj.__class__, allowed_types
495
- )
496
- )
497
-
498
- obj_id = id(obj)
499
- if obj_id in self._added_objs:
500
- return self._added_objs[obj_id].entry
501
-
502
- # If the object is coming from another artifact, save it as a reference
503
- ref_path = obj._get_artifact_entry_ref_url()
504
- if ref_path is not None:
505
- return self.add_reference(ref_path, type(obj).with_suffix(name))[0]
506
-
507
- val = obj.to_json(self)
508
- name = obj.with_suffix(name)
509
- entry = self._manifest.get_entry_by_path(name)
510
- if entry is not None:
511
- return entry
512
-
513
- def do_write(f: IO) -> None:
514
- import json
515
-
516
- # TODO: Do we need to open with utf-8 codec?
517
- f.write(json.dumps(val, sort_keys=True))
518
-
519
- if is_tmp_name:
520
- file_path = os.path.join(ARTIFACT_TMP.name, str(id(self)), name)
521
- folder_path, _ = os.path.split(file_path)
522
- if not os.path.exists(folder_path):
523
- os.makedirs(folder_path)
524
- with open(file_path, "w") as tmp_f:
525
- do_write(tmp_f)
526
- else:
527
- with self.new_file(name) as f:
528
- file_path = f.name
529
- do_write(f)
530
-
531
- # Note, we add the file from our temp directory.
532
- # It will be added again later on finalize, but succeed since
533
- # the checksum should match
534
- entry = self.add_file(file_path, name, is_tmp_name)
535
- self._added_objs[obj_id] = _AddedObj(entry, obj)
536
- if obj._artifact_target is None:
537
- obj._set_artifact_target(self, entry.path)
538
-
539
- if is_tmp_name:
540
- if os.path.exists(file_path):
541
- os.remove(file_path)
542
-
543
- return entry
544
-
545
- def remove(self, item: Union[StrPath, "ArtifactManifestEntry"]) -> None:
546
- if self._logged_artifact:
547
- raise ArtifactFinalizedError(self, "remove")
548
-
549
- if isinstance(item, ArtifactManifestEntry):
550
- self._manifest.remove_entry(item)
551
- return
552
-
553
- path = str(PurePosixPath(item))
554
- entry = self._manifest.get_entry_by_path(path)
555
- if entry:
556
- self._manifest.remove_entry(entry)
557
- return
558
-
559
- entries = self._manifest.get_entries_in_directory(path)
560
- if not entries:
561
- raise FileNotFoundError(f"No such file or directory: {path}")
562
- for entry in entries:
563
- self._manifest.remove_entry(entry)
564
-
565
- def get_path(self, name: StrPath) -> ArtifactManifestEntry:
566
- if self._logged_artifact:
567
- return self._logged_artifact.get_path(name)
568
-
569
- raise ArtifactNotLoggedError(self, "get_path")
570
-
571
- def get(self, name: str) -> Optional[data_types.WBValue]:
572
- if self._logged_artifact:
573
- return self._logged_artifact.get(name)
574
-
575
- raise ArtifactNotLoggedError(self, "get")
576
-
577
- def download(
578
- self, root: Optional[str] = None, recursive: bool = False
579
- ) -> FilePathStr:
580
- if self._logged_artifact:
581
- return self._logged_artifact.download(root=root, recursive=recursive)
582
-
583
- raise ArtifactNotLoggedError(self, "download")
584
-
585
- def checkout(self, root: Optional[str] = None) -> str:
586
- if self._logged_artifact:
587
- return self._logged_artifact.checkout(root=root)
588
-
589
- raise ArtifactNotLoggedError(self, "checkout")
590
-
591
- def verify(self, root: Optional[str] = None) -> None:
592
- if self._logged_artifact:
593
- return self._logged_artifact.verify(root=root)
594
-
595
- raise ArtifactNotLoggedError(self, "verify")
596
-
597
- def save(
598
- self,
599
- project: Optional[str] = None,
600
- settings: Optional["wandb.wandb_sdk.wandb_settings.Settings"] = None,
601
- ) -> None:
602
- """Persist any changes made to the artifact.
603
-
604
- If currently in a run, that run will log this artifact. If not currently in a
605
- run, a run of type "auto" will be created to track this artifact.
606
-
607
- Arguments:
608
- project: (str, optional) A project to use for the artifact in the case that
609
- a run is not already in context settings: (wandb.Settings, optional) A
610
- settings object to use when initializing an automatic run. Most commonly
611
- used in testing harness.
612
-
613
- Returns:
614
- None
615
- """
616
- if self._incremental:
617
- with wandb_lib.telemetry.context() as tel:
618
- tel.feature.artifact_incremental = True
619
-
620
- if self._logged_artifact:
621
- return self._logged_artifact.save()
622
- else:
623
- if wandb.run is None:
624
- if settings is None:
625
- settings = wandb.Settings(silent="true")
626
- with wandb.init(
627
- project=project, job_type="auto", settings=settings
628
- ) as run:
629
- # redoing this here because in this branch we know we didn't
630
- # have the run at the beginning of the method
631
- if self._incremental:
632
- with wandb_lib.telemetry.context(run=run) as tel:
633
- tel.feature.artifact_incremental = True
634
- run.log_artifact(self)
635
- else:
636
- wandb.run.log_artifact(self)
637
-
638
- def delete(self, delete_aliases: bool = False) -> None:
639
- if self._logged_artifact:
640
- return self._logged_artifact.delete(delete_aliases=delete_aliases)
641
-
642
- raise ArtifactNotLoggedError(self, "delete")
643
-
644
- def wait(self, timeout: Optional[int] = None) -> ArtifactInterface:
645
- """Wait for an artifact to finish logging.
646
-
647
- Arguments:
648
- timeout: (int, optional) Wait up to this long.
649
- """
650
- if self._logged_artifact:
651
- return self._logged_artifact.wait(timeout) # type: ignore [call-arg]
652
-
653
- raise ArtifactNotLoggedError(self, "wait")
654
-
655
- def get_added_local_path_name(self, local_path: str) -> Optional[str]:
656
- """Get the artifact relative name of a file added by a local filesystem path.
657
-
658
- Arguments:
659
- local_path: (str) The local path to resolve into an artifact relative name.
660
-
661
- Returns:
662
- str: The artifact relative name.
663
-
664
- Examples:
665
- Basic usage
666
- ```
667
- artifact = wandb.Artifact('my_dataset', type='dataset')
668
- artifact.add_file('path/to/file.txt', name='artifact/path/file.txt')
669
-
670
- # Returns `artifact/path/file.txt`:
671
- name = artifact.get_added_local_path_name('path/to/file.txt')
672
- ```
673
- """
674
- entry = self._added_local_paths.get(local_path, None)
675
- if entry is None:
676
- return None
677
- return entry.path
678
-
679
- def finalize(self) -> None:
680
- """Mark this artifact as final, disallowing further modifications.
681
-
682
- This happens automatically when calling `log_artifact`.
683
-
684
- Returns:
685
- None
686
- """
687
- if self._final:
688
- return self._file_entries
689
-
690
- # mark final after all files are added
691
- self._final = True
692
- self._digest = self._manifest.digest()
693
-
694
- def json_encode(self) -> Dict[str, Any]:
695
- if not self._logged_artifact:
696
- raise ArtifactNotLoggedError(self, "json_encode")
697
- return util.artifact_to_json(self)
698
-
699
- def _ensure_can_add(self) -> None:
700
- if self._final:
701
- raise ArtifactFinalizedError(artifact=self)
702
-
703
- def _add_local_file(
704
- self, name: StrPath, path: StrPath, digest: Optional[B64MD5] = None
705
- ) -> ArtifactManifestEntry:
706
- with tempfile.NamedTemporaryFile(dir=get_staging_dir(), delete=False) as f:
707
- staging_path = f.name
708
- shutil.copyfile(path, staging_path)
709
- os.chmod(staging_path, 0o400)
710
-
711
- entry = ArtifactManifestEntry(
712
- path=name,
713
- digest=digest or md5_file_b64(staging_path),
714
- local_path=staging_path,
715
- )
716
-
717
- self._manifest.add_entry(entry)
718
- self._added_local_paths[os.fspath(path)] = entry
719
- return entry