wandb 0.13.10__py3-none-any.whl → 0.14.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +2 -3
- wandb/apis/__init__.py +1 -3
- wandb/apis/importers/__init__.py +4 -0
- wandb/apis/importers/base.py +312 -0
- wandb/apis/importers/mlflow.py +113 -0
- wandb/apis/internal.py +29 -2
- wandb/apis/normalize.py +6 -5
- wandb/apis/public.py +163 -180
- wandb/apis/reports/_templates.py +6 -12
- wandb/apis/reports/report.py +1 -1
- wandb/apis/reports/runset.py +1 -3
- wandb/apis/reports/util.py +12 -10
- wandb/beta/workflows.py +57 -34
- wandb/catboost/__init__.py +1 -2
- wandb/cli/cli.py +215 -133
- wandb/data_types.py +63 -56
- wandb/docker/__init__.py +78 -16
- wandb/docker/auth.py +21 -22
- wandb/env.py +0 -1
- wandb/errors/__init__.py +8 -116
- wandb/errors/term.py +1 -1
- wandb/fastai/__init__.py +1 -2
- wandb/filesync/dir_watcher.py +8 -5
- wandb/filesync/step_prepare.py +76 -75
- wandb/filesync/step_upload.py +1 -2
- wandb/integration/catboost/__init__.py +1 -3
- wandb/integration/catboost/catboost.py +8 -14
- wandb/integration/fastai/__init__.py +7 -13
- wandb/integration/gym/__init__.py +35 -4
- wandb/integration/keras/__init__.py +3 -3
- wandb/integration/keras/callbacks/metrics_logger.py +9 -8
- wandb/integration/keras/callbacks/model_checkpoint.py +9 -9
- wandb/integration/keras/callbacks/tables_builder.py +31 -19
- wandb/integration/kfp/kfp_patch.py +20 -17
- wandb/integration/kfp/wandb_logging.py +1 -2
- wandb/integration/lightgbm/__init__.py +21 -19
- wandb/integration/prodigy/prodigy.py +6 -7
- wandb/integration/sacred/__init__.py +9 -12
- wandb/integration/sagemaker/__init__.py +1 -3
- wandb/integration/sagemaker/auth.py +0 -1
- wandb/integration/sagemaker/config.py +1 -1
- wandb/integration/sagemaker/resources.py +1 -1
- wandb/integration/sb3/sb3.py +8 -4
- wandb/integration/tensorboard/__init__.py +1 -3
- wandb/integration/tensorboard/log.py +8 -8
- wandb/integration/tensorboard/monkeypatch.py +11 -9
- wandb/integration/tensorflow/__init__.py +1 -3
- wandb/integration/xgboost/__init__.py +4 -6
- wandb/integration/yolov8/__init__.py +7 -0
- wandb/integration/yolov8/yolov8.py +250 -0
- wandb/jupyter.py +31 -35
- wandb/lightgbm/__init__.py +1 -2
- wandb/old/settings.py +2 -2
- wandb/plot/bar.py +1 -2
- wandb/plot/confusion_matrix.py +1 -3
- wandb/plot/histogram.py +1 -2
- wandb/plot/line.py +1 -2
- wandb/plot/line_series.py +4 -4
- wandb/plot/pr_curve.py +17 -20
- wandb/plot/roc_curve.py +1 -3
- wandb/plot/scatter.py +1 -2
- wandb/proto/v3/wandb_server_pb2.py +85 -39
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_server_pb2.py +51 -39
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/sdk/__init__.py +1 -3
- wandb/sdk/backend/backend.py +1 -1
- wandb/sdk/data_types/_dtypes.py +38 -30
- wandb/sdk/data_types/base_types/json_metadata.py +1 -3
- wandb/sdk/data_types/base_types/media.py +17 -17
- wandb/sdk/data_types/base_types/wb_value.py +33 -26
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +91 -125
- wandb/sdk/data_types/helper_types/classes.py +1 -1
- wandb/sdk/data_types/helper_types/image_mask.py +12 -12
- wandb/sdk/data_types/histogram.py +5 -4
- wandb/sdk/data_types/html.py +1 -2
- wandb/sdk/data_types/image.py +11 -11
- wandb/sdk/data_types/molecule.py +3 -6
- wandb/sdk/data_types/object_3d.py +1 -2
- wandb/sdk/data_types/plotly.py +1 -2
- wandb/sdk/data_types/saved_model.py +10 -8
- wandb/sdk/data_types/video.py +1 -1
- wandb/sdk/integration_utils/data_logging.py +5 -5
- wandb/sdk/interface/artifacts.py +288 -266
- wandb/sdk/interface/interface.py +2 -3
- wandb/sdk/interface/interface_grpc.py +1 -1
- wandb/sdk/interface/interface_queue.py +1 -1
- wandb/sdk/interface/interface_relay.py +1 -1
- wandb/sdk/interface/interface_shared.py +1 -2
- wandb/sdk/interface/interface_sock.py +1 -1
- wandb/sdk/interface/message_future.py +1 -1
- wandb/sdk/interface/message_future_poll.py +1 -1
- wandb/sdk/interface/router.py +1 -1
- wandb/sdk/interface/router_queue.py +1 -1
- wandb/sdk/interface/router_relay.py +1 -1
- wandb/sdk/interface/router_sock.py +1 -1
- wandb/sdk/interface/summary_record.py +1 -1
- wandb/sdk/internal/artifacts.py +1 -1
- wandb/sdk/internal/datastore.py +2 -3
- wandb/sdk/internal/file_pusher.py +5 -3
- wandb/sdk/internal/file_stream.py +22 -19
- wandb/sdk/internal/handler.py +5 -4
- wandb/sdk/internal/internal.py +1 -1
- wandb/sdk/internal/internal_api.py +115 -55
- wandb/sdk/internal/job_builder.py +1 -3
- wandb/sdk/internal/profiler.py +1 -1
- wandb/sdk/internal/progress.py +4 -6
- wandb/sdk/internal/sample.py +1 -3
- wandb/sdk/internal/sender.py +28 -16
- wandb/sdk/internal/settings_static.py +5 -5
- wandb/sdk/internal/system/assets/__init__.py +1 -0
- wandb/sdk/internal/system/assets/cpu.py +3 -9
- wandb/sdk/internal/system/assets/disk.py +2 -4
- wandb/sdk/internal/system/assets/gpu.py +6 -18
- wandb/sdk/internal/system/assets/gpu_apple.py +2 -4
- wandb/sdk/internal/system/assets/interfaces.py +50 -22
- wandb/sdk/internal/system/assets/ipu.py +1 -3
- wandb/sdk/internal/system/assets/memory.py +7 -13
- wandb/sdk/internal/system/assets/network.py +4 -8
- wandb/sdk/internal/system/assets/open_metrics.py +283 -0
- wandb/sdk/internal/system/assets/tpu.py +1 -4
- wandb/sdk/internal/system/assets/trainium.py +26 -14
- wandb/sdk/internal/system/system_info.py +2 -3
- wandb/sdk/internal/system/system_monitor.py +52 -20
- wandb/sdk/internal/tb_watcher.py +12 -13
- wandb/sdk/launch/_project_spec.py +54 -65
- wandb/sdk/launch/agent/agent.py +374 -90
- wandb/sdk/launch/builder/abstract.py +61 -7
- wandb/sdk/launch/builder/build.py +81 -110
- wandb/sdk/launch/builder/docker_builder.py +181 -0
- wandb/sdk/launch/builder/kaniko_builder.py +419 -0
- wandb/sdk/launch/builder/noop.py +31 -12
- wandb/sdk/launch/builder/templates/_wandb_bootstrap.py +70 -20
- wandb/sdk/launch/environment/abstract.py +28 -0
- wandb/sdk/launch/environment/aws_environment.py +276 -0
- wandb/sdk/launch/environment/gcp_environment.py +271 -0
- wandb/sdk/launch/environment/local_environment.py +65 -0
- wandb/sdk/launch/github_reference.py +3 -8
- wandb/sdk/launch/launch.py +38 -29
- wandb/sdk/launch/launch_add.py +6 -8
- wandb/sdk/launch/loader.py +230 -0
- wandb/sdk/launch/registry/abstract.py +54 -0
- wandb/sdk/launch/registry/elastic_container_registry.py +163 -0
- wandb/sdk/launch/registry/google_artifact_registry.py +203 -0
- wandb/sdk/launch/registry/local_registry.py +62 -0
- wandb/sdk/launch/runner/abstract.py +1 -16
- wandb/sdk/launch/runner/{kubernetes.py → kubernetes_runner.py} +83 -95
- wandb/sdk/launch/runner/local_container.py +46 -22
- wandb/sdk/launch/runner/local_process.py +1 -4
- wandb/sdk/launch/runner/{aws.py → sagemaker_runner.py} +53 -212
- wandb/sdk/launch/runner/{gcp_vertex.py → vertex_runner.py} +38 -55
- wandb/sdk/launch/sweeps/__init__.py +3 -2
- wandb/sdk/launch/sweeps/scheduler.py +132 -39
- wandb/sdk/launch/sweeps/scheduler_sweep.py +80 -89
- wandb/sdk/launch/utils.py +101 -30
- wandb/sdk/launch/wandb_reference.py +2 -7
- wandb/sdk/lib/_settings_toposort_generate.py +166 -0
- wandb/sdk/lib/_settings_toposort_generated.py +201 -0
- wandb/sdk/lib/apikey.py +2 -4
- wandb/sdk/lib/config_util.py +4 -1
- wandb/sdk/lib/console.py +1 -3
- wandb/sdk/lib/deprecate.py +3 -3
- wandb/sdk/lib/file_stream_utils.py +7 -5
- wandb/sdk/lib/filenames.py +1 -1
- wandb/sdk/lib/filesystem.py +61 -5
- wandb/sdk/lib/git.py +1 -3
- wandb/sdk/lib/import_hooks.py +4 -7
- wandb/sdk/lib/ipython.py +8 -5
- wandb/sdk/lib/lazyloader.py +1 -3
- wandb/sdk/lib/mailbox.py +14 -4
- wandb/sdk/lib/proto_util.py +10 -5
- wandb/sdk/lib/redirect.py +15 -22
- wandb/sdk/lib/reporting.py +1 -3
- wandb/sdk/lib/retry.py +4 -5
- wandb/sdk/lib/runid.py +1 -3
- wandb/sdk/lib/server.py +15 -9
- wandb/sdk/lib/sock_client.py +1 -1
- wandb/sdk/lib/sparkline.py +1 -1
- wandb/sdk/lib/wburls.py +1 -1
- wandb/sdk/service/port_file.py +1 -2
- wandb/sdk/service/service.py +36 -13
- wandb/sdk/service/service_base.py +12 -1
- wandb/sdk/verify/verify.py +5 -7
- wandb/sdk/wandb_artifacts.py +142 -177
- wandb/sdk/wandb_config.py +5 -8
- wandb/sdk/wandb_helper.py +1 -1
- wandb/sdk/wandb_init.py +24 -13
- wandb/sdk/wandb_login.py +9 -9
- wandb/sdk/wandb_manager.py +39 -4
- wandb/sdk/wandb_metric.py +2 -6
- wandb/sdk/wandb_require.py +4 -15
- wandb/sdk/wandb_require_helpers.py +1 -9
- wandb/sdk/wandb_run.py +95 -141
- wandb/sdk/wandb_save.py +1 -3
- wandb/sdk/wandb_settings.py +149 -54
- wandb/sdk/wandb_setup.py +66 -46
- wandb/sdk/wandb_summary.py +13 -10
- wandb/sdk/wandb_sweep.py +6 -7
- wandb/sdk/wandb_watch.py +1 -1
- wandb/sklearn/calculate/confusion_matrix.py +1 -1
- wandb/sklearn/calculate/learning_curve.py +1 -1
- wandb/sklearn/calculate/summary_metrics.py +1 -3
- wandb/sklearn/plot/__init__.py +1 -1
- wandb/sklearn/plot/classifier.py +27 -18
- wandb/sklearn/plot/clusterer.py +4 -5
- wandb/sklearn/plot/regressor.py +4 -4
- wandb/sklearn/plot/shared.py +2 -2
- wandb/sync/__init__.py +1 -3
- wandb/sync/sync.py +4 -5
- wandb/testing/relay.py +11 -10
- wandb/trigger.py +1 -1
- wandb/util.py +106 -81
- wandb/viz.py +4 -4
- wandb/wandb_agent.py +50 -50
- wandb/wandb_controller.py +2 -3
- wandb/wandb_run.py +1 -2
- wandb/wandb_torch.py +1 -1
- wandb/xgboost/__init__.py +1 -2
- {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/METADATA +6 -2
- {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/RECORD +224 -209
- {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/WHEEL +1 -1
- wandb/sdk/launch/builder/docker.py +0 -80
- wandb/sdk/launch/builder/kaniko.py +0 -393
- wandb/sdk/launch/builder/loader.py +0 -32
- wandb/sdk/launch/runner/loader.py +0 -50
- {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/LICENSE +0 -0
- {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/entry_points.txt +0 -0
- {wandb-0.13.10.dist-info → wandb-0.14.0.dist-info}/top_level.txt +0 -0
wandb/sdk/wandb_artifacts.py
CHANGED
@@ -35,14 +35,16 @@ from wandb.apis.public import Artifact as PublicArtifact
|
|
35
35
|
from wandb.errors import CommError
|
36
36
|
from wandb.errors.term import termlog, termwarn
|
37
37
|
from wandb.sdk.internal import progress
|
38
|
-
from wandb.util import LogicalFilePathStr
|
38
|
+
from wandb.util import FilePathStr, LogicalFilePathStr, URIStr
|
39
39
|
|
40
40
|
from . import lib as wandb_lib
|
41
41
|
from .data_types._dtypes import Type, TypeRegistry
|
42
42
|
from .interface.artifacts import Artifact as ArtifactInterface
|
43
|
-
from .interface.artifacts import (
|
43
|
+
from .interface.artifacts import (
|
44
|
+
ArtifactFinalizedError,
|
44
45
|
ArtifactManifest,
|
45
46
|
ArtifactManifestEntry,
|
47
|
+
ArtifactNotLoggedError,
|
46
48
|
ArtifactsCache,
|
47
49
|
StorageHandler,
|
48
50
|
StorageLayout,
|
@@ -62,7 +64,6 @@ from .lib.hashutil import (
|
|
62
64
|
)
|
63
65
|
|
64
66
|
if TYPE_CHECKING:
|
65
|
-
|
66
67
|
# We could probably use https://pypi.org/project/boto3-stubs/ or something
|
67
68
|
# instead of `type:ignore`ing these boto imports, but it's nontrivial:
|
68
69
|
# for some reason, despite being actively maintained as of 2022-09-30,
|
@@ -75,7 +76,6 @@ if TYPE_CHECKING:
|
|
75
76
|
|
76
77
|
import wandb.apis.public
|
77
78
|
from wandb.filesync.step_prepare import StepPrepare
|
78
|
-
from wandb.sdk.internal import internal_api
|
79
79
|
|
80
80
|
# This makes the first sleep 1s, and then doubles it up to total times,
|
81
81
|
# which makes for ~18 hours.
|
@@ -109,8 +109,7 @@ def _normalize_metadata(metadata: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
109
109
|
|
110
110
|
|
111
111
|
class Artifact(ArtifactInterface):
|
112
|
-
"""
|
113
|
-
Flexible and lightweight building block for dataset and model versioning.
|
112
|
+
"""Flexible and lightweight building block for dataset and model versioning.
|
114
113
|
|
115
114
|
Constructs an empty artifact whose contents can be populated using its
|
116
115
|
`add` family of functions. Once the artifact has all the desired files,
|
@@ -141,9 +140,6 @@ class Artifact(ArtifactInterface):
|
|
141
140
|
wandb.log_artifact(artifact)
|
142
141
|
```
|
143
142
|
|
144
|
-
Raises:
|
145
|
-
Exception: if problem.
|
146
|
-
|
147
143
|
Returns:
|
148
144
|
An `Artifact` object.
|
149
145
|
"""
|
@@ -187,7 +183,7 @@ class Artifact(ArtifactInterface):
|
|
187
183
|
self._final = False
|
188
184
|
self._digest = ""
|
189
185
|
self._file_entries = None
|
190
|
-
self._manifest = ArtifactManifestV1(self
|
186
|
+
self._manifest = ArtifactManifestV1(self._storage_policy)
|
191
187
|
self._cache = get_artifacts_cache()
|
192
188
|
self._added_objs = {}
|
193
189
|
self._added_local_paths = {}
|
@@ -229,9 +225,7 @@ class Artifact(ArtifactInterface):
|
|
229
225
|
if self._logged_artifact:
|
230
226
|
return self._logged_artifact.version
|
231
227
|
|
232
|
-
raise
|
233
|
-
"Cannot call version on an artifact before it has been logged or in offline mode"
|
234
|
-
)
|
228
|
+
raise ArtifactNotLoggedError(self, "version")
|
235
229
|
|
236
230
|
@property
|
237
231
|
def entity(self) -> str:
|
@@ -301,9 +295,7 @@ class Artifact(ArtifactInterface):
|
|
301
295
|
if self._logged_artifact:
|
302
296
|
return self._logged_artifact.commit_hash
|
303
297
|
|
304
|
-
raise
|
305
|
-
"Cannot access commit_hash on an artifact before it has been logged or in offline mode"
|
306
|
-
)
|
298
|
+
raise ArtifactNotLoggedError(self, "commit_hash")
|
307
299
|
|
308
300
|
@property
|
309
301
|
def description(self) -> Optional[str]:
|
@@ -341,13 +333,12 @@ class Artifact(ArtifactInterface):
|
|
341
333
|
if self._logged_artifact:
|
342
334
|
return self._logged_artifact.aliases
|
343
335
|
|
344
|
-
raise
|
345
|
-
"Cannot call aliases on an artifact before it has been logged or in offline mode"
|
346
|
-
)
|
336
|
+
raise ArtifactNotLoggedError(self, "aliases")
|
347
337
|
|
348
338
|
@aliases.setter
|
349
339
|
def aliases(self, aliases: List[str]) -> None:
|
350
|
-
"""
|
340
|
+
"""Set artifact aliases.
|
341
|
+
|
351
342
|
Arguments:
|
352
343
|
aliases: (list) The list of aliases associated with this artifact.
|
353
344
|
"""
|
@@ -355,9 +346,7 @@ class Artifact(ArtifactInterface):
|
|
355
346
|
self._logged_artifact.aliases = aliases
|
356
347
|
return
|
357
348
|
|
358
|
-
raise
|
359
|
-
"Cannot set aliases on an artifact before it has been logged or in offline mode"
|
360
|
-
)
|
349
|
+
raise ArtifactNotLoggedError(self, "aliases")
|
361
350
|
|
362
351
|
@property
|
363
352
|
def use_as(self) -> Optional[str]:
|
@@ -379,17 +368,13 @@ class Artifact(ArtifactInterface):
|
|
379
368
|
if self._logged_artifact:
|
380
369
|
return self._logged_artifact.used_by()
|
381
370
|
|
382
|
-
raise
|
383
|
-
"Cannot call used_by on an artifact before it has been logged or in offline mode"
|
384
|
-
)
|
371
|
+
raise ArtifactNotLoggedError(self, "used_by")
|
385
372
|
|
386
373
|
def logged_by(self) -> "wandb.apis.public.Run":
|
387
374
|
if self._logged_artifact:
|
388
375
|
return self._logged_artifact.logged_by()
|
389
376
|
|
390
|
-
raise
|
391
|
-
"Cannot call logged_by on an artifact before it has been logged or in offline mode"
|
392
|
-
)
|
377
|
+
raise ArtifactNotLoggedError(self, "logged_by")
|
393
378
|
|
394
379
|
@contextlib.contextmanager
|
395
380
|
def new_file(
|
@@ -481,7 +466,6 @@ class Artifact(ArtifactInterface):
|
|
481
466
|
# This is a bit of a hack, we want to check if the uri is a of the type
|
482
467
|
# ArtifactManifestEntry which is a private class returned by Artifact.get_path in
|
483
468
|
# wandb/apis/public.py. If so, then recover the reference URL.
|
484
|
-
uri_str: str
|
485
469
|
if isinstance(uri, ArtifactManifestEntry) and uri.parent_artifact() != self:
|
486
470
|
ref_url_fn = uri.ref_url
|
487
471
|
uri_str = ref_url_fn()
|
@@ -494,7 +478,11 @@ class Artifact(ArtifactInterface):
|
|
494
478
|
)
|
495
479
|
|
496
480
|
manifest_entries = self._storage_policy.store_reference(
|
497
|
-
self,
|
481
|
+
self,
|
482
|
+
URIStr(uri_str),
|
483
|
+
name=name,
|
484
|
+
checksum=checksum,
|
485
|
+
max_objects=max_objects,
|
498
486
|
)
|
499
487
|
for entry in manifest_entries:
|
500
488
|
self._manifest.add_entry(entry)
|
@@ -588,63 +576,53 @@ class Artifact(ArtifactInterface):
|
|
588
576
|
if self._logged_artifact:
|
589
577
|
return self._logged_artifact.get_path(name)
|
590
578
|
|
591
|
-
raise
|
592
|
-
"Cannot load paths from an artifact before it has been logged or in offline mode"
|
593
|
-
)
|
579
|
+
raise ArtifactNotLoggedError(self, "get_path")
|
594
580
|
|
595
581
|
def get(self, name: str) -> data_types.WBValue:
|
596
582
|
if self._logged_artifact:
|
597
583
|
return self._logged_artifact.get(name)
|
598
584
|
|
599
|
-
raise
|
600
|
-
"Cannot call get on an artifact before it has been logged or in offline mode"
|
601
|
-
)
|
585
|
+
raise ArtifactNotLoggedError(self, "get")
|
602
586
|
|
603
587
|
def download(
|
604
588
|
self, root: Optional[str] = None, recursive: bool = False
|
605
|
-
) ->
|
589
|
+
) -> FilePathStr:
|
606
590
|
if self._logged_artifact:
|
607
591
|
return self._logged_artifact.download(root=root, recursive=recursive)
|
608
592
|
|
609
|
-
raise
|
610
|
-
"Cannot call download on an artifact before it has been logged or in offline mode"
|
611
|
-
)
|
593
|
+
raise ArtifactNotLoggedError(self, "download")
|
612
594
|
|
613
595
|
def checkout(self, root: Optional[str] = None) -> str:
|
614
596
|
if self._logged_artifact:
|
615
597
|
return self._logged_artifact.checkout(root=root)
|
616
598
|
|
617
|
-
raise
|
618
|
-
"Cannot call checkout on an artifact before it has been logged or in offline mode"
|
619
|
-
)
|
599
|
+
raise ArtifactNotLoggedError(self, "checkout")
|
620
600
|
|
621
601
|
def verify(self, root: Optional[str] = None) -> bool:
|
622
602
|
if self._logged_artifact:
|
623
603
|
return self._logged_artifact.verify(root=root)
|
624
604
|
|
625
|
-
raise
|
626
|
-
"Cannot call verify on an artifact before it has been logged or in offline mode"
|
627
|
-
)
|
605
|
+
raise ArtifactNotLoggedError(self, "verify")
|
628
606
|
|
629
607
|
def save(
|
630
608
|
self,
|
631
609
|
project: Optional[str] = None,
|
632
610
|
settings: Optional["wandb.wandb_sdk.wandb_settings.Settings"] = None,
|
633
611
|
) -> None:
|
634
|
-
"""
|
635
|
-
|
636
|
-
|
637
|
-
to track this artifact.
|
612
|
+
"""Persist any changes made to the artifact.
|
613
|
+
|
614
|
+
If currently in a run, that run will log this artifact. If not currently in a
|
615
|
+
run, a run of type "auto" will be created to track this artifact.
|
638
616
|
|
639
617
|
Arguments:
|
640
|
-
project: (str, optional) A project to use for the artifact in the case that
|
641
|
-
settings: (wandb.Settings, optional) A
|
642
|
-
automatic run. Most commonly
|
618
|
+
project: (str, optional) A project to use for the artifact in the case that
|
619
|
+
a run is not already in context settings: (wandb.Settings, optional) A
|
620
|
+
settings object to use when initializing an automatic run. Most commonly
|
621
|
+
used in testing harness.
|
643
622
|
|
644
623
|
Returns:
|
645
624
|
None
|
646
625
|
"""
|
647
|
-
|
648
626
|
if self._incremental:
|
649
627
|
with wandb_lib.telemetry.context() as tel:
|
650
628
|
tel.feature.artifact_incremental = True
|
@@ -671,25 +649,21 @@ class Artifact(ArtifactInterface):
|
|
671
649
|
if self._logged_artifact:
|
672
650
|
return self._logged_artifact.delete()
|
673
651
|
|
674
|
-
raise
|
675
|
-
"Cannot call delete on an artifact before it has been logged or in offline mode"
|
676
|
-
)
|
652
|
+
raise ArtifactNotLoggedError(self, "delete")
|
677
653
|
|
678
654
|
def wait(self, timeout: Optional[int] = None) -> ArtifactInterface:
|
679
|
-
"""
|
655
|
+
"""Wait for an artifact to finish logging.
|
656
|
+
|
680
657
|
Arguments:
|
681
|
-
timeout: (int, optional)
|
658
|
+
timeout: (int, optional) Wait up to this long.
|
682
659
|
"""
|
683
660
|
if self._logged_artifact:
|
684
661
|
return self._logged_artifact.wait(timeout) # type: ignore [call-arg]
|
685
662
|
|
686
|
-
raise
|
687
|
-
"Cannot call wait on an artifact before it has been logged or in offline mode"
|
688
|
-
)
|
663
|
+
raise ArtifactNotLoggedError(self, "wait")
|
689
664
|
|
690
665
|
def get_added_local_path_name(self, local_path: str) -> Optional[str]:
|
691
|
-
"""
|
692
|
-
Get the artifact relative name of a file added by a local filesystem path.
|
666
|
+
"""Get the artifact relative name of a file added by a local filesystem path.
|
693
667
|
|
694
668
|
Arguments:
|
695
669
|
local_path: (str) The local path to resolve into an artifact relative name.
|
@@ -713,10 +687,9 @@ class Artifact(ArtifactInterface):
|
|
713
687
|
return entry.path
|
714
688
|
|
715
689
|
def finalize(self) -> None:
|
716
|
-
"""
|
717
|
-
Marks this artifact as final, which disallows further additions to the artifact.
|
718
|
-
This happens automatically when calling `log_artifact`.
|
690
|
+
"""Mark this artifact as final, disallowing further modifications.
|
719
691
|
|
692
|
+
This happens automatically when calling `log_artifact`.
|
720
693
|
|
721
694
|
Returns:
|
722
695
|
None
|
@@ -730,14 +703,12 @@ class Artifact(ArtifactInterface):
|
|
730
703
|
|
731
704
|
def json_encode(self) -> Dict[str, Any]:
|
732
705
|
if not self._logged_artifact:
|
733
|
-
raise
|
734
|
-
"Cannot json encode artifact before it has been logged or in offline mode."
|
735
|
-
)
|
706
|
+
raise ArtifactNotLoggedError(self, "json_encode")
|
736
707
|
return util.artifact_to_json(self)
|
737
708
|
|
738
709
|
def _ensure_can_add(self) -> None:
|
739
710
|
if self._final:
|
740
|
-
raise
|
711
|
+
raise ArtifactFinalizedError(artifact=self)
|
741
712
|
|
742
713
|
def _add_local_file(
|
743
714
|
self, name: str, path: str, digest: Optional[B64MD5] = None
|
@@ -774,9 +745,7 @@ class ArtifactManifestV1(ArtifactManifest):
|
|
774
745
|
return 1
|
775
746
|
|
776
747
|
@classmethod
|
777
|
-
def from_manifest_json(
|
778
|
-
cls, artifact: ArtifactInterface, manifest_json: Dict
|
779
|
-
) -> "ArtifactManifestV1":
|
748
|
+
def from_manifest_json(cls, manifest_json: Dict) -> "ArtifactManifestV1":
|
780
749
|
if manifest_json["version"] != cls.version():
|
781
750
|
raise ValueError(
|
782
751
|
"Expected manifest version 1, got %s" % manifest_json["version"]
|
@@ -787,6 +756,11 @@ class ArtifactManifestV1(ArtifactManifest):
|
|
787
756
|
storage_policy_cls = StoragePolicy.lookup_by_name(storage_policy_name)
|
788
757
|
if storage_policy_cls is None:
|
789
758
|
raise ValueError('Failed to find storage policy "%s"' % storage_policy_name)
|
759
|
+
if not issubclass(storage_policy_cls, WandbStoragePolicy):
|
760
|
+
raise ValueError(
|
761
|
+
"No handler found for storage handler of type '%s'"
|
762
|
+
% storage_policy_name
|
763
|
+
)
|
790
764
|
|
791
765
|
entries: Mapping[str, ArtifactManifestEntry]
|
792
766
|
entries = {
|
@@ -802,20 +776,17 @@ class ArtifactManifestV1(ArtifactManifest):
|
|
802
776
|
for name, val in manifest_json["contents"].items()
|
803
777
|
}
|
804
778
|
|
805
|
-
return cls(
|
806
|
-
artifact, storage_policy_cls.from_config(storage_policy_config), entries
|
807
|
-
)
|
779
|
+
return cls(storage_policy_cls.from_config(storage_policy_config), entries)
|
808
780
|
|
809
781
|
def __init__(
|
810
782
|
self,
|
811
|
-
artifact: ArtifactInterface,
|
812
783
|
storage_policy: "WandbStoragePolicy",
|
813
784
|
entries: Optional[Mapping[str, ArtifactManifestEntry]] = None,
|
814
785
|
) -> None:
|
815
|
-
super().__init__(
|
786
|
+
super().__init__(storage_policy, entries=entries)
|
816
787
|
|
817
788
|
def to_manifest_json(self) -> Dict:
|
818
|
-
"""This is the JSON that's stored in wandb_manifest.json
|
789
|
+
"""This is the JSON that's stored in wandb_manifest.json.
|
819
790
|
|
820
791
|
If include_local is True we also include the local paths to files. This is
|
821
792
|
used to represent an artifact that's waiting to be saved on the current
|
@@ -846,7 +817,7 @@ class ArtifactManifestV1(ArtifactManifest):
|
|
846
817
|
def digest(self) -> HexMD5:
|
847
818
|
hasher = hashlib.md5()
|
848
819
|
hasher.update(b"wandb-artifact-manifest-v1\n")
|
849
|
-
for
|
820
|
+
for name, entry in sorted(self.entries.items(), key=lambda kv: kv[0]):
|
850
821
|
hasher.update(f"{name}:{entry.digest}\n".encode())
|
851
822
|
return HexMD5(hasher.hexdigest())
|
852
823
|
|
@@ -860,8 +831,13 @@ class WandbStoragePolicy(StoragePolicy):
|
|
860
831
|
def from_config(cls, config: Dict) -> "WandbStoragePolicy":
|
861
832
|
return cls(config=config)
|
862
833
|
|
863
|
-
def __init__(
|
864
|
-
self
|
834
|
+
def __init__(
|
835
|
+
self,
|
836
|
+
config: Optional[Dict] = None,
|
837
|
+
cache: Optional[ArtifactsCache] = None,
|
838
|
+
api: Optional[InternalApi] = None,
|
839
|
+
) -> None:
|
840
|
+
self._cache = cache or get_artifacts_cache()
|
865
841
|
self._config = config or {}
|
866
842
|
self._session = requests.Session()
|
867
843
|
adapter = requests.adapters.HTTPAdapter(
|
@@ -880,7 +856,7 @@ class WandbStoragePolicy(StoragePolicy):
|
|
880
856
|
local_artifact = WBLocalArtifactHandler()
|
881
857
|
file_handler = LocalFileHandler()
|
882
858
|
|
883
|
-
self._api = InternalApi()
|
859
|
+
self._api = api or InternalApi()
|
884
860
|
self._handler = MultiHandler(
|
885
861
|
handlers=[
|
886
862
|
s3,
|
@@ -924,7 +900,7 @@ class WandbStoragePolicy(StoragePolicy):
|
|
924
900
|
def store_reference(
|
925
901
|
self,
|
926
902
|
artifact: ArtifactInterface,
|
927
|
-
path:
|
903
|
+
path: Union[URIStr, FilePathStr],
|
928
904
|
name: Optional[str] = None,
|
929
905
|
checksum: bool = True,
|
930
906
|
max_objects: Optional[int] = None,
|
@@ -980,16 +956,14 @@ class WandbStoragePolicy(StoragePolicy):
|
|
980
956
|
True if the file was a duplicate (did not need to be uploaded),
|
981
957
|
False if it needed to be uploaded or was a reference (nothing to dedupe).
|
982
958
|
"""
|
983
|
-
|
984
|
-
|
985
|
-
return {
|
959
|
+
resp = preparer.prepare(
|
960
|
+
{
|
986
961
|
"artifactID": artifact_id,
|
987
962
|
"artifactManifestID": artifact_manifest_id,
|
988
963
|
"name": entry.path,
|
989
964
|
"md5": entry.digest,
|
990
965
|
}
|
991
|
-
|
992
|
-
resp = preparer.prepare(_prepare_fn)
|
966
|
+
)
|
993
967
|
|
994
968
|
entry.birth_artifact_id = resp.birth_artifact_id
|
995
969
|
if resp.upload_url is None:
|
@@ -1022,7 +996,7 @@ class WandbStoragePolicy(StoragePolicy):
|
|
1022
996
|
|
1023
997
|
|
1024
998
|
# Don't use this yet!
|
1025
|
-
class __S3BucketPolicy(StoragePolicy):
|
999
|
+
class __S3BucketPolicy(StoragePolicy): # noqa: N801
|
1026
1000
|
@classmethod
|
1027
1001
|
def name(cls) -> str:
|
1028
1002
|
return "wandb-s3-bucket-policy-v1"
|
@@ -1053,13 +1027,13 @@ class __S3BucketPolicy(StoragePolicy):
|
|
1053
1027
|
self,
|
1054
1028
|
manifest_entry: ArtifactManifestEntry,
|
1055
1029
|
local: bool = False,
|
1056
|
-
) -> Union[
|
1030
|
+
) -> Union[URIStr, FilePathStr]:
|
1057
1031
|
return self._handler.load_path(manifest_entry, local=local)
|
1058
1032
|
|
1059
1033
|
def store_path(
|
1060
1034
|
self,
|
1061
|
-
artifact:
|
1062
|
-
path: Union[
|
1035
|
+
artifact: ArtifactInterface,
|
1036
|
+
path: Union[URIStr, FilePathStr],
|
1063
1037
|
name: Optional[str] = None,
|
1064
1038
|
checksum: bool = True,
|
1065
1039
|
max_objects: Optional[int] = None,
|
@@ -1092,7 +1066,7 @@ class MultiHandler(StorageHandler):
|
|
1092
1066
|
self,
|
1093
1067
|
manifest_entry: ArtifactManifestEntry,
|
1094
1068
|
local: bool = False,
|
1095
|
-
) -> Union[
|
1069
|
+
) -> Union[URIStr, FilePathStr]:
|
1096
1070
|
url = urlparse(manifest_entry.ref)
|
1097
1071
|
if url.scheme not in self._handlers:
|
1098
1072
|
if self._default_handler is not None:
|
@@ -1105,7 +1079,7 @@ class MultiHandler(StorageHandler):
|
|
1105
1079
|
def store_path(
|
1106
1080
|
self,
|
1107
1081
|
artifact: ArtifactInterface,
|
1108
|
-
path:
|
1082
|
+
path: Union[URIStr, FilePathStr],
|
1109
1083
|
name: Optional[str] = None,
|
1110
1084
|
checksum: bool = True,
|
1111
1085
|
max_objects: Optional[int] = None,
|
@@ -1132,9 +1106,9 @@ class MultiHandler(StorageHandler):
|
|
1132
1106
|
|
1133
1107
|
class TrackingHandler(StorageHandler):
|
1134
1108
|
def __init__(self, scheme: Optional[str] = None) -> None:
|
1135
|
-
"""
|
1136
|
-
|
1137
|
-
when paths being tracked are on file systems mounted at a standardized
|
1109
|
+
"""Track paths with no modification or special processing.
|
1110
|
+
|
1111
|
+
Useful when paths being tracked are on file systems mounted at a standardized
|
1138
1112
|
location.
|
1139
1113
|
|
1140
1114
|
For example, if the data to track is located on an NFS share mounted on
|
@@ -1150,7 +1124,7 @@ class TrackingHandler(StorageHandler):
|
|
1150
1124
|
self,
|
1151
1125
|
manifest_entry: ArtifactManifestEntry,
|
1152
1126
|
local: bool = False,
|
1153
|
-
) -> Union[
|
1127
|
+
) -> Union[URIStr, FilePathStr]:
|
1154
1128
|
if local:
|
1155
1129
|
# Likely a user error. The tracking handler is
|
1156
1130
|
# oblivious to the underlying paths, so it has
|
@@ -1162,12 +1136,12 @@ class TrackingHandler(StorageHandler):
|
|
1162
1136
|
)
|
1163
1137
|
# TODO(spencerpearson): should this go through util.to_native_slash_path
|
1164
1138
|
# instead of just getting typecast?
|
1165
|
-
return
|
1139
|
+
return FilePathStr(manifest_entry.path)
|
1166
1140
|
|
1167
1141
|
def store_path(
|
1168
1142
|
self,
|
1169
|
-
artifact:
|
1170
|
-
path: Union[
|
1143
|
+
artifact: ArtifactInterface,
|
1144
|
+
path: Union[URIStr, FilePathStr],
|
1171
1145
|
name: Optional[str] = None,
|
1172
1146
|
checksum: bool = True,
|
1173
1147
|
max_objects: Optional[int] = None,
|
@@ -1190,12 +1164,12 @@ DEFAULT_MAX_OBJECTS = 10000
|
|
1190
1164
|
|
1191
1165
|
|
1192
1166
|
class LocalFileHandler(StorageHandler):
|
1193
|
-
"""Handles file:// references"""
|
1167
|
+
"""Handles file:// references."""
|
1194
1168
|
|
1195
1169
|
def __init__(self, scheme: Optional[str] = None) -> None:
|
1196
|
-
"""
|
1197
|
-
|
1198
|
-
|
1170
|
+
"""Track files or directories on a local filesystem.
|
1171
|
+
|
1172
|
+
Expand directories to create an entry for each file contained.
|
1199
1173
|
"""
|
1200
1174
|
self._scheme = scheme or "file"
|
1201
1175
|
self._cache = get_artifacts_cache()
|
@@ -1208,7 +1182,7 @@ class LocalFileHandler(StorageHandler):
|
|
1208
1182
|
self,
|
1209
1183
|
manifest_entry: ArtifactManifestEntry,
|
1210
1184
|
local: bool = False,
|
1211
|
-
) -> Union[
|
1185
|
+
) -> Union[URIStr, FilePathStr]:
|
1212
1186
|
if manifest_entry.ref is None:
|
1213
1187
|
raise ValueError(f"Cannot add path with no ref: {manifest_entry.path}")
|
1214
1188
|
local_path = util.local_file_uri_to_path(str(manifest_entry.ref))
|
@@ -1239,8 +1213,8 @@ class LocalFileHandler(StorageHandler):
|
|
1239
1213
|
|
1240
1214
|
def store_path(
|
1241
1215
|
self,
|
1242
|
-
artifact:
|
1243
|
-
path: Union[
|
1216
|
+
artifact: ArtifactInterface,
|
1217
|
+
path: Union[URIStr, FilePathStr],
|
1244
1218
|
name: Optional[str] = None,
|
1245
1219
|
checksum: bool = True,
|
1246
1220
|
max_objects: Optional[int] = None,
|
@@ -1270,7 +1244,7 @@ class LocalFileHandler(StorageHandler):
|
|
1270
1244
|
for root, _, files in os.walk(local_path):
|
1271
1245
|
for sub_path in files:
|
1272
1246
|
i += 1
|
1273
|
-
if i
|
1247
|
+
if i > max_objects:
|
1274
1248
|
raise ValueError(
|
1275
1249
|
"Exceeded %i objects tracked, pass max_objects to add_reference"
|
1276
1250
|
% max_objects
|
@@ -1285,7 +1259,7 @@ class LocalFileHandler(StorageHandler):
|
|
1285
1259
|
|
1286
1260
|
entry = ArtifactManifestEntry(
|
1287
1261
|
path=LogicalFilePathStr(logical_path),
|
1288
|
-
ref=
|
1262
|
+
ref=FilePathStr(os.path.join(path, logical_path)),
|
1289
1263
|
size=os.path.getsize(physical_path),
|
1290
1264
|
digest=md5(physical_path),
|
1291
1265
|
)
|
@@ -1328,6 +1302,7 @@ class S3Handler(StorageHandler):
|
|
1328
1302
|
boto: "boto3" = util.get_module(
|
1329
1303
|
"boto3",
|
1330
1304
|
required="s3:// references requires the boto3 library, run pip install wandb[aws]",
|
1305
|
+
lazy=False,
|
1331
1306
|
)
|
1332
1307
|
self._s3 = boto.session.Session().resource(
|
1333
1308
|
"s3",
|
@@ -1360,7 +1335,7 @@ class S3Handler(StorageHandler):
|
|
1360
1335
|
self,
|
1361
1336
|
manifest_entry: ArtifactManifestEntry,
|
1362
1337
|
local: bool = False,
|
1363
|
-
) -> Union[
|
1338
|
+
) -> Union[URIStr, FilePathStr]:
|
1364
1339
|
if not local:
|
1365
1340
|
assert manifest_entry.ref is not None
|
1366
1341
|
return manifest_entry.ref
|
@@ -1368,7 +1343,7 @@ class S3Handler(StorageHandler):
|
|
1368
1343
|
assert manifest_entry.ref is not None
|
1369
1344
|
|
1370
1345
|
path, hit, cache_open = self._cache.check_etag_obj_path(
|
1371
|
-
|
1346
|
+
URIStr(manifest_entry.ref),
|
1372
1347
|
ETag(manifest_entry.digest), # TODO(spencerpearson): unsafe cast
|
1373
1348
|
manifest_entry.size if manifest_entry.size is not None else 0,
|
1374
1349
|
)
|
@@ -1421,8 +1396,8 @@ class S3Handler(StorageHandler):
|
|
1421
1396
|
|
1422
1397
|
def store_path(
|
1423
1398
|
self,
|
1424
|
-
artifact:
|
1425
|
-
path: Union[
|
1399
|
+
artifact: ArtifactInterface,
|
1400
|
+
path: Union[URIStr, FilePathStr],
|
1426
1401
|
name: Optional[str] = None,
|
1427
1402
|
checksum: bool = True,
|
1428
1403
|
max_objects: Optional[int] = None,
|
@@ -1435,7 +1410,7 @@ class S3Handler(StorageHandler):
|
|
1435
1410
|
# parsing. Once we have that, we can store the rest of the
|
1436
1411
|
# metadata in the artifact entry itself.
|
1437
1412
|
bucket, key, version = self._parse_uri(path)
|
1438
|
-
path =
|
1413
|
+
path = URIStr(f"{self.scheme}://{bucket}/{key}")
|
1439
1414
|
if not self.versioning_enabled(bucket) and version:
|
1440
1415
|
raise ValueError(
|
1441
1416
|
f"Specifying a versionId is not valid for s3://{bucket} as it does not have versioning enabled."
|
@@ -1489,7 +1464,7 @@ class S3Handler(StorageHandler):
|
|
1489
1464
|
]
|
1490
1465
|
if start_time is not None:
|
1491
1466
|
termlog("Done. %.1fs" % (time.time() - start_time), prefix=False)
|
1492
|
-
if len(entries)
|
1467
|
+
if len(entries) > max_objects:
|
1493
1468
|
raise ValueError(
|
1494
1469
|
"Exceeded %i objects tracked, pass max_objects to add_reference"
|
1495
1470
|
% max_objects
|
@@ -1513,13 +1488,14 @@ class S3Handler(StorageHandler):
|
|
1513
1488
|
prefix: str = "",
|
1514
1489
|
multi: bool = False,
|
1515
1490
|
) -> ArtifactManifestEntry:
|
1516
|
-
"""
|
1491
|
+
"""Create an ArtifactManifestEntry from an S3 object.
|
1492
|
+
|
1517
1493
|
Arguments:
|
1518
1494
|
obj: The S3 object
|
1519
1495
|
path: The S3-style path (e.g.: "s3://bucket/file.txt")
|
1520
1496
|
name: The user assigned name, or None if not specified
|
1521
1497
|
prefix: The prefix to add (will be the same as `path` for directories)
|
1522
|
-
multi: Whether or not this is a multi-object add
|
1498
|
+
multi: Whether or not this is a multi-object add.
|
1523
1499
|
"""
|
1524
1500
|
bucket, key, _ = self._parse_uri(path)
|
1525
1501
|
|
@@ -1547,7 +1523,7 @@ class S3Handler(StorageHandler):
|
|
1547
1523
|
posix_ref = posix_path / relpath
|
1548
1524
|
return ArtifactManifestEntry(
|
1549
1525
|
path=LogicalFilePathStr(str(posix_name)),
|
1550
|
-
ref=
|
1526
|
+
ref=URIStr(f"{self.scheme}://{str(posix_ref)}"),
|
1551
1527
|
digest=ETag(self._etag_from_obj(obj)),
|
1552
1528
|
size=self._size_from_obj(obj),
|
1553
1529
|
extra=self._extra_from_obj(obj),
|
@@ -1570,11 +1546,11 @@ class S3Handler(StorageHandler):
|
|
1570
1546
|
return extra
|
1571
1547
|
|
1572
1548
|
@staticmethod
|
1573
|
-
def _content_addressed_path(md5: str) ->
|
1549
|
+
def _content_addressed_path(md5: str) -> FilePathStr:
|
1574
1550
|
# TODO: is this the structure we want? not at all human
|
1575
1551
|
# readable, but that's probably OK. don't want people
|
1576
1552
|
# poking around in the bucket
|
1577
|
-
return
|
1553
|
+
return FilePathStr(
|
1578
1554
|
"wandb/%s" % base64.b64encode(md5.encode("ascii")).decode("ascii")
|
1579
1555
|
)
|
1580
1556
|
|
@@ -1624,7 +1600,7 @@ class GCSHandler(StorageHandler):
|
|
1624
1600
|
self,
|
1625
1601
|
manifest_entry: ArtifactManifestEntry,
|
1626
1602
|
local: bool = False,
|
1627
|
-
) -> Union[
|
1603
|
+
) -> Union[URIStr, FilePathStr]:
|
1628
1604
|
if not local:
|
1629
1605
|
assert manifest_entry.ref is not None
|
1630
1606
|
return manifest_entry.ref
|
@@ -1669,8 +1645,8 @@ class GCSHandler(StorageHandler):
|
|
1669
1645
|
|
1670
1646
|
def store_path(
|
1671
1647
|
self,
|
1672
|
-
artifact:
|
1673
|
-
path: Union[
|
1648
|
+
artifact: ArtifactInterface,
|
1649
|
+
path: Union[URIStr, FilePathStr],
|
1674
1650
|
name: Optional[str] = None,
|
1675
1651
|
checksum: bool = True,
|
1676
1652
|
max_objects: Optional[int] = None,
|
@@ -1682,7 +1658,7 @@ class GCSHandler(StorageHandler):
|
|
1682
1658
|
# such as version identifiers, pare down the path to just the bucket
|
1683
1659
|
# and key.
|
1684
1660
|
bucket, key, version = self._parse_uri(path)
|
1685
|
-
path =
|
1661
|
+
path = URIStr(f"{self.scheme}://{bucket}/{key}")
|
1686
1662
|
max_objects = max_objects or DEFAULT_MAX_OBJECTS
|
1687
1663
|
if not self.versioning_enabled(bucket) and version:
|
1688
1664
|
raise ValueError(
|
@@ -1718,7 +1694,7 @@ class GCSHandler(StorageHandler):
|
|
1718
1694
|
]
|
1719
1695
|
if start_time is not None:
|
1720
1696
|
termlog("Done. %.1fs" % (time.time() - start_time), prefix=False)
|
1721
|
-
if len(entries)
|
1697
|
+
if len(entries) > max_objects:
|
1722
1698
|
raise ValueError(
|
1723
1699
|
"Exceeded %i objects tracked, pass max_objects to add_reference"
|
1724
1700
|
% max_objects
|
@@ -1733,13 +1709,14 @@ class GCSHandler(StorageHandler):
|
|
1733
1709
|
prefix: str = "",
|
1734
1710
|
multi: bool = False,
|
1735
1711
|
) -> ArtifactManifestEntry:
|
1736
|
-
"""
|
1712
|
+
"""Create an ArtifactManifestEntry from a GCS object.
|
1713
|
+
|
1737
1714
|
Arguments:
|
1738
1715
|
obj: The GCS object
|
1739
1716
|
path: The GCS-style path (e.g.: "gs://bucket/file.txt")
|
1740
1717
|
name: The user assigned name, or None if not specified
|
1741
1718
|
prefix: The prefix to add (will be the same as `path` for directories)
|
1742
|
-
multi: Whether or not this is a multi-object add
|
1719
|
+
multi: Whether or not this is a multi-object add.
|
1743
1720
|
"""
|
1744
1721
|
bucket, key, _ = self._parse_uri(path)
|
1745
1722
|
|
@@ -1767,7 +1744,7 @@ class GCSHandler(StorageHandler):
|
|
1767
1744
|
posix_ref = posix_path / relpath
|
1768
1745
|
return ArtifactManifestEntry(
|
1769
1746
|
path=LogicalFilePathStr(str(posix_name)),
|
1770
|
-
ref=
|
1747
|
+
ref=URIStr(f"{self.scheme}://{str(posix_ref)}"),
|
1771
1748
|
digest=obj.md5_hash,
|
1772
1749
|
size=obj.size,
|
1773
1750
|
extra=self._extra_from_obj(obj),
|
@@ -1781,11 +1758,11 @@ class GCSHandler(StorageHandler):
|
|
1781
1758
|
}
|
1782
1759
|
|
1783
1760
|
@staticmethod
|
1784
|
-
def _content_addressed_path(md5: str) ->
|
1761
|
+
def _content_addressed_path(md5: str) -> FilePathStr:
|
1785
1762
|
# TODO: is this the structure we want? not at all human
|
1786
1763
|
# readable, but that's probably OK. don't want people
|
1787
1764
|
# poking around in the bucket
|
1788
|
-
return
|
1765
|
+
return FilePathStr(
|
1789
1766
|
"wandb/%s" % base64.b64encode(md5.encode("ascii")).decode("ascii")
|
1790
1767
|
)
|
1791
1768
|
|
@@ -1804,7 +1781,7 @@ class HTTPHandler(StorageHandler):
|
|
1804
1781
|
self,
|
1805
1782
|
manifest_entry: ArtifactManifestEntry,
|
1806
1783
|
local: bool = False,
|
1807
|
-
) -> Union[
|
1784
|
+
) -> Union[URIStr, FilePathStr]:
|
1808
1785
|
if not local:
|
1809
1786
|
assert manifest_entry.ref is not None
|
1810
1787
|
return manifest_entry.ref
|
@@ -1812,7 +1789,7 @@ class HTTPHandler(StorageHandler):
|
|
1812
1789
|
assert manifest_entry.ref is not None
|
1813
1790
|
|
1814
1791
|
path, hit, cache_open = self._cache.check_etag_obj_path(
|
1815
|
-
|
1792
|
+
URIStr(manifest_entry.ref),
|
1816
1793
|
ETag(manifest_entry.digest), # TODO(spencerpearson): unsafe cast
|
1817
1794
|
manifest_entry.size if manifest_entry.size is not None else 0,
|
1818
1795
|
)
|
@@ -1822,7 +1799,7 @@ class HTTPHandler(StorageHandler):
|
|
1822
1799
|
response = self._session.get(manifest_entry.ref, stream=True)
|
1823
1800
|
response.raise_for_status()
|
1824
1801
|
|
1825
|
-
digest: Optional[Union[ETag,
|
1802
|
+
digest: Optional[Union[ETag, FilePathStr, URIStr]]
|
1826
1803
|
digest, size, extra = self._entry_from_headers(response.headers)
|
1827
1804
|
digest = digest or manifest_entry.ref
|
1828
1805
|
if manifest_entry.digest != digest:
|
@@ -1838,8 +1815,8 @@ class HTTPHandler(StorageHandler):
|
|
1838
1815
|
|
1839
1816
|
def store_path(
|
1840
1817
|
self,
|
1841
|
-
artifact:
|
1842
|
-
path: Union[
|
1818
|
+
artifact: ArtifactInterface,
|
1819
|
+
path: Union[URIStr, FilePathStr],
|
1843
1820
|
name: Optional[str] = None,
|
1844
1821
|
checksum: bool = True,
|
1845
1822
|
max_objects: Optional[int] = None,
|
@@ -1850,7 +1827,7 @@ class HTTPHandler(StorageHandler):
|
|
1850
1827
|
|
1851
1828
|
with self._session.get(path, stream=True) as response:
|
1852
1829
|
response.raise_for_status()
|
1853
|
-
digest: Optional[Union[ETag,
|
1830
|
+
digest: Optional[Union[ETag, FilePathStr, URIStr]]
|
1854
1831
|
digest, size, extra = self._entry_from_headers(response.headers)
|
1855
1832
|
digest = digest or path
|
1856
1833
|
return [
|
@@ -1877,7 +1854,7 @@ class HTTPHandler(StorageHandler):
|
|
1877
1854
|
|
1878
1855
|
|
1879
1856
|
class WBArtifactHandler(StorageHandler):
|
1880
|
-
"""Handles loading and storing Artifact reference-type files"""
|
1857
|
+
"""Handles loading and storing Artifact reference-type files."""
|
1881
1858
|
|
1882
1859
|
_client: Optional[PublicApi]
|
1883
1860
|
|
@@ -1888,11 +1865,7 @@ class WBArtifactHandler(StorageHandler):
|
|
1888
1865
|
|
1889
1866
|
@property
|
1890
1867
|
def scheme(self) -> str:
|
1891
|
-
"""
|
1892
|
-
|
1893
|
-
Returns:
|
1894
|
-
(str): The scheme to which this handler applies.
|
1895
|
-
"""
|
1868
|
+
"""Scheme this handler applies to."""
|
1896
1869
|
return self._scheme
|
1897
1870
|
|
1898
1871
|
@property
|
@@ -1905,11 +1878,10 @@ class WBArtifactHandler(StorageHandler):
|
|
1905
1878
|
self,
|
1906
1879
|
manifest_entry: ArtifactManifestEntry,
|
1907
1880
|
local: bool = False,
|
1908
|
-
) -> Union[
|
1909
|
-
"""
|
1910
|
-
|
1911
|
-
|
1912
|
-
and a new symlink is created and returned to the caller.
|
1881
|
+
) -> Union[URIStr, FilePathStr]:
|
1882
|
+
"""Load the file in the specified artifact given its corresponding entry.
|
1883
|
+
|
1884
|
+
Download the referenced artifact; create and return a new symlink to the caller.
|
1913
1885
|
|
1914
1886
|
Arguments:
|
1915
1887
|
manifest_entry (ArtifactManifestEntry): The index entry to load
|
@@ -1928,7 +1900,7 @@ class WBArtifactHandler(StorageHandler):
|
|
1928
1900
|
artifact_file_path = util.uri_from_path(manifest_entry.ref)
|
1929
1901
|
|
1930
1902
|
dep_artifact = PublicArtifact.from_id(hex_to_b64_id(artifact_id), self.client)
|
1931
|
-
link_target_path:
|
1903
|
+
link_target_path: FilePathStr
|
1932
1904
|
if local:
|
1933
1905
|
link_target_path = dep_artifact.get_path(artifact_file_path).download()
|
1934
1906
|
else:
|
@@ -1938,28 +1910,26 @@ class WBArtifactHandler(StorageHandler):
|
|
1938
1910
|
|
1939
1911
|
def store_path(
|
1940
1912
|
self,
|
1941
|
-
artifact:
|
1942
|
-
path: Union[
|
1913
|
+
artifact: ArtifactInterface,
|
1914
|
+
path: Union[URIStr, FilePathStr],
|
1943
1915
|
name: Optional[str] = None,
|
1944
1916
|
checksum: bool = True,
|
1945
1917
|
max_objects: Optional[int] = None,
|
1946
1918
|
) -> Sequence[ArtifactManifestEntry]:
|
1947
|
-
"""
|
1948
|
-
|
1949
|
-
|
1950
|
-
we don't have multiple hops. TODO-This resolution could be done in the server for
|
1951
|
-
performance improvements.
|
1919
|
+
"""Store the file or directory at the given path into the specified artifact.
|
1920
|
+
|
1921
|
+
Recursively resolves the reference until the result is a concrete asset.
|
1952
1922
|
|
1953
1923
|
Arguments:
|
1954
|
-
artifact: The artifact doing the storing
|
1955
|
-
|
1956
|
-
name (str): If specified, the logical name that should map to `path`
|
1924
|
+
artifact: The artifact doing the storing path (str): The path to store name
|
1925
|
+
(str): If specified, the logical name that should map to `path`
|
1957
1926
|
|
1958
1927
|
Returns:
|
1959
|
-
(list[ArtifactManifestEntry]): A list of manifest entries to store within
|
1928
|
+
(list[ArtifactManifestEntry]): A list of manifest entries to store within
|
1929
|
+
the artifact
|
1960
1930
|
"""
|
1961
|
-
|
1962
1931
|
# Recursively resolve the reference until a concrete asset is found
|
1932
|
+
# TODO: Consider resolving server-side for performance improvements.
|
1963
1933
|
while path is not None and urlparse(path).scheme == self._scheme:
|
1964
1934
|
artifact_id = util.host_from_path(path)
|
1965
1935
|
artifact_file_path = util.uri_from_path(path)
|
@@ -1975,7 +1945,7 @@ class WBArtifactHandler(StorageHandler):
|
|
1975
1945
|
path = entry.ref
|
1976
1946
|
|
1977
1947
|
# Create the path reference
|
1978
|
-
path =
|
1948
|
+
path = URIStr(
|
1979
1949
|
"{}://{}/{}".format(
|
1980
1950
|
self._scheme,
|
1981
1951
|
b64_to_hex_id(target_artifact.id),
|
@@ -1995,7 +1965,7 @@ class WBArtifactHandler(StorageHandler):
|
|
1995
1965
|
|
1996
1966
|
|
1997
1967
|
class WBLocalArtifactHandler(StorageHandler):
|
1998
|
-
"""Handles loading and storing Artifact reference-type files"""
|
1968
|
+
"""Handles loading and storing Artifact reference-type files."""
|
1999
1969
|
|
2000
1970
|
_client: Optional[PublicApi]
|
2001
1971
|
|
@@ -2005,32 +1975,27 @@ class WBLocalArtifactHandler(StorageHandler):
|
|
2005
1975
|
|
2006
1976
|
@property
|
2007
1977
|
def scheme(self) -> str:
|
2008
|
-
"""
|
2009
|
-
|
2010
|
-
Returns:
|
2011
|
-
(str): The scheme to which this handler applies.
|
2012
|
-
"""
|
1978
|
+
"""Scheme this handler applies to."""
|
2013
1979
|
return self._scheme
|
2014
1980
|
|
2015
1981
|
def load_path(
|
2016
1982
|
self,
|
2017
1983
|
manifest_entry: ArtifactManifestEntry,
|
2018
1984
|
local: bool = False,
|
2019
|
-
) -> Union[
|
1985
|
+
) -> Union[URIStr, FilePathStr]:
|
2020
1986
|
raise NotImplementedError(
|
2021
1987
|
"Should not be loading a path for an artifact entry with unresolved client id."
|
2022
1988
|
)
|
2023
1989
|
|
2024
1990
|
def store_path(
|
2025
1991
|
self,
|
2026
|
-
artifact:
|
2027
|
-
path: Union[
|
1992
|
+
artifact: ArtifactInterface,
|
1993
|
+
path: Union[URIStr, FilePathStr],
|
2028
1994
|
name: Optional[str] = None,
|
2029
1995
|
checksum: bool = True,
|
2030
1996
|
max_objects: Optional[int] = None,
|
2031
1997
|
) -> Sequence[ArtifactManifestEntry]:
|
2032
|
-
"""
|
2033
|
-
Stores the file or directory at the given path within the specified artifact.
|
1998
|
+
"""Store the file or directory at the given path within the specified artifact.
|
2034
1999
|
|
2035
2000
|
Arguments:
|
2036
2001
|
artifact: The artifact doing the storing
|
@@ -2043,7 +2008,7 @@ class WBLocalArtifactHandler(StorageHandler):
|
|
2043
2008
|
client_id = util.host_from_path(path)
|
2044
2009
|
target_path = util.uri_from_path(path)
|
2045
2010
|
target_artifact = self._cache.get_client_artifact(client_id)
|
2046
|
-
if target_artifact
|
2011
|
+
if not isinstance(target_artifact, Artifact):
|
2047
2012
|
raise RuntimeError("Local Artifact not found - invalid reference")
|
2048
2013
|
target_entry = target_artifact._manifest.entries[target_path]
|
2049
2014
|
if target_entry is None:
|