wandb 0.15.9__py3-none-any.whl → 0.15.11__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +5 -1
- wandb/apis/public.py +137 -17
- wandb/apis/reports/_panels.py +1 -1
- wandb/apis/reports/blocks.py +1 -0
- wandb/apis/reports/report.py +27 -5
- wandb/cli/cli.py +52 -41
- wandb/docker/__init__.py +17 -0
- wandb/docker/auth.py +1 -1
- wandb/env.py +24 -4
- wandb/filesync/step_checksum.py +3 -3
- wandb/integration/openai/openai.py +3 -0
- wandb/integration/ultralytics/__init__.py +9 -0
- wandb/integration/ultralytics/bbox_utils.py +196 -0
- wandb/integration/ultralytics/callback.py +458 -0
- wandb/integration/ultralytics/classification_utils.py +66 -0
- wandb/integration/ultralytics/mask_utils.py +141 -0
- wandb/integration/ultralytics/pose_utils.py +92 -0
- wandb/integration/xgboost/xgboost.py +3 -3
- wandb/integration/yolov8/__init__.py +0 -7
- wandb/integration/yolov8/yolov8.py +22 -3
- wandb/old/settings.py +7 -0
- wandb/plot/line_series.py +0 -1
- wandb/proto/v3/wandb_internal_pb2.py +353 -300
- wandb/proto/v3/wandb_server_pb2.py +37 -41
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_telemetry_pb2.py +16 -16
- wandb/proto/v4/wandb_internal_pb2.py +272 -260
- wandb/proto/v4/wandb_server_pb2.py +37 -40
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_telemetry_pb2.py +16 -16
- wandb/proto/wandb_internal_codegen.py +7 -31
- wandb/sdk/artifacts/artifact.py +321 -189
- wandb/sdk/artifacts/artifact_cache.py +14 -0
- wandb/sdk/artifacts/artifact_manifest.py +5 -4
- wandb/sdk/artifacts/artifact_manifest_entry.py +37 -9
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -9
- wandb/sdk/artifacts/artifact_saver.py +13 -50
- wandb/sdk/artifacts/artifact_ttl.py +6 -0
- wandb/sdk/artifacts/artifacts_cache.py +119 -93
- wandb/sdk/artifacts/staging.py +25 -0
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +12 -7
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +2 -3
- wandb/sdk/artifacts/storage_policies/__init__.py +4 -0
- wandb/sdk/artifacts/storage_policies/register.py +1 -0
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +4 -3
- wandb/sdk/artifacts/storage_policy.py +4 -2
- wandb/sdk/backend/backend.py +0 -16
- wandb/sdk/data_types/image.py +3 -1
- wandb/sdk/integration_utils/auto_logging.py +38 -13
- wandb/sdk/interface/interface.py +16 -135
- wandb/sdk/interface/interface_shared.py +9 -147
- wandb/sdk/interface/interface_sock.py +0 -26
- wandb/sdk/internal/file_pusher.py +20 -3
- wandb/sdk/internal/file_stream.py +3 -1
- wandb/sdk/internal/handler.py +53 -70
- wandb/sdk/internal/internal_api.py +220 -130
- wandb/sdk/internal/job_builder.py +41 -37
- wandb/sdk/internal/sender.py +7 -25
- wandb/sdk/internal/system/assets/disk.py +144 -11
- wandb/sdk/internal/system/system_info.py +6 -2
- wandb/sdk/launch/__init__.py +5 -0
- wandb/sdk/launch/{launch.py → _launch.py} +53 -54
- wandb/sdk/launch/{launch_add.py → _launch_add.py} +34 -31
- wandb/sdk/launch/_project_spec.py +13 -2
- wandb/sdk/launch/agent/agent.py +103 -59
- wandb/sdk/launch/agent/run_queue_item_file_saver.py +6 -4
- wandb/sdk/launch/builder/build.py +19 -1
- wandb/sdk/launch/builder/docker_builder.py +5 -1
- wandb/sdk/launch/builder/kaniko_builder.py +5 -1
- wandb/sdk/launch/create_job.py +20 -5
- wandb/sdk/launch/loader.py +14 -5
- wandb/sdk/launch/runner/abstract.py +0 -2
- wandb/sdk/launch/runner/kubernetes_monitor.py +329 -0
- wandb/sdk/launch/runner/kubernetes_runner.py +66 -209
- wandb/sdk/launch/runner/local_container.py +5 -2
- wandb/sdk/launch/runner/local_process.py +4 -1
- wandb/sdk/launch/sweeps/scheduler.py +43 -25
- wandb/sdk/launch/sweeps/utils.py +5 -3
- wandb/sdk/launch/utils.py +3 -1
- wandb/sdk/lib/_settings_toposort_generate.py +3 -9
- wandb/sdk/lib/_settings_toposort_generated.py +27 -3
- wandb/sdk/lib/_wburls_generated.py +1 -0
- wandb/sdk/lib/filenames.py +27 -6
- wandb/sdk/lib/filesystem.py +181 -7
- wandb/sdk/lib/fsm.py +5 -3
- wandb/sdk/lib/gql_request.py +3 -0
- wandb/sdk/lib/ipython.py +7 -0
- wandb/sdk/lib/wburls.py +1 -0
- wandb/sdk/service/port_file.py +2 -15
- wandb/sdk/service/server.py +7 -55
- wandb/sdk/service/service.py +56 -26
- wandb/sdk/service/service_base.py +1 -1
- wandb/sdk/service/streams.py +11 -5
- wandb/sdk/verify/verify.py +2 -2
- wandb/sdk/wandb_init.py +8 -2
- wandb/sdk/wandb_manager.py +4 -14
- wandb/sdk/wandb_run.py +143 -53
- wandb/sdk/wandb_settings.py +148 -35
- wandb/testing/relay.py +85 -38
- wandb/util.py +87 -4
- wandb/wandb_torch.py +24 -38
- {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/METADATA +48 -23
- {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/RECORD +107 -103
- {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/WHEEL +1 -1
- wandb/proto/v3/wandb_server_pb2_grpc.py +0 -1422
- wandb/proto/v4/wandb_server_pb2_grpc.py +0 -1422
- wandb/proto/wandb_server_pb2_grpc.py +0 -8
- wandb/sdk/artifacts/storage_policies/s3_bucket_policy.py +0 -61
- wandb/sdk/interface/interface_grpc.py +0 -460
- wandb/sdk/service/server_grpc.py +0 -444
- wandb/sdk/service/service_grpc.py +0 -73
- {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/LICENSE +0 -0
- {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/entry_points.txt +0 -0
- {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
WANDB_STORAGE_POLICY = "wandb-storage-policy-v1"
|
@@ -23,6 +23,7 @@ from wandb.sdk.artifacts.storage_handlers.wb_local_artifact_handler import (
|
|
23
23
|
WBLocalArtifactHandler,
|
24
24
|
)
|
25
25
|
from wandb.sdk.artifacts.storage_layout import StorageLayout
|
26
|
+
from wandb.sdk.artifacts.storage_policies.register import WANDB_STORAGE_POLICY
|
26
27
|
from wandb.sdk.artifacts.storage_policy import StoragePolicy
|
27
28
|
from wandb.sdk.internal.thread_local_settings import _thread_local_api_settings
|
28
29
|
from wandb.sdk.lib.hashutil import B64MD5, b64_to_hex_id, hex_to_b64_id
|
@@ -53,7 +54,7 @@ S3_MAX_MULTI_UPLOAD_SIZE = 5 * 1024**4
|
|
53
54
|
class WandbStoragePolicy(StoragePolicy):
|
54
55
|
@classmethod
|
55
56
|
def name(cls) -> str:
|
56
|
-
return
|
57
|
+
return WANDB_STORAGE_POLICY
|
57
58
|
|
58
59
|
@classmethod
|
59
60
|
def from_config(cls, config: Dict) -> "WandbStoragePolicy":
|
@@ -380,7 +381,7 @@ class WandbStoragePolicy(StoragePolicy):
|
|
380
381
|
)
|
381
382
|
if not hit:
|
382
383
|
try:
|
383
|
-
with cache_open() as f:
|
384
|
-
shutil.
|
384
|
+
with cache_open("wb") as f, open(entry.local_path, "rb") as src:
|
385
|
+
shutil.copyfileobj(src, f)
|
385
386
|
except OSError as e:
|
386
387
|
termwarn(f"Failed to cache {entry.local_path}, ignoring {e}")
|
@@ -12,11 +12,13 @@ if TYPE_CHECKING:
|
|
12
12
|
|
13
13
|
class StoragePolicy:
|
14
14
|
@classmethod
|
15
|
-
def lookup_by_name(cls, name: str) ->
|
15
|
+
def lookup_by_name(cls, name: str) -> Type["StoragePolicy"]:
|
16
|
+
import wandb.sdk.artifacts.storage_policies # noqa: F401
|
17
|
+
|
16
18
|
for sub in cls.__subclasses__():
|
17
19
|
if sub.name() == name:
|
18
20
|
return sub
|
19
|
-
|
21
|
+
raise NotImplementedError(f"Failed to find storage policy '{name}'")
|
20
22
|
|
21
23
|
@classmethod
|
22
24
|
def name(cls) -> str:
|
wandb/sdk/backend/backend.py
CHANGED
@@ -25,7 +25,6 @@ from wandb.sdk.wandb_settings import Settings
|
|
25
25
|
if TYPE_CHECKING:
|
26
26
|
from wandb.proto.wandb_internal_pb2 import Record, Result
|
27
27
|
|
28
|
-
from ..service.service_grpc import ServiceGrpcInterface
|
29
28
|
from ..service.service_sock import ServiceSockInterface
|
30
29
|
from ..wandb_run import Run
|
31
30
|
|
@@ -141,13 +140,6 @@ class Backend:
|
|
141
140
|
main_module.__file__ = self._save_mod_path
|
142
141
|
|
143
142
|
def _ensure_launched_manager(self) -> None:
|
144
|
-
# grpc_port: Optional[int] = None
|
145
|
-
# attach_id = self._settings._attach_id if self._settings else None
|
146
|
-
# if attach_id:
|
147
|
-
# # TODO(attach): implement
|
148
|
-
# # already have a server, assume it is already up
|
149
|
-
# grpc_port = int(attach_id)
|
150
|
-
|
151
143
|
assert self._manager
|
152
144
|
svc = self._manager._get_service()
|
153
145
|
assert svc
|
@@ -161,14 +153,6 @@ class Backend:
|
|
161
153
|
sock_client = svc_iface_sock._get_sock_client()
|
162
154
|
sock_interface = InterfaceSock(sock_client, mailbox=self._mailbox)
|
163
155
|
self.interface = sock_interface
|
164
|
-
elif svc_transport == "grpc":
|
165
|
-
from ..interface.interface_grpc import InterfaceGrpc
|
166
|
-
|
167
|
-
svc_iface_grpc = cast("ServiceGrpcInterface", svc_iface)
|
168
|
-
stub = svc_iface_grpc._get_stub()
|
169
|
-
grpc_interface = InterfaceGrpc(mailbox=self._mailbox)
|
170
|
-
grpc_interface._connect(stub=stub)
|
171
|
-
self.interface = grpc_interface
|
172
156
|
else:
|
173
157
|
raise AssertionError(f"Unsupported service transport: {svc_transport}")
|
174
158
|
|
wandb/sdk/data_types/image.py
CHANGED
@@ -69,7 +69,7 @@ class Image(BatchableMedia):
|
|
69
69
|
image data, or a PIL image. The class attempts to infer
|
70
70
|
the data format and converts it.
|
71
71
|
mode: (string) The PIL mode for an image. Most common are "L", "RGB",
|
72
|
-
"RGBA". Full explanation at https://pillow.readthedocs.io/en/
|
72
|
+
"RGBA". Full explanation at https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes.
|
73
73
|
caption: (string) Label for display of image.
|
74
74
|
|
75
75
|
Note : When logging a `torch.Tensor` as a `wandb.Image`, images are normalized. If you do not want to normalize your images, please convert your tensors to a PIL Image.
|
@@ -287,6 +287,8 @@ class Image(BatchableMedia):
|
|
287
287
|
)
|
288
288
|
if hasattr(data, "requires_grad") and data.requires_grad:
|
289
289
|
data = data.detach() # type: ignore
|
290
|
+
if hasattr(data, "dtype") and str(data.dtype) == "torch.uint8":
|
291
|
+
data = data.to(float)
|
290
292
|
data = vis_util.make_grid(data, normalize=True)
|
291
293
|
self._image = pil_image.fromarray(
|
292
294
|
data.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()
|
@@ -1,4 +1,6 @@
|
|
1
|
+
import asyncio
|
1
2
|
import functools
|
3
|
+
import inspect
|
2
4
|
import logging
|
3
5
|
import sys
|
4
6
|
from typing import Any, Dict, Optional, Sequence, TypeVar
|
@@ -86,21 +88,44 @@ class PatchAPI:
|
|
86
88
|
original = functools.reduce(getattr, symbol_parts, self.set_api)
|
87
89
|
|
88
90
|
def method_factory(original_method: Any):
|
89
|
-
|
90
|
-
|
91
|
+
async def async_method(*args, **kwargs):
|
92
|
+
future = asyncio.Future()
|
93
|
+
|
94
|
+
async def callback(coro):
|
95
|
+
try:
|
96
|
+
result = await coro
|
97
|
+
loggable_dict = self.resolver(
|
98
|
+
args, kwargs, result, timer.start_time, timer.elapsed
|
99
|
+
)
|
100
|
+
if loggable_dict is not None:
|
101
|
+
run.log(loggable_dict)
|
102
|
+
future.set_result(result)
|
103
|
+
except Exception as e:
|
104
|
+
logger.warning(e)
|
105
|
+
|
106
|
+
with Timer() as timer:
|
107
|
+
coro = original_method(*args, **kwargs)
|
108
|
+
asyncio.ensure_future(callback(coro))
|
109
|
+
|
110
|
+
return await future
|
111
|
+
|
112
|
+
def sync_method(*args, **kwargs):
|
91
113
|
with Timer() as timer:
|
92
114
|
result = original_method(*args, **kwargs)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
115
|
+
try:
|
116
|
+
loggable_dict = self.resolver(
|
117
|
+
args, kwargs, result, timer.start_time, timer.elapsed
|
118
|
+
)
|
119
|
+
if loggable_dict is not None:
|
120
|
+
run.log(loggable_dict)
|
121
|
+
except Exception as e:
|
122
|
+
logger.warning(e)
|
123
|
+
return result
|
124
|
+
|
125
|
+
if inspect.iscoroutinefunction(original_method):
|
126
|
+
return functools.wraps(original_method)(async_method)
|
127
|
+
else:
|
128
|
+
return functools.wraps(original_method)(sync_method)
|
104
129
|
|
105
130
|
# save original method
|
106
131
|
self.original_methods[symbol] = original
|
wandb/sdk/interface/interface.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
"""Interface base class - Used to send messages to the internal process.
|
2
2
|
|
3
3
|
InterfaceBase: The abstract class
|
4
|
-
InterfaceGrpc: Use gRPC to send and receive messages
|
5
4
|
InterfaceShared: Common routines for socket and queue based implementations
|
6
5
|
InterfaceQueue: Use multiprocessing queues to send and receive messages
|
7
6
|
InterfaceSock: Use socket to send and receive messages
|
@@ -16,10 +15,11 @@ import time
|
|
16
15
|
from abc import abstractmethod
|
17
16
|
from typing import TYPE_CHECKING, Any, Dict, Iterable, NewType, Optional, Tuple, Union
|
18
17
|
|
19
|
-
import wandb.sdk.lib.json_util as json
|
20
18
|
from wandb.proto import wandb_internal_pb2 as pb
|
21
19
|
from wandb.proto import wandb_telemetry_pb2 as tpb
|
20
|
+
from wandb.sdk.artifacts.artifact import Artifact
|
22
21
|
from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
|
22
|
+
from wandb.sdk.lib import json_util as json
|
23
23
|
from wandb.util import (
|
24
24
|
WandBJSONEncoderOld,
|
25
25
|
get_h5_typename,
|
@@ -38,8 +38,6 @@ from .message_future import MessageFuture
|
|
38
38
|
GlobStr = NewType("GlobStr", str)
|
39
39
|
|
40
40
|
if TYPE_CHECKING:
|
41
|
-
from wandb.sdk.artifacts.artifact import Artifact
|
42
|
-
|
43
41
|
from ..wandb_run import Run
|
44
42
|
|
45
43
|
if sys.version_info >= (3, 8):
|
@@ -97,21 +95,6 @@ class InterfaceBase:
|
|
97
95
|
def _publish_header(self, header: pb.HeaderRecord) -> None:
|
98
96
|
raise NotImplementedError
|
99
97
|
|
100
|
-
def communicate_check_version(
|
101
|
-
self, current_version: Optional[str] = None
|
102
|
-
) -> Optional[pb.CheckVersionResponse]:
|
103
|
-
check_version = pb.CheckVersionRequest()
|
104
|
-
if current_version:
|
105
|
-
check_version.current_version = current_version
|
106
|
-
ret = self._communicate_check_version(check_version)
|
107
|
-
return ret
|
108
|
-
|
109
|
-
@abstractmethod
|
110
|
-
def _communicate_check_version(
|
111
|
-
self, current_version: pb.CheckVersionRequest
|
112
|
-
) -> Optional[pb.CheckVersionResponse]:
|
113
|
-
raise NotImplementedError
|
114
|
-
|
115
98
|
def communicate_status(self) -> Optional[pb.StatusResponse]:
|
116
99
|
status = pb.StatusRequest()
|
117
100
|
resp = self._communicate_status(status)
|
@@ -123,28 +106,6 @@ class InterfaceBase:
|
|
123
106
|
) -> Optional[pb.StatusResponse]:
|
124
107
|
raise NotImplementedError
|
125
108
|
|
126
|
-
def communicate_stop_status(self) -> Optional[pb.StopStatusResponse]:
|
127
|
-
status = pb.StopStatusRequest()
|
128
|
-
resp = self._communicate_stop_status(status)
|
129
|
-
return resp
|
130
|
-
|
131
|
-
@abstractmethod
|
132
|
-
def _communicate_stop_status(
|
133
|
-
self, status: pb.StopStatusRequest
|
134
|
-
) -> Optional[pb.StopStatusResponse]:
|
135
|
-
raise NotImplementedError
|
136
|
-
|
137
|
-
def communicate_network_status(self) -> Optional[pb.NetworkStatusResponse]:
|
138
|
-
status = pb.NetworkStatusRequest()
|
139
|
-
resp = self._communicate_network_status(status)
|
140
|
-
return resp
|
141
|
-
|
142
|
-
@abstractmethod
|
143
|
-
def _communicate_network_status(
|
144
|
-
self, status: pb.NetworkStatusRequest
|
145
|
-
) -> Optional[pb.NetworkStatusResponse]:
|
146
|
-
raise NotImplementedError
|
147
|
-
|
148
109
|
def _make_config(
|
149
110
|
self,
|
150
111
|
data: Optional[dict] = None,
|
@@ -214,41 +175,6 @@ class InterfaceBase:
|
|
214
175
|
def _publish_metric(self, metric: pb.MetricRecord) -> None:
|
215
176
|
raise NotImplementedError
|
216
177
|
|
217
|
-
def communicate_attach(self, attach_id: str) -> Optional[pb.AttachResponse]:
|
218
|
-
attach = pb.AttachRequest(attach_id=attach_id)
|
219
|
-
resp = self._communicate_attach(attach)
|
220
|
-
return resp
|
221
|
-
|
222
|
-
@abstractmethod
|
223
|
-
def _communicate_attach(
|
224
|
-
self, attach: pb.AttachRequest
|
225
|
-
) -> Optional[pb.AttachResponse]:
|
226
|
-
raise NotImplementedError
|
227
|
-
|
228
|
-
def communicate_run(
|
229
|
-
self, run_obj: "Run", timeout: Optional[int] = None
|
230
|
-
) -> Optional[pb.RunUpdateResult]:
|
231
|
-
run = self._make_run(run_obj)
|
232
|
-
return self._communicate_run(run, timeout=timeout)
|
233
|
-
|
234
|
-
@abstractmethod
|
235
|
-
def _communicate_run(
|
236
|
-
self, run: pb.RunRecord, timeout: Optional[int] = None
|
237
|
-
) -> Optional[pb.RunUpdateResult]:
|
238
|
-
raise NotImplementedError
|
239
|
-
|
240
|
-
def communicate_run_start(self, run_pb: pb.RunRecord) -> bool:
|
241
|
-
run_start = pb.RunStartRequest()
|
242
|
-
run_start.run.CopyFrom(run_pb)
|
243
|
-
result = self._communicate_run_start(run_start)
|
244
|
-
return result is not None
|
245
|
-
|
246
|
-
@abstractmethod
|
247
|
-
def _communicate_run_start(
|
248
|
-
self, run_start: pb.RunStartRequest
|
249
|
-
) -> Optional[pb.RunStartResponse]:
|
250
|
-
raise NotImplementedError
|
251
|
-
|
252
178
|
def _make_summary_from_dict(self, summary_dict: dict) -> pb.SummaryRecord:
|
253
179
|
summary = pb.SummaryRecord()
|
254
180
|
for k, v in summary_dict.items():
|
@@ -336,27 +262,6 @@ class InterfaceBase:
|
|
336
262
|
def _publish_summary(self, summary: pb.SummaryRecord) -> None:
|
337
263
|
raise NotImplementedError
|
338
264
|
|
339
|
-
def communicate_get_summary(self) -> Optional[pb.GetSummaryResponse]:
|
340
|
-
get_summary = pb.GetSummaryRequest()
|
341
|
-
return self._communicate_get_summary(get_summary)
|
342
|
-
|
343
|
-
@abstractmethod
|
344
|
-
def _communicate_get_summary(
|
345
|
-
self, get_summary: pb.GetSummaryRequest
|
346
|
-
) -> Optional[pb.GetSummaryResponse]:
|
347
|
-
raise NotImplementedError
|
348
|
-
|
349
|
-
def communicate_sampled_history(self) -> Optional[pb.SampledHistoryResponse]:
|
350
|
-
sampled_history = pb.SampledHistoryRequest()
|
351
|
-
resp = self._communicate_sampled_history(sampled_history)
|
352
|
-
return resp
|
353
|
-
|
354
|
-
@abstractmethod
|
355
|
-
def _communicate_sampled_history(
|
356
|
-
self, sampled_history: pb.SampledHistoryRequest
|
357
|
-
) -> Optional[pb.SampledHistoryResponse]:
|
358
|
-
raise NotImplementedError
|
359
|
-
|
360
265
|
def _make_files(self, files_dict: "FilesDict") -> pb.FilesRecord:
|
361
266
|
files = pb.FilesRecord()
|
362
267
|
for path, policy in files_dict["files"]:
|
@@ -388,6 +293,10 @@ class InterfaceBase:
|
|
388
293
|
proto_artifact.metadata = json.dumps(json_friendly_val(artifact.metadata))
|
389
294
|
if artifact._base_id:
|
390
295
|
proto_artifact.base_id = artifact._base_id
|
296
|
+
|
297
|
+
ttl_duration_input = artifact._ttl_duration_seconds_to_gql()
|
298
|
+
if ttl_duration_input:
|
299
|
+
proto_artifact.ttl_duration_seconds = ttl_duration_input
|
391
300
|
proto_artifact.incremental_beta1 = artifact.incremental
|
392
301
|
self._make_artifact_manifest(artifact.manifest, obj=proto_artifact.manifest)
|
393
302
|
return proto_artifact
|
@@ -565,22 +474,6 @@ class InterfaceBase:
|
|
565
474
|
) -> MessageFuture:
|
566
475
|
raise NotImplementedError
|
567
476
|
|
568
|
-
@abstractmethod
|
569
|
-
def _communicate_artifact_send(
|
570
|
-
self, artifact_send: pb.ArtifactSendRequest
|
571
|
-
) -> Optional[pb.ArtifactSendResponse]:
|
572
|
-
raise NotImplementedError
|
573
|
-
|
574
|
-
@abstractmethod
|
575
|
-
def _communicate_artifact_poll(
|
576
|
-
self, art_poll: pb.ArtifactPollRequest
|
577
|
-
) -> Optional[pb.ArtifactPollResponse]:
|
578
|
-
raise NotImplementedError
|
579
|
-
|
580
|
-
@abstractmethod
|
581
|
-
def _publish_artifact_done(self, artifact_done: pb.ArtifactDoneRequest) -> None:
|
582
|
-
raise NotImplementedError
|
583
|
-
|
584
477
|
def publish_artifact(
|
585
478
|
self,
|
586
479
|
run: "Run",
|
@@ -772,17 +665,6 @@ class InterfaceBase:
|
|
772
665
|
def _publish_exit(self, exit_data: pb.RunExitRecord) -> None:
|
773
666
|
raise NotImplementedError
|
774
667
|
|
775
|
-
def communicate_poll_exit(self) -> Optional[pb.PollExitResponse]:
|
776
|
-
poll_exit = pb.PollExitRequest()
|
777
|
-
resp = self._communicate_poll_exit(poll_exit)
|
778
|
-
return resp
|
779
|
-
|
780
|
-
@abstractmethod
|
781
|
-
def _communicate_poll_exit(
|
782
|
-
self, poll_exit: pb.PollExitRequest
|
783
|
-
) -> Optional[pb.PollExitResponse]:
|
784
|
-
raise NotImplementedError
|
785
|
-
|
786
668
|
def publish_keepalive(self) -> None:
|
787
669
|
keepalive = pb.KeepaliveRequest()
|
788
670
|
self._publish_keepalive(keepalive)
|
@@ -791,17 +673,6 @@ class InterfaceBase:
|
|
791
673
|
def _publish_keepalive(self, keepalive: pb.KeepaliveRequest) -> None:
|
792
674
|
raise NotImplementedError
|
793
675
|
|
794
|
-
def communicate_server_info(self) -> Optional[pb.ServerInfoResponse]:
|
795
|
-
server_info = pb.ServerInfoRequest()
|
796
|
-
resp = self._communicate_server_info(server_info)
|
797
|
-
return resp
|
798
|
-
|
799
|
-
@abstractmethod
|
800
|
-
def _communicate_server_info(
|
801
|
-
self, server_info: pb.ServerInfoRequest
|
802
|
-
) -> Optional[pb.ServerInfoResponse]:
|
803
|
-
raise NotImplementedError
|
804
|
-
|
805
676
|
def join(self) -> None:
|
806
677
|
# Drop indicates that the internal process has already been shutdown
|
807
678
|
if self._drop:
|
@@ -867,6 +738,16 @@ class InterfaceBase:
|
|
867
738
|
def _deliver_network_status(self, status: pb.NetworkStatusRequest) -> MailboxHandle:
|
868
739
|
raise NotImplementedError
|
869
740
|
|
741
|
+
def deliver_internal_messages(self) -> MailboxHandle:
|
742
|
+
internal_message = pb.InternalMessagesRequest()
|
743
|
+
return self._deliver_internal_messages(internal_message)
|
744
|
+
|
745
|
+
@abstractmethod
|
746
|
+
def _deliver_internal_messages(
|
747
|
+
self, internal_message: pb.InternalMessagesRequest
|
748
|
+
) -> MailboxHandle:
|
749
|
+
raise NotImplementedError
|
750
|
+
|
870
751
|
def deliver_get_summary(self) -> MailboxHandle:
|
871
752
|
get_summary = pb.GetSummaryRequest()
|
872
753
|
return self._deliver_get_summary(get_summary)
|
@@ -124,6 +124,7 @@ class InterfaceShared(InterfaceBase):
|
|
124
124
|
resume: Optional[pb.ResumeRequest] = None,
|
125
125
|
status: Optional[pb.StatusRequest] = None,
|
126
126
|
stop_status: Optional[pb.StopStatusRequest] = None,
|
127
|
+
internal_messages: Optional[pb.InternalMessagesRequest] = None,
|
127
128
|
network_status: Optional[pb.NetworkStatusRequest] = None,
|
128
129
|
poll_exit: Optional[pb.PollExitRequest] = None,
|
129
130
|
partial_history: Optional[pb.PartialHistoryRequest] = None,
|
@@ -133,9 +134,6 @@ class InterfaceShared(InterfaceBase):
|
|
133
134
|
log_artifact: Optional[pb.LogArtifactRequest] = None,
|
134
135
|
defer: Optional[pb.DeferRequest] = None,
|
135
136
|
attach: Optional[pb.AttachRequest] = None,
|
136
|
-
artifact_send: Optional[pb.ArtifactSendRequest] = None,
|
137
|
-
artifact_poll: Optional[pb.ArtifactPollRequest] = None,
|
138
|
-
artifact_done: Optional[pb.ArtifactDoneRequest] = None,
|
139
137
|
server_info: Optional[pb.ServerInfoRequest] = None,
|
140
138
|
keepalive: Optional[pb.KeepaliveRequest] = None,
|
141
139
|
run_status: Optional[pb.RunStatusRequest] = None,
|
@@ -160,6 +158,8 @@ class InterfaceShared(InterfaceBase):
|
|
160
158
|
request.status.CopyFrom(status)
|
161
159
|
elif stop_status:
|
162
160
|
request.stop_status.CopyFrom(stop_status)
|
161
|
+
elif internal_messages:
|
162
|
+
request.internal_messages.CopyFrom(internal_messages)
|
163
163
|
elif network_status:
|
164
164
|
request.network_status.CopyFrom(network_status)
|
165
165
|
elif poll_exit:
|
@@ -178,12 +178,6 @@ class InterfaceShared(InterfaceBase):
|
|
178
178
|
request.defer.CopyFrom(defer)
|
179
179
|
elif attach:
|
180
180
|
request.attach.CopyFrom(attach)
|
181
|
-
elif artifact_send:
|
182
|
-
request.artifact_send.CopyFrom(artifact_send)
|
183
|
-
elif artifact_poll:
|
184
|
-
request.artifact_poll.CopyFrom(artifact_poll)
|
185
|
-
elif artifact_done:
|
186
|
-
request.artifact_done.CopyFrom(artifact_done)
|
187
181
|
elif server_info:
|
188
182
|
request.server_info.CopyFrom(server_info)
|
189
183
|
elif keepalive:
|
@@ -368,36 +362,6 @@ class InterfaceShared(InterfaceBase):
|
|
368
362
|
rec = self._make_record(metric=metric)
|
369
363
|
self._publish(rec)
|
370
364
|
|
371
|
-
def _communicate_attach(
|
372
|
-
self, attach: pb.AttachRequest
|
373
|
-
) -> Optional[pb.AttachResponse]:
|
374
|
-
req = self._make_request(attach=attach)
|
375
|
-
resp = self._communicate(req)
|
376
|
-
if resp is None:
|
377
|
-
return None
|
378
|
-
return resp.response.attach_response
|
379
|
-
|
380
|
-
def _communicate_run(
|
381
|
-
self, run: pb.RunRecord, timeout: Optional[int] = None
|
382
|
-
) -> Optional[pb.RunUpdateResult]:
|
383
|
-
"""Send synchronous run object waiting for a response.
|
384
|
-
|
385
|
-
Arguments:
|
386
|
-
run: RunRecord object
|
387
|
-
timeout: number of seconds to wait
|
388
|
-
|
389
|
-
Returns:
|
390
|
-
RunRecord object
|
391
|
-
"""
|
392
|
-
req = self._make_record(run=run)
|
393
|
-
resp = self._communicate(req, timeout=timeout)
|
394
|
-
if resp is None:
|
395
|
-
logger.info("couldn't get run from backend")
|
396
|
-
# Note: timeouts handled by callers: wandb_init.py
|
397
|
-
return None
|
398
|
-
assert resp.HasField("run_result")
|
399
|
-
return resp.run_result
|
400
|
-
|
401
365
|
def publish_stats(self, stats_dict: dict) -> None:
|
402
366
|
stats = self._make_stats(stats_dict)
|
403
367
|
rec = self._make_record(stats=stats)
|
@@ -419,30 +383,6 @@ class InterfaceShared(InterfaceBase):
|
|
419
383
|
rec = self._make_request(log_artifact=log_artifact)
|
420
384
|
return self._communicate_async(rec)
|
421
385
|
|
422
|
-
def _communicate_artifact_send(
|
423
|
-
self, artifact_send: pb.ArtifactSendRequest
|
424
|
-
) -> Optional[pb.ArtifactSendResponse]:
|
425
|
-
rec = self._make_request(artifact_send=artifact_send)
|
426
|
-
result = self._communicate(rec)
|
427
|
-
if result is None:
|
428
|
-
return None
|
429
|
-
artifact_send_resp = result.response.artifact_send_response
|
430
|
-
return artifact_send_resp
|
431
|
-
|
432
|
-
def _communicate_artifact_poll(
|
433
|
-
self, artifact_poll: pb.ArtifactPollRequest
|
434
|
-
) -> Optional[pb.ArtifactPollResponse]:
|
435
|
-
rec = self._make_request(artifact_poll=artifact_poll)
|
436
|
-
result = self._communicate(rec)
|
437
|
-
if result is None:
|
438
|
-
return None
|
439
|
-
artifact_poll_resp = result.response.artifact_poll_response
|
440
|
-
return artifact_poll_resp
|
441
|
-
|
442
|
-
def _publish_artifact_done(self, artifact_done: pb.ArtifactDoneRequest) -> None:
|
443
|
-
rec = self._make_request(artifact_done=artifact_done)
|
444
|
-
self._publish(rec)
|
445
|
-
|
446
386
|
def _publish_artifact(self, proto_artifact: pb.ArtifactRecord) -> None:
|
447
387
|
rec = self._make_record(artifact=proto_artifact)
|
448
388
|
self._publish(rec)
|
@@ -461,98 +401,14 @@ class InterfaceShared(InterfaceBase):
|
|
461
401
|
assert resp.response.status_response
|
462
402
|
return resp.response.status_response
|
463
403
|
|
464
|
-
def _communicate_stop_status(
|
465
|
-
self, status: pb.StopStatusRequest
|
466
|
-
) -> Optional[pb.StopStatusResponse]:
|
467
|
-
req = self._make_request(stop_status=status)
|
468
|
-
resp = self._communicate(req, local=True)
|
469
|
-
if resp is None:
|
470
|
-
return None
|
471
|
-
assert resp.response.stop_status_response
|
472
|
-
return resp.response.stop_status_response
|
473
|
-
|
474
|
-
def _communicate_network_status(
|
475
|
-
self, status: pb.NetworkStatusRequest
|
476
|
-
) -> Optional[pb.NetworkStatusResponse]:
|
477
|
-
req = self._make_request(network_status=status)
|
478
|
-
resp = self._communicate(req, local=True)
|
479
|
-
if resp is None:
|
480
|
-
return None
|
481
|
-
assert resp.response.network_status_response
|
482
|
-
return resp.response.network_status_response
|
483
|
-
|
484
404
|
def _publish_exit(self, exit_data: pb.RunExitRecord) -> None:
|
485
405
|
rec = self._make_record(exit=exit_data)
|
486
406
|
self._publish(rec)
|
487
407
|
|
488
|
-
def _communicate_poll_exit(
|
489
|
-
self, poll_exit: pb.PollExitRequest
|
490
|
-
) -> Optional[pb.PollExitResponse]:
|
491
|
-
rec = self._make_request(poll_exit=poll_exit)
|
492
|
-
result = self._communicate(rec)
|
493
|
-
if result is None:
|
494
|
-
return None
|
495
|
-
poll_exit_response = result.response.poll_exit_response
|
496
|
-
assert poll_exit_response
|
497
|
-
return poll_exit_response
|
498
|
-
|
499
408
|
def _publish_keepalive(self, keepalive: pb.KeepaliveRequest) -> None:
|
500
409
|
record = self._make_request(keepalive=keepalive)
|
501
410
|
self._publish(record)
|
502
411
|
|
503
|
-
def _communicate_server_info(
|
504
|
-
self, server_info: pb.ServerInfoRequest
|
505
|
-
) -> Optional[pb.ServerInfoResponse]:
|
506
|
-
rec = self._make_request(server_info=server_info)
|
507
|
-
result = self._communicate(rec)
|
508
|
-
if result is None:
|
509
|
-
return None
|
510
|
-
server_info_response = result.response.server_info_response
|
511
|
-
assert server_info_response
|
512
|
-
return server_info_response
|
513
|
-
|
514
|
-
def _communicate_check_version(
|
515
|
-
self, check_version: pb.CheckVersionRequest
|
516
|
-
) -> Optional[pb.CheckVersionResponse]:
|
517
|
-
rec = self._make_request(check_version=check_version)
|
518
|
-
result = self._communicate(rec)
|
519
|
-
if result is None:
|
520
|
-
# Note: timeouts handled by callers: wandb_init.py
|
521
|
-
return None
|
522
|
-
return result.response.check_version_response
|
523
|
-
|
524
|
-
def _communicate_run_start(
|
525
|
-
self, run_start: pb.RunStartRequest
|
526
|
-
) -> Optional[pb.RunStartResponse]:
|
527
|
-
rec = self._make_request(run_start=run_start)
|
528
|
-
result = self._communicate(rec)
|
529
|
-
if result is None:
|
530
|
-
return None
|
531
|
-
run_start_response = result.response.run_start_response
|
532
|
-
return run_start_response
|
533
|
-
|
534
|
-
def _communicate_get_summary(
|
535
|
-
self, get_summary: pb.GetSummaryRequest
|
536
|
-
) -> Optional[pb.GetSummaryResponse]:
|
537
|
-
record = self._make_request(get_summary=get_summary)
|
538
|
-
result = self._communicate(record, timeout=10)
|
539
|
-
if result is None:
|
540
|
-
return None
|
541
|
-
get_summary_response = result.response.get_summary_response
|
542
|
-
assert get_summary_response
|
543
|
-
return get_summary_response
|
544
|
-
|
545
|
-
def _communicate_sampled_history(
|
546
|
-
self, sampled_history: pb.SampledHistoryRequest
|
547
|
-
) -> Optional[pb.SampledHistoryResponse]:
|
548
|
-
record = self._make_request(sampled_history=sampled_history)
|
549
|
-
result = self._communicate(record)
|
550
|
-
if result is None:
|
551
|
-
return None
|
552
|
-
sampled_history_response = result.response.sampled_history_response
|
553
|
-
assert sampled_history_response
|
554
|
-
return sampled_history_response
|
555
|
-
|
556
412
|
def _communicate_shutdown(self) -> None:
|
557
413
|
# shutdown
|
558
414
|
request = pb.Request(shutdown=pb.ShutdownRequest())
|
@@ -609,6 +465,12 @@ class InterfaceShared(InterfaceBase):
|
|
609
465
|
record = self._make_request(network_status=network_status)
|
610
466
|
return self._deliver_record(record)
|
611
467
|
|
468
|
+
def _deliver_internal_messages(
|
469
|
+
self, internal_message: pb.InternalMessagesRequest
|
470
|
+
) -> MailboxHandle:
|
471
|
+
record = self._make_request(internal_messages=internal_message)
|
472
|
+
return self._deliver_record(record)
|
473
|
+
|
612
474
|
def _deliver_request_server_info(
|
613
475
|
self, server_info: pb.ServerInfoRequest
|
614
476
|
) -> MailboxHandle:
|
@@ -59,29 +59,3 @@ class InterfaceSock(InterfaceShared):
|
|
59
59
|
raise Exception("The wandb backend process has shutdown")
|
60
60
|
future = self._router.send_and_receive(rec, local=local)
|
61
61
|
return future
|
62
|
-
|
63
|
-
def _communicate_stop_status(
|
64
|
-
self, status: "pb.StopStatusRequest"
|
65
|
-
) -> Optional["pb.StopStatusResponse"]:
|
66
|
-
# Message stop_status is called from a daemon thread started by wandb_run
|
67
|
-
# The underlying socket might go away while the thread is still running.
|
68
|
-
# Handle this like a timed-out message as the daemon thread will eventually
|
69
|
-
# be killed.
|
70
|
-
try:
|
71
|
-
data = super()._communicate_stop_status(status)
|
72
|
-
except BrokenPipeError:
|
73
|
-
data = None
|
74
|
-
return data
|
75
|
-
|
76
|
-
def _communicate_network_status(
|
77
|
-
self, status: "pb.NetworkStatusRequest"
|
78
|
-
) -> Optional["pb.NetworkStatusResponse"]:
|
79
|
-
# Message network_status is called from a daemon thread started by wandb_run
|
80
|
-
# The underlying socket might go away while the thread is still running.
|
81
|
-
# Handle this like a timed-out message as the daemon thread will eventually
|
82
|
-
# be killed.
|
83
|
-
try:
|
84
|
-
data = super()._communicate_network_status(status)
|
85
|
-
except BrokenPipeError:
|
86
|
-
data = None
|
87
|
-
return data
|