wandb 0.19.6__py3-none-musllinux_1_2_aarch64.whl → 0.19.7__py3-none-musllinux_1_2_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wandb/__init__.py +1 -1
- wandb/__init__.pyi +25 -5
- wandb/apis/public/_generated/__init__.py +21 -0
- wandb/apis/public/_generated/base.py +128 -0
- wandb/apis/public/_generated/enums.py +4 -0
- wandb/apis/public/_generated/input_types.py +4 -0
- wandb/apis/public/_generated/operations.py +15 -0
- wandb/apis/public/_generated/server_features_query.py +27 -0
- wandb/apis/public/_generated/typing_compat.py +14 -0
- wandb/apis/public/api.py +192 -6
- wandb/apis/public/artifacts.py +13 -45
- wandb/apis/public/registries.py +573 -0
- wandb/apis/public/utils.py +36 -0
- wandb/bin/gpu_stats +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/cli.py +11 -20
- wandb/env.py +10 -0
- wandb/proto/v3/wandb_internal_pb2.py +243 -222
- wandb/proto/v3/wandb_server_pb2.py +4 -4
- wandb/proto/v3/wandb_settings_pb2.py +1 -1
- wandb/proto/v4/wandb_internal_pb2.py +226 -222
- wandb/proto/v4/wandb_server_pb2.py +4 -4
- wandb/proto/v4/wandb_settings_pb2.py +1 -1
- wandb/proto/v5/wandb_internal_pb2.py +226 -222
- wandb/proto/v5/wandb_server_pb2.py +4 -4
- wandb/proto/v5/wandb_settings_pb2.py +1 -1
- wandb/sdk/artifacts/_graphql_fragments.py +126 -0
- wandb/sdk/artifacts/artifact.py +43 -88
- wandb/sdk/backend/backend.py +1 -1
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +14 -6
- wandb/sdk/data_types/helper_types/image_mask.py +12 -6
- wandb/sdk/data_types/saved_model.py +35 -46
- wandb/sdk/data_types/video.py +7 -16
- wandb/sdk/interface/interface.py +26 -10
- wandb/sdk/interface/interface_queue.py +5 -8
- wandb/sdk/interface/interface_relay.py +1 -6
- wandb/sdk/interface/interface_shared.py +21 -99
- wandb/sdk/interface/interface_sock.py +2 -13
- wandb/sdk/interface/router.py +21 -15
- wandb/sdk/interface/router_queue.py +2 -1
- wandb/sdk/interface/router_relay.py +2 -1
- wandb/sdk/interface/router_sock.py +5 -4
- wandb/sdk/internal/handler.py +4 -3
- wandb/sdk/internal/internal_api.py +12 -1
- wandb/sdk/internal/sender.py +0 -18
- wandb/sdk/lib/apikey.py +87 -26
- wandb/sdk/lib/asyncio_compat.py +210 -0
- wandb/sdk/lib/progress.py +78 -16
- wandb/sdk/lib/service_connection.py +1 -1
- wandb/sdk/lib/sock_client.py +7 -7
- wandb/sdk/mailbox/__init__.py +23 -0
- wandb/sdk/mailbox/handles.py +199 -0
- wandb/sdk/mailbox/mailbox.py +121 -0
- wandb/sdk/mailbox/wait_with_progress.py +134 -0
- wandb/sdk/service/server_sock.py +5 -1
- wandb/sdk/service/streams.py +66 -74
- wandb/sdk/verify/verify.py +54 -2
- wandb/sdk/wandb_init.py +61 -61
- wandb/sdk/wandb_login.py +7 -4
- wandb/sdk/wandb_metadata.py +65 -34
- wandb/sdk/wandb_require.py +14 -8
- wandb/sdk/wandb_run.py +82 -87
- wandb/sdk/wandb_settings.py +3 -3
- wandb/sdk/wandb_setup.py +19 -8
- wandb/sdk/wandb_sync.py +2 -4
- wandb/util.py +3 -1
- {wandb-0.19.6.dist-info → wandb-0.19.7.dist-info}/METADATA +2 -2
- {wandb-0.19.6.dist-info → wandb-0.19.7.dist-info}/RECORD +71 -58
- wandb/sdk/lib/mailbox.py +0 -442
- {wandb-0.19.6.dist-info → wandb-0.19.7.dist-info}/WHEEL +0 -0
- {wandb-0.19.6.dist-info → wandb-0.19.7.dist-info}/entry_points.txt +0 -0
- {wandb-0.19.6.dist-info → wandb-0.19.7.dist-info}/licenses/LICENSE +0 -0
wandb/sdk/wandb_metadata.py
CHANGED
@@ -236,39 +236,6 @@ class Metadata(BaseModel, validate_assignment=True):
|
|
236
236
|
|
237
237
|
NOTE: Definitions must be kept in sync with wandb_internal.proto::MetadataRequest.
|
238
238
|
|
239
|
-
Attributes:
|
240
|
-
os: Operating system.
|
241
|
-
python: Python version.
|
242
|
-
heartbeat_at: Timestamp of last heartbeat.
|
243
|
-
started_at: Timestamp of run start.
|
244
|
-
docker: Docker image.
|
245
|
-
cuda: CUDA version.
|
246
|
-
args: Command-line arguments.
|
247
|
-
state: Run state.
|
248
|
-
program: Program name.
|
249
|
-
code_path: Path to code.
|
250
|
-
git: Git repository information.
|
251
|
-
email: Email address.
|
252
|
-
root: Root directory.
|
253
|
-
host: Host name.
|
254
|
-
username: Username.
|
255
|
-
executable: Python executable path.
|
256
|
-
code_path_local: Local code path.
|
257
|
-
colab: Colab URL.
|
258
|
-
cpu_count: CPU count.
|
259
|
-
cpu_count_logical: Logical CPU count.
|
260
|
-
gpu_type: GPU type.
|
261
|
-
disk: Disk information.
|
262
|
-
memory: Memory information.
|
263
|
-
cpu: CPU information.
|
264
|
-
apple: Apple silicon information.
|
265
|
-
gpu_nvidia: NVIDIA GPU information.
|
266
|
-
gpu_amd: AMD GPU information.
|
267
|
-
slurm: Slurm environment information.
|
268
|
-
cuda_version: CUDA version.
|
269
|
-
trainium: Trainium information.
|
270
|
-
tpu: TPU information.
|
271
|
-
|
272
239
|
Examples:
|
273
240
|
Update Run metadata:
|
274
241
|
|
@@ -296,44 +263,108 @@ class Metadata(BaseModel, validate_assignment=True):
|
|
296
263
|
```
|
297
264
|
"""
|
298
265
|
|
299
|
-
# TODO: Pydantic configuration.
|
300
266
|
model_config = ConfigDict(
|
301
267
|
extra="ignore", # ignore extra fields
|
302
268
|
validate_default=True, # validate default values
|
269
|
+
use_attribute_docstrings=True, # for field descriptions
|
270
|
+
revalidate_instances="always",
|
303
271
|
)
|
304
272
|
|
305
273
|
os: str | None = None
|
274
|
+
"""Operating system."""
|
275
|
+
|
306
276
|
python: str | None = None
|
277
|
+
"""Python version."""
|
278
|
+
|
307
279
|
heartbeat_at: datetime | None = Field(default=None, alias="heartbeatAt")
|
280
|
+
"""Timestamp of last heartbeat."""
|
281
|
+
|
308
282
|
started_at: datetime | None = Field(default=None, alias="startedAt")
|
283
|
+
"""Timestamp of run start."""
|
284
|
+
|
309
285
|
docker: str | None = None
|
286
|
+
"""Docker image."""
|
287
|
+
|
310
288
|
cuda: str | None = None
|
289
|
+
"""CUDA version."""
|
290
|
+
|
311
291
|
args: list[str] = Field(default_factory=list)
|
292
|
+
"""Command-line arguments."""
|
293
|
+
|
312
294
|
state: str | None = None
|
295
|
+
"""Run state."""
|
296
|
+
|
313
297
|
program: str | None = None
|
298
|
+
"""Program name."""
|
299
|
+
|
314
300
|
code_path: str | None = Field(default=None, alias="codePath")
|
301
|
+
"""Path to code."""
|
302
|
+
|
315
303
|
git: GitRepoRecord | None = None
|
304
|
+
"""Git repository information."""
|
305
|
+
|
316
306
|
email: str | None = None
|
307
|
+
"""Email address."""
|
308
|
+
|
317
309
|
root: str | None = None
|
310
|
+
"""Root directory."""
|
311
|
+
|
318
312
|
host: str | None = None
|
313
|
+
"""Host name."""
|
314
|
+
|
319
315
|
username: str | None = None
|
316
|
+
"""Username."""
|
317
|
+
|
320
318
|
executable: str | None = None
|
319
|
+
"""Python executable path."""
|
320
|
+
|
321
321
|
code_path_local: str | None = Field(default=None, alias="codePathLocal")
|
322
|
+
"""Local code path."""
|
323
|
+
|
322
324
|
colab: str | None = None
|
325
|
+
"""Colab URL."""
|
326
|
+
|
323
327
|
cpu_count: int | None = Field(default=None, alias="cpuCount")
|
328
|
+
"""CPU count."""
|
329
|
+
|
324
330
|
cpu_count_logical: int | None = Field(default=None, alias="cpuCountLogical")
|
331
|
+
"""Logical CPU count."""
|
332
|
+
|
325
333
|
gpu_type: str | None = Field(default=None, alias="gpuType")
|
334
|
+
"""GPU type."""
|
335
|
+
|
326
336
|
gpu_count: int | None = Field(default=None, alias="gpuCount")
|
337
|
+
"""GPU count."""
|
338
|
+
|
327
339
|
disk: dict[str, DiskInfo] = Field(default_factory=dict)
|
340
|
+
"""Disk information."""
|
341
|
+
|
328
342
|
memory: MemoryInfo | None = None
|
343
|
+
"""Memory information."""
|
344
|
+
|
329
345
|
cpu: CpuInfo | None = None
|
346
|
+
"""CPU information."""
|
347
|
+
|
330
348
|
apple: AppleInfo | None = None
|
349
|
+
"""Apple silicon information."""
|
350
|
+
|
331
351
|
gpu_nvidia: list[GpuNvidiaInfo] = Field(default_factory=list, alias="gpuNvidia")
|
352
|
+
"""NVIDIA GPU information."""
|
353
|
+
|
332
354
|
gpu_amd: list[GpuAmdInfo] = Field(default_factory=list, alias="gpuAmd")
|
355
|
+
"""AMD GPU information."""
|
356
|
+
|
333
357
|
slurm: dict[str, str] = Field(default_factory=dict)
|
358
|
+
"""Slurm environment information."""
|
359
|
+
|
334
360
|
cuda_version: str | None = Field(default=None, alias="cudaVersion")
|
361
|
+
"""CUDA version."""
|
362
|
+
|
335
363
|
trainium: TrainiumInfo | None = None
|
364
|
+
"""Trainium information."""
|
365
|
+
|
336
366
|
tpu: TPUInfo | None = None
|
367
|
+
"""TPU information."""
|
337
368
|
|
338
369
|
def __init__(self, **data):
|
339
370
|
super().__init__(**data)
|
wandb/sdk/wandb_require.py
CHANGED
@@ -9,8 +9,10 @@ Example:
|
|
9
9
|
wandb.require("incremental-artifacts@beta")
|
10
10
|
"""
|
11
11
|
|
12
|
+
from __future__ import annotations
|
13
|
+
|
12
14
|
import os
|
13
|
-
from typing import
|
15
|
+
from typing import Iterable
|
14
16
|
|
15
17
|
import wandb
|
16
18
|
from wandb.env import _REQUIRE_LEGACY_SERVICE
|
@@ -21,9 +23,9 @@ from wandb.sdk import wandb_run
|
|
21
23
|
class _Requires:
|
22
24
|
"""Internal feature class."""
|
23
25
|
|
24
|
-
_features:
|
26
|
+
_features: tuple[str, ...]
|
25
27
|
|
26
|
-
def __init__(self, features:
|
28
|
+
def __init__(self, features: str | Iterable[str]) -> None:
|
27
29
|
self._features = (
|
28
30
|
tuple([features]) if isinstance(features, str) else tuple(features)
|
29
31
|
)
|
@@ -67,17 +69,21 @@ class _Requires:
|
|
67
69
|
|
68
70
|
|
69
71
|
def require(
|
70
|
-
requirement:
|
71
|
-
experiment:
|
72
|
+
requirement: str | Iterable[str] | None = None,
|
73
|
+
experiment: str | Iterable[str] | None = None,
|
72
74
|
) -> None:
|
73
75
|
"""Indicate which experimental features are used by the script.
|
74
76
|
|
77
|
+
This should be called before any other `wandb` functions, ideally right
|
78
|
+
after importing `wandb`.
|
79
|
+
|
75
80
|
Args:
|
76
|
-
requirement:
|
77
|
-
|
81
|
+
requirement: The name of a feature to require or an iterable of
|
82
|
+
feature names.
|
83
|
+
experiment: An alias for `requirement`.
|
78
84
|
|
79
85
|
Raises:
|
80
|
-
wandb.errors.UnsupportedError:
|
86
|
+
wandb.errors.UnsupportedError: If a feature name is unknown.
|
81
87
|
"""
|
82
88
|
features = requirement or experiment
|
83
89
|
if not features:
|
wandb/sdk/wandb_run.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import asyncio
|
3
4
|
import atexit
|
4
5
|
import functools
|
5
6
|
import glob
|
@@ -43,6 +44,7 @@ from wandb.proto.wandb_internal_pb2 import (
|
|
43
44
|
)
|
44
45
|
from wandb.sdk.artifacts.artifact import Artifact
|
45
46
|
from wandb.sdk.internal import job_builder
|
47
|
+
from wandb.sdk.lib import asyncio_compat
|
46
48
|
from wandb.sdk.lib.import_hooks import (
|
47
49
|
register_post_import_hook,
|
48
50
|
unregister_post_import_hook,
|
@@ -82,7 +84,12 @@ from .lib import (
|
|
82
84
|
telemetry,
|
83
85
|
)
|
84
86
|
from .lib.exit_hooks import ExitHooks
|
85
|
-
from .
|
87
|
+
from .mailbox import (
|
88
|
+
HandleAbandonedError,
|
89
|
+
MailboxClosedError,
|
90
|
+
MailboxHandle,
|
91
|
+
wait_with_progress,
|
92
|
+
)
|
86
93
|
from .wandb_alerts import AlertLevel
|
87
94
|
from .wandb_metadata import Metadata
|
88
95
|
from .wandb_settings import Settings
|
@@ -222,30 +229,31 @@ class RunStatusChecker:
|
|
222
229
|
while not join_requested:
|
223
230
|
time_probe = time.monotonic()
|
224
231
|
if not local_handle:
|
225
|
-
|
232
|
+
try:
|
233
|
+
local_handle = request()
|
234
|
+
except MailboxClosedError:
|
235
|
+
# This can happen if the service process dies.
|
236
|
+
break
|
226
237
|
assert local_handle
|
227
238
|
|
228
239
|
with lock:
|
229
240
|
if self._join_event.is_set():
|
230
241
|
break
|
231
242
|
set_handle(local_handle)
|
243
|
+
|
232
244
|
try:
|
233
|
-
result = local_handle.
|
234
|
-
except
|
235
|
-
#
|
236
|
-
# from the internal process but the internal process could
|
237
|
-
# be shutdown at any time. In this case assume that the
|
238
|
-
# thread should exit silently. This is possible
|
239
|
-
# because we do not have an atexit handler for the user
|
240
|
-
# process which quiesces active threads.
|
245
|
+
result = local_handle.wait_or(timeout=timeout)
|
246
|
+
except HandleAbandonedError:
|
247
|
+
# This can happen if the service process dies.
|
241
248
|
break
|
249
|
+
except TimeoutError:
|
250
|
+
result = None
|
251
|
+
|
242
252
|
with lock:
|
243
253
|
set_handle(None)
|
244
254
|
|
245
255
|
if result:
|
246
256
|
process(result)
|
247
|
-
# if request finished, clear the handle to send on the next interval
|
248
|
-
local_handle.abandon()
|
249
257
|
local_handle = None
|
250
258
|
|
251
259
|
time_elapsed = time.monotonic() - time_probe
|
@@ -1300,12 +1308,6 @@ class Run:
|
|
1300
1308
|
if self._backend and self._backend.interface:
|
1301
1309
|
self._backend.interface.publish_summary(self, summary_record)
|
1302
1310
|
|
1303
|
-
def _on_progress_get_summary(self, handle: MailboxProgress) -> None:
|
1304
|
-
pass
|
1305
|
-
# TODO(jhr): enable printing for get_summary in later mailbox dev phase
|
1306
|
-
# line = "Waiting for run.summary data..."
|
1307
|
-
# self._printer.display(line)
|
1308
|
-
|
1309
1311
|
def _summary_get_current_summary_callback(self) -> dict[str, Any]:
|
1310
1312
|
if self._is_finished:
|
1311
1313
|
# TODO: WB-18420: fetch summary from backend and stage it before run is finished
|
@@ -1314,12 +1316,12 @@ class Run:
|
|
1314
1316
|
if not self._backend or not self._backend.interface:
|
1315
1317
|
return {}
|
1316
1318
|
handle = self._backend.interface.deliver_get_summary()
|
1317
|
-
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1321
|
-
if not result:
|
1319
|
+
|
1320
|
+
try:
|
1321
|
+
result = handle.wait_or(timeout=self._settings.summary_timeout)
|
1322
|
+
except TimeoutError:
|
1322
1323
|
return {}
|
1324
|
+
|
1323
1325
|
get_summary_response = result.response.get_summary_response
|
1324
1326
|
return proto_util.dict_from_proto_list(get_summary_response.item)
|
1325
1327
|
|
@@ -1503,7 +1505,7 @@ class Run:
|
|
1503
1505
|
if run_obj.notes:
|
1504
1506
|
self._settings.run_notes = run_obj.notes
|
1505
1507
|
if run_obj.tags:
|
1506
|
-
self._settings.run_tags = run_obj.tags
|
1508
|
+
self._settings.run_tags = tuple(run_obj.tags)
|
1507
1509
|
if run_obj.sweep_id:
|
1508
1510
|
self._settings.sweep_id = run_obj.sweep_id
|
1509
1511
|
if run_obj.host:
|
@@ -2111,15 +2113,6 @@ class Run:
|
|
2111
2113
|
with telemetry.context(run=self) as tel:
|
2112
2114
|
tel.feature.finish = True
|
2113
2115
|
|
2114
|
-
# Pop this run (hopefully) from the run stack, to support the "reinit"
|
2115
|
-
# functionality of wandb.init().
|
2116
|
-
#
|
2117
|
-
# TODO: It's not clear how _global_run_stack could have length other
|
2118
|
-
# than 1 at this point in the code. If you're reading this, consider
|
2119
|
-
# refactoring this thing.
|
2120
|
-
if self._wl and len(self._wl._global_run_stack) > 0:
|
2121
|
-
self._wl._global_run_stack.pop()
|
2122
|
-
|
2123
2116
|
# Run hooks that need to happen before the last messages to the
|
2124
2117
|
# internal service, like Jupyter hooks.
|
2125
2118
|
for hook in self._teardown_hooks:
|
@@ -2175,8 +2168,7 @@ class Run:
|
|
2175
2168
|
return RunStatus()
|
2176
2169
|
|
2177
2170
|
handle_run_status = self._backend.interface.deliver_request_run_status()
|
2178
|
-
result = handle_run_status.
|
2179
|
-
assert result
|
2171
|
+
result = handle_run_status.wait_or(timeout=None)
|
2180
2172
|
sync_data = result.response.run_status_response
|
2181
2173
|
|
2182
2174
|
sync_time = None
|
@@ -2563,31 +2555,38 @@ class Run:
|
|
2563
2555
|
else:
|
2564
2556
|
return artifact
|
2565
2557
|
|
2566
|
-
def
|
2567
|
-
handle = probe_handle.get_mailbox_handle()
|
2568
|
-
if handle:
|
2569
|
-
result = handle.wait(timeout=0, release=False)
|
2570
|
-
if not result:
|
2571
|
-
return
|
2572
|
-
probe_handle.set_probe_result(result)
|
2573
|
-
assert self._backend and self._backend.interface
|
2574
|
-
handle = self._backend.interface.deliver_poll_exit()
|
2575
|
-
probe_handle.set_mailbox_handle(handle)
|
2576
|
-
|
2577
|
-
def _on_progress_exit(
|
2558
|
+
async def _display_finish_stats(
|
2578
2559
|
self,
|
2579
2560
|
progress_printer: progress.ProgressPrinter,
|
2580
|
-
progress_handle: MailboxProgress,
|
2581
2561
|
) -> None:
|
2582
|
-
|
2583
|
-
if not probe_handles or len(probe_handles) != 1:
|
2584
|
-
return
|
2562
|
+
last_result: Result | None = None
|
2585
2563
|
|
2586
|
-
|
2587
|
-
|
2588
|
-
|
2564
|
+
async def loop_update_printer() -> None:
|
2565
|
+
while True:
|
2566
|
+
if last_result:
|
2567
|
+
progress_printer.update(
|
2568
|
+
[last_result.response.poll_exit_response],
|
2569
|
+
)
|
2570
|
+
await asyncio.sleep(0.1)
|
2571
|
+
|
2572
|
+
async def loop_poll_exit() -> None:
|
2573
|
+
nonlocal last_result
|
2574
|
+
assert self._backend and self._backend.interface
|
2575
|
+
|
2576
|
+
while True:
|
2577
|
+
handle = self._backend.interface.deliver_poll_exit()
|
2578
|
+
|
2579
|
+
time_start = time.monotonic()
|
2580
|
+
last_result = await handle.wait_async(timeout=None)
|
2589
2581
|
|
2590
|
-
|
2582
|
+
# Update at most once a second.
|
2583
|
+
time_elapsed = time.monotonic() - time_start
|
2584
|
+
if time_elapsed < 1:
|
2585
|
+
await asyncio.sleep(1 - time_elapsed)
|
2586
|
+
|
2587
|
+
async with asyncio_compat.open_task_group() as task_group:
|
2588
|
+
task_group.start_soon(loop_update_printer())
|
2589
|
+
task_group.start_soon(loop_poll_exit())
|
2591
2590
|
|
2592
2591
|
def _on_finish(self) -> None:
|
2593
2592
|
trigger.call("on_finished")
|
@@ -2604,25 +2603,24 @@ class Run:
|
|
2604
2603
|
else:
|
2605
2604
|
exit_handle = self._backend.interface.deliver_finish_without_exit()
|
2606
2605
|
|
2607
|
-
exit_handle.add_probe(on_probe=self._on_probe_exit)
|
2608
|
-
|
2609
2606
|
with progress.progress_printer(
|
2610
2607
|
self._printer,
|
2611
|
-
|
2608
|
+
default_text="Finishing up...",
|
2612
2609
|
) as progress_printer:
|
2613
2610
|
# Wait for the run to complete.
|
2614
|
-
|
2615
|
-
|
2616
|
-
|
2617
|
-
|
2611
|
+
wait_with_progress(
|
2612
|
+
exit_handle,
|
2613
|
+
timeout=None,
|
2614
|
+
progress_after=1,
|
2615
|
+
display_progress=functools.partial(
|
2616
|
+
self._display_finish_stats,
|
2618
2617
|
progress_printer,
|
2619
2618
|
),
|
2620
2619
|
)
|
2621
2620
|
|
2622
2621
|
# Print some final statistics.
|
2623
2622
|
poll_exit_handle = self._backend.interface.deliver_poll_exit()
|
2624
|
-
result = poll_exit_handle.
|
2625
|
-
assert result
|
2623
|
+
result = poll_exit_handle.wait_or(timeout=None)
|
2626
2624
|
progress.print_sync_dedupe_stats(
|
2627
2625
|
self._printer,
|
2628
2626
|
result.response.poll_exit_response,
|
@@ -2630,8 +2628,7 @@ class Run:
|
|
2630
2628
|
|
2631
2629
|
self._poll_exit_response = result.response.poll_exit_response
|
2632
2630
|
internal_messages_handle = self._backend.interface.deliver_internal_messages()
|
2633
|
-
result = internal_messages_handle.
|
2634
|
-
assert result
|
2631
|
+
result = internal_messages_handle.wait_or(timeout=None)
|
2635
2632
|
self._internal_messages_response = result.response.internal_messages_response
|
2636
2633
|
|
2637
2634
|
# dispatch all our final requests
|
@@ -2641,12 +2638,10 @@ class Run:
|
|
2641
2638
|
self._backend.interface.deliver_request_sampled_history()
|
2642
2639
|
)
|
2643
2640
|
|
2644
|
-
result = sampled_history_handle.
|
2645
|
-
assert result
|
2641
|
+
result = sampled_history_handle.wait_or(timeout=None)
|
2646
2642
|
self._sampled_history = result.response.sampled_history_response
|
2647
2643
|
|
2648
|
-
result = final_summary_handle.
|
2649
|
-
assert result
|
2644
|
+
result = final_summary_handle.wait_or(timeout=None)
|
2650
2645
|
self._final_summary = result.response.get_summary_response
|
2651
2646
|
|
2652
2647
|
if self._backend:
|
@@ -2952,13 +2947,10 @@ class Run:
|
|
2952
2947
|
wandb.termwarn(
|
2953
2948
|
"Artifact TTL will be disabled for source artifacts that are linked to portfolios."
|
2954
2949
|
)
|
2955
|
-
result = handle.
|
2956
|
-
|
2957
|
-
|
2958
|
-
|
2959
|
-
response = result.response.link_artifact_response
|
2960
|
-
if response.error_message:
|
2961
|
-
wandb.termerror(response.error_message)
|
2950
|
+
result = handle.wait_or(timeout=None)
|
2951
|
+
response = result.response.link_artifact_response
|
2952
|
+
if response.error_message:
|
2953
|
+
wandb.termerror(response.error_message)
|
2962
2954
|
|
2963
2955
|
@_run_decorator._noop_on_finish()
|
2964
2956
|
@_run_decorator._attach
|
@@ -3258,7 +3250,7 @@ class Run:
|
|
3258
3250
|
self._assert_can_log_artifact(artifact)
|
3259
3251
|
if self._backend and self._backend.interface:
|
3260
3252
|
if not self._settings._offline:
|
3261
|
-
|
3253
|
+
handle = self._backend.interface.deliver_artifact(
|
3262
3254
|
self,
|
3263
3255
|
artifact,
|
3264
3256
|
aliases,
|
@@ -3268,7 +3260,7 @@ class Run:
|
|
3268
3260
|
is_user_created=is_user_created,
|
3269
3261
|
use_after_commit=use_after_commit,
|
3270
3262
|
)
|
3271
|
-
artifact.
|
3263
|
+
artifact._set_save_handle(handle, self._public_api().client)
|
3272
3264
|
else:
|
3273
3265
|
self._backend.interface.publish_artifact(
|
3274
3266
|
self,
|
@@ -3670,16 +3662,18 @@ class Run:
|
|
3670
3662
|
return {}
|
3671
3663
|
|
3672
3664
|
handle = self._backend.interface.deliver_get_system_metrics()
|
3673
|
-
result = handle.wait(timeout=1)
|
3674
3665
|
|
3675
|
-
|
3666
|
+
try:
|
3667
|
+
result = handle.wait_or(timeout=1)
|
3668
|
+
except TimeoutError:
|
3669
|
+
return {}
|
3670
|
+
else:
|
3676
3671
|
try:
|
3677
3672
|
response = result.response.get_system_metrics_response
|
3678
|
-
if response
|
3679
|
-
return pb_to_dict(response)
|
3673
|
+
return pb_to_dict(response) if response else {}
|
3680
3674
|
except Exception as e:
|
3681
3675
|
logger.error("Error getting system metrics: %s", e)
|
3682
|
-
|
3676
|
+
return {}
|
3683
3677
|
|
3684
3678
|
@property
|
3685
3679
|
@_run_decorator._attach
|
@@ -3698,10 +3692,11 @@ class Run:
|
|
3698
3692
|
self.__metadata._set_callback(self._metadata_callback)
|
3699
3693
|
|
3700
3694
|
handle = self._backend.interface.deliver_get_system_metadata()
|
3701
|
-
result = handle.wait(timeout=1)
|
3702
3695
|
|
3703
|
-
|
3704
|
-
|
3696
|
+
try:
|
3697
|
+
result = handle.wait_or(timeout=1)
|
3698
|
+
except TimeoutError:
|
3699
|
+
logger.error("Error getting run metadata: timeout")
|
3705
3700
|
return None
|
3706
3701
|
|
3707
3702
|
try:
|
wandb/sdk/wandb_settings.py
CHANGED
@@ -165,6 +165,9 @@ class Settings(BaseModel, validate_assignment=True):
|
|
165
165
|
entity: str | None = None
|
166
166
|
"""The W&B entity, such as a user or a team."""
|
167
167
|
|
168
|
+
organization: str | None = None
|
169
|
+
"""The W&B organization."""
|
170
|
+
|
168
171
|
force: bool = False
|
169
172
|
"""Whether to pass the `force` flag to `wandb.login()`."""
|
170
173
|
|
@@ -564,9 +567,6 @@ class Settings(BaseModel, validate_assignment=True):
|
|
564
567
|
x_service_wait: float = 30.0
|
565
568
|
"""Time in seconds to wait for the wandb-core internal service to start."""
|
566
569
|
|
567
|
-
x_show_operation_stats: bool = True
|
568
|
-
"""Whether to show statistics about internal operations such as data uploads."""
|
569
|
-
|
570
570
|
x_start_time: float | None = None
|
571
571
|
"""The start time of the run in seconds since the Unix epoch."""
|
572
572
|
|
wandb/sdk/wandb_setup.py
CHANGED
@@ -28,7 +28,6 @@ from .lib import config_util, server
|
|
28
28
|
|
29
29
|
if TYPE_CHECKING:
|
30
30
|
from wandb.sdk.lib.service_connection import ServiceConnection
|
31
|
-
from wandb.sdk.wandb_run import Run
|
32
31
|
from wandb.sdk.wandb_settings import Settings
|
33
32
|
|
34
33
|
|
@@ -90,9 +89,6 @@ class _WandbSetup:
|
|
90
89
|
self._server: server.Server | None = None
|
91
90
|
self._pid = pid
|
92
91
|
|
93
|
-
# keep track of multiple runs, so we can unwind with join()s
|
94
|
-
self._global_run_stack: list[Run] = []
|
95
|
-
|
96
92
|
# TODO(jhr): defer strict checks until settings are fully initialized
|
97
93
|
# and logging is ready
|
98
94
|
self._logger: Logger = _EarlyLogger()
|
@@ -298,18 +294,33 @@ def singleton() -> _WandbSetup | None:
|
|
298
294
|
return None
|
299
295
|
|
300
296
|
|
301
|
-
def _setup(
|
302
|
-
|
297
|
+
def _setup(
|
298
|
+
settings: Settings | None = None,
|
299
|
+
start_service: bool = True,
|
300
|
+
) -> _WandbSetup:
|
301
|
+
"""Set up library context.
|
302
|
+
|
303
|
+
Args:
|
304
|
+
settings: Global settings to set, or updates to the global settings
|
305
|
+
if the singleton has already been initialized.
|
306
|
+
start_service: Whether to start up the service process.
|
307
|
+
NOTE: A service process will only be started if allowed by the
|
308
|
+
global settings (after the given updates). The service will not
|
309
|
+
start up if the mode resolves to "disabled".
|
310
|
+
"""
|
303
311
|
global _singleton
|
304
312
|
|
305
313
|
pid = os.getpid()
|
306
314
|
|
307
315
|
if _singleton and _singleton._pid == pid:
|
308
316
|
_singleton._update(settings=settings)
|
309
|
-
return _singleton
|
310
317
|
else:
|
311
318
|
_singleton = _WandbSetup(settings=settings, pid=pid)
|
312
|
-
|
319
|
+
|
320
|
+
if start_service and not _singleton.settings._noop:
|
321
|
+
_singleton.ensure_service()
|
322
|
+
|
323
|
+
return _singleton
|
313
324
|
|
314
325
|
|
315
326
|
def setup(settings: Settings | None = None) -> _WandbSetup:
|
wandb/sdk/wandb_sync.py
CHANGED
@@ -6,8 +6,8 @@ from wandb.errors.term import termerror, termlog
|
|
6
6
|
|
7
7
|
from . import wandb_setup
|
8
8
|
from .backend.backend import Backend
|
9
|
-
from .lib.mailbox import Mailbox
|
10
9
|
from .lib.runid import generate_id
|
10
|
+
from .mailbox import Mailbox
|
11
11
|
|
12
12
|
if TYPE_CHECKING:
|
13
13
|
from wandb.proto import wandb_internal_pb2
|
@@ -60,10 +60,8 @@ def _sync(
|
|
60
60
|
assert backend.interface
|
61
61
|
backend.interface._stream_id = stream_id # type: ignore
|
62
62
|
|
63
|
-
mailbox.enable_keepalive()
|
64
63
|
handle = backend.interface.deliver_finish_sync()
|
65
|
-
result = handle.
|
66
|
-
assert result and result.response
|
64
|
+
result = handle.wait_or(timeout=None)
|
67
65
|
response = result.response.sync_response
|
68
66
|
if response.url:
|
69
67
|
termlog(f"Synced {p} to {util.app_url(response.url)}")
|
wandb/util.py
CHANGED
@@ -788,6 +788,8 @@ class JSONEncoderUncompressed(json.JSONEncoder):
|
|
788
788
|
def default(self, obj: Any) -> Any:
|
789
789
|
if is_numpy_array(obj):
|
790
790
|
return obj.tolist()
|
791
|
+
elif np and isinstance(obj, np.number):
|
792
|
+
return obj.item()
|
791
793
|
elif np and isinstance(obj, np.generic):
|
792
794
|
obj = obj.item()
|
793
795
|
return json.JSONEncoder.default(self, obj)
|
@@ -872,7 +874,7 @@ def parse_backend_error_messages(response: requests.Response) -> List[str]:
|
|
872
874
|
# Backend error values are returned in one of two ways:
|
873
875
|
# - A string containing the error message
|
874
876
|
# - A JSON object with a "message" field that is a string
|
875
|
-
def get_message(
|
877
|
+
def get_message(error: Any) -> Optional[str]:
|
876
878
|
if isinstance(error, str):
|
877
879
|
return error
|
878
880
|
elif (
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: wandb
|
3
|
-
Version: 0.19.
|
3
|
+
Version: 0.19.7
|
4
4
|
Summary: A CLI and library for interacting with the Weights & Biases API.
|
5
5
|
Project-URL: Source, https://github.com/wandb/wandb
|
6
6
|
Project-URL: Bug Reports, https://github.com/wandb/wandb/issues
|
@@ -110,7 +110,7 @@ Requires-Dist: typing-extensions; extra == 'launch'
|
|
110
110
|
Provides-Extra: media
|
111
111
|
Requires-Dist: bokeh; extra == 'media'
|
112
112
|
Requires-Dist: imageio; extra == 'media'
|
113
|
-
Requires-Dist: moviepy; extra == 'media'
|
113
|
+
Requires-Dist: moviepy>=1.0.0; extra == 'media'
|
114
114
|
Requires-Dist: numpy; extra == 'media'
|
115
115
|
Requires-Dist: pillow; extra == 'media'
|
116
116
|
Requires-Dist: plotly>=5.18.0; extra == 'media'
|