wandb 0.19.6rc4__py3-none-musllinux_1_2_aarch64.whl → 0.19.8__py3-none-musllinux_1_2_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wandb/__init__.py +1 -1
- wandb/__init__.pyi +56 -6
- wandb/apis/public/_generated/__init__.py +21 -0
- wandb/apis/public/_generated/base.py +128 -0
- wandb/apis/public/_generated/enums.py +4 -0
- wandb/apis/public/_generated/input_types.py +4 -0
- wandb/apis/public/_generated/operations.py +15 -0
- wandb/apis/public/_generated/server_features_query.py +27 -0
- wandb/apis/public/_generated/typing_compat.py +14 -0
- wandb/apis/public/api.py +192 -6
- wandb/apis/public/artifacts.py +13 -45
- wandb/apis/public/registries.py +573 -0
- wandb/apis/public/utils.py +36 -0
- wandb/bin/gpu_stats +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/cli.py +11 -20
- wandb/data_types.py +1 -1
- wandb/env.py +10 -0
- wandb/filesync/dir_watcher.py +2 -1
- wandb/proto/v3/wandb_internal_pb2.py +243 -222
- wandb/proto/v3/wandb_server_pb2.py +4 -4
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +226 -222
- wandb/proto/v4/wandb_server_pb2.py +4 -4
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v5/wandb_internal_pb2.py +226 -222
- wandb/proto/v5/wandb_server_pb2.py +4 -4
- wandb/proto/v5/wandb_settings_pb2.py +2 -2
- wandb/proto/v5/wandb_telemetry_pb2.py +10 -10
- wandb/sdk/artifacts/_graphql_fragments.py +126 -0
- wandb/sdk/artifacts/artifact.py +51 -95
- wandb/sdk/backend/backend.py +17 -6
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +14 -6
- wandb/sdk/data_types/helper_types/image_mask.py +12 -6
- wandb/sdk/data_types/saved_model.py +35 -46
- wandb/sdk/data_types/video.py +7 -16
- wandb/sdk/interface/interface.py +87 -49
- wandb/sdk/interface/interface_queue.py +5 -15
- wandb/sdk/interface/interface_relay.py +7 -22
- wandb/sdk/interface/interface_shared.py +65 -136
- wandb/sdk/interface/interface_sock.py +3 -21
- wandb/sdk/interface/router.py +42 -68
- wandb/sdk/interface/router_queue.py +13 -11
- wandb/sdk/interface/router_relay.py +26 -13
- wandb/sdk/interface/router_sock.py +12 -16
- wandb/sdk/internal/handler.py +4 -3
- wandb/sdk/internal/internal_api.py +12 -1
- wandb/sdk/internal/sender.py +3 -19
- wandb/sdk/lib/apikey.py +87 -26
- wandb/sdk/lib/asyncio_compat.py +210 -0
- wandb/sdk/lib/console_capture.py +172 -0
- wandb/sdk/lib/progress.py +78 -16
- wandb/sdk/lib/redirect.py +102 -76
- wandb/sdk/lib/service_connection.py +37 -17
- wandb/sdk/lib/sock_client.py +6 -56
- wandb/sdk/mailbox/__init__.py +23 -0
- wandb/sdk/mailbox/mailbox.py +135 -0
- wandb/sdk/mailbox/mailbox_handle.py +127 -0
- wandb/sdk/mailbox/response_handle.py +167 -0
- wandb/sdk/mailbox/wait_with_progress.py +135 -0
- wandb/sdk/service/server_sock.py +9 -3
- wandb/sdk/service/streams.py +75 -78
- wandb/sdk/verify/verify.py +54 -2
- wandb/sdk/wandb_init.py +72 -75
- wandb/sdk/wandb_login.py +7 -4
- wandb/sdk/wandb_metadata.py +65 -34
- wandb/sdk/wandb_require.py +14 -8
- wandb/sdk/wandb_run.py +90 -97
- wandb/sdk/wandb_settings.py +10 -4
- wandb/sdk/wandb_setup.py +19 -8
- wandb/sdk/wandb_sync.py +2 -10
- wandb/util.py +3 -1
- {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/METADATA +2 -2
- {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/RECORD +79 -66
- wandb/sdk/interface/message_future.py +0 -27
- wandb/sdk/interface/message_future_poll.py +0 -50
- wandb/sdk/lib/mailbox.py +0 -442
- {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/WHEEL +0 -0
- {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/entry_points.txt +0 -0
- {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/licenses/LICENSE +0 -0
wandb/sdk/wandb_metadata.py
CHANGED
@@ -236,39 +236,6 @@ class Metadata(BaseModel, validate_assignment=True):
|
|
236
236
|
|
237
237
|
NOTE: Definitions must be kept in sync with wandb_internal.proto::MetadataRequest.
|
238
238
|
|
239
|
-
Attributes:
|
240
|
-
os: Operating system.
|
241
|
-
python: Python version.
|
242
|
-
heartbeat_at: Timestamp of last heartbeat.
|
243
|
-
started_at: Timestamp of run start.
|
244
|
-
docker: Docker image.
|
245
|
-
cuda: CUDA version.
|
246
|
-
args: Command-line arguments.
|
247
|
-
state: Run state.
|
248
|
-
program: Program name.
|
249
|
-
code_path: Path to code.
|
250
|
-
git: Git repository information.
|
251
|
-
email: Email address.
|
252
|
-
root: Root directory.
|
253
|
-
host: Host name.
|
254
|
-
username: Username.
|
255
|
-
executable: Python executable path.
|
256
|
-
code_path_local: Local code path.
|
257
|
-
colab: Colab URL.
|
258
|
-
cpu_count: CPU count.
|
259
|
-
cpu_count_logical: Logical CPU count.
|
260
|
-
gpu_type: GPU type.
|
261
|
-
disk: Disk information.
|
262
|
-
memory: Memory information.
|
263
|
-
cpu: CPU information.
|
264
|
-
apple: Apple silicon information.
|
265
|
-
gpu_nvidia: NVIDIA GPU information.
|
266
|
-
gpu_amd: AMD GPU information.
|
267
|
-
slurm: Slurm environment information.
|
268
|
-
cuda_version: CUDA version.
|
269
|
-
trainium: Trainium information.
|
270
|
-
tpu: TPU information.
|
271
|
-
|
272
239
|
Examples:
|
273
240
|
Update Run metadata:
|
274
241
|
|
@@ -296,44 +263,108 @@ class Metadata(BaseModel, validate_assignment=True):
|
|
296
263
|
```
|
297
264
|
"""
|
298
265
|
|
299
|
-
# TODO: Pydantic configuration.
|
300
266
|
model_config = ConfigDict(
|
301
267
|
extra="ignore", # ignore extra fields
|
302
268
|
validate_default=True, # validate default values
|
269
|
+
use_attribute_docstrings=True, # for field descriptions
|
270
|
+
revalidate_instances="always",
|
303
271
|
)
|
304
272
|
|
305
273
|
os: str | None = None
|
274
|
+
"""Operating system."""
|
275
|
+
|
306
276
|
python: str | None = None
|
277
|
+
"""Python version."""
|
278
|
+
|
307
279
|
heartbeat_at: datetime | None = Field(default=None, alias="heartbeatAt")
|
280
|
+
"""Timestamp of last heartbeat."""
|
281
|
+
|
308
282
|
started_at: datetime | None = Field(default=None, alias="startedAt")
|
283
|
+
"""Timestamp of run start."""
|
284
|
+
|
309
285
|
docker: str | None = None
|
286
|
+
"""Docker image."""
|
287
|
+
|
310
288
|
cuda: str | None = None
|
289
|
+
"""CUDA version."""
|
290
|
+
|
311
291
|
args: list[str] = Field(default_factory=list)
|
292
|
+
"""Command-line arguments."""
|
293
|
+
|
312
294
|
state: str | None = None
|
295
|
+
"""Run state."""
|
296
|
+
|
313
297
|
program: str | None = None
|
298
|
+
"""Program name."""
|
299
|
+
|
314
300
|
code_path: str | None = Field(default=None, alias="codePath")
|
301
|
+
"""Path to code."""
|
302
|
+
|
315
303
|
git: GitRepoRecord | None = None
|
304
|
+
"""Git repository information."""
|
305
|
+
|
316
306
|
email: str | None = None
|
307
|
+
"""Email address."""
|
308
|
+
|
317
309
|
root: str | None = None
|
310
|
+
"""Root directory."""
|
311
|
+
|
318
312
|
host: str | None = None
|
313
|
+
"""Host name."""
|
314
|
+
|
319
315
|
username: str | None = None
|
316
|
+
"""Username."""
|
317
|
+
|
320
318
|
executable: str | None = None
|
319
|
+
"""Python executable path."""
|
320
|
+
|
321
321
|
code_path_local: str | None = Field(default=None, alias="codePathLocal")
|
322
|
+
"""Local code path."""
|
323
|
+
|
322
324
|
colab: str | None = None
|
325
|
+
"""Colab URL."""
|
326
|
+
|
323
327
|
cpu_count: int | None = Field(default=None, alias="cpuCount")
|
328
|
+
"""CPU count."""
|
329
|
+
|
324
330
|
cpu_count_logical: int | None = Field(default=None, alias="cpuCountLogical")
|
331
|
+
"""Logical CPU count."""
|
332
|
+
|
325
333
|
gpu_type: str | None = Field(default=None, alias="gpuType")
|
334
|
+
"""GPU type."""
|
335
|
+
|
326
336
|
gpu_count: int | None = Field(default=None, alias="gpuCount")
|
337
|
+
"""GPU count."""
|
338
|
+
|
327
339
|
disk: dict[str, DiskInfo] = Field(default_factory=dict)
|
340
|
+
"""Disk information."""
|
341
|
+
|
328
342
|
memory: MemoryInfo | None = None
|
343
|
+
"""Memory information."""
|
344
|
+
|
329
345
|
cpu: CpuInfo | None = None
|
346
|
+
"""CPU information."""
|
347
|
+
|
330
348
|
apple: AppleInfo | None = None
|
349
|
+
"""Apple silicon information."""
|
350
|
+
|
331
351
|
gpu_nvidia: list[GpuNvidiaInfo] = Field(default_factory=list, alias="gpuNvidia")
|
352
|
+
"""NVIDIA GPU information."""
|
353
|
+
|
332
354
|
gpu_amd: list[GpuAmdInfo] = Field(default_factory=list, alias="gpuAmd")
|
355
|
+
"""AMD GPU information."""
|
356
|
+
|
333
357
|
slurm: dict[str, str] = Field(default_factory=dict)
|
358
|
+
"""Slurm environment information."""
|
359
|
+
|
334
360
|
cuda_version: str | None = Field(default=None, alias="cudaVersion")
|
361
|
+
"""CUDA version."""
|
362
|
+
|
335
363
|
trainium: TrainiumInfo | None = None
|
364
|
+
"""Trainium information."""
|
365
|
+
|
336
366
|
tpu: TPUInfo | None = None
|
367
|
+
"""TPU information."""
|
337
368
|
|
338
369
|
def __init__(self, **data):
|
339
370
|
super().__init__(**data)
|
wandb/sdk/wandb_require.py
CHANGED
@@ -9,8 +9,10 @@ Example:
|
|
9
9
|
wandb.require("incremental-artifacts@beta")
|
10
10
|
"""
|
11
11
|
|
12
|
+
from __future__ import annotations
|
13
|
+
|
12
14
|
import os
|
13
|
-
from typing import
|
15
|
+
from typing import Iterable
|
14
16
|
|
15
17
|
import wandb
|
16
18
|
from wandb.env import _REQUIRE_LEGACY_SERVICE
|
@@ -21,9 +23,9 @@ from wandb.sdk import wandb_run
|
|
21
23
|
class _Requires:
|
22
24
|
"""Internal feature class."""
|
23
25
|
|
24
|
-
_features:
|
26
|
+
_features: tuple[str, ...]
|
25
27
|
|
26
|
-
def __init__(self, features:
|
28
|
+
def __init__(self, features: str | Iterable[str]) -> None:
|
27
29
|
self._features = (
|
28
30
|
tuple([features]) if isinstance(features, str) else tuple(features)
|
29
31
|
)
|
@@ -67,17 +69,21 @@ class _Requires:
|
|
67
69
|
|
68
70
|
|
69
71
|
def require(
|
70
|
-
requirement:
|
71
|
-
experiment:
|
72
|
+
requirement: str | Iterable[str] | None = None,
|
73
|
+
experiment: str | Iterable[str] | None = None,
|
72
74
|
) -> None:
|
73
75
|
"""Indicate which experimental features are used by the script.
|
74
76
|
|
77
|
+
This should be called before any other `wandb` functions, ideally right
|
78
|
+
after importing `wandb`.
|
79
|
+
|
75
80
|
Args:
|
76
|
-
requirement:
|
77
|
-
|
81
|
+
requirement: The name of a feature to require or an iterable of
|
82
|
+
feature names.
|
83
|
+
experiment: An alias for `requirement`.
|
78
84
|
|
79
85
|
Raises:
|
80
|
-
wandb.errors.UnsupportedError:
|
86
|
+
wandb.errors.UnsupportedError: If a feature name is unknown.
|
81
87
|
"""
|
82
88
|
features = requirement or experiment
|
83
89
|
if not features:
|
wandb/sdk/wandb_run.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import asyncio
|
3
4
|
import atexit
|
4
5
|
import functools
|
5
6
|
import glob
|
@@ -43,6 +44,7 @@ from wandb.proto.wandb_internal_pb2 import (
|
|
43
44
|
)
|
44
45
|
from wandb.sdk.artifacts.artifact import Artifact
|
45
46
|
from wandb.sdk.internal import job_builder
|
47
|
+
from wandb.sdk.lib import asyncio_compat
|
46
48
|
from wandb.sdk.lib.import_hooks import (
|
47
49
|
register_post_import_hook,
|
48
50
|
unregister_post_import_hook,
|
@@ -82,7 +84,12 @@ from .lib import (
|
|
82
84
|
telemetry,
|
83
85
|
)
|
84
86
|
from .lib.exit_hooks import ExitHooks
|
85
|
-
from .
|
87
|
+
from .mailbox import (
|
88
|
+
HandleAbandonedError,
|
89
|
+
MailboxClosedError,
|
90
|
+
MailboxHandle,
|
91
|
+
wait_with_progress,
|
92
|
+
)
|
86
93
|
from .wandb_alerts import AlertLevel
|
87
94
|
from .wandb_metadata import Metadata
|
88
95
|
from .wandb_settings import Settings
|
@@ -150,11 +157,11 @@ class RunStatusChecker:
|
|
150
157
|
"""
|
151
158
|
|
152
159
|
_stop_status_lock: threading.Lock
|
153
|
-
_stop_status_handle: MailboxHandle | None
|
160
|
+
_stop_status_handle: MailboxHandle[Result] | None
|
154
161
|
_network_status_lock: threading.Lock
|
155
|
-
_network_status_handle: MailboxHandle | None
|
162
|
+
_network_status_handle: MailboxHandle[Result] | None
|
156
163
|
_internal_messages_lock: threading.Lock
|
157
|
-
_internal_messages_handle: MailboxHandle | None
|
164
|
+
_internal_messages_handle: MailboxHandle[Result] | None
|
158
165
|
|
159
166
|
def __init__(
|
160
167
|
self,
|
@@ -202,7 +209,7 @@ class RunStatusChecker:
|
|
202
209
|
@staticmethod
|
203
210
|
def _abandon_status_check(
|
204
211
|
lock: threading.Lock,
|
205
|
-
handle: MailboxHandle | None,
|
212
|
+
handle: MailboxHandle[Result] | None,
|
206
213
|
):
|
207
214
|
with lock:
|
208
215
|
if handle:
|
@@ -217,35 +224,36 @@ class RunStatusChecker:
|
|
217
224
|
request: Any,
|
218
225
|
process: Any,
|
219
226
|
) -> None:
|
220
|
-
local_handle: MailboxHandle | None = None
|
227
|
+
local_handle: MailboxHandle[Result] | None = None
|
221
228
|
join_requested = False
|
222
229
|
while not join_requested:
|
223
230
|
time_probe = time.monotonic()
|
224
231
|
if not local_handle:
|
225
|
-
|
232
|
+
try:
|
233
|
+
local_handle = request()
|
234
|
+
except MailboxClosedError:
|
235
|
+
# This can happen if the service process dies.
|
236
|
+
break
|
226
237
|
assert local_handle
|
227
238
|
|
228
239
|
with lock:
|
229
240
|
if self._join_event.is_set():
|
230
241
|
break
|
231
242
|
set_handle(local_handle)
|
243
|
+
|
232
244
|
try:
|
233
|
-
result = local_handle.
|
234
|
-
except
|
235
|
-
#
|
236
|
-
# from the internal process but the internal process could
|
237
|
-
# be shutdown at any time. In this case assume that the
|
238
|
-
# thread should exit silently. This is possible
|
239
|
-
# because we do not have an atexit handler for the user
|
240
|
-
# process which quiesces active threads.
|
245
|
+
result = local_handle.wait_or(timeout=timeout)
|
246
|
+
except HandleAbandonedError:
|
247
|
+
# This can happen if the service process dies.
|
241
248
|
break
|
249
|
+
except TimeoutError:
|
250
|
+
result = None
|
251
|
+
|
242
252
|
with lock:
|
243
253
|
set_handle(None)
|
244
254
|
|
245
255
|
if result:
|
246
256
|
process(result)
|
247
|
-
# if request finished, clear the handle to send on the next interval
|
248
|
-
local_handle.abandon()
|
249
257
|
local_handle = None
|
250
258
|
|
251
259
|
time_elapsed = time.monotonic() - time_probe
|
@@ -534,7 +542,7 @@ class Run:
|
|
534
542
|
|
535
543
|
_sampled_history: SampledHistoryResponse | None
|
536
544
|
_final_summary: GetSummaryResponse | None
|
537
|
-
_poll_exit_handle: MailboxHandle | None
|
545
|
+
_poll_exit_handle: MailboxHandle[Result] | None
|
538
546
|
_poll_exit_response: PollExitResponse | None
|
539
547
|
_internal_messages_response: InternalMessagesResponse | None
|
540
548
|
|
@@ -1300,12 +1308,6 @@ class Run:
|
|
1300
1308
|
if self._backend and self._backend.interface:
|
1301
1309
|
self._backend.interface.publish_summary(self, summary_record)
|
1302
1310
|
|
1303
|
-
def _on_progress_get_summary(self, handle: MailboxProgress) -> None:
|
1304
|
-
pass
|
1305
|
-
# TODO(jhr): enable printing for get_summary in later mailbox dev phase
|
1306
|
-
# line = "Waiting for run.summary data..."
|
1307
|
-
# self._printer.display(line)
|
1308
|
-
|
1309
1311
|
def _summary_get_current_summary_callback(self) -> dict[str, Any]:
|
1310
1312
|
if self._is_finished:
|
1311
1313
|
# TODO: WB-18420: fetch summary from backend and stage it before run is finished
|
@@ -1314,12 +1316,12 @@ class Run:
|
|
1314
1316
|
if not self._backend or not self._backend.interface:
|
1315
1317
|
return {}
|
1316
1318
|
handle = self._backend.interface.deliver_get_summary()
|
1317
|
-
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1321
|
-
if not result:
|
1319
|
+
|
1320
|
+
try:
|
1321
|
+
result = handle.wait_or(timeout=self._settings.summary_timeout)
|
1322
|
+
except TimeoutError:
|
1322
1323
|
return {}
|
1324
|
+
|
1323
1325
|
get_summary_response = result.response.get_summary_response
|
1324
1326
|
return proto_util.dict_from_proto_list(get_summary_response.item)
|
1325
1327
|
|
@@ -1330,7 +1332,7 @@ class Run:
|
|
1330
1332
|
def _datatypes_callback(self, fname: str) -> None:
|
1331
1333
|
if not self._backend or not self._backend.interface:
|
1332
1334
|
return
|
1333
|
-
files: FilesDict = dict(files=[(GlobStr(
|
1335
|
+
files: FilesDict = dict(files=[(GlobStr(fname), "now")])
|
1334
1336
|
self._backend.interface.publish_files(files)
|
1335
1337
|
|
1336
1338
|
def _pop_all_charts(
|
@@ -1503,7 +1505,7 @@ class Run:
|
|
1503
1505
|
if run_obj.notes:
|
1504
1506
|
self._settings.run_notes = run_obj.notes
|
1505
1507
|
if run_obj.tags:
|
1506
|
-
self._settings.run_tags = run_obj.tags
|
1508
|
+
self._settings.run_tags = tuple(run_obj.tags)
|
1507
1509
|
if run_obj.sweep_id:
|
1508
1510
|
self._settings.sweep_id = run_obj.sweep_id
|
1509
1511
|
if run_obj.host:
|
@@ -1643,7 +1645,7 @@ class Run:
|
|
1643
1645
|
[guides to logging](https://docs.wandb.ai/guides/track/log) for examples,
|
1644
1646
|
from 3D molecular structures and segmentation masks to PR curves and histograms.
|
1645
1647
|
You can use `wandb.Table` to log structured data. See our
|
1646
|
-
[guide to logging tables](https://docs.wandb.ai/guides/tables/tables-walkthrough)
|
1648
|
+
[guide to logging tables](https://docs.wandb.ai/guides/models/tables/tables-walkthrough)
|
1647
1649
|
for details.
|
1648
1650
|
|
1649
1651
|
The W&B UI organizes metrics with a forward slash (`/`) in their name
|
@@ -2111,15 +2113,6 @@ class Run:
|
|
2111
2113
|
with telemetry.context(run=self) as tel:
|
2112
2114
|
tel.feature.finish = True
|
2113
2115
|
|
2114
|
-
# Pop this run (hopefully) from the run stack, to support the "reinit"
|
2115
|
-
# functionality of wandb.init().
|
2116
|
-
#
|
2117
|
-
# TODO: It's not clear how _global_run_stack could have length other
|
2118
|
-
# than 1 at this point in the code. If you're reading this, consider
|
2119
|
-
# refactoring this thing.
|
2120
|
-
if self._wl and len(self._wl._global_run_stack) > 0:
|
2121
|
-
self._wl._global_run_stack.pop()
|
2122
|
-
|
2123
2116
|
# Run hooks that need to happen before the last messages to the
|
2124
2117
|
# internal service, like Jupyter hooks.
|
2125
2118
|
for hook in self._teardown_hooks:
|
@@ -2175,8 +2168,7 @@ class Run:
|
|
2175
2168
|
return RunStatus()
|
2176
2169
|
|
2177
2170
|
handle_run_status = self._backend.interface.deliver_request_run_status()
|
2178
|
-
result = handle_run_status.
|
2179
|
-
assert result
|
2171
|
+
result = handle_run_status.wait_or(timeout=None)
|
2180
2172
|
sync_data = result.response.run_status_response
|
2181
2173
|
|
2182
2174
|
sync_time = None
|
@@ -2316,8 +2308,6 @@ class Run:
|
|
2316
2308
|
raise ValueError("unhandled console")
|
2317
2309
|
try:
|
2318
2310
|
# save stdout and stderr before installing new write functions
|
2319
|
-
out_redir.save()
|
2320
|
-
err_redir.save()
|
2321
2311
|
out_redir.install()
|
2322
2312
|
err_redir.install()
|
2323
2313
|
self._out_redir = out_redir
|
@@ -2563,31 +2553,38 @@ class Run:
|
|
2563
2553
|
else:
|
2564
2554
|
return artifact
|
2565
2555
|
|
2566
|
-
def
|
2567
|
-
handle = probe_handle.get_mailbox_handle()
|
2568
|
-
if handle:
|
2569
|
-
result = handle.wait(timeout=0, release=False)
|
2570
|
-
if not result:
|
2571
|
-
return
|
2572
|
-
probe_handle.set_probe_result(result)
|
2573
|
-
assert self._backend and self._backend.interface
|
2574
|
-
handle = self._backend.interface.deliver_poll_exit()
|
2575
|
-
probe_handle.set_mailbox_handle(handle)
|
2576
|
-
|
2577
|
-
def _on_progress_exit(
|
2556
|
+
async def _display_finish_stats(
|
2578
2557
|
self,
|
2579
2558
|
progress_printer: progress.ProgressPrinter,
|
2580
|
-
progress_handle: MailboxProgress,
|
2581
2559
|
) -> None:
|
2582
|
-
|
2583
|
-
if not probe_handles or len(probe_handles) != 1:
|
2584
|
-
return
|
2560
|
+
last_result: Result | None = None
|
2585
2561
|
|
2586
|
-
|
2587
|
-
|
2588
|
-
|
2562
|
+
async def loop_update_printer() -> None:
|
2563
|
+
while True:
|
2564
|
+
if last_result:
|
2565
|
+
progress_printer.update(
|
2566
|
+
[last_result.response.poll_exit_response],
|
2567
|
+
)
|
2568
|
+
await asyncio.sleep(0.1)
|
2569
|
+
|
2570
|
+
async def loop_poll_exit() -> None:
|
2571
|
+
nonlocal last_result
|
2572
|
+
assert self._backend and self._backend.interface
|
2573
|
+
|
2574
|
+
while True:
|
2575
|
+
handle = self._backend.interface.deliver_poll_exit()
|
2576
|
+
|
2577
|
+
time_start = time.monotonic()
|
2578
|
+
last_result = await handle.wait_async(timeout=None)
|
2589
2579
|
|
2590
|
-
|
2580
|
+
# Update at most once a second.
|
2581
|
+
time_elapsed = time.monotonic() - time_start
|
2582
|
+
if time_elapsed < 1:
|
2583
|
+
await asyncio.sleep(1 - time_elapsed)
|
2584
|
+
|
2585
|
+
async with asyncio_compat.open_task_group() as task_group:
|
2586
|
+
task_group.start_soon(loop_update_printer())
|
2587
|
+
task_group.start_soon(loop_poll_exit())
|
2591
2588
|
|
2592
2589
|
def _on_finish(self) -> None:
|
2593
2590
|
trigger.call("on_finished")
|
@@ -2604,25 +2601,24 @@ class Run:
|
|
2604
2601
|
else:
|
2605
2602
|
exit_handle = self._backend.interface.deliver_finish_without_exit()
|
2606
2603
|
|
2607
|
-
exit_handle.add_probe(on_probe=self._on_probe_exit)
|
2608
|
-
|
2609
2604
|
with progress.progress_printer(
|
2610
2605
|
self._printer,
|
2611
|
-
|
2606
|
+
default_text="Finishing up...",
|
2612
2607
|
) as progress_printer:
|
2613
2608
|
# Wait for the run to complete.
|
2614
|
-
|
2615
|
-
|
2616
|
-
|
2617
|
-
|
2609
|
+
wait_with_progress(
|
2610
|
+
exit_handle,
|
2611
|
+
timeout=None,
|
2612
|
+
progress_after=1,
|
2613
|
+
display_progress=functools.partial(
|
2614
|
+
self._display_finish_stats,
|
2618
2615
|
progress_printer,
|
2619
2616
|
),
|
2620
2617
|
)
|
2621
2618
|
|
2622
2619
|
# Print some final statistics.
|
2623
2620
|
poll_exit_handle = self._backend.interface.deliver_poll_exit()
|
2624
|
-
result = poll_exit_handle.
|
2625
|
-
assert result
|
2621
|
+
result = poll_exit_handle.wait_or(timeout=None)
|
2626
2622
|
progress.print_sync_dedupe_stats(
|
2627
2623
|
self._printer,
|
2628
2624
|
result.response.poll_exit_response,
|
@@ -2630,8 +2626,7 @@ class Run:
|
|
2630
2626
|
|
2631
2627
|
self._poll_exit_response = result.response.poll_exit_response
|
2632
2628
|
internal_messages_handle = self._backend.interface.deliver_internal_messages()
|
2633
|
-
result = internal_messages_handle.
|
2634
|
-
assert result
|
2629
|
+
result = internal_messages_handle.wait_or(timeout=None)
|
2635
2630
|
self._internal_messages_response = result.response.internal_messages_response
|
2636
2631
|
|
2637
2632
|
# dispatch all our final requests
|
@@ -2641,12 +2636,10 @@ class Run:
|
|
2641
2636
|
self._backend.interface.deliver_request_sampled_history()
|
2642
2637
|
)
|
2643
2638
|
|
2644
|
-
result = sampled_history_handle.
|
2645
|
-
assert result
|
2639
|
+
result = sampled_history_handle.wait_or(timeout=None)
|
2646
2640
|
self._sampled_history = result.response.sampled_history_response
|
2647
2641
|
|
2648
|
-
result = final_summary_handle.
|
2649
|
-
assert result
|
2642
|
+
result = final_summary_handle.wait_or(timeout=None)
|
2650
2643
|
self._final_summary = result.response.get_summary_response
|
2651
2644
|
|
2652
2645
|
if self._backend:
|
@@ -2952,13 +2945,10 @@ class Run:
|
|
2952
2945
|
wandb.termwarn(
|
2953
2946
|
"Artifact TTL will be disabled for source artifacts that are linked to portfolios."
|
2954
2947
|
)
|
2955
|
-
result = handle.
|
2956
|
-
|
2957
|
-
|
2958
|
-
|
2959
|
-
response = result.response.link_artifact_response
|
2960
|
-
if response.error_message:
|
2961
|
-
wandb.termerror(response.error_message)
|
2948
|
+
result = handle.wait_or(timeout=None)
|
2949
|
+
response = result.response.link_artifact_response
|
2950
|
+
if response.error_message:
|
2951
|
+
wandb.termerror(response.error_message)
|
2962
2952
|
|
2963
2953
|
@_run_decorator._noop_on_finish()
|
2964
2954
|
@_run_decorator._attach
|
@@ -3258,7 +3248,7 @@ class Run:
|
|
3258
3248
|
self._assert_can_log_artifact(artifact)
|
3259
3249
|
if self._backend and self._backend.interface:
|
3260
3250
|
if not self._settings._offline:
|
3261
|
-
|
3251
|
+
handle = self._backend.interface.deliver_artifact(
|
3262
3252
|
self,
|
3263
3253
|
artifact,
|
3264
3254
|
aliases,
|
@@ -3268,7 +3258,7 @@ class Run:
|
|
3268
3258
|
is_user_created=is_user_created,
|
3269
3259
|
use_after_commit=use_after_commit,
|
3270
3260
|
)
|
3271
|
-
artifact.
|
3261
|
+
artifact._set_save_handle(handle, self._public_api().client)
|
3272
3262
|
else:
|
3273
3263
|
self._backend.interface.publish_artifact(
|
3274
3264
|
self,
|
@@ -3670,16 +3660,18 @@ class Run:
|
|
3670
3660
|
return {}
|
3671
3661
|
|
3672
3662
|
handle = self._backend.interface.deliver_get_system_metrics()
|
3673
|
-
result = handle.wait(timeout=1)
|
3674
3663
|
|
3675
|
-
|
3664
|
+
try:
|
3665
|
+
result = handle.wait_or(timeout=1)
|
3666
|
+
except TimeoutError:
|
3667
|
+
return {}
|
3668
|
+
else:
|
3676
3669
|
try:
|
3677
3670
|
response = result.response.get_system_metrics_response
|
3678
|
-
if response
|
3679
|
-
return pb_to_dict(response)
|
3671
|
+
return pb_to_dict(response) if response else {}
|
3680
3672
|
except Exception as e:
|
3681
3673
|
logger.error("Error getting system metrics: %s", e)
|
3682
|
-
|
3674
|
+
return {}
|
3683
3675
|
|
3684
3676
|
@property
|
3685
3677
|
@_run_decorator._attach
|
@@ -3698,10 +3690,11 @@ class Run:
|
|
3698
3690
|
self.__metadata._set_callback(self._metadata_callback)
|
3699
3691
|
|
3700
3692
|
handle = self._backend.interface.deliver_get_system_metadata()
|
3701
|
-
result = handle.wait(timeout=1)
|
3702
3693
|
|
3703
|
-
|
3704
|
-
|
3694
|
+
try:
|
3695
|
+
result = handle.wait_or(timeout=1)
|
3696
|
+
except TimeoutError:
|
3697
|
+
logger.error("Error getting run metadata: timeout")
|
3705
3698
|
return None
|
3706
3699
|
|
3707
3700
|
try:
|
wandb/sdk/wandb_settings.py
CHANGED
@@ -165,6 +165,9 @@ class Settings(BaseModel, validate_assignment=True):
|
|
165
165
|
entity: str | None = None
|
166
166
|
"""The W&B entity, such as a user or a team."""
|
167
167
|
|
168
|
+
organization: str | None = None
|
169
|
+
"""The W&B organization."""
|
170
|
+
|
168
171
|
force: bool = False
|
169
172
|
"""Whether to pass the `force` flag to `wandb.login()`."""
|
170
173
|
|
@@ -558,22 +561,25 @@ class Settings(BaseModel, validate_assignment=True):
|
|
558
561
|
x_save_requirements: bool = True
|
559
562
|
"""Flag to save the requirements file."""
|
560
563
|
|
564
|
+
x_server_side_derived_summary: bool = False
|
565
|
+
"""Flag to delegate automatic computation of summary from history to the server.
|
566
|
+
|
567
|
+
This does not disable user-provided summary updates.
|
568
|
+
"""
|
569
|
+
|
561
570
|
x_service_transport: str | None = None
|
562
571
|
"""Transport method for communication with the wandb service."""
|
563
572
|
|
564
573
|
x_service_wait: float = 30.0
|
565
574
|
"""Time in seconds to wait for the wandb-core internal service to start."""
|
566
575
|
|
567
|
-
x_show_operation_stats: bool = True
|
568
|
-
"""Whether to show statistics about internal operations such as data uploads."""
|
569
|
-
|
570
576
|
x_start_time: float | None = None
|
571
577
|
"""The start time of the run in seconds since the Unix epoch."""
|
572
578
|
|
573
579
|
x_stats_pid: int = os.getpid()
|
574
580
|
"""PID of the process that started the wandb-core process to collect system stats for."""
|
575
581
|
|
576
|
-
x_stats_sampling_interval: float = Field(default=
|
582
|
+
x_stats_sampling_interval: float = Field(default=15.0)
|
577
583
|
"""Sampling interval for the system monitor in seconds."""
|
578
584
|
|
579
585
|
x_stats_neuron_monitor_config_path: str | None = None
|
wandb/sdk/wandb_setup.py
CHANGED
@@ -28,7 +28,6 @@ from .lib import config_util, server
|
|
28
28
|
|
29
29
|
if TYPE_CHECKING:
|
30
30
|
from wandb.sdk.lib.service_connection import ServiceConnection
|
31
|
-
from wandb.sdk.wandb_run import Run
|
32
31
|
from wandb.sdk.wandb_settings import Settings
|
33
32
|
|
34
33
|
|
@@ -90,9 +89,6 @@ class _WandbSetup:
|
|
90
89
|
self._server: server.Server | None = None
|
91
90
|
self._pid = pid
|
92
91
|
|
93
|
-
# keep track of multiple runs, so we can unwind with join()s
|
94
|
-
self._global_run_stack: list[Run] = []
|
95
|
-
|
96
92
|
# TODO(jhr): defer strict checks until settings are fully initialized
|
97
93
|
# and logging is ready
|
98
94
|
self._logger: Logger = _EarlyLogger()
|
@@ -298,18 +294,33 @@ def singleton() -> _WandbSetup | None:
|
|
298
294
|
return None
|
299
295
|
|
300
296
|
|
301
|
-
def _setup(
|
302
|
-
|
297
|
+
def _setup(
|
298
|
+
settings: Settings | None = None,
|
299
|
+
start_service: bool = True,
|
300
|
+
) -> _WandbSetup:
|
301
|
+
"""Set up library context.
|
302
|
+
|
303
|
+
Args:
|
304
|
+
settings: Global settings to set, or updates to the global settings
|
305
|
+
if the singleton has already been initialized.
|
306
|
+
start_service: Whether to start up the service process.
|
307
|
+
NOTE: A service process will only be started if allowed by the
|
308
|
+
global settings (after the given updates). The service will not
|
309
|
+
start up if the mode resolves to "disabled".
|
310
|
+
"""
|
303
311
|
global _singleton
|
304
312
|
|
305
313
|
pid = os.getpid()
|
306
314
|
|
307
315
|
if _singleton and _singleton._pid == pid:
|
308
316
|
_singleton._update(settings=settings)
|
309
|
-
return _singleton
|
310
317
|
else:
|
311
318
|
_singleton = _WandbSetup(settings=settings, pid=pid)
|
312
|
-
|
319
|
+
|
320
|
+
if start_service and not _singleton.settings._noop:
|
321
|
+
_singleton.ensure_service()
|
322
|
+
|
323
|
+
return _singleton
|
313
324
|
|
314
325
|
|
315
326
|
def setup(settings: Settings | None = None) -> _WandbSetup:
|