wandb 0.19.6rc4__py3-none-macosx_11_0_arm64.whl → 0.19.8__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. wandb/__init__.py +1 -1
  2. wandb/__init__.pyi +56 -6
  3. wandb/apis/public/_generated/__init__.py +21 -0
  4. wandb/apis/public/_generated/base.py +128 -0
  5. wandb/apis/public/_generated/enums.py +4 -0
  6. wandb/apis/public/_generated/input_types.py +4 -0
  7. wandb/apis/public/_generated/operations.py +15 -0
  8. wandb/apis/public/_generated/server_features_query.py +27 -0
  9. wandb/apis/public/_generated/typing_compat.py +14 -0
  10. wandb/apis/public/api.py +192 -6
  11. wandb/apis/public/artifacts.py +13 -45
  12. wandb/apis/public/registries.py +573 -0
  13. wandb/apis/public/utils.py +36 -0
  14. wandb/bin/gpu_stats +0 -0
  15. wandb/bin/wandb-core +0 -0
  16. wandb/cli/cli.py +11 -20
  17. wandb/data_types.py +1 -1
  18. wandb/env.py +10 -0
  19. wandb/filesync/dir_watcher.py +2 -1
  20. wandb/proto/v3/wandb_internal_pb2.py +243 -222
  21. wandb/proto/v3/wandb_server_pb2.py +4 -4
  22. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  23. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  24. wandb/proto/v4/wandb_internal_pb2.py +226 -222
  25. wandb/proto/v4/wandb_server_pb2.py +4 -4
  26. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  27. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  28. wandb/proto/v5/wandb_internal_pb2.py +226 -222
  29. wandb/proto/v5/wandb_server_pb2.py +4 -4
  30. wandb/proto/v5/wandb_settings_pb2.py +2 -2
  31. wandb/proto/v5/wandb_telemetry_pb2.py +10 -10
  32. wandb/sdk/artifacts/_graphql_fragments.py +126 -0
  33. wandb/sdk/artifacts/artifact.py +51 -95
  34. wandb/sdk/backend/backend.py +17 -6
  35. wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +14 -6
  36. wandb/sdk/data_types/helper_types/image_mask.py +12 -6
  37. wandb/sdk/data_types/saved_model.py +35 -46
  38. wandb/sdk/data_types/video.py +7 -16
  39. wandb/sdk/interface/interface.py +87 -49
  40. wandb/sdk/interface/interface_queue.py +5 -15
  41. wandb/sdk/interface/interface_relay.py +7 -22
  42. wandb/sdk/interface/interface_shared.py +65 -136
  43. wandb/sdk/interface/interface_sock.py +3 -21
  44. wandb/sdk/interface/router.py +42 -68
  45. wandb/sdk/interface/router_queue.py +13 -11
  46. wandb/sdk/interface/router_relay.py +26 -13
  47. wandb/sdk/interface/router_sock.py +12 -16
  48. wandb/sdk/internal/handler.py +4 -3
  49. wandb/sdk/internal/internal_api.py +12 -1
  50. wandb/sdk/internal/sender.py +3 -19
  51. wandb/sdk/lib/apikey.py +87 -26
  52. wandb/sdk/lib/asyncio_compat.py +210 -0
  53. wandb/sdk/lib/console_capture.py +172 -0
  54. wandb/sdk/lib/progress.py +78 -16
  55. wandb/sdk/lib/redirect.py +102 -76
  56. wandb/sdk/lib/service_connection.py +37 -17
  57. wandb/sdk/lib/sock_client.py +6 -56
  58. wandb/sdk/mailbox/__init__.py +23 -0
  59. wandb/sdk/mailbox/mailbox.py +135 -0
  60. wandb/sdk/mailbox/mailbox_handle.py +127 -0
  61. wandb/sdk/mailbox/response_handle.py +167 -0
  62. wandb/sdk/mailbox/wait_with_progress.py +135 -0
  63. wandb/sdk/service/server_sock.py +9 -3
  64. wandb/sdk/service/streams.py +75 -78
  65. wandb/sdk/verify/verify.py +54 -2
  66. wandb/sdk/wandb_init.py +72 -75
  67. wandb/sdk/wandb_login.py +7 -4
  68. wandb/sdk/wandb_metadata.py +65 -34
  69. wandb/sdk/wandb_require.py +14 -8
  70. wandb/sdk/wandb_run.py +90 -97
  71. wandb/sdk/wandb_settings.py +10 -4
  72. wandb/sdk/wandb_setup.py +19 -8
  73. wandb/sdk/wandb_sync.py +2 -10
  74. wandb/util.py +3 -1
  75. {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/METADATA +2 -2
  76. {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/RECORD +79 -66
  77. wandb/sdk/interface/message_future.py +0 -27
  78. wandb/sdk/interface/message_future_poll.py +0 -50
  79. wandb/sdk/lib/mailbox.py +0 -442
  80. {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/WHEEL +0 -0
  81. {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/entry_points.txt +0 -0
  82. {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/licenses/LICENSE +0 -0
@@ -236,39 +236,6 @@ class Metadata(BaseModel, validate_assignment=True):
236
236
 
237
237
  NOTE: Definitions must be kept in sync with wandb_internal.proto::MetadataRequest.
238
238
 
239
- Attributes:
240
- os: Operating system.
241
- python: Python version.
242
- heartbeat_at: Timestamp of last heartbeat.
243
- started_at: Timestamp of run start.
244
- docker: Docker image.
245
- cuda: CUDA version.
246
- args: Command-line arguments.
247
- state: Run state.
248
- program: Program name.
249
- code_path: Path to code.
250
- git: Git repository information.
251
- email: Email address.
252
- root: Root directory.
253
- host: Host name.
254
- username: Username.
255
- executable: Python executable path.
256
- code_path_local: Local code path.
257
- colab: Colab URL.
258
- cpu_count: CPU count.
259
- cpu_count_logical: Logical CPU count.
260
- gpu_type: GPU type.
261
- disk: Disk information.
262
- memory: Memory information.
263
- cpu: CPU information.
264
- apple: Apple silicon information.
265
- gpu_nvidia: NVIDIA GPU information.
266
- gpu_amd: AMD GPU information.
267
- slurm: Slurm environment information.
268
- cuda_version: CUDA version.
269
- trainium: Trainium information.
270
- tpu: TPU information.
271
-
272
239
  Examples:
273
240
  Update Run metadata:
274
241
 
@@ -296,44 +263,108 @@ class Metadata(BaseModel, validate_assignment=True):
296
263
  ```
297
264
  """
298
265
 
299
- # TODO: Pydantic configuration.
300
266
  model_config = ConfigDict(
301
267
  extra="ignore", # ignore extra fields
302
268
  validate_default=True, # validate default values
269
+ use_attribute_docstrings=True, # for field descriptions
270
+ revalidate_instances="always",
303
271
  )
304
272
 
305
273
  os: str | None = None
274
+ """Operating system."""
275
+
306
276
  python: str | None = None
277
+ """Python version."""
278
+
307
279
  heartbeat_at: datetime | None = Field(default=None, alias="heartbeatAt")
280
+ """Timestamp of last heartbeat."""
281
+
308
282
  started_at: datetime | None = Field(default=None, alias="startedAt")
283
+ """Timestamp of run start."""
284
+
309
285
  docker: str | None = None
286
+ """Docker image."""
287
+
310
288
  cuda: str | None = None
289
+ """CUDA version."""
290
+
311
291
  args: list[str] = Field(default_factory=list)
292
+ """Command-line arguments."""
293
+
312
294
  state: str | None = None
295
+ """Run state."""
296
+
313
297
  program: str | None = None
298
+ """Program name."""
299
+
314
300
  code_path: str | None = Field(default=None, alias="codePath")
301
+ """Path to code."""
302
+
315
303
  git: GitRepoRecord | None = None
304
+ """Git repository information."""
305
+
316
306
  email: str | None = None
307
+ """Email address."""
308
+
317
309
  root: str | None = None
310
+ """Root directory."""
311
+
318
312
  host: str | None = None
313
+ """Host name."""
314
+
319
315
  username: str | None = None
316
+ """Username."""
317
+
320
318
  executable: str | None = None
319
+ """Python executable path."""
320
+
321
321
  code_path_local: str | None = Field(default=None, alias="codePathLocal")
322
+ """Local code path."""
323
+
322
324
  colab: str | None = None
325
+ """Colab URL."""
326
+
323
327
  cpu_count: int | None = Field(default=None, alias="cpuCount")
328
+ """CPU count."""
329
+
324
330
  cpu_count_logical: int | None = Field(default=None, alias="cpuCountLogical")
331
+ """Logical CPU count."""
332
+
325
333
  gpu_type: str | None = Field(default=None, alias="gpuType")
334
+ """GPU type."""
335
+
326
336
  gpu_count: int | None = Field(default=None, alias="gpuCount")
337
+ """GPU count."""
338
+
327
339
  disk: dict[str, DiskInfo] = Field(default_factory=dict)
340
+ """Disk information."""
341
+
328
342
  memory: MemoryInfo | None = None
343
+ """Memory information."""
344
+
329
345
  cpu: CpuInfo | None = None
346
+ """CPU information."""
347
+
330
348
  apple: AppleInfo | None = None
349
+ """Apple silicon information."""
350
+
331
351
  gpu_nvidia: list[GpuNvidiaInfo] = Field(default_factory=list, alias="gpuNvidia")
352
+ """NVIDIA GPU information."""
353
+
332
354
  gpu_amd: list[GpuAmdInfo] = Field(default_factory=list, alias="gpuAmd")
355
+ """AMD GPU information."""
356
+
333
357
  slurm: dict[str, str] = Field(default_factory=dict)
358
+ """Slurm environment information."""
359
+
334
360
  cuda_version: str | None = Field(default=None, alias="cudaVersion")
361
+ """CUDA version."""
362
+
335
363
  trainium: TrainiumInfo | None = None
364
+ """Trainium information."""
365
+
336
366
  tpu: TPUInfo | None = None
367
+ """TPU information."""
337
368
 
338
369
  def __init__(self, **data):
339
370
  super().__init__(**data)
@@ -9,8 +9,10 @@ Example:
9
9
  wandb.require("incremental-artifacts@beta")
10
10
  """
11
11
 
12
+ from __future__ import annotations
13
+
12
14
  import os
13
- from typing import Optional, Sequence, Union
15
+ from typing import Iterable
14
16
 
15
17
  import wandb
16
18
  from wandb.env import _REQUIRE_LEGACY_SERVICE
@@ -21,9 +23,9 @@ from wandb.sdk import wandb_run
21
23
  class _Requires:
22
24
  """Internal feature class."""
23
25
 
24
- _features: Sequence[str]
26
+ _features: tuple[str, ...]
25
27
 
26
- def __init__(self, features: Union[str, Sequence[str]]) -> None:
28
+ def __init__(self, features: str | Iterable[str]) -> None:
27
29
  self._features = (
28
30
  tuple([features]) if isinstance(features, str) else tuple(features)
29
31
  )
@@ -67,17 +69,21 @@ class _Requires:
67
69
 
68
70
 
69
71
  def require(
70
- requirement: Optional[Union[str, Sequence[str]]] = None,
71
- experiment: Optional[Union[str, Sequence[str]]] = None,
72
+ requirement: str | Iterable[str] | None = None,
73
+ experiment: str | Iterable[str] | None = None,
72
74
  ) -> None:
73
75
  """Indicate which experimental features are used by the script.
74
76
 
77
+ This should be called before any other `wandb` functions, ideally right
78
+ after importing `wandb`.
79
+
75
80
  Args:
76
- requirement: (str or list) Features to require
77
- experiment: (str or list) Features to require
81
+ requirement: The name of a feature to require or an iterable of
82
+ feature names.
83
+ experiment: An alias for `requirement`.
78
84
 
79
85
  Raises:
80
- wandb.errors.UnsupportedError: if not supported
86
+ wandb.errors.UnsupportedError: If a feature name is unknown.
81
87
  """
82
88
  features = requirement or experiment
83
89
  if not features:
wandb/sdk/wandb_run.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
3
4
  import atexit
4
5
  import functools
5
6
  import glob
@@ -43,6 +44,7 @@ from wandb.proto.wandb_internal_pb2 import (
43
44
  )
44
45
  from wandb.sdk.artifacts.artifact import Artifact
45
46
  from wandb.sdk.internal import job_builder
47
+ from wandb.sdk.lib import asyncio_compat
46
48
  from wandb.sdk.lib.import_hooks import (
47
49
  register_post_import_hook,
48
50
  unregister_post_import_hook,
@@ -82,7 +84,12 @@ from .lib import (
82
84
  telemetry,
83
85
  )
84
86
  from .lib.exit_hooks import ExitHooks
85
- from .lib.mailbox import MailboxError, MailboxHandle, MailboxProbe, MailboxProgress
87
+ from .mailbox import (
88
+ HandleAbandonedError,
89
+ MailboxClosedError,
90
+ MailboxHandle,
91
+ wait_with_progress,
92
+ )
86
93
  from .wandb_alerts import AlertLevel
87
94
  from .wandb_metadata import Metadata
88
95
  from .wandb_settings import Settings
@@ -150,11 +157,11 @@ class RunStatusChecker:
150
157
  """
151
158
 
152
159
  _stop_status_lock: threading.Lock
153
- _stop_status_handle: MailboxHandle | None
160
+ _stop_status_handle: MailboxHandle[Result] | None
154
161
  _network_status_lock: threading.Lock
155
- _network_status_handle: MailboxHandle | None
162
+ _network_status_handle: MailboxHandle[Result] | None
156
163
  _internal_messages_lock: threading.Lock
157
- _internal_messages_handle: MailboxHandle | None
164
+ _internal_messages_handle: MailboxHandle[Result] | None
158
165
 
159
166
  def __init__(
160
167
  self,
@@ -202,7 +209,7 @@ class RunStatusChecker:
202
209
  @staticmethod
203
210
  def _abandon_status_check(
204
211
  lock: threading.Lock,
205
- handle: MailboxHandle | None,
212
+ handle: MailboxHandle[Result] | None,
206
213
  ):
207
214
  with lock:
208
215
  if handle:
@@ -217,35 +224,36 @@ class RunStatusChecker:
217
224
  request: Any,
218
225
  process: Any,
219
226
  ) -> None:
220
- local_handle: MailboxHandle | None = None
227
+ local_handle: MailboxHandle[Result] | None = None
221
228
  join_requested = False
222
229
  while not join_requested:
223
230
  time_probe = time.monotonic()
224
231
  if not local_handle:
225
- local_handle = request()
232
+ try:
233
+ local_handle = request()
234
+ except MailboxClosedError:
235
+ # This can happen if the service process dies.
236
+ break
226
237
  assert local_handle
227
238
 
228
239
  with lock:
229
240
  if self._join_event.is_set():
230
241
  break
231
242
  set_handle(local_handle)
243
+
232
244
  try:
233
- result = local_handle.wait(timeout=timeout, release=False)
234
- except MailboxError:
235
- # background threads are oportunistically getting results
236
- # from the internal process but the internal process could
237
- # be shutdown at any time. In this case assume that the
238
- # thread should exit silently. This is possible
239
- # because we do not have an atexit handler for the user
240
- # process which quiesces active threads.
245
+ result = local_handle.wait_or(timeout=timeout)
246
+ except HandleAbandonedError:
247
+ # This can happen if the service process dies.
241
248
  break
249
+ except TimeoutError:
250
+ result = None
251
+
242
252
  with lock:
243
253
  set_handle(None)
244
254
 
245
255
  if result:
246
256
  process(result)
247
- # if request finished, clear the handle to send on the next interval
248
- local_handle.abandon()
249
257
  local_handle = None
250
258
 
251
259
  time_elapsed = time.monotonic() - time_probe
@@ -534,7 +542,7 @@ class Run:
534
542
 
535
543
  _sampled_history: SampledHistoryResponse | None
536
544
  _final_summary: GetSummaryResponse | None
537
- _poll_exit_handle: MailboxHandle | None
545
+ _poll_exit_handle: MailboxHandle[Result] | None
538
546
  _poll_exit_response: PollExitResponse | None
539
547
  _internal_messages_response: InternalMessagesResponse | None
540
548
 
@@ -1300,12 +1308,6 @@ class Run:
1300
1308
  if self._backend and self._backend.interface:
1301
1309
  self._backend.interface.publish_summary(self, summary_record)
1302
1310
 
1303
- def _on_progress_get_summary(self, handle: MailboxProgress) -> None:
1304
- pass
1305
- # TODO(jhr): enable printing for get_summary in later mailbox dev phase
1306
- # line = "Waiting for run.summary data..."
1307
- # self._printer.display(line)
1308
-
1309
1311
  def _summary_get_current_summary_callback(self) -> dict[str, Any]:
1310
1312
  if self._is_finished:
1311
1313
  # TODO: WB-18420: fetch summary from backend and stage it before run is finished
@@ -1314,12 +1316,12 @@ class Run:
1314
1316
  if not self._backend or not self._backend.interface:
1315
1317
  return {}
1316
1318
  handle = self._backend.interface.deliver_get_summary()
1317
- result = handle.wait(
1318
- timeout=self._settings.summary_timeout,
1319
- on_progress=self._on_progress_get_summary,
1320
- )
1321
- if not result:
1319
+
1320
+ try:
1321
+ result = handle.wait_or(timeout=self._settings.summary_timeout)
1322
+ except TimeoutError:
1322
1323
  return {}
1324
+
1323
1325
  get_summary_response = result.response.get_summary_response
1324
1326
  return proto_util.dict_from_proto_list(get_summary_response.item)
1325
1327
 
@@ -1330,7 +1332,7 @@ class Run:
1330
1332
  def _datatypes_callback(self, fname: str) -> None:
1331
1333
  if not self._backend or not self._backend.interface:
1332
1334
  return
1333
- files: FilesDict = dict(files=[(GlobStr(glob.escape(fname)), "now")])
1335
+ files: FilesDict = dict(files=[(GlobStr(fname), "now")])
1334
1336
  self._backend.interface.publish_files(files)
1335
1337
 
1336
1338
  def _pop_all_charts(
@@ -1503,7 +1505,7 @@ class Run:
1503
1505
  if run_obj.notes:
1504
1506
  self._settings.run_notes = run_obj.notes
1505
1507
  if run_obj.tags:
1506
- self._settings.run_tags = run_obj.tags
1508
+ self._settings.run_tags = tuple(run_obj.tags)
1507
1509
  if run_obj.sweep_id:
1508
1510
  self._settings.sweep_id = run_obj.sweep_id
1509
1511
  if run_obj.host:
@@ -1643,7 +1645,7 @@ class Run:
1643
1645
  [guides to logging](https://docs.wandb.ai/guides/track/log) for examples,
1644
1646
  from 3D molecular structures and segmentation masks to PR curves and histograms.
1645
1647
  You can use `wandb.Table` to log structured data. See our
1646
- [guide to logging tables](https://docs.wandb.ai/guides/tables/tables-walkthrough)
1648
+ [guide to logging tables](https://docs.wandb.ai/guides/models/tables/tables-walkthrough)
1647
1649
  for details.
1648
1650
 
1649
1651
  The W&B UI organizes metrics with a forward slash (`/`) in their name
@@ -2111,15 +2113,6 @@ class Run:
2111
2113
  with telemetry.context(run=self) as tel:
2112
2114
  tel.feature.finish = True
2113
2115
 
2114
- # Pop this run (hopefully) from the run stack, to support the "reinit"
2115
- # functionality of wandb.init().
2116
- #
2117
- # TODO: It's not clear how _global_run_stack could have length other
2118
- # than 1 at this point in the code. If you're reading this, consider
2119
- # refactoring this thing.
2120
- if self._wl and len(self._wl._global_run_stack) > 0:
2121
- self._wl._global_run_stack.pop()
2122
-
2123
2116
  # Run hooks that need to happen before the last messages to the
2124
2117
  # internal service, like Jupyter hooks.
2125
2118
  for hook in self._teardown_hooks:
@@ -2175,8 +2168,7 @@ class Run:
2175
2168
  return RunStatus()
2176
2169
 
2177
2170
  handle_run_status = self._backend.interface.deliver_request_run_status()
2178
- result = handle_run_status.wait(timeout=-1)
2179
- assert result
2171
+ result = handle_run_status.wait_or(timeout=None)
2180
2172
  sync_data = result.response.run_status_response
2181
2173
 
2182
2174
  sync_time = None
@@ -2316,8 +2308,6 @@ class Run:
2316
2308
  raise ValueError("unhandled console")
2317
2309
  try:
2318
2310
  # save stdout and stderr before installing new write functions
2319
- out_redir.save()
2320
- err_redir.save()
2321
2311
  out_redir.install()
2322
2312
  err_redir.install()
2323
2313
  self._out_redir = out_redir
@@ -2563,31 +2553,38 @@ class Run:
2563
2553
  else:
2564
2554
  return artifact
2565
2555
 
2566
- def _on_probe_exit(self, probe_handle: MailboxProbe) -> None:
2567
- handle = probe_handle.get_mailbox_handle()
2568
- if handle:
2569
- result = handle.wait(timeout=0, release=False)
2570
- if not result:
2571
- return
2572
- probe_handle.set_probe_result(result)
2573
- assert self._backend and self._backend.interface
2574
- handle = self._backend.interface.deliver_poll_exit()
2575
- probe_handle.set_mailbox_handle(handle)
2576
-
2577
- def _on_progress_exit(
2556
+ async def _display_finish_stats(
2578
2557
  self,
2579
2558
  progress_printer: progress.ProgressPrinter,
2580
- progress_handle: MailboxProgress,
2581
2559
  ) -> None:
2582
- probe_handles = progress_handle.get_probe_handles()
2583
- if not probe_handles or len(probe_handles) != 1:
2584
- return
2560
+ last_result: Result | None = None
2585
2561
 
2586
- result = probe_handles[0].get_probe_result()
2587
- if not result:
2588
- return
2562
+ async def loop_update_printer() -> None:
2563
+ while True:
2564
+ if last_result:
2565
+ progress_printer.update(
2566
+ [last_result.response.poll_exit_response],
2567
+ )
2568
+ await asyncio.sleep(0.1)
2569
+
2570
+ async def loop_poll_exit() -> None:
2571
+ nonlocal last_result
2572
+ assert self._backend and self._backend.interface
2573
+
2574
+ while True:
2575
+ handle = self._backend.interface.deliver_poll_exit()
2576
+
2577
+ time_start = time.monotonic()
2578
+ last_result = await handle.wait_async(timeout=None)
2589
2579
 
2590
- progress_printer.update([result.response.poll_exit_response])
2580
+ # Update at most once a second.
2581
+ time_elapsed = time.monotonic() - time_start
2582
+ if time_elapsed < 1:
2583
+ await asyncio.sleep(1 - time_elapsed)
2584
+
2585
+ async with asyncio_compat.open_task_group() as task_group:
2586
+ task_group.start_soon(loop_update_printer())
2587
+ task_group.start_soon(loop_poll_exit())
2591
2588
 
2592
2589
  def _on_finish(self) -> None:
2593
2590
  trigger.call("on_finished")
@@ -2604,25 +2601,24 @@ class Run:
2604
2601
  else:
2605
2602
  exit_handle = self._backend.interface.deliver_finish_without_exit()
2606
2603
 
2607
- exit_handle.add_probe(on_probe=self._on_probe_exit)
2608
-
2609
2604
  with progress.progress_printer(
2610
2605
  self._printer,
2611
- self._settings,
2606
+ default_text="Finishing up...",
2612
2607
  ) as progress_printer:
2613
2608
  # Wait for the run to complete.
2614
- _ = exit_handle.wait(
2615
- timeout=-1,
2616
- on_progress=functools.partial(
2617
- self._on_progress_exit,
2609
+ wait_with_progress(
2610
+ exit_handle,
2611
+ timeout=None,
2612
+ progress_after=1,
2613
+ display_progress=functools.partial(
2614
+ self._display_finish_stats,
2618
2615
  progress_printer,
2619
2616
  ),
2620
2617
  )
2621
2618
 
2622
2619
  # Print some final statistics.
2623
2620
  poll_exit_handle = self._backend.interface.deliver_poll_exit()
2624
- result = poll_exit_handle.wait(timeout=-1)
2625
- assert result
2621
+ result = poll_exit_handle.wait_or(timeout=None)
2626
2622
  progress.print_sync_dedupe_stats(
2627
2623
  self._printer,
2628
2624
  result.response.poll_exit_response,
@@ -2630,8 +2626,7 @@ class Run:
2630
2626
 
2631
2627
  self._poll_exit_response = result.response.poll_exit_response
2632
2628
  internal_messages_handle = self._backend.interface.deliver_internal_messages()
2633
- result = internal_messages_handle.wait(timeout=-1)
2634
- assert result
2629
+ result = internal_messages_handle.wait_or(timeout=None)
2635
2630
  self._internal_messages_response = result.response.internal_messages_response
2636
2631
 
2637
2632
  # dispatch all our final requests
@@ -2641,12 +2636,10 @@ class Run:
2641
2636
  self._backend.interface.deliver_request_sampled_history()
2642
2637
  )
2643
2638
 
2644
- result = sampled_history_handle.wait(timeout=-1)
2645
- assert result
2639
+ result = sampled_history_handle.wait_or(timeout=None)
2646
2640
  self._sampled_history = result.response.sampled_history_response
2647
2641
 
2648
- result = final_summary_handle.wait(timeout=-1)
2649
- assert result
2642
+ result = final_summary_handle.wait_or(timeout=None)
2650
2643
  self._final_summary = result.response.get_summary_response
2651
2644
 
2652
2645
  if self._backend:
@@ -2952,13 +2945,10 @@ class Run:
2952
2945
  wandb.termwarn(
2953
2946
  "Artifact TTL will be disabled for source artifacts that are linked to portfolios."
2954
2947
  )
2955
- result = handle.wait(timeout=-1)
2956
- if result is None:
2957
- handle.abandon()
2958
- else:
2959
- response = result.response.link_artifact_response
2960
- if response.error_message:
2961
- wandb.termerror(response.error_message)
2948
+ result = handle.wait_or(timeout=None)
2949
+ response = result.response.link_artifact_response
2950
+ if response.error_message:
2951
+ wandb.termerror(response.error_message)
2962
2952
 
2963
2953
  @_run_decorator._noop_on_finish()
2964
2954
  @_run_decorator._attach
@@ -3258,7 +3248,7 @@ class Run:
3258
3248
  self._assert_can_log_artifact(artifact)
3259
3249
  if self._backend and self._backend.interface:
3260
3250
  if not self._settings._offline:
3261
- future = self._backend.interface.communicate_artifact(
3251
+ handle = self._backend.interface.deliver_artifact(
3262
3252
  self,
3263
3253
  artifact,
3264
3254
  aliases,
@@ -3268,7 +3258,7 @@ class Run:
3268
3258
  is_user_created=is_user_created,
3269
3259
  use_after_commit=use_after_commit,
3270
3260
  )
3271
- artifact._set_save_future(future, self._public_api().client)
3261
+ artifact._set_save_handle(handle, self._public_api().client)
3272
3262
  else:
3273
3263
  self._backend.interface.publish_artifact(
3274
3264
  self,
@@ -3670,16 +3660,18 @@ class Run:
3670
3660
  return {}
3671
3661
 
3672
3662
  handle = self._backend.interface.deliver_get_system_metrics()
3673
- result = handle.wait(timeout=1)
3674
3663
 
3675
- if result:
3664
+ try:
3665
+ result = handle.wait_or(timeout=1)
3666
+ except TimeoutError:
3667
+ return {}
3668
+ else:
3676
3669
  try:
3677
3670
  response = result.response.get_system_metrics_response
3678
- if response:
3679
- return pb_to_dict(response)
3671
+ return pb_to_dict(response) if response else {}
3680
3672
  except Exception as e:
3681
3673
  logger.error("Error getting system metrics: %s", e)
3682
- return {}
3674
+ return {}
3683
3675
 
3684
3676
  @property
3685
3677
  @_run_decorator._attach
@@ -3698,10 +3690,11 @@ class Run:
3698
3690
  self.__metadata._set_callback(self._metadata_callback)
3699
3691
 
3700
3692
  handle = self._backend.interface.deliver_get_system_metadata()
3701
- result = handle.wait(timeout=1)
3702
3693
 
3703
- if not result:
3704
- logger.error("Error getting run metadata: no result")
3694
+ try:
3695
+ result = handle.wait_or(timeout=1)
3696
+ except TimeoutError:
3697
+ logger.error("Error getting run metadata: timeout")
3705
3698
  return None
3706
3699
 
3707
3700
  try:
@@ -165,6 +165,9 @@ class Settings(BaseModel, validate_assignment=True):
165
165
  entity: str | None = None
166
166
  """The W&B entity, such as a user or a team."""
167
167
 
168
+ organization: str | None = None
169
+ """The W&B organization."""
170
+
168
171
  force: bool = False
169
172
  """Whether to pass the `force` flag to `wandb.login()`."""
170
173
 
@@ -558,22 +561,25 @@ class Settings(BaseModel, validate_assignment=True):
558
561
  x_save_requirements: bool = True
559
562
  """Flag to save the requirements file."""
560
563
 
564
+ x_server_side_derived_summary: bool = False
565
+ """Flag to delegate automatic computation of summary from history to the server.
566
+
567
+ This does not disable user-provided summary updates.
568
+ """
569
+
561
570
  x_service_transport: str | None = None
562
571
  """Transport method for communication with the wandb service."""
563
572
 
564
573
  x_service_wait: float = 30.0
565
574
  """Time in seconds to wait for the wandb-core internal service to start."""
566
575
 
567
- x_show_operation_stats: bool = True
568
- """Whether to show statistics about internal operations such as data uploads."""
569
-
570
576
  x_start_time: float | None = None
571
577
  """The start time of the run in seconds since the Unix epoch."""
572
578
 
573
579
  x_stats_pid: int = os.getpid()
574
580
  """PID of the process that started the wandb-core process to collect system stats for."""
575
581
 
576
- x_stats_sampling_interval: float = Field(default=10.0)
582
+ x_stats_sampling_interval: float = Field(default=15.0)
577
583
  """Sampling interval for the system monitor in seconds."""
578
584
 
579
585
  x_stats_neuron_monitor_config_path: str | None = None
wandb/sdk/wandb_setup.py CHANGED
@@ -28,7 +28,6 @@ from .lib import config_util, server
28
28
 
29
29
  if TYPE_CHECKING:
30
30
  from wandb.sdk.lib.service_connection import ServiceConnection
31
- from wandb.sdk.wandb_run import Run
32
31
  from wandb.sdk.wandb_settings import Settings
33
32
 
34
33
 
@@ -90,9 +89,6 @@ class _WandbSetup:
90
89
  self._server: server.Server | None = None
91
90
  self._pid = pid
92
91
 
93
- # keep track of multiple runs, so we can unwind with join()s
94
- self._global_run_stack: list[Run] = []
95
-
96
92
  # TODO(jhr): defer strict checks until settings are fully initialized
97
93
  # and logging is ready
98
94
  self._logger: Logger = _EarlyLogger()
@@ -298,18 +294,33 @@ def singleton() -> _WandbSetup | None:
298
294
  return None
299
295
 
300
296
 
301
- def _setup(settings: Settings | None = None) -> _WandbSetup:
302
- """Set up library context."""
297
+ def _setup(
298
+ settings: Settings | None = None,
299
+ start_service: bool = True,
300
+ ) -> _WandbSetup:
301
+ """Set up library context.
302
+
303
+ Args:
304
+ settings: Global settings to set, or updates to the global settings
305
+ if the singleton has already been initialized.
306
+ start_service: Whether to start up the service process.
307
+ NOTE: A service process will only be started if allowed by the
308
+ global settings (after the given updates). The service will not
309
+ start up if the mode resolves to "disabled".
310
+ """
303
311
  global _singleton
304
312
 
305
313
  pid = os.getpid()
306
314
 
307
315
  if _singleton and _singleton._pid == pid:
308
316
  _singleton._update(settings=settings)
309
- return _singleton
310
317
  else:
311
318
  _singleton = _WandbSetup(settings=settings, pid=pid)
312
- return _singleton
319
+
320
+ if start_service and not _singleton.settings._noop:
321
+ _singleton.ensure_service()
322
+
323
+ return _singleton
313
324
 
314
325
 
315
326
  def setup(settings: Settings | None = None) -> _WandbSetup: