wandb 0.20.1__py3-none-win32.whl → 0.20.2rc20250616__py3-none-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. wandb/__init__.py +3 -6
  2. wandb/__init__.pyi +1 -1
  3. wandb/analytics/sentry.py +2 -2
  4. wandb/apis/importers/internals/internal.py +0 -3
  5. wandb/apis/public/api.py +2 -2
  6. wandb/apis/public/registries/{utils.py → _utils.py} +12 -12
  7. wandb/apis/public/registries/registries_search.py +2 -2
  8. wandb/apis/public/registries/registry.py +19 -18
  9. wandb/bin/gpu_stats.exe +0 -0
  10. wandb/bin/wandb-core +0 -0
  11. wandb/cli/beta.py +1 -7
  12. wandb/cli/cli.py +0 -30
  13. wandb/env.py +0 -6
  14. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  15. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  16. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  17. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  18. wandb/proto/v5/wandb_settings_pb2.py +2 -2
  19. wandb/proto/v5/wandb_telemetry_pb2.py +10 -10
  20. wandb/proto/v6/wandb_settings_pb2.py +2 -2
  21. wandb/proto/v6/wandb_telemetry_pb2.py +10 -10
  22. wandb/sdk/artifacts/storage_handlers/s3_handler.py +42 -1
  23. wandb/sdk/backend/backend.py +1 -1
  24. wandb/sdk/internal/handler.py +1 -69
  25. wandb/sdk/lib/printer.py +6 -7
  26. wandb/sdk/lib/progress.py +1 -3
  27. wandb/sdk/lib/service/ipc_support.py +13 -0
  28. wandb/sdk/lib/{service_connection.py → service/service_connection.py} +20 -56
  29. wandb/sdk/lib/service/service_port_file.py +105 -0
  30. wandb/sdk/lib/service/service_process.py +111 -0
  31. wandb/sdk/lib/service/service_token.py +164 -0
  32. wandb/sdk/lib/sock_client.py +8 -12
  33. wandb/sdk/wandb_init.py +0 -3
  34. wandb/sdk/wandb_require.py +9 -20
  35. wandb/sdk/wandb_run.py +0 -24
  36. wandb/sdk/wandb_settings.py +0 -9
  37. wandb/sdk/wandb_setup.py +2 -13
  38. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/METADATA +1 -3
  39. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/RECORD +42 -68
  40. wandb/sdk/internal/flow_control.py +0 -263
  41. wandb/sdk/internal/internal.py +0 -401
  42. wandb/sdk/internal/internal_util.py +0 -97
  43. wandb/sdk/internal/system/__init__.py +0 -0
  44. wandb/sdk/internal/system/assets/__init__.py +0 -25
  45. wandb/sdk/internal/system/assets/aggregators.py +0 -31
  46. wandb/sdk/internal/system/assets/asset_registry.py +0 -20
  47. wandb/sdk/internal/system/assets/cpu.py +0 -163
  48. wandb/sdk/internal/system/assets/disk.py +0 -210
  49. wandb/sdk/internal/system/assets/gpu.py +0 -416
  50. wandb/sdk/internal/system/assets/gpu_amd.py +0 -233
  51. wandb/sdk/internal/system/assets/interfaces.py +0 -205
  52. wandb/sdk/internal/system/assets/ipu.py +0 -177
  53. wandb/sdk/internal/system/assets/memory.py +0 -166
  54. wandb/sdk/internal/system/assets/network.py +0 -125
  55. wandb/sdk/internal/system/assets/open_metrics.py +0 -293
  56. wandb/sdk/internal/system/assets/tpu.py +0 -154
  57. wandb/sdk/internal/system/assets/trainium.py +0 -393
  58. wandb/sdk/internal/system/env_probe_helpers.py +0 -13
  59. wandb/sdk/internal/system/system_info.py +0 -248
  60. wandb/sdk/internal/system/system_monitor.py +0 -224
  61. wandb/sdk/internal/writer.py +0 -204
  62. wandb/sdk/lib/service_token.py +0 -93
  63. wandb/sdk/service/__init__.py +0 -0
  64. wandb/sdk/service/_startup_debug.py +0 -22
  65. wandb/sdk/service/port_file.py +0 -53
  66. wandb/sdk/service/server.py +0 -107
  67. wandb/sdk/service/server_sock.py +0 -286
  68. wandb/sdk/service/service.py +0 -252
  69. wandb/sdk/service/streams.py +0 -425
  70. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/WHEEL +0 -0
  71. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/entry_points.txt +0 -0
  72. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/licenses/LICENSE +0 -0
@@ -1,425 +0,0 @@
1
- """streams: class that manages internal threads for each run.
2
-
3
- StreamThread: Thread that runs internal.wandb_internal()
4
- StreamRecord: All the external state for the internal thread (queues, etc)
5
- StreamAction: Lightweight record for stream ops for thread safety
6
- StreamMux: Container for dictionary of stream threads per runid
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- import asyncio
12
- import functools
13
- import queue
14
- import threading
15
- import time
16
- from threading import Event
17
- from typing import Any, Callable, NoReturn
18
-
19
- import psutil
20
-
21
- from wandb.proto import wandb_internal_pb2 as pb
22
- from wandb.sdk.interface.interface_relay import InterfaceRelay
23
- from wandb.sdk.interface.router_relay import MessageRelayRouter
24
- from wandb.sdk.internal.internal import wandb_internal
25
- from wandb.sdk.internal.settings_static import SettingsStatic
26
- from wandb.sdk.lib import asyncio_compat, progress
27
- from wandb.sdk.lib import printer as printerlib
28
- from wandb.sdk.mailbox import Mailbox, MailboxHandle, wait_all_with_progress
29
- from wandb.sdk.wandb_run import Run
30
-
31
-
32
- class StreamThread(threading.Thread):
33
- """Class to running internal process as a thread."""
34
-
35
- def __init__(self, target: Callable, kwargs: dict[str, Any]) -> None:
36
- threading.Thread.__init__(self)
37
- self.name = "StreamThr"
38
- self._target = target
39
- self._kwargs = kwargs
40
- self.daemon = True
41
-
42
- def run(self) -> None:
43
- # TODO: catch exceptions and report errors to scheduler
44
- self._target(**self._kwargs)
45
-
46
-
47
- class StreamRecord:
48
- _record_q: queue.Queue[pb.Record]
49
- _result_q: queue.Queue[pb.Result]
50
- _relay_q: queue.Queue[pb.Result]
51
- _iface: InterfaceRelay
52
- _thread: StreamThread
53
- _settings: SettingsStatic
54
- _started: bool
55
-
56
- def __init__(self, settings: SettingsStatic) -> None:
57
- self._started = False
58
- self._mailbox = Mailbox()
59
- self._record_q = queue.Queue()
60
- self._result_q = queue.Queue()
61
- self._relay_q = queue.Queue()
62
- self._router = MessageRelayRouter(
63
- request_queue=self._record_q,
64
- response_queue=self._result_q,
65
- relay_queue=self._relay_q,
66
- mailbox=self._mailbox,
67
- )
68
- self._iface = InterfaceRelay(
69
- record_q=self._record_q,
70
- result_q=self._result_q,
71
- relay_q=self._relay_q,
72
- mailbox=self._mailbox,
73
- )
74
- self._settings = settings
75
-
76
- def start_thread(self, thread: StreamThread) -> None:
77
- self._thread = thread
78
- thread.start()
79
- self._wait_thread_active()
80
-
81
- def _wait_thread_active(self) -> None:
82
- self._iface.deliver_status().wait_or(timeout=None)
83
-
84
- def join(self) -> None:
85
- self._iface.join()
86
- self._router.join()
87
- if self._thread:
88
- self._thread.join()
89
-
90
- def drop(self) -> None:
91
- self._iface._drop = True
92
-
93
- @property
94
- def interface(self) -> InterfaceRelay:
95
- return self._iface
96
-
97
- def mark_started(self) -> None:
98
- self._started = True
99
-
100
- def update(self, settings: SettingsStatic) -> None:
101
- # Note: Currently just overriding the _settings attribute
102
- # once we use Settings Class we might want to properly update it
103
- self._settings = settings
104
-
105
-
106
- class StreamAction:
107
- _action: str
108
- _stream_id: str
109
- _processed: Event
110
- _data: Any
111
-
112
- def __init__(self, action: str, stream_id: str, data: Any | None = None):
113
- self._action = action
114
- self._stream_id = stream_id
115
- self._data = data
116
- self._processed = Event()
117
-
118
- def __repr__(self) -> str:
119
- return f"StreamAction({self._action},{self._stream_id})"
120
-
121
- def wait_handled(self) -> None:
122
- self._processed.wait()
123
-
124
- def set_handled(self) -> None:
125
- self._processed.set()
126
-
127
- @property
128
- def stream_id(self) -> str:
129
- return self._stream_id
130
-
131
-
132
- class StreamMux:
133
- _streams_lock: threading.Lock
134
- _streams: dict[str, StreamRecord]
135
- _port: int | None
136
- _pid: int | None
137
- _action_q: queue.Queue[StreamAction]
138
- _stopped: Event
139
- _pid_checked_ts: float | None
140
-
141
- def __init__(self) -> None:
142
- self._streams_lock = threading.Lock()
143
- self._streams = dict()
144
- self._port = None
145
- self._pid = None
146
- self._stopped = Event()
147
- self._action_q = queue.Queue()
148
- self._pid_checked_ts = None
149
-
150
- def _get_stopped_event(self) -> Event:
151
- # TODO: clean this up, there should be a better way to abstract this
152
- return self._stopped
153
-
154
- def set_port(self, port: int) -> None:
155
- self._port = port
156
-
157
- def set_pid(self, pid: int) -> None:
158
- self._pid = pid
159
-
160
- def add_stream(self, stream_id: str, settings: SettingsStatic) -> None:
161
- action = StreamAction(action="add", stream_id=stream_id, data=settings)
162
- self._action_q.put(action)
163
- action.wait_handled()
164
-
165
- def start_stream(self, stream_id: str) -> None:
166
- action = StreamAction(action="start", stream_id=stream_id)
167
- self._action_q.put(action)
168
- action.wait_handled()
169
-
170
- def update_stream(self, stream_id: str, settings: SettingsStatic) -> None:
171
- action = StreamAction(action="update", stream_id=stream_id, data=settings)
172
- self._action_q.put(action)
173
- action.wait_handled()
174
-
175
- def del_stream(self, stream_id: str) -> None:
176
- action = StreamAction(action="del", stream_id=stream_id)
177
- self._action_q.put(action)
178
- action.wait_handled()
179
-
180
- def drop_stream(self, stream_id: str) -> None:
181
- action = StreamAction(action="drop", stream_id=stream_id)
182
- self._action_q.put(action)
183
- action.wait_handled()
184
-
185
- def teardown(self, exit_code: int) -> None:
186
- action = StreamAction(action="teardown", stream_id="na", data=exit_code)
187
- self._action_q.put(action)
188
- action.wait_handled()
189
-
190
- def stream_names(self) -> list[str]:
191
- with self._streams_lock:
192
- names = list(self._streams.keys())
193
- return names
194
-
195
- def has_stream(self, stream_id: str) -> bool:
196
- with self._streams_lock:
197
- return stream_id in self._streams
198
-
199
- def get_stream(self, stream_id: str) -> StreamRecord:
200
- """Returns the StreamRecord for the ID.
201
-
202
- Raises:
203
- KeyError: If a corresponding StreamRecord does not exist.
204
- """
205
- with self._streams_lock:
206
- stream = self._streams[stream_id]
207
- return stream
208
-
209
- def _process_add(self, action: StreamAction) -> None:
210
- stream = StreamRecord(action._data)
211
- # run_id = action.stream_id # will want to fix if a streamid != runid
212
- settings = action._data
213
- thread = StreamThread(
214
- target=wandb_internal,
215
- kwargs=dict(
216
- settings=settings,
217
- record_q=stream._record_q,
218
- result_q=stream._result_q,
219
- port=self._port,
220
- user_pid=self._pid,
221
- ),
222
- )
223
- stream.start_thread(thread)
224
- with self._streams_lock:
225
- self._streams[action._stream_id] = stream
226
-
227
- def _process_start(self, action: StreamAction) -> None:
228
- with self._streams_lock:
229
- self._streams[action._stream_id].mark_started()
230
-
231
- def _process_update(self, action: StreamAction) -> None:
232
- with self._streams_lock:
233
- self._streams[action._stream_id].update(action._data)
234
-
235
- def _process_del(self, action: StreamAction) -> None:
236
- with self._streams_lock:
237
- stream = self._streams.pop(action._stream_id)
238
- stream.join()
239
- # TODO: we assume stream has already been shutdown. should we verify?
240
-
241
- def _process_drop(self, action: StreamAction) -> None:
242
- with self._streams_lock:
243
- if action._stream_id in self._streams:
244
- stream = self._streams.pop(action._stream_id)
245
- stream.drop()
246
- stream.join()
247
-
248
- async def _finish_all_progress(
249
- self,
250
- progress_printer: progress.ProgressPrinter,
251
- streams_to_watch: dict[str, StreamRecord],
252
- ) -> None:
253
- """Poll the streams and display statistics about them.
254
-
255
- This never returns and must be cancelled.
256
-
257
- Args:
258
- progress_printer: Printer to use for displaying finish progress.
259
- streams_to_watch: Streams to poll for finish progress.
260
- """
261
- results: dict[str, pb.Result | None] = {}
262
-
263
- async def loop_poll_stream(
264
- stream_id: str,
265
- stream: StreamRecord,
266
- ) -> NoReturn:
267
- while True:
268
- start_time = time.monotonic()
269
-
270
- handle = stream.interface.deliver_poll_exit()
271
- results[stream_id] = await handle.wait_async(timeout=None)
272
-
273
- elapsed_time = time.monotonic() - start_time
274
- if elapsed_time < 1:
275
- await asyncio.sleep(1 - elapsed_time)
276
-
277
- async def loop_update_printer() -> NoReturn:
278
- while True:
279
- poll_exit_responses: list[pb.PollExitResponse] = []
280
- for result in results.values():
281
- if not result or not result.response:
282
- continue
283
- if poll_exit_response := result.response.poll_exit_response:
284
- poll_exit_responses.append(poll_exit_response)
285
-
286
- progress_printer.update(poll_exit_responses)
287
- await asyncio.sleep(1)
288
-
289
- async with asyncio_compat.open_task_group() as task_group:
290
- for stream_id, stream in streams_to_watch.items():
291
- task_group.start_soon(loop_poll_stream(stream_id, stream))
292
- task_group.start_soon(loop_update_printer())
293
-
294
- def _finish_all(self, streams: dict[str, StreamRecord], exit_code: int) -> None:
295
- if not streams:
296
- return
297
-
298
- printer = printerlib.new_printer()
299
-
300
- # fixme: for now we have a single printer for all streams,
301
- # and jupyter is disabled if at least single stream's setting set `_jupyter` to false
302
- exit_handles: list[MailboxHandle[pb.Result]] = []
303
-
304
- # only finish started streams, non started streams failed early
305
- started_streams: dict[str, StreamRecord] = {}
306
- not_started_streams: dict[str, StreamRecord] = {}
307
- for stream_id, stream in streams.items():
308
- d = started_streams if stream._started else not_started_streams
309
- d[stream_id] = stream
310
-
311
- for stream in started_streams.values():
312
- handle = stream.interface.deliver_exit(exit_code)
313
- exit_handles.append(handle)
314
-
315
- with progress.progress_printer(
316
- printer,
317
- default_text="Finishing up...",
318
- ) as progress_printer:
319
- # todo: should we wait for the max timeout (?) of all exit handles or just wait forever?
320
- # timeout = max(stream._settings._exit_timeout for stream in streams.values())
321
- wait_all_with_progress(
322
- exit_handles,
323
- timeout=None,
324
- progress_after=1,
325
- display_progress=functools.partial(
326
- self._finish_all_progress,
327
- progress_printer,
328
- started_streams,
329
- ),
330
- )
331
-
332
- # These could be done in parallel in the future
333
- for _sid, stream in started_streams.items():
334
- # dispatch all our final requests
335
- poll_exit_handle = stream.interface.deliver_poll_exit()
336
- final_summary_handle = stream.interface.deliver_get_summary()
337
- sampled_history_handle = stream.interface.deliver_request_sampled_history()
338
- internal_messages_handle = stream.interface.deliver_internal_messages()
339
-
340
- result = internal_messages_handle.wait_or(timeout=None)
341
- internal_messages_response = result.response.internal_messages_response
342
-
343
- result = poll_exit_handle.wait_or(timeout=None)
344
- poll_exit_response = result.response.poll_exit_response
345
-
346
- result = sampled_history_handle.wait_or(timeout=None)
347
- sampled_history = result.response.sampled_history_response
348
-
349
- result = final_summary_handle.wait_or(timeout=None)
350
- final_summary = result.response.get_summary_response
351
-
352
- Run._footer(
353
- sampled_history=sampled_history,
354
- final_summary=final_summary,
355
- poll_exit_response=poll_exit_response,
356
- internal_messages_response=internal_messages_response,
357
- settings=stream._settings, # type: ignore
358
- printer=printer,
359
- )
360
- stream.join()
361
-
362
- # not started streams need to be cleaned up
363
- for stream in not_started_streams.values():
364
- stream.join()
365
-
366
- def _process_teardown(self, action: StreamAction) -> None:
367
- exit_code: int = action._data
368
- with self._streams_lock:
369
- # TODO: mark streams to prevent new modifications?
370
- streams_copy = self._streams.copy()
371
- self._finish_all(streams_copy, exit_code)
372
- with self._streams_lock:
373
- self._streams = dict()
374
- self._stopped.set()
375
-
376
- def _process_action(self, action: StreamAction) -> None:
377
- if action._action == "add":
378
- self._process_add(action)
379
- return
380
- if action._action == "update":
381
- self._process_update(action)
382
- return
383
- if action._action == "start":
384
- self._process_start(action)
385
- return
386
- if action._action == "del":
387
- self._process_del(action)
388
- return
389
- if action._action == "drop":
390
- self._process_drop(action)
391
- return
392
- if action._action == "teardown":
393
- self._process_teardown(action)
394
- return
395
- raise AssertionError(f"Unsupported action: {action._action}")
396
-
397
- def _check_orphaned(self) -> bool:
398
- if not self._pid:
399
- return False
400
- time_now = time.time()
401
- # if we have checked already and it was less than 2 seconds ago
402
- if self._pid_checked_ts and time_now < self._pid_checked_ts + 2:
403
- return False
404
- self._pid_checked_ts = time_now
405
- return not psutil.pid_exists(self._pid)
406
-
407
- def _loop(self) -> None:
408
- while not self._stopped.is_set():
409
- if self._check_orphaned():
410
- # parent process is gone, let other threads know we need to shut down
411
- self._stopped.set()
412
- try:
413
- action = self._action_q.get(timeout=1)
414
- except queue.Empty:
415
- continue
416
- self._process_action(action)
417
- action.set_handled()
418
- self._action_q.task_done()
419
- self._action_q.join()
420
-
421
- def loop(self) -> None:
422
- self._loop()
423
-
424
- def cleanup(self) -> None:
425
- pass