wandb 0.20.1__py3-none-any.whl → 0.20.2rc20250616__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. wandb/__init__.py +3 -6
  2. wandb/__init__.pyi +1 -1
  3. wandb/analytics/sentry.py +2 -2
  4. wandb/apis/importers/internals/internal.py +0 -3
  5. wandb/apis/public/api.py +2 -2
  6. wandb/apis/public/registries/{utils.py → _utils.py} +12 -12
  7. wandb/apis/public/registries/registries_search.py +2 -2
  8. wandb/apis/public/registries/registry.py +19 -18
  9. wandb/bin/gpu_stats +0 -0
  10. wandb/cli/beta.py +1 -7
  11. wandb/cli/cli.py +0 -30
  12. wandb/env.py +0 -6
  13. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  14. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  15. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  16. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  17. wandb/proto/v5/wandb_settings_pb2.py +2 -2
  18. wandb/proto/v5/wandb_telemetry_pb2.py +10 -10
  19. wandb/proto/v6/wandb_settings_pb2.py +2 -2
  20. wandb/proto/v6/wandb_telemetry_pb2.py +10 -10
  21. wandb/sdk/artifacts/storage_handlers/s3_handler.py +42 -1
  22. wandb/sdk/backend/backend.py +1 -1
  23. wandb/sdk/internal/handler.py +1 -69
  24. wandb/sdk/lib/printer.py +6 -7
  25. wandb/sdk/lib/progress.py +1 -3
  26. wandb/sdk/lib/service/ipc_support.py +13 -0
  27. wandb/sdk/lib/{service_connection.py → service/service_connection.py} +20 -56
  28. wandb/sdk/lib/service/service_port_file.py +105 -0
  29. wandb/sdk/lib/service/service_process.py +111 -0
  30. wandb/sdk/lib/service/service_token.py +164 -0
  31. wandb/sdk/lib/sock_client.py +8 -12
  32. wandb/sdk/wandb_init.py +0 -3
  33. wandb/sdk/wandb_require.py +9 -20
  34. wandb/sdk/wandb_run.py +0 -24
  35. wandb/sdk/wandb_settings.py +0 -9
  36. wandb/sdk/wandb_setup.py +2 -13
  37. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/METADATA +1 -3
  38. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/RECORD +41 -67
  39. wandb/sdk/internal/flow_control.py +0 -263
  40. wandb/sdk/internal/internal.py +0 -401
  41. wandb/sdk/internal/internal_util.py +0 -97
  42. wandb/sdk/internal/system/__init__.py +0 -0
  43. wandb/sdk/internal/system/assets/__init__.py +0 -25
  44. wandb/sdk/internal/system/assets/aggregators.py +0 -31
  45. wandb/sdk/internal/system/assets/asset_registry.py +0 -20
  46. wandb/sdk/internal/system/assets/cpu.py +0 -163
  47. wandb/sdk/internal/system/assets/disk.py +0 -210
  48. wandb/sdk/internal/system/assets/gpu.py +0 -416
  49. wandb/sdk/internal/system/assets/gpu_amd.py +0 -233
  50. wandb/sdk/internal/system/assets/interfaces.py +0 -205
  51. wandb/sdk/internal/system/assets/ipu.py +0 -177
  52. wandb/sdk/internal/system/assets/memory.py +0 -166
  53. wandb/sdk/internal/system/assets/network.py +0 -125
  54. wandb/sdk/internal/system/assets/open_metrics.py +0 -293
  55. wandb/sdk/internal/system/assets/tpu.py +0 -154
  56. wandb/sdk/internal/system/assets/trainium.py +0 -393
  57. wandb/sdk/internal/system/env_probe_helpers.py +0 -13
  58. wandb/sdk/internal/system/system_info.py +0 -248
  59. wandb/sdk/internal/system/system_monitor.py +0 -224
  60. wandb/sdk/internal/writer.py +0 -204
  61. wandb/sdk/lib/service_token.py +0 -93
  62. wandb/sdk/service/__init__.py +0 -0
  63. wandb/sdk/service/_startup_debug.py +0 -22
  64. wandb/sdk/service/port_file.py +0 -53
  65. wandb/sdk/service/server.py +0 -107
  66. wandb/sdk/service/server_sock.py +0 -286
  67. wandb/sdk/service/service.py +0 -252
  68. wandb/sdk/service/streams.py +0 -425
  69. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/WHEEL +0 -0
  70. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/entry_points.txt +0 -0
  71. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/licenses/LICENSE +0 -0
@@ -1,107 +0,0 @@
1
- """wandb server.
2
-
3
- Start up socket transport servers.
4
- """
5
-
6
- import logging
7
- import os
8
- import sys
9
- from typing import Optional
10
-
11
- import wandb
12
-
13
- from . import _startup_debug, port_file
14
- from .server_sock import SocketServer
15
- from .streams import StreamMux
16
-
17
-
18
- class WandbServer:
19
- _pid: Optional[int]
20
- _sock_port: Optional[int]
21
- _debug: bool
22
- _sock_server: Optional[SocketServer]
23
- _startup_debug_enabled: bool
24
-
25
- def __init__(
26
- self,
27
- sock_port: Optional[int] = None,
28
- port_fname: Optional[str] = None,
29
- address: Optional[str] = None,
30
- pid: Optional[int] = None,
31
- debug: bool = True,
32
- ) -> None:
33
- self._sock_port = sock_port
34
- self._port_fname = port_fname
35
- self._address = address
36
- self._pid = pid
37
- self._debug = debug
38
- self._sock_server = None
39
- self._startup_debug_enabled = _startup_debug.is_enabled()
40
-
41
- if debug:
42
- logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
43
-
44
- def _inform_used_ports(self, sock_port: Optional[int]) -> None:
45
- if not self._port_fname:
46
- return
47
- pf = port_file.PortFile(sock_port=sock_port)
48
- pf.write(self._port_fname)
49
-
50
- def _start_sock(self, mux: StreamMux) -> int:
51
- address: str = self._address or "127.0.0.1"
52
- port: int = self._sock_port or 0
53
- self._sock_server = SocketServer(mux=mux, address=address, port=port)
54
- try:
55
- self._sock_server.start()
56
- port = self._sock_server.port
57
- if self._pid:
58
- mux.set_pid(self._pid)
59
- except KeyboardInterrupt:
60
- mux.cleanup()
61
- raise
62
- except Exception:
63
- mux.cleanup()
64
- raise
65
- return port
66
-
67
- def _stop_servers(self) -> None:
68
- if self._sock_server:
69
- self._sock_server.stop()
70
-
71
- def _startup_debug_print(self, message: str) -> None:
72
- if not self._startup_debug_enabled:
73
- return
74
- _startup_debug.print_message(message)
75
-
76
- def _setup_proctitle(self, sock_port: Optional[int]) -> None:
77
- # TODO: the internal_process should have a better way to have access to
78
- # settings.
79
- disable_setproctitle = os.environ.get("WANDB_X_DISABLE_SETPROCTITLE")
80
- if disable_setproctitle:
81
- return
82
-
83
- setproctitle = wandb.util.get_optional_module("setproctitle")
84
- if setproctitle:
85
- service_ver = 2
86
- pid = str(self._pid or 0)
87
- transport = "s" if sock_port else "g"
88
- port = sock_port or 0
89
- # this format is similar to the service token, but it's purely informative now
90
- # (consider unifying this in the future)
91
- service_id = f"{service_ver}-{pid}-{transport}-{port}"
92
- proc_title = f"wandb-service({service_id})"
93
- self._startup_debug_print("before_setproctitle")
94
- setproctitle.setproctitle(proc_title)
95
- self._startup_debug_print("after_setproctitle")
96
-
97
- def serve(self) -> None:
98
- mux = StreamMux()
99
- self._startup_debug_print("before_network")
100
- sock_port = self._start_sock(mux=mux)
101
- self._startup_debug_print("after_network")
102
- self._inform_used_ports(sock_port=sock_port)
103
- self._startup_debug_print("after_inform")
104
- self._setup_proctitle(sock_port=sock_port)
105
- self._startup_debug_print("before_loop")
106
- mux.loop()
107
- self._stop_servers()
@@ -1,286 +0,0 @@
1
- import queue
2
- import socket
3
- import threading
4
- import time
5
- from typing import TYPE_CHECKING, Any, Callable, Dict, Optional
6
-
7
- import wandb
8
- from wandb.proto import wandb_internal_pb2 as pb
9
- from wandb.proto import wandb_server_pb2 as spb
10
- from wandb.sdk.internal.settings_static import SettingsStatic
11
-
12
- from ..lib.sock_client import SockClient, SockClientClosedError
13
- from .streams import StreamMux
14
-
15
- if TYPE_CHECKING:
16
- from threading import Event
17
-
18
- from ..interface.interface_relay import InterfaceRelay
19
-
20
-
21
- class ClientDict:
22
- _client_dict: Dict[str, SockClient]
23
- _lock: threading.Lock
24
-
25
- def __init__(self) -> None:
26
- self._client_dict = {}
27
- self._lock = threading.Lock()
28
-
29
- def get_client(self, client_id: str) -> Optional[SockClient]:
30
- with self._lock:
31
- client = self._client_dict.get(client_id)
32
- return client
33
-
34
- def add_client(self, client: SockClient) -> None:
35
- with self._lock:
36
- self._client_dict[client._sockid] = client
37
-
38
- def del_client(self, client: SockClient) -> None:
39
- with self._lock:
40
- del self._client_dict[client._sockid]
41
-
42
-
43
- class SockServerInterfaceReaderThread(threading.Thread):
44
- _socket_client: SockClient
45
- _stopped: "Event"
46
-
47
- def __init__(
48
- self,
49
- clients: ClientDict,
50
- iface: "InterfaceRelay",
51
- stopped: "Event",
52
- ) -> None:
53
- self._iface = iface
54
- self._clients = clients
55
- threading.Thread.__init__(self)
56
- self.name = "SockSrvIntRdThr"
57
- self._stopped = stopped
58
-
59
- def run(self) -> None:
60
- while not self._stopped.is_set():
61
- try:
62
- result = self._iface.relay_q.get(timeout=1)
63
- except queue.Empty:
64
- continue
65
- except OSError:
66
- # handle is closed
67
- break
68
- except ValueError:
69
- # queue is closed
70
- break
71
- sockid = result.control.relay_id
72
- assert sockid
73
- sock_client = self._clients.get_client(sockid)
74
- assert sock_client
75
- sresp = spb.ServerResponse()
76
- sresp.request_id = result.control.mailbox_slot
77
- sresp.result_communicate.CopyFrom(result)
78
- sock_client.send_server_response(sresp)
79
-
80
-
81
- class SockServerReadThread(threading.Thread):
82
- _sock_client: SockClient
83
- _mux: StreamMux
84
- _stopped: "Event"
85
- _clients: ClientDict
86
-
87
- def __init__(
88
- self, conn: socket.socket, mux: StreamMux, clients: ClientDict
89
- ) -> None:
90
- self._mux = mux
91
- threading.Thread.__init__(self)
92
- self.name = "SockSrvRdThr"
93
- sock_client = SockClient()
94
- sock_client.set_socket(conn)
95
- self._sock_client = sock_client
96
- self._stopped = mux._get_stopped_event()
97
- self._clients = clients
98
-
99
- def run(self) -> None:
100
- while not self._stopped.is_set():
101
- try:
102
- sreq = self._sock_client.read_server_request()
103
- except SockClientClosedError:
104
- # socket has been closed
105
- # TODO: shut down other threads serving this socket?
106
- break
107
- assert sreq, "read_server_request should never timeout"
108
- sreq_type = sreq.WhichOneof("server_request_type")
109
- shandler_str = "server_" + sreq_type # type: ignore
110
- shandler: Callable[[spb.ServerRequest], None] = getattr( # type: ignore
111
- self, shandler_str, None
112
- )
113
- assert shandler, f"unknown handle: {shandler_str}" # type: ignore
114
- shandler(sreq)
115
-
116
- def stop(self) -> None:
117
- try:
118
- # See shutdown notes in class SocketServer for a discussion about this mechanism
119
- self._sock_client.shutdown(socket.SHUT_RDWR)
120
- except OSError:
121
- pass
122
- self._sock_client.close()
123
-
124
- def server_inform_init(self, sreq: "spb.ServerRequest") -> None:
125
- request = sreq.inform_init
126
- stream_id = request._info.stream_id
127
- settings = SettingsStatic(request.settings)
128
- self._mux.add_stream(stream_id, settings=settings)
129
-
130
- iface = self._mux.get_stream(stream_id).interface
131
- self._clients.add_client(self._sock_client)
132
- iface_reader_thread = SockServerInterfaceReaderThread(
133
- clients=self._clients,
134
- iface=iface,
135
- stopped=self._stopped,
136
- )
137
- iface_reader_thread.start()
138
-
139
- def server_inform_start(self, sreq: "spb.ServerRequest") -> None:
140
- request = sreq.inform_start
141
- stream_id = request._info.stream_id
142
- settings = SettingsStatic(request.settings)
143
- self._mux.update_stream(stream_id, settings=settings)
144
- self._mux.start_stream(stream_id)
145
-
146
- def server_inform_attach(self, sreq: "spb.ServerRequest") -> None:
147
- request = sreq.inform_attach
148
- stream_id = request._info.stream_id
149
-
150
- self._clients.add_client(self._sock_client)
151
- inform_attach_response = spb.ServerInformAttachResponse()
152
- inform_attach_response.settings.CopyFrom(
153
- self._mux._streams[stream_id]._settings._proto,
154
- )
155
- response = spb.ServerResponse(
156
- request_id=sreq.request_id,
157
- inform_attach_response=inform_attach_response,
158
- )
159
- self._sock_client.send_server_response(response)
160
-
161
- def server_record_communicate(self, sreq: "spb.ServerRequest") -> None:
162
- self._put_record(sreq.record_communicate)
163
-
164
- def server_record_publish(self, sreq: "spb.ServerRequest") -> None:
165
- self._put_record(sreq.record_publish)
166
-
167
- def _put_record(self, record: "pb.Record") -> None:
168
- # encode relay information so the right socket picks up the data
169
- record.control.relay_id = self._sock_client._sockid
170
- stream_id = record._info.stream_id
171
-
172
- try:
173
- iface = self._mux.get_stream(stream_id).interface
174
-
175
- except KeyError:
176
- # We should log the error but cannot because it may print to console
177
- # due to how logging is set up. This error usually happens if
178
- # a record is sent when no run is active, but during this time the
179
- # logger prints to the console.
180
- pass
181
-
182
- else:
183
- assert iface.record_q
184
- iface.record_q.put(record)
185
-
186
- def server_inform_finish(self, sreq: "spb.ServerRequest") -> None:
187
- request = sreq.inform_finish
188
- stream_id = request._info.stream_id
189
- self._mux.drop_stream(stream_id)
190
-
191
- def server_inform_teardown(self, sreq: "spb.ServerRequest") -> None:
192
- request = sreq.inform_teardown
193
- exit_code = request.exit_code
194
- self._mux.teardown(exit_code)
195
-
196
-
197
- class SockAcceptThread(threading.Thread):
198
- _sock: socket.socket
199
- _mux: StreamMux
200
- _stopped: "Event"
201
- _clients: ClientDict
202
-
203
- def __init__(self, sock: socket.socket, mux: StreamMux) -> None:
204
- self._sock = sock
205
- self._mux = mux
206
- self._stopped = mux._get_stopped_event()
207
- threading.Thread.__init__(self)
208
- self.name = "SockAcceptThr"
209
- self._clients = ClientDict()
210
-
211
- def run(self) -> None:
212
- read_threads = []
213
-
214
- while not self._stopped.is_set():
215
- try:
216
- conn, addr = self._sock.accept()
217
- except ConnectionAbortedError:
218
- break
219
- except OSError:
220
- # on shutdown
221
- break
222
- sr = SockServerReadThread(conn=conn, mux=self._mux, clients=self._clients)
223
- sr.start()
224
- read_threads.append(sr)
225
-
226
- for rt in read_threads:
227
- rt.stop()
228
-
229
-
230
- class DebugThread(threading.Thread):
231
- def __init__(self, mux: "StreamMux") -> None:
232
- threading.Thread.__init__(self)
233
- self.daemon = True
234
- self.name = "DebugThr"
235
-
236
- def run(self) -> None:
237
- while True:
238
- time.sleep(30)
239
- for thread in threading.enumerate():
240
- wandb.termwarn(f"DEBUG: {thread.name}")
241
-
242
-
243
- class SocketServer:
244
- _mux: StreamMux
245
- _address: str
246
- _port: int
247
- _sock: socket.socket
248
-
249
- def __init__(self, mux: Any, address: str, port: int) -> None:
250
- self._mux = mux
251
- self._address = address
252
- self._port = port
253
- # This is the server socket that we accept new connections from
254
- self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
255
-
256
- def _bind(self) -> None:
257
- self._sock.bind((self._address, self._port))
258
- self._port = self._sock.getsockname()[1]
259
-
260
- @property
261
- def port(self) -> int:
262
- return self._port
263
-
264
- def start(self) -> None:
265
- self._bind()
266
- self._sock.listen(5)
267
- self._thread = SockAcceptThread(sock=self._sock, mux=self._mux)
268
- self._thread.start()
269
- # Note: Uncomment to figure out what thread is not exiting properly
270
- # self._dbg_thread = DebugThread(mux=self._mux)
271
- # self._dbg_thread.start()
272
-
273
- def stop(self) -> None:
274
- if self._sock:
275
- # we need to stop the SockAcceptThread
276
- try:
277
- # TODO(jhr): consider a more graceful shutdown in the future
278
- # socket.shutdown() is a more heavy handed approach to interrupting socket.accept()
279
- # in the future we might want to consider a more graceful shutdown which would involve setting
280
- # a threading Event and then initiating one last connection just to close down the thread
281
- # The advantage of the heavy handed approach is that it does not depend on the threads functioning
282
- # properly, that is, if something has gone wrong, we probably want to use this hammer to shut things down
283
- self._sock.shutdown(socket.SHUT_RDWR)
284
- except OSError:
285
- pass
286
- self._sock.close()
@@ -1,252 +0,0 @@
1
- """Reliably launch and connect to backend server process (wandb service).
2
-
3
- Backend server process can be connected to using tcp sockets transport.
4
- """
5
-
6
- import datetime
7
- import os
8
- import pathlib
9
- import platform
10
- import shutil
11
- import subprocess
12
- import sys
13
- import tempfile
14
- import time
15
- from typing import TYPE_CHECKING, Any, Dict, Optional
16
-
17
- from wandb import _sentry
18
- from wandb.env import (
19
- core_debug,
20
- dcgm_profiling_enabled,
21
- error_reporting_enabled,
22
- is_require_legacy_service,
23
- )
24
- from wandb.errors import Error, WandbCoreNotAvailableError
25
- from wandb.errors.term import termlog, termwarn
26
- from wandb.util import get_core_path, get_module
27
-
28
- from . import _startup_debug, port_file
29
-
30
- if TYPE_CHECKING:
31
- from wandb.sdk.wandb_settings import Settings
32
-
33
-
34
- class ServiceStartProcessError(Error):
35
- """Raised when a known error occurs when launching wandb service."""
36
-
37
-
38
- class ServiceStartTimeoutError(Error):
39
- """Raised when service start times out."""
40
-
41
-
42
- class ServiceStartPortError(Error):
43
- """Raised when service start fails to find a port."""
44
-
45
-
46
- class _Service:
47
- _settings: "Settings"
48
- _sock_port: Optional[int]
49
- _internal_proc: Optional[subprocess.Popen]
50
- _startup_debug_enabled: bool
51
-
52
- def __init__(
53
- self,
54
- settings: "Settings",
55
- ) -> None:
56
- self._settings = settings
57
- self._stub = None
58
- self._sock_port = None
59
- self._internal_proc = None
60
- self._startup_debug_enabled = _startup_debug.is_enabled()
61
-
62
- _sentry.configure_scope(tags=dict(settings), process_context="service")
63
-
64
- def _startup_debug_print(self, message: str) -> None:
65
- if not self._startup_debug_enabled:
66
- return
67
- _startup_debug.print_message(message)
68
-
69
- def _wait_for_ports(
70
- self, fname: str, proc: Optional[subprocess.Popen] = None
71
- ) -> None:
72
- """Wait for the service to write the port file and then read it.
73
-
74
- Args:
75
- fname: The path to the port file.
76
- proc: The process to wait for.
77
-
78
- Raises:
79
- ServiceStartTimeoutError: If the service takes too long to start.
80
- ServiceStartPortError: If the service writes an invalid port file or unable to read it.
81
- ServiceStartProcessError: If the service process exits unexpectedly.
82
-
83
- """
84
- time_max = time.monotonic() + self._settings.x_service_wait
85
- while time.monotonic() < time_max:
86
- if proc and proc.poll():
87
- # process finished
88
- # define these variables for sentry context grab:
89
- # command = proc.args
90
- # sys_executable = sys.executable
91
- # which_python = shutil.which("python3")
92
- # proc_out = proc.stdout.read()
93
- # proc_err = proc.stderr.read()
94
- context = dict(
95
- command=proc.args,
96
- sys_executable=sys.executable,
97
- which_python=shutil.which("python3"),
98
- proc_out=proc.stdout.read() if proc.stdout else "",
99
- proc_err=proc.stderr.read() if proc.stderr else "",
100
- )
101
- raise ServiceStartProcessError(
102
- f"The wandb service process exited with {proc.returncode}. "
103
- "Ensure that `sys.executable` is a valid python interpreter. "
104
- "You can override it with the `_executable` setting "
105
- "or with the `WANDB_X_EXECUTABLE` environment variable."
106
- f"\n{context}",
107
- context=context,
108
- )
109
- if not os.path.isfile(fname):
110
- time.sleep(0.2)
111
- continue
112
- try:
113
- pf = port_file.PortFile()
114
- pf.read(fname)
115
- if not pf.is_valid:
116
- time.sleep(0.2)
117
- continue
118
- self._sock_port = pf.sock_port
119
- except Exception as e:
120
- # todo: point at the docs. this could be due to a number of reasons,
121
- # for example, being unable to write to the port file etc.
122
- raise ServiceStartPortError(
123
- f"Failed to allocate port for wandb service: {e}."
124
- )
125
- return
126
- raise ServiceStartTimeoutError(
127
- "Timed out waiting for wandb service to start after "
128
- f"{self._settings.x_service_wait} seconds. "
129
- "Try increasing the timeout with the `_service_wait` setting."
130
- )
131
-
132
- def _launch_server(self) -> None:
133
- """Launch server and set ports."""
134
- # References for starting processes
135
- # - https://github.com/wandb/wandb/blob/archive/old-cli/wandb/__init__.py
136
- # - https://stackoverflow.com/questions/1196074/how-to-start-a-background-process-in-python
137
- self._startup_debug_print("launch")
138
-
139
- kwargs: Dict[str, Any] = dict(close_fds=True)
140
- # flags to handle keyboard interrupt signal that is causing a hang
141
- if platform.system() == "Windows":
142
- kwargs.update(creationflags=subprocess.CREATE_NEW_PROCESS_GROUP) # type: ignore [attr-defined]
143
- else:
144
- kwargs.update(start_new_session=True)
145
-
146
- pid = str(os.getpid())
147
-
148
- with tempfile.TemporaryDirectory() as tmpdir:
149
- fname = os.path.join(tmpdir, f"port-{pid}.txt")
150
-
151
- executable = self._settings.x_executable
152
- exec_cmd_list = [executable, "-m"]
153
-
154
- service_args = []
155
-
156
- if not is_require_legacy_service():
157
- try:
158
- core_path = get_core_path()
159
- except WandbCoreNotAvailableError as e:
160
- _sentry.reraise(e)
161
-
162
- service_args.extend([core_path])
163
-
164
- if not error_reporting_enabled():
165
- service_args.append("--no-observability")
166
-
167
- if core_debug(default="False"):
168
- service_args.extend(["--log-level", "-4"])
169
-
170
- if dcgm_profiling_enabled():
171
- service_args.append("--enable-dcgm-profiling")
172
-
173
- exec_cmd_list = []
174
- else:
175
- service_args.extend(["wandb", "service", "--debug"])
176
- termwarn(
177
- "Using legacy-service, which is deprecated. If this is"
178
- " unintentional, you can fix it by ensuring you do not call"
179
- " `wandb.require('legacy-service')` and do not set the"
180
- " WANDB_X_REQUIRE_LEGACY_SERVICE environment"
181
- " variable."
182
- )
183
-
184
- service_args += [
185
- "--port-filename",
186
- fname,
187
- "--pid",
188
- pid,
189
- ]
190
-
191
- if os.environ.get("WANDB_SERVICE_PROFILE") == "memray":
192
- _ = get_module(
193
- "memray",
194
- required=(
195
- "wandb service memory profiling requires memray, "
196
- "install with `pip install memray`"
197
- ),
198
- )
199
-
200
- time_tag = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
201
- output_file = f"wandb_service.memray.{time_tag}.bin"
202
- cli_executable = (
203
- pathlib.Path(__file__).parent.parent.parent.parent
204
- / "tools"
205
- / "cli.py"
206
- )
207
- exec_cmd_list = [
208
- executable,
209
- "-m",
210
- "memray",
211
- "run",
212
- "-o",
213
- output_file,
214
- ]
215
- service_args[0] = str(cli_executable)
216
- termlog(
217
- f"wandb service memory profiling enabled, output file: {output_file}"
218
- )
219
- termlog(
220
- f"Convert to flamegraph with: `python -m memray flamegraph {output_file}`"
221
- )
222
-
223
- try:
224
- internal_proc = subprocess.Popen(
225
- exec_cmd_list + service_args, # type: ignore[arg-type]
226
- env=os.environ,
227
- **kwargs,
228
- )
229
- except Exception as e:
230
- _sentry.reraise(e)
231
-
232
- self._startup_debug_print("wait_ports")
233
- try:
234
- self._wait_for_ports(fname, proc=internal_proc)
235
- except Exception as e:
236
- _sentry.reraise(e)
237
- self._startup_debug_print("wait_ports_done")
238
- self._internal_proc = internal_proc
239
- self._startup_debug_print("launch_done")
240
-
241
- def start(self) -> None:
242
- self._launch_server()
243
-
244
- @property
245
- def sock_port(self) -> Optional[int]:
246
- return self._sock_port
247
-
248
- def join(self) -> int:
249
- ret = 0
250
- if self._internal_proc:
251
- ret = self._internal_proc.wait()
252
- return ret