wandb 0.20.1rc20250604__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wandb/__init__.py +3 -6
- wandb/__init__.pyi +24 -23
- wandb/analytics/sentry.py +2 -2
- wandb/apis/importers/internals/internal.py +0 -3
- wandb/apis/internal.py +3 -0
- wandb/apis/paginator.py +17 -4
- wandb/apis/public/api.py +85 -4
- wandb/apis/public/artifacts.py +10 -8
- wandb/apis/public/files.py +5 -5
- wandb/apis/public/projects.py +44 -3
- wandb/apis/public/registries/{utils.py → _utils.py} +12 -12
- wandb/apis/public/registries/registries_search.py +2 -2
- wandb/apis/public/registries/registry.py +19 -18
- wandb/apis/public/reports.py +64 -8
- wandb/apis/public/runs.py +16 -23
- wandb/automations/__init__.py +10 -10
- wandb/automations/_filters/run_metrics.py +0 -2
- wandb/automations/_utils.py +0 -2
- wandb/automations/actions.py +0 -2
- wandb/automations/automations.py +0 -2
- wandb/automations/events.py +0 -2
- wandb/bin/gpu_stats +0 -0
- wandb/cli/beta.py +1 -7
- wandb/cli/cli.py +0 -30
- wandb/env.py +0 -6
- wandb/integration/catboost/catboost.py +6 -2
- wandb/integration/kfp/kfp_patch.py +3 -1
- wandb/integration/sb3/sb3.py +3 -3
- wandb/integration/ultralytics/callback.py +6 -2
- wandb/plot/__init__.py +2 -0
- wandb/plot/bar.py +30 -29
- wandb/plot/confusion_matrix.py +75 -71
- wandb/plot/histogram.py +26 -25
- wandb/plot/line.py +33 -32
- wandb/plot/line_series.py +100 -103
- wandb/plot/pr_curve.py +33 -32
- wandb/plot/roc_curve.py +38 -38
- wandb/plot/scatter.py +27 -27
- wandb/proto/v3/wandb_internal_pb2.py +366 -385
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +352 -356
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v5/wandb_internal_pb2.py +352 -356
- wandb/proto/v5/wandb_settings_pb2.py +2 -2
- wandb/proto/v5/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v6/wandb_internal_pb2.py +352 -356
- wandb/proto/v6/wandb_settings_pb2.py +2 -2
- wandb/proto/v6/wandb_telemetry_pb2.py +10 -10
- wandb/sdk/artifacts/_generated/__init__.py +12 -1
- wandb/sdk/artifacts/_generated/input_types.py +20 -2
- wandb/sdk/artifacts/_generated/link_artifact.py +21 -0
- wandb/sdk/artifacts/_generated/operations.py +9 -0
- wandb/sdk/artifacts/_validators.py +40 -2
- wandb/sdk/artifacts/artifact.py +163 -21
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +42 -1
- wandb/sdk/backend/backend.py +1 -1
- wandb/sdk/data_types/base_types/media.py +9 -7
- wandb/sdk/data_types/base_types/wb_value.py +6 -6
- wandb/sdk/data_types/saved_model.py +3 -3
- wandb/sdk/data_types/table.py +41 -41
- wandb/sdk/data_types/trace_tree.py +12 -12
- wandb/sdk/interface/interface.py +8 -19
- wandb/sdk/interface/interface_shared.py +7 -16
- wandb/sdk/internal/datastore.py +18 -18
- wandb/sdk/internal/handler.py +4 -74
- wandb/sdk/internal/internal_api.py +54 -0
- wandb/sdk/internal/sender.py +23 -3
- wandb/sdk/internal/sender_config.py +9 -0
- wandb/sdk/launch/_project_spec.py +3 -3
- wandb/sdk/launch/agent/agent.py +3 -3
- wandb/sdk/launch/agent/job_status_tracker.py +3 -1
- wandb/sdk/launch/utils.py +3 -3
- wandb/sdk/lib/console_capture.py +66 -19
- wandb/sdk/lib/printer.py +6 -7
- wandb/sdk/lib/progress.py +1 -3
- wandb/sdk/lib/service/ipc_support.py +13 -0
- wandb/sdk/lib/{service_connection.py → service/service_connection.py} +20 -56
- wandb/sdk/lib/service/service_port_file.py +105 -0
- wandb/sdk/lib/service/service_process.py +111 -0
- wandb/sdk/lib/service/service_token.py +164 -0
- wandb/sdk/lib/sock_client.py +8 -12
- wandb/sdk/wandb_init.py +1 -5
- wandb/sdk/wandb_require.py +9 -21
- wandb/sdk/wandb_run.py +23 -137
- wandb/sdk/wandb_settings.py +233 -80
- wandb/sdk/wandb_setup.py +2 -13
- {wandb-0.20.1rc20250604.dist-info → wandb-0.21.0.dist-info}/METADATA +1 -3
- {wandb-0.20.1rc20250604.dist-info → wandb-0.21.0.dist-info}/RECORD +93 -119
- wandb/sdk/internal/flow_control.py +0 -263
- wandb/sdk/internal/internal.py +0 -401
- wandb/sdk/internal/internal_util.py +0 -97
- wandb/sdk/internal/system/__init__.py +0 -0
- wandb/sdk/internal/system/assets/__init__.py +0 -25
- wandb/sdk/internal/system/assets/aggregators.py +0 -31
- wandb/sdk/internal/system/assets/asset_registry.py +0 -20
- wandb/sdk/internal/system/assets/cpu.py +0 -163
- wandb/sdk/internal/system/assets/disk.py +0 -210
- wandb/sdk/internal/system/assets/gpu.py +0 -416
- wandb/sdk/internal/system/assets/gpu_amd.py +0 -233
- wandb/sdk/internal/system/assets/interfaces.py +0 -205
- wandb/sdk/internal/system/assets/ipu.py +0 -177
- wandb/sdk/internal/system/assets/memory.py +0 -166
- wandb/sdk/internal/system/assets/network.py +0 -125
- wandb/sdk/internal/system/assets/open_metrics.py +0 -293
- wandb/sdk/internal/system/assets/tpu.py +0 -154
- wandb/sdk/internal/system/assets/trainium.py +0 -393
- wandb/sdk/internal/system/env_probe_helpers.py +0 -13
- wandb/sdk/internal/system/system_info.py +0 -248
- wandb/sdk/internal/system/system_monitor.py +0 -224
- wandb/sdk/internal/writer.py +0 -204
- wandb/sdk/lib/service_token.py +0 -93
- wandb/sdk/service/__init__.py +0 -0
- wandb/sdk/service/_startup_debug.py +0 -22
- wandb/sdk/service/port_file.py +0 -53
- wandb/sdk/service/server.py +0 -107
- wandb/sdk/service/server_sock.py +0 -286
- wandb/sdk/service/service.py +0 -252
- wandb/sdk/service/streams.py +0 -425
- wandb/sdk/wandb_metadata.py +0 -623
- {wandb-0.20.1rc20250604.dist-info → wandb-0.21.0.dist-info}/WHEEL +0 -0
- {wandb-0.20.1rc20250604.dist-info → wandb-0.21.0.dist-info}/entry_points.txt +0 -0
- {wandb-0.20.1rc20250604.dist-info → wandb-0.21.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import atexit
|
4
|
-
import os
|
5
4
|
from typing import Callable
|
6
5
|
|
7
6
|
from wandb.proto import wandb_internal_pb2 as pb
|
@@ -11,77 +10,42 @@ from wandb.sdk import wandb_settings
|
|
11
10
|
from wandb.sdk.interface.interface import InterfaceBase
|
12
11
|
from wandb.sdk.interface.interface_sock import InterfaceSock
|
13
12
|
from wandb.sdk.interface.router_sock import MessageSockRouter
|
14
|
-
from wandb.sdk.lib import service_token
|
15
13
|
from wandb.sdk.lib.exit_hooks import ExitHooks
|
16
14
|
from wandb.sdk.lib.sock_client import SockClient, SockClientClosedError
|
17
15
|
from wandb.sdk.mailbox import HandleAbandonedError, Mailbox, MailboxClosedError
|
18
|
-
from wandb.sdk.service import service
|
19
16
|
|
20
|
-
|
21
|
-
class WandbServiceConnectionError(Exception):
|
22
|
-
"""Raised on failure to connect to the service process."""
|
17
|
+
from . import service_process, service_token
|
23
18
|
|
24
19
|
|
25
20
|
class WandbAttachFailedError(Exception):
|
26
|
-
"""
|
21
|
+
"""Failed to attach to a run."""
|
27
22
|
|
28
23
|
|
29
24
|
def connect_to_service(
|
30
25
|
settings: wandb_settings.Settings,
|
31
26
|
) -> ServiceConnection:
|
32
|
-
"""
|
33
|
-
|
34
|
-
if conn:
|
35
|
-
return conn
|
36
|
-
|
37
|
-
return _start_and_connect_service(settings)
|
38
|
-
|
39
|
-
|
40
|
-
def _try_connect_to_existing_service() -> ServiceConnection | None:
|
41
|
-
"""Attempts to connect to an existing service process."""
|
42
|
-
token = service_token.get_service_token()
|
43
|
-
if not token:
|
44
|
-
return None
|
45
|
-
|
46
|
-
# Only localhost sockets are supported below.
|
47
|
-
assert token.host == "localhost"
|
48
|
-
client = SockClient()
|
27
|
+
"""Connect to the service process, starting one up if necessary."""
|
28
|
+
token = service_token.from_env()
|
49
29
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
except Exception as e:
|
55
|
-
raise WandbServiceConnectionError(
|
56
|
-
"Failed to connect to internal service."
|
57
|
-
) from e
|
58
|
-
|
59
|
-
return ServiceConnection(client=client, proc=None)
|
30
|
+
if token:
|
31
|
+
return ServiceConnection(client=token.connect(), proc=None)
|
32
|
+
else:
|
33
|
+
return _start_and_connect_service(settings)
|
60
34
|
|
61
35
|
|
62
36
|
def _start_and_connect_service(
|
63
37
|
settings: wandb_settings.Settings,
|
64
38
|
) -> ServiceConnection:
|
65
|
-
"""
|
39
|
+
"""Start a service process and returns a connection to it.
|
66
40
|
|
67
41
|
An atexit hook is registered to tear down the service process and wait for
|
68
42
|
it to complete. The hook does not run in processes started using the
|
69
43
|
multiprocessing module.
|
70
44
|
"""
|
71
|
-
proc =
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
assert port
|
76
|
-
client = SockClient()
|
77
|
-
client.connect(port)
|
78
|
-
|
79
|
-
service_token.set_service_token(
|
80
|
-
parent_pid=os.getpid(),
|
81
|
-
transport="tcp",
|
82
|
-
host="localhost",
|
83
|
-
port=port,
|
84
|
-
)
|
45
|
+
proc = service_process.start(settings)
|
46
|
+
|
47
|
+
client = proc.token.connect()
|
48
|
+
proc.token.save_to_env()
|
85
49
|
|
86
50
|
hooks = ExitHooks()
|
87
51
|
hooks.hook()
|
@@ -106,7 +70,7 @@ class ServiceConnection:
|
|
106
70
|
def __init__(
|
107
71
|
self,
|
108
72
|
client: SockClient,
|
109
|
-
proc:
|
73
|
+
proc: service_process.ServiceProcess | None,
|
110
74
|
cleanup: Callable[[], None] | None = None,
|
111
75
|
):
|
112
76
|
"""Returns a new ServiceConnection.
|
@@ -132,7 +96,7 @@ class ServiceConnection:
|
|
132
96
|
return InterfaceSock(self._client, self._mailbox, stream_id=stream_id)
|
133
97
|
|
134
98
|
def send_record(self, record: pb.Record) -> None:
|
135
|
-
"""
|
99
|
+
"""Send data to the service."""
|
136
100
|
self._client.send_record_publish(record)
|
137
101
|
|
138
102
|
def inform_init(
|
@@ -140,14 +104,14 @@ class ServiceConnection:
|
|
140
104
|
settings: wandb_settings_pb2.Settings,
|
141
105
|
run_id: str,
|
142
106
|
) -> None:
|
143
|
-
"""
|
107
|
+
"""Send an init request to the service."""
|
144
108
|
request = spb.ServerInformInitRequest()
|
145
109
|
request.settings.CopyFrom(settings)
|
146
110
|
request._info.stream_id = run_id
|
147
111
|
self._client.send_server_request(spb.ServerRequest(inform_init=request))
|
148
112
|
|
149
113
|
def inform_finish(self, run_id: str) -> None:
|
150
|
-
"""
|
114
|
+
"""Send an finish request to the service."""
|
151
115
|
request = spb.ServerInformFinishRequest()
|
152
116
|
request._info.stream_id = run_id
|
153
117
|
self._client.send_server_request(spb.ServerRequest(inform_finish=request))
|
@@ -156,7 +120,7 @@ class ServiceConnection:
|
|
156
120
|
self,
|
157
121
|
attach_id: str,
|
158
122
|
) -> wandb_settings_pb2.Settings:
|
159
|
-
"""
|
123
|
+
"""Send an attach request to the service.
|
160
124
|
|
161
125
|
Raises a WandbAttachFailedError if attaching is not possible.
|
162
126
|
"""
|
@@ -188,7 +152,7 @@ class ServiceConnection:
|
|
188
152
|
settings: wandb_settings_pb2.Settings,
|
189
153
|
run_id: str,
|
190
154
|
) -> None:
|
191
|
-
"""
|
155
|
+
"""Send a start request to the service."""
|
192
156
|
request = spb.ServerInformStartRequest()
|
193
157
|
request.settings.CopyFrom(settings)
|
194
158
|
request._info.stream_id = run_id
|
@@ -221,7 +185,7 @@ class ServiceConnection:
|
|
221
185
|
return None
|
222
186
|
|
223
187
|
# Clear the service token to prevent new connections to the process.
|
224
|
-
service_token.
|
188
|
+
service_token.clear_service_in_env()
|
225
189
|
|
226
190
|
self._client.send_server_request(
|
227
191
|
spb.ServerRequest(
|
@@ -0,0 +1,105 @@
|
|
1
|
+
"""Module for figuring out how to connect to the service process."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import os
|
6
|
+
import pathlib
|
7
|
+
import re
|
8
|
+
import subprocess
|
9
|
+
import time
|
10
|
+
|
11
|
+
import wandb
|
12
|
+
|
13
|
+
from . import ipc_support, service_token
|
14
|
+
|
15
|
+
# Time functions are monkeypatched in unit tests.
|
16
|
+
_MONOTONIC = time.monotonic
|
17
|
+
_SLEEP = time.sleep
|
18
|
+
|
19
|
+
|
20
|
+
class ServicePollForTokenError(wandb.Error):
|
21
|
+
"""Failed to discover how to connect to the service."""
|
22
|
+
|
23
|
+
|
24
|
+
def poll_for_token(
|
25
|
+
file: pathlib.Path,
|
26
|
+
proc: subprocess.Popen,
|
27
|
+
*,
|
28
|
+
timeout: float,
|
29
|
+
) -> service_token.ServiceToken:
|
30
|
+
"""Poll the 'port' file to discover how to connect to the service.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
file: The file path that should eventually contain this information.
|
34
|
+
proc: The process that's supposed to generate the file.
|
35
|
+
If the process dies, this raises an error.
|
36
|
+
timeout: A timeout in seconds after which to raise an error.
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
A token specifying how to connect to the service process.
|
40
|
+
|
41
|
+
Raises:
|
42
|
+
ServicePollForTokenError: if the service process dies, a timeout
|
43
|
+
occurs, or there's an issue reading the port file.
|
44
|
+
"""
|
45
|
+
end_time = _MONOTONIC() + timeout
|
46
|
+
|
47
|
+
while _MONOTONIC() < end_time:
|
48
|
+
if (code := proc.poll()) is not None:
|
49
|
+
raise ServicePollForTokenError(
|
50
|
+
f"wandb-core exited with code {code}",
|
51
|
+
context={
|
52
|
+
"command": proc.args,
|
53
|
+
"proc_out": proc.stdout.read() if proc.stdout else "",
|
54
|
+
"proc_err": proc.stderr.read() if proc.stderr else "",
|
55
|
+
},
|
56
|
+
)
|
57
|
+
|
58
|
+
if token := _poll_once(file):
|
59
|
+
return token
|
60
|
+
|
61
|
+
_SLEEP(max(0, min(0.2, end_time - _MONOTONIC())))
|
62
|
+
|
63
|
+
raise ServicePollForTokenError(
|
64
|
+
f"Failed to read port info after {timeout} seconds.",
|
65
|
+
)
|
66
|
+
|
67
|
+
|
68
|
+
_UNIX_NAME_RE = re.compile(r"unix=(.+)")
|
69
|
+
_TCP_PORT_RE = re.compile(r"sock=(\d+)")
|
70
|
+
|
71
|
+
|
72
|
+
def _poll_once(file: pathlib.Path) -> service_token.ServiceToken | None:
|
73
|
+
"""Try to read the port file.
|
74
|
+
|
75
|
+
Returns:
|
76
|
+
A connection token on success. Otherwise, returns None.
|
77
|
+
|
78
|
+
Raises:
|
79
|
+
ServicePollForTokenError: if the file contains no known
|
80
|
+
connection method.
|
81
|
+
"""
|
82
|
+
try:
|
83
|
+
text = file.read_text()
|
84
|
+
except OSError:
|
85
|
+
return None
|
86
|
+
|
87
|
+
lines = text.splitlines()
|
88
|
+
if lines[-1] != "EOF":
|
89
|
+
return None
|
90
|
+
|
91
|
+
for line in lines:
|
92
|
+
if ipc_support.SUPPORTS_UNIX and (match := _UNIX_NAME_RE.fullmatch(line)):
|
93
|
+
return service_token.UnixServiceToken(
|
94
|
+
parent_pid=os.getpid(),
|
95
|
+
path=match.group(1),
|
96
|
+
)
|
97
|
+
elif match := _TCP_PORT_RE.fullmatch(line):
|
98
|
+
return service_token.TCPServiceToken(
|
99
|
+
parent_pid=os.getpid(),
|
100
|
+
port=int(match.group(1)),
|
101
|
+
)
|
102
|
+
|
103
|
+
raise ServicePollForTokenError(
|
104
|
+
f"No known connection method in {file}:\n{text}",
|
105
|
+
)
|
@@ -0,0 +1,111 @@
|
|
1
|
+
"""Module for starting up the service process (wandb-core)."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import os
|
6
|
+
import pathlib
|
7
|
+
import platform
|
8
|
+
import subprocess
|
9
|
+
import tempfile
|
10
|
+
from typing import TYPE_CHECKING
|
11
|
+
|
12
|
+
from wandb import _sentry
|
13
|
+
from wandb.env import core_debug, dcgm_profiling_enabled, error_reporting_enabled
|
14
|
+
from wandb.errors import WandbCoreNotAvailableError
|
15
|
+
from wandb.sdk.lib.service import ipc_support
|
16
|
+
from wandb.util import get_core_path
|
17
|
+
|
18
|
+
from . import service_port_file, service_token
|
19
|
+
|
20
|
+
if TYPE_CHECKING:
|
21
|
+
from wandb.sdk.wandb_settings import Settings
|
22
|
+
|
23
|
+
|
24
|
+
def start(settings: Settings) -> ServiceProcess:
|
25
|
+
"""Start the internal service process.
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
A handle to the process.
|
29
|
+
"""
|
30
|
+
_sentry.configure_scope(tags=dict(settings), process_context="service")
|
31
|
+
|
32
|
+
try:
|
33
|
+
return _launch_server(settings)
|
34
|
+
except Exception as e:
|
35
|
+
_sentry.reraise(e)
|
36
|
+
|
37
|
+
|
38
|
+
class ServiceProcess:
|
39
|
+
"""A handle to a process running the internal service."""
|
40
|
+
|
41
|
+
def __init__(
|
42
|
+
self,
|
43
|
+
*,
|
44
|
+
connection_token: service_token.ServiceToken,
|
45
|
+
process: subprocess.Popen,
|
46
|
+
) -> None:
|
47
|
+
self._token = connection_token
|
48
|
+
self._process = process
|
49
|
+
|
50
|
+
@property
|
51
|
+
def token(self) -> service_token.ServiceToken:
|
52
|
+
"""A token for connecting to the process."""
|
53
|
+
return self._token
|
54
|
+
|
55
|
+
def join(self) -> int:
|
56
|
+
"""Wait for the process to end and return its exit code."""
|
57
|
+
return self._process.wait()
|
58
|
+
|
59
|
+
|
60
|
+
def _launch_server(settings: Settings) -> ServiceProcess:
|
61
|
+
"""Launch server and set ports."""
|
62
|
+
if platform.system() == "Windows":
|
63
|
+
creationflags: int = subprocess.CREATE_NEW_PROCESS_GROUP # type: ignore[attr-defined]
|
64
|
+
start_new_session = False
|
65
|
+
else:
|
66
|
+
creationflags = 0
|
67
|
+
start_new_session = True
|
68
|
+
|
69
|
+
pid = str(os.getpid())
|
70
|
+
|
71
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
72
|
+
port_file = pathlib.Path(tmpdir, f"port-{pid}.txt")
|
73
|
+
service_args: list[str] = []
|
74
|
+
|
75
|
+
try:
|
76
|
+
core_path = get_core_path()
|
77
|
+
except WandbCoreNotAvailableError as e:
|
78
|
+
_sentry.reraise(e)
|
79
|
+
|
80
|
+
service_args.extend([core_path])
|
81
|
+
|
82
|
+
if not error_reporting_enabled():
|
83
|
+
service_args.append("--no-observability")
|
84
|
+
|
85
|
+
if core_debug(default="False"):
|
86
|
+
service_args.extend(["--log-level", "-4"])
|
87
|
+
|
88
|
+
if dcgm_profiling_enabled():
|
89
|
+
service_args.append("--enable-dcgm-profiling")
|
90
|
+
|
91
|
+
service_args.extend(["--port-filename", str(port_file)])
|
92
|
+
service_args.extend(["--pid", pid])
|
93
|
+
|
94
|
+
if not ipc_support.SUPPORTS_UNIX:
|
95
|
+
service_args.append("--listen-on-localhost")
|
96
|
+
|
97
|
+
proc = subprocess.Popen(
|
98
|
+
service_args,
|
99
|
+
env=os.environ,
|
100
|
+
close_fds=True,
|
101
|
+
creationflags=creationflags,
|
102
|
+
start_new_session=start_new_session,
|
103
|
+
)
|
104
|
+
|
105
|
+
token = service_port_file.poll_for_token(
|
106
|
+
port_file,
|
107
|
+
proc,
|
108
|
+
timeout=settings.x_service_wait,
|
109
|
+
)
|
110
|
+
|
111
|
+
return ServiceProcess(connection_token=token, process=proc)
|
@@ -0,0 +1,164 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import abc
|
4
|
+
import os
|
5
|
+
import re
|
6
|
+
import socket
|
7
|
+
|
8
|
+
from typing_extensions import final, override
|
9
|
+
|
10
|
+
from wandb import env
|
11
|
+
from wandb.sdk.lib.service import ipc_support
|
12
|
+
from wandb.sdk.lib.sock_client import SockClient
|
13
|
+
|
14
|
+
_CURRENT_VERSION = "3"
|
15
|
+
|
16
|
+
# Token formats:
|
17
|
+
_UNIX_TOKEN_RE = re.compile(rf"{_CURRENT_VERSION}-(\d+)-unix-(.+)")
|
18
|
+
_TCP_TOKEN_RE = re.compile(rf"{_CURRENT_VERSION}-(\d+)-tcp-localhost-(\d+)")
|
19
|
+
|
20
|
+
|
21
|
+
class WandbServiceConnectionError(Exception):
|
22
|
+
"""Failed to connect to the service process."""
|
23
|
+
|
24
|
+
|
25
|
+
def clear_service_in_env() -> None:
|
26
|
+
"""Clear the environment variable that stores the service token."""
|
27
|
+
os.environ.pop(env.SERVICE, None)
|
28
|
+
|
29
|
+
|
30
|
+
def from_env() -> ServiceToken | None:
|
31
|
+
"""Read the token from environment variables.
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
The token if the correct environment variable is set, or None.
|
35
|
+
|
36
|
+
Raises:
|
37
|
+
ValueError: If the environment variable is set but cannot be
|
38
|
+
parsed.
|
39
|
+
"""
|
40
|
+
token = os.environ.get(env.SERVICE)
|
41
|
+
if not token:
|
42
|
+
return None
|
43
|
+
|
44
|
+
if unix_token := UnixServiceToken.from_env_string(token):
|
45
|
+
return unix_token
|
46
|
+
if tcp_token := TCPServiceToken.from_env_string(token):
|
47
|
+
return tcp_token
|
48
|
+
|
49
|
+
raise ValueError(f"Failed to parse {env.SERVICE}={token!r}")
|
50
|
+
|
51
|
+
|
52
|
+
class ServiceToken(abc.ABC):
|
53
|
+
"""A way of connecting to a running service process."""
|
54
|
+
|
55
|
+
@abc.abstractmethod
|
56
|
+
def connect(self) -> SockClient:
|
57
|
+
"""Connect to the service process.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
A socket object for communicating with the service.
|
61
|
+
|
62
|
+
Raises:
|
63
|
+
WandbServiceConnectionError: on failure to connect.
|
64
|
+
"""
|
65
|
+
|
66
|
+
def save_to_env(self) -> None:
|
67
|
+
"""Save the token in this process's environment variables."""
|
68
|
+
os.environ[env.SERVICE] = self._as_env_string()
|
69
|
+
|
70
|
+
@abc.abstractmethod
|
71
|
+
def _as_env_string(self) -> str:
|
72
|
+
"""Returns a string representation of this token."""
|
73
|
+
|
74
|
+
|
75
|
+
@final
|
76
|
+
class UnixServiceToken(ServiceToken):
|
77
|
+
"""Connects to the service using a Unix domain socket."""
|
78
|
+
|
79
|
+
def __init__(self, *, parent_pid: int, path: str) -> None:
|
80
|
+
self._parent_pid = parent_pid
|
81
|
+
self._path = path
|
82
|
+
|
83
|
+
@override
|
84
|
+
def connect(self) -> SockClient:
|
85
|
+
if not ipc_support.SUPPORTS_UNIX:
|
86
|
+
raise WandbServiceConnectionError("AF_UNIX socket not supported")
|
87
|
+
|
88
|
+
sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
89
|
+
|
90
|
+
try:
|
91
|
+
# TODO: This may block indefinitely if the service is unhealthy.
|
92
|
+
sock.connect(self._path)
|
93
|
+
except Exception as e:
|
94
|
+
raise WandbServiceConnectionError(
|
95
|
+
f"Failed to connect to service on socket {self._path}",
|
96
|
+
) from e
|
97
|
+
|
98
|
+
return SockClient(sock)
|
99
|
+
|
100
|
+
@override
|
101
|
+
def _as_env_string(self):
|
102
|
+
return "-".join(
|
103
|
+
(
|
104
|
+
_CURRENT_VERSION,
|
105
|
+
str(self._parent_pid),
|
106
|
+
"unix",
|
107
|
+
str(self._path),
|
108
|
+
)
|
109
|
+
)
|
110
|
+
|
111
|
+
@staticmethod
|
112
|
+
def from_env_string(token: str) -> UnixServiceToken | None:
|
113
|
+
"""Returns a Unix service token parsed from the env var."""
|
114
|
+
match = _UNIX_TOKEN_RE.fullmatch(token)
|
115
|
+
if not match:
|
116
|
+
return None
|
117
|
+
|
118
|
+
parent_pid, path = match.groups()
|
119
|
+
return UnixServiceToken(parent_pid=int(parent_pid), path=path)
|
120
|
+
|
121
|
+
|
122
|
+
@final
|
123
|
+
class TCPServiceToken(ServiceToken):
|
124
|
+
"""Connects to the service using TCP over a localhost socket."""
|
125
|
+
|
126
|
+
def __init__(self, *, parent_pid: int, port: int) -> None:
|
127
|
+
self._parent_pid = parent_pid
|
128
|
+
self._port = port
|
129
|
+
|
130
|
+
@override
|
131
|
+
def connect(self) -> SockClient:
|
132
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
133
|
+
|
134
|
+
try:
|
135
|
+
# TODO: This may block indefinitely if the service is unhealthy.
|
136
|
+
sock.connect(("localhost", self._port))
|
137
|
+
except Exception as e:
|
138
|
+
raise WandbServiceConnectionError(
|
139
|
+
f"Failed to connect to service on port {self._port}",
|
140
|
+
) from e
|
141
|
+
|
142
|
+
return SockClient(sock)
|
143
|
+
|
144
|
+
@override
|
145
|
+
def _as_env_string(self):
|
146
|
+
return "-".join(
|
147
|
+
(
|
148
|
+
_CURRENT_VERSION,
|
149
|
+
str(self._parent_pid),
|
150
|
+
"tcp",
|
151
|
+
"localhost",
|
152
|
+
str(self._port),
|
153
|
+
)
|
154
|
+
)
|
155
|
+
|
156
|
+
@staticmethod
|
157
|
+
def from_env_string(token: str) -> TCPServiceToken | None:
|
158
|
+
"""Returns a TCP service token parsed from the env var."""
|
159
|
+
match = _TCP_TOKEN_RE.fullmatch(token)
|
160
|
+
if not match:
|
161
|
+
return None
|
162
|
+
|
163
|
+
parent_pid, port = match.groups()
|
164
|
+
return TCPServiceToken(parent_pid=int(parent_pid), port=int(port))
|
wandb/sdk/lib/sock_client.py
CHANGED
@@ -79,17 +79,17 @@ class SockBuffer:
|
|
79
79
|
|
80
80
|
|
81
81
|
class SockClient:
|
82
|
-
_sock: socket.socket
|
83
|
-
_sockid: str
|
84
|
-
_retry_delay: float
|
85
|
-
_lock: "threading.Lock"
|
86
|
-
_bufsize: int
|
87
|
-
_buffer: SockBuffer
|
88
|
-
|
89
82
|
# current header is magic byte "W" followed by 4 byte length of the message
|
90
83
|
HEADLEN = 1 + 4
|
91
84
|
|
92
|
-
def __init__(self) -> None:
|
85
|
+
def __init__(self, sock: socket.socket) -> None:
|
86
|
+
"""Create a SockClient.
|
87
|
+
|
88
|
+
Args:
|
89
|
+
sock: A connected socket.
|
90
|
+
"""
|
91
|
+
self._sock = sock
|
92
|
+
|
93
93
|
# TODO: use safe uuid's (python3.7+) or emulate this
|
94
94
|
self._sockid = uuid.uuid4().hex
|
95
95
|
self._retry_delay = 0.1
|
@@ -97,10 +97,6 @@ class SockClient:
|
|
97
97
|
self._bufsize = 4096
|
98
98
|
self._buffer = SockBuffer()
|
99
99
|
|
100
|
-
def connect(self, port: int) -> None:
|
101
|
-
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
102
|
-
s.connect(("localhost", port))
|
103
|
-
self._sock = s
|
104
100
|
self._detect_bufsize()
|
105
101
|
|
106
102
|
def _detect_bufsize(self) -> None:
|
wandb/sdk/wandb_init.py
CHANGED
@@ -740,7 +740,6 @@ class _WandbInit:
|
|
740
740
|
drun._config.update(config.sweep_no_artifacts)
|
741
741
|
drun._config.update(config.base_no_artifacts)
|
742
742
|
drun.summary = SummaryDisabled() # type: ignore
|
743
|
-
drun._Run__metadata = wandb.sdk.wandb_metadata.Metadata()
|
744
743
|
|
745
744
|
# methods
|
746
745
|
drun.log = lambda data, *_, **__: drun.summary.update(data) # type: ignore[method-assign]
|
@@ -925,8 +924,6 @@ class _WandbInit:
|
|
925
924
|
tel.feature.flow_control_disabled = True
|
926
925
|
if settings.x_flow_control_custom:
|
927
926
|
tel.feature.flow_control_custom = True
|
928
|
-
if not settings.x_require_legacy_service:
|
929
|
-
tel.feature.core = True
|
930
927
|
if settings._shared:
|
931
928
|
wandb.termwarn(
|
932
929
|
"The `shared` mode feature is experimental and may change. "
|
@@ -1367,7 +1364,7 @@ def init( # noqa: C901
|
|
1367
1364
|
the UI.
|
1368
1365
|
If resuming a run, the tags provided here will replace any existing
|
1369
1366
|
tags. To add tags to a resumed run without overwriting the current
|
1370
|
-
tags, use `run.tags +=
|
1367
|
+
tags, use `run.tags += ("new_tag",)` after calling `run = wandb.init()`.
|
1371
1368
|
config: Sets `wandb.config`, a dictionary-like object for storing input
|
1372
1369
|
parameters to your run, such as model hyperparameters or data
|
1373
1370
|
preprocessing settings.
|
@@ -1621,4 +1618,3 @@ def init( # noqa: C901
|
|
1621
1618
|
# Need to build delay into this sentry capture because our exit hooks
|
1622
1619
|
# mess with sentry's ability to send out errors before the program ends.
|
1623
1620
|
wandb._sentry.reraise(e)
|
1624
|
-
raise AssertionError() # should never get here
|
wandb/sdk/wandb_require.py
CHANGED
@@ -11,13 +11,10 @@ Example:
|
|
11
11
|
|
12
12
|
from __future__ import annotations
|
13
13
|
|
14
|
-
import os
|
15
14
|
from typing import Iterable
|
16
15
|
|
17
16
|
import wandb
|
18
|
-
from wandb.env import _REQUIRE_LEGACY_SERVICE
|
19
17
|
from wandb.errors import UnsupportedError
|
20
|
-
from wandb.sdk import wandb_run
|
21
18
|
|
22
19
|
|
23
20
|
class _Requires:
|
@@ -33,22 +30,21 @@ class _Requires:
|
|
33
30
|
def require_require(self) -> None:
|
34
31
|
pass
|
35
32
|
|
36
|
-
def _require_service(self) -> None:
|
37
|
-
wandb.teardown = wandb._teardown # type: ignore
|
38
|
-
wandb.attach = wandb._attach # type: ignore
|
39
|
-
wandb_run.Run.detach = wandb_run.Run._detach # type: ignore
|
40
|
-
|
41
33
|
def require_service(self) -> None:
|
42
|
-
|
34
|
+
# Legacy no-op kept solely for backward compatibility:
|
35
|
+
# some integrations (e.g. PyTorch Lightning) still call
|
36
|
+
# `wandb.require('service')`, which routes here.
|
37
|
+
wandb.termwarn(
|
38
|
+
"`wandb.require('service')` is a no-op as it is now the default behavior."
|
39
|
+
)
|
43
40
|
|
44
41
|
def require_core(self) -> None:
|
42
|
+
# Legacy no-op kept solely for backward compatibility:
|
43
|
+
# many public codebases still call `wandb.require('core')`.
|
45
44
|
wandb.termwarn(
|
46
|
-
"`wandb.require('core')` is
|
45
|
+
"`wandb.require('core')` is a no-op as it is now the default behavior."
|
47
46
|
)
|
48
47
|
|
49
|
-
def require_legacy_service(self) -> None:
|
50
|
-
os.environ[_REQUIRE_LEGACY_SERVICE] = "true"
|
51
|
-
|
52
48
|
def apply(self) -> None:
|
53
49
|
"""Call require_* method for supported features."""
|
54
50
|
last_message: str = ""
|
@@ -64,7 +60,6 @@ class _Requires:
|
|
64
60
|
func()
|
65
61
|
|
66
62
|
if last_message:
|
67
|
-
wandb.termwarn("Supported requirements are: `legacy-service`, `service`.")
|
68
63
|
raise UnsupportedError(last_message)
|
69
64
|
|
70
65
|
|
@@ -91,10 +86,3 @@ def require(
|
|
91
86
|
|
92
87
|
f = _Requires(features=features)
|
93
88
|
f.apply()
|
94
|
-
|
95
|
-
|
96
|
-
def _import_module_hook() -> None:
|
97
|
-
"""On wandb import, setup anything needed based on parent process require calls."""
|
98
|
-
# TODO: optimize by caching which pids this has been done for or use real import hooks
|
99
|
-
# TODO: make this more generic, but for now this works
|
100
|
-
require("service")
|