wandb 0.20.1__py3-none-win32.whl → 0.20.2rc20250616__py3-none-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. wandb/__init__.py +3 -6
  2. wandb/__init__.pyi +1 -1
  3. wandb/analytics/sentry.py +2 -2
  4. wandb/apis/importers/internals/internal.py +0 -3
  5. wandb/apis/public/api.py +2 -2
  6. wandb/apis/public/registries/{utils.py → _utils.py} +12 -12
  7. wandb/apis/public/registries/registries_search.py +2 -2
  8. wandb/apis/public/registries/registry.py +19 -18
  9. wandb/bin/gpu_stats.exe +0 -0
  10. wandb/bin/wandb-core +0 -0
  11. wandb/cli/beta.py +1 -7
  12. wandb/cli/cli.py +0 -30
  13. wandb/env.py +0 -6
  14. wandb/proto/v3/wandb_settings_pb2.py +2 -2
  15. wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
  16. wandb/proto/v4/wandb_settings_pb2.py +2 -2
  17. wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
  18. wandb/proto/v5/wandb_settings_pb2.py +2 -2
  19. wandb/proto/v5/wandb_telemetry_pb2.py +10 -10
  20. wandb/proto/v6/wandb_settings_pb2.py +2 -2
  21. wandb/proto/v6/wandb_telemetry_pb2.py +10 -10
  22. wandb/sdk/artifacts/storage_handlers/s3_handler.py +42 -1
  23. wandb/sdk/backend/backend.py +1 -1
  24. wandb/sdk/internal/handler.py +1 -69
  25. wandb/sdk/lib/printer.py +6 -7
  26. wandb/sdk/lib/progress.py +1 -3
  27. wandb/sdk/lib/service/ipc_support.py +13 -0
  28. wandb/sdk/lib/{service_connection.py → service/service_connection.py} +20 -56
  29. wandb/sdk/lib/service/service_port_file.py +105 -0
  30. wandb/sdk/lib/service/service_process.py +111 -0
  31. wandb/sdk/lib/service/service_token.py +164 -0
  32. wandb/sdk/lib/sock_client.py +8 -12
  33. wandb/sdk/wandb_init.py +0 -3
  34. wandb/sdk/wandb_require.py +9 -20
  35. wandb/sdk/wandb_run.py +0 -24
  36. wandb/sdk/wandb_settings.py +0 -9
  37. wandb/sdk/wandb_setup.py +2 -13
  38. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/METADATA +1 -3
  39. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/RECORD +42 -68
  40. wandb/sdk/internal/flow_control.py +0 -263
  41. wandb/sdk/internal/internal.py +0 -401
  42. wandb/sdk/internal/internal_util.py +0 -97
  43. wandb/sdk/internal/system/__init__.py +0 -0
  44. wandb/sdk/internal/system/assets/__init__.py +0 -25
  45. wandb/sdk/internal/system/assets/aggregators.py +0 -31
  46. wandb/sdk/internal/system/assets/asset_registry.py +0 -20
  47. wandb/sdk/internal/system/assets/cpu.py +0 -163
  48. wandb/sdk/internal/system/assets/disk.py +0 -210
  49. wandb/sdk/internal/system/assets/gpu.py +0 -416
  50. wandb/sdk/internal/system/assets/gpu_amd.py +0 -233
  51. wandb/sdk/internal/system/assets/interfaces.py +0 -205
  52. wandb/sdk/internal/system/assets/ipu.py +0 -177
  53. wandb/sdk/internal/system/assets/memory.py +0 -166
  54. wandb/sdk/internal/system/assets/network.py +0 -125
  55. wandb/sdk/internal/system/assets/open_metrics.py +0 -293
  56. wandb/sdk/internal/system/assets/tpu.py +0 -154
  57. wandb/sdk/internal/system/assets/trainium.py +0 -393
  58. wandb/sdk/internal/system/env_probe_helpers.py +0 -13
  59. wandb/sdk/internal/system/system_info.py +0 -248
  60. wandb/sdk/internal/system/system_monitor.py +0 -224
  61. wandb/sdk/internal/writer.py +0 -204
  62. wandb/sdk/lib/service_token.py +0 -93
  63. wandb/sdk/service/__init__.py +0 -0
  64. wandb/sdk/service/_startup_debug.py +0 -22
  65. wandb/sdk/service/port_file.py +0 -53
  66. wandb/sdk/service/server.py +0 -107
  67. wandb/sdk/service/server_sock.py +0 -286
  68. wandb/sdk/service/service.py +0 -252
  69. wandb/sdk/service/streams.py +0 -425
  70. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/WHEEL +0 -0
  71. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/entry_points.txt +0 -0
  72. {wandb-0.20.1.dist-info → wandb-0.20.2rc20250616.dist-info}/licenses/LICENSE +0 -0
@@ -1,97 +0,0 @@
1
- """Internal utility routines.
2
-
3
- Collection of classes to support the internal process.
4
-
5
- """
6
-
7
- import logging
8
- import queue
9
- import sys
10
- import threading
11
- import time
12
- from typing import TYPE_CHECKING, Optional, Tuple, Type, Union
13
-
14
- if TYPE_CHECKING:
15
- from queue import Queue
16
- from threading import Event
17
- from types import TracebackType
18
-
19
- from wandb.proto.wandb_internal_pb2 import Record, Result
20
-
21
- ExceptionType = Union[
22
- Tuple[Type[BaseException], BaseException, TracebackType],
23
- Tuple[None, None, None],
24
- ]
25
-
26
-
27
- logger = logging.getLogger(__name__)
28
-
29
-
30
- class ExceptionThread(threading.Thread):
31
- """Class to catch exceptions when running a thread."""
32
-
33
- __stopped: "Event"
34
- __exception: Optional["ExceptionType"]
35
-
36
- def __init__(self, stopped: "Event") -> None:
37
- threading.Thread.__init__(self)
38
- self.__stopped = stopped
39
- self.__exception = None
40
-
41
- def _run(self) -> None:
42
- raise NotImplementedError
43
-
44
- def run(self) -> None:
45
- try:
46
- self._run()
47
- except Exception:
48
- self.__exception = sys.exc_info()
49
- finally:
50
- if self.__exception and self.__stopped:
51
- self.__stopped.set()
52
-
53
- def get_exception(self) -> Optional["ExceptionType"]:
54
- return self.__exception
55
-
56
-
57
- class RecordLoopThread(ExceptionThread):
58
- """Class to manage reading from queues safely."""
59
-
60
- def __init__(
61
- self,
62
- input_record_q: "Queue[Record]",
63
- result_q: "Queue[Result]",
64
- stopped: "Event",
65
- debounce_interval_ms: "float" = 1000,
66
- ) -> None:
67
- ExceptionThread.__init__(self, stopped=stopped)
68
- self._input_record_q = input_record_q
69
- self._result_q = result_q
70
- self._stopped = stopped
71
- self._debounce_interval_ms = debounce_interval_ms
72
-
73
- def _setup(self) -> None:
74
- raise NotImplementedError
75
-
76
- def _process(self, record: "Record") -> None:
77
- raise NotImplementedError
78
-
79
- def _finish(self) -> None:
80
- raise NotImplementedError
81
-
82
- def _debounce(self) -> None:
83
- raise NotImplementedError
84
-
85
- def _run(self) -> None:
86
- self._setup()
87
- start = time.time()
88
- while not self._stopped.is_set():
89
- if time.time() - start >= self._debounce_interval_ms / 1000.0:
90
- self._debounce()
91
- start = time.time()
92
- try:
93
- record = self._input_record_q.get(timeout=1)
94
- except queue.Empty:
95
- continue
96
- self._process(record)
97
- self._finish()
File without changes
@@ -1,25 +0,0 @@
1
- __all__ = (
2
- "asset_registry",
3
- "CPU",
4
- "Disk",
5
- "GPU",
6
- "GPUAMD",
7
- "IPU",
8
- "Memory",
9
- "Network",
10
- "OpenMetrics",
11
- "TPU",
12
- "Trainium",
13
- )
14
-
15
- from .asset_registry import asset_registry
16
- from .cpu import CPU
17
- from .disk import Disk
18
- from .gpu import GPU
19
- from .gpu_amd import GPUAMD
20
- from .ipu import IPU
21
- from .memory import Memory
22
- from .network import Network
23
- from .open_metrics import OpenMetrics
24
- from .tpu import TPU
25
- from .trainium import Trainium
@@ -1,31 +0,0 @@
1
- from typing import Sequence, Union
2
-
3
- Number = Union[int, float]
4
-
5
-
6
- def aggregate_mean(samples: Sequence[Number], precision: int = 2) -> float:
7
- return round(sum(samples) / len(samples), precision)
8
-
9
-
10
- def aggregate_last(samples: Sequence[Number], precision: int = 2) -> Union[float, int]:
11
- if isinstance(samples[-1], int):
12
- return samples[-1]
13
- return round(samples[-1], precision)
14
-
15
-
16
- def aggregate_max(samples: Sequence[Number], precision: int = 2) -> Union[float, int]:
17
- if isinstance(samples[-1], int):
18
- return max(samples)
19
- return round(max(samples), precision)
20
-
21
-
22
- def aggregate_min(samples: Sequence[Number], precision: int = 2) -> Union[float, int]:
23
- if isinstance(samples[-1], int):
24
- return min(samples)
25
- return round(min(samples), precision)
26
-
27
-
28
- def aggregate_sum(samples: Sequence[Number], precision: int = 2) -> Union[float, int]:
29
- if isinstance(samples[-1], int):
30
- return sum(samples)
31
- return round(sum(samples), precision)
@@ -1,20 +0,0 @@
1
- from typing import Iterator, List, Type
2
-
3
- from .interfaces import Asset
4
-
5
-
6
- class AssetRegistry:
7
- def __init__(self) -> None:
8
- self._registry: List[Type[Asset]] = []
9
-
10
- def register(self, asset: Type[Asset]) -> Type[Asset]:
11
- self._registry.append(asset)
12
- return asset
13
-
14
- def __iter__(self) -> Iterator[Type[Asset]]:
15
- for asset in self._registry:
16
- if asset.is_available():
17
- yield asset
18
-
19
-
20
- asset_registry = AssetRegistry()
@@ -1,163 +0,0 @@
1
- import threading
2
- from collections import deque
3
- from typing import TYPE_CHECKING, List, Optional
4
-
5
- try:
6
- import psutil
7
- except ImportError:
8
- psutil = None
9
- from .aggregators import aggregate_last, aggregate_mean
10
- from .asset_registry import asset_registry
11
- from .interfaces import Interface, Metric, MetricsMonitor
12
-
13
- if TYPE_CHECKING:
14
- from typing import Deque
15
-
16
- from wandb.sdk.internal.settings_static import SettingsStatic
17
-
18
-
19
- # CPU Metrics
20
-
21
-
22
- class ProcessCpuPercent:
23
- """CPU usage of the process in percent normalized by the number of CPUs."""
24
-
25
- # name = "process_cpu_percent"
26
- name = "cpu"
27
-
28
- def __init__(self, pid: int) -> None:
29
- self.pid = pid
30
- self.samples: Deque[float] = deque([])
31
- self.process: Optional[psutil.Process] = None
32
-
33
- def sample(self) -> None:
34
- # todo: this is what we'd eventually want to do
35
- # self.samples.append(
36
- # (
37
- # datetime.datetime.utcnow(),
38
- # psutil.Process(self.pid).cpu_percent(),
39
- # )
40
- # )
41
- if self.process is None:
42
- self.process = psutil.Process(self.pid)
43
-
44
- self.samples.append(self.process.cpu_percent() / psutil.cpu_count())
45
-
46
- def clear(self) -> None:
47
- self.samples.clear()
48
-
49
- def aggregate(self) -> dict:
50
- # todo: create a statistics class with helper methods to compute
51
- # mean, median, min, max, etc.
52
- if not self.samples:
53
- return {}
54
- aggregate = aggregate_mean(self.samples)
55
- return {self.name: aggregate}
56
-
57
-
58
- class CpuPercent:
59
- """CPU usage of the system in percent per core."""
60
-
61
- name = "cpu.{i}.cpu_percent"
62
-
63
- def __init__(self, interval: Optional[float] = None) -> None:
64
- self.samples: Deque[List[float]] = deque([])
65
- self.interval = interval
66
-
67
- def sample(self) -> None:
68
- self.samples.append(psutil.cpu_percent(interval=self.interval, percpu=True))
69
-
70
- def clear(self) -> None:
71
- self.samples.clear()
72
-
73
- def aggregate(self) -> dict:
74
- if not self.samples:
75
- return {}
76
- num_cpu = len(self.samples[0])
77
- cpu_metrics = {}
78
- for i in range(num_cpu):
79
- aggregate_i = aggregate_mean([sample[i] for sample in self.samples])
80
- cpu_metrics[self.name.format(i=i)] = aggregate_i
81
-
82
- return cpu_metrics
83
-
84
-
85
- class ProcessCpuThreads:
86
- """Number of threads used by the process."""
87
-
88
- name = "proc.cpu.threads"
89
-
90
- def __init__(self, pid: int) -> None:
91
- self.samples: Deque[int] = deque([])
92
- self.pid = pid
93
- self.process: Optional[psutil.Process] = None
94
-
95
- def sample(self) -> None:
96
- if self.process is None:
97
- self.process = psutil.Process(self.pid)
98
-
99
- self.samples.append(self.process.num_threads())
100
-
101
- def clear(self) -> None:
102
- self.samples.clear()
103
-
104
- def aggregate(self) -> dict:
105
- if not self.samples:
106
- return {}
107
- return {self.name: aggregate_last(self.samples)}
108
-
109
-
110
- @asset_registry.register
111
- class CPU:
112
- def __init__(
113
- self,
114
- interface: "Interface",
115
- settings: "SettingsStatic",
116
- shutdown_event: threading.Event,
117
- ) -> None:
118
- self.name: str = self.__class__.__name__.lower()
119
- self.metrics: List[Metric] = [
120
- ProcessCpuPercent(settings.x_stats_pid),
121
- # CpuPercent(),
122
- ProcessCpuThreads(settings.x_stats_pid),
123
- ]
124
- self.metrics_monitor: MetricsMonitor = MetricsMonitor(
125
- self.name,
126
- self.metrics,
127
- interface,
128
- settings,
129
- shutdown_event,
130
- )
131
-
132
- @classmethod
133
- def is_available(cls) -> bool:
134
- return psutil is not None
135
-
136
- def probe(self) -> dict:
137
- asset_info = {
138
- "cpu_count": psutil.cpu_count(logical=False),
139
- "cpu_count_logical": psutil.cpu_count(logical=True),
140
- }
141
- try:
142
- asset_info["cpu_freq"] = {
143
- "current": psutil.cpu_freq().current,
144
- "min": psutil.cpu_freq().min,
145
- "max": psutil.cpu_freq().max,
146
- }
147
- asset_info["cpu_freq_per_core"] = [
148
- {
149
- "current": freq.current,
150
- "min": freq.min,
151
- "max": freq.max,
152
- }
153
- for freq in psutil.cpu_freq(percpu=True)
154
- ]
155
- except Exception:
156
- pass
157
- return asset_info
158
-
159
- def start(self) -> None:
160
- self.metrics_monitor.start()
161
-
162
- def finish(self) -> None:
163
- self.metrics_monitor.finish()
@@ -1,210 +0,0 @@
1
- import threading
2
- from collections import deque
3
- from typing import TYPE_CHECKING, List, Optional
4
-
5
- try:
6
- import psutil
7
- except ImportError:
8
- psutil = None
9
-
10
- from wandb.errors.term import termwarn
11
-
12
- from .aggregators import aggregate_mean
13
- from .asset_registry import asset_registry
14
- from .interfaces import Interface, Metric, MetricsMonitor
15
-
16
- if TYPE_CHECKING:
17
- from typing import Deque
18
-
19
- from wandb.sdk.internal.settings_static import SettingsStatic
20
-
21
-
22
- class DiskUsagePercent:
23
- """Total system disk usage in percent."""
24
-
25
- name = "disk.{path}.usagePercent"
26
- samples: "Deque[List[float]]"
27
-
28
- def __init__(self, paths: List[str]) -> None:
29
- self.samples = deque([])
30
- # check if we have access to the disk paths:
31
- self.paths: List[str] = []
32
- for path in paths:
33
- try:
34
- psutil.disk_usage(path)
35
- self.paths.append(path)
36
- except Exception as e:
37
- termwarn(f"Could not access disk path {path}: {e}", repeat=False)
38
-
39
- def sample(self) -> None:
40
- # self.samples.append(psutil.disk_usage("/").percent)
41
- disk_usage: List[float] = []
42
- for path in self.paths:
43
- disk_usage.append(psutil.disk_usage(path).percent)
44
- if disk_usage:
45
- self.samples.append(disk_usage)
46
-
47
- def clear(self) -> None:
48
- self.samples.clear()
49
-
50
- def aggregate(self) -> dict:
51
- if not self.samples:
52
- return {}
53
- disk_metrics = {}
54
- for i, _path in enumerate(self.paths):
55
- aggregate_i = aggregate_mean([sample[i] for sample in self.samples])
56
- # ugly hack to please the frontend:
57
- _path = _path.replace("/", "\\")
58
- disk_metrics[self.name.format(path=_path)] = aggregate_i
59
-
60
- return disk_metrics
61
-
62
-
63
- class DiskUsage:
64
- """Total system disk usage in GB."""
65
-
66
- name = "disk.{path}.usageGB"
67
- samples: "Deque[List[float]]"
68
-
69
- def __init__(self, paths: List[str]) -> None:
70
- self.samples = deque([])
71
- # check if we have access to the disk paths:
72
- self.paths: List[str] = []
73
- for path in paths:
74
- try:
75
- psutil.disk_usage(path)
76
- self.paths.append(path)
77
- except Exception as e:
78
- termwarn(f"Could not access disk path {path}: {e}", repeat=False)
79
-
80
- def sample(self) -> None:
81
- disk_usage: List[float] = []
82
- for path in self.paths:
83
- disk_usage.append(psutil.disk_usage(path).used / 1024 / 1024 / 1024)
84
- if disk_usage:
85
- self.samples.append(disk_usage)
86
-
87
- def clear(self) -> None:
88
- self.samples.clear()
89
-
90
- def aggregate(self) -> dict:
91
- if not self.samples:
92
- return {}
93
- disk_metrics = {}
94
- for i, _path in enumerate(self.paths):
95
- aggregate_i = aggregate_mean([sample[i] for sample in self.samples])
96
- # ugly hack to please the frontend:
97
- _path = _path.replace("/", "\\")
98
- disk_metrics[self.name.format(path=_path)] = aggregate_i
99
-
100
- return disk_metrics
101
-
102
-
103
- class DiskIn:
104
- """Total system disk read in MB."""
105
-
106
- name = "disk.in"
107
- samples: "Deque[float]"
108
-
109
- def __init__(self) -> None:
110
- self.samples = deque([])
111
- self.read_init: Optional[int] = None
112
-
113
- def sample(self) -> None:
114
- if self.read_init is None:
115
- # initialize the read_init value on first sample
116
- self.read_init = psutil.disk_io_counters().read_bytes
117
- self.samples.append(
118
- (psutil.disk_io_counters().read_bytes - self.read_init) / 1024 / 1024
119
- )
120
-
121
- def clear(self) -> None:
122
- self.samples.clear()
123
-
124
- def aggregate(self) -> dict:
125
- if not self.samples:
126
- return {}
127
- aggregate = aggregate_mean(self.samples)
128
- return {self.name: aggregate}
129
-
130
-
131
- class DiskOut:
132
- """Total system disk write in MB."""
133
-
134
- name = "disk.out"
135
- samples: "Deque[float]"
136
-
137
- def __init__(self) -> None:
138
- self.samples = deque([])
139
- self.write_init: Optional[int] = None
140
-
141
- def sample(self) -> None:
142
- if self.write_init is None:
143
- # init on first sample
144
- self.write_init = psutil.disk_io_counters().write_bytes
145
- self.samples.append(
146
- (psutil.disk_io_counters().write_bytes - self.write_init) / 1024 / 1024
147
- )
148
-
149
- def clear(self) -> None:
150
- self.samples.clear()
151
-
152
- def aggregate(self) -> dict:
153
- if not self.samples:
154
- return {}
155
- aggregate = aggregate_mean(self.samples)
156
- return {self.name: aggregate}
157
-
158
-
159
- @asset_registry.register
160
- class Disk:
161
- def __init__(
162
- self,
163
- interface: "Interface",
164
- settings: "SettingsStatic",
165
- shutdown_event: threading.Event,
166
- ) -> None:
167
- self.name = self.__class__.__name__.lower()
168
- self.settings = settings
169
- self.metrics: List[Metric] = [
170
- DiskUsagePercent(list(settings.x_stats_disk_paths or ["/"])),
171
- DiskUsage(list(settings.x_stats_disk_paths or ["/"])),
172
- DiskIn(),
173
- DiskOut(),
174
- ]
175
- self.metrics_monitor = MetricsMonitor(
176
- self.name,
177
- self.metrics,
178
- interface,
179
- settings,
180
- shutdown_event,
181
- )
182
-
183
- @classmethod
184
- def is_available(cls) -> bool:
185
- """Return a new instance of the CPU metrics."""
186
- return psutil is not None
187
-
188
- def probe(self) -> dict:
189
- disk_paths = list(self.settings.x_stats_disk_paths or ["/"])
190
- disk_metrics = {}
191
- for disk_path in disk_paths:
192
- try:
193
- # total disk space in Bytes:
194
- total = psutil.disk_usage(disk_path).total
195
- # total disk space used in Bytes:
196
- used = psutil.disk_usage(disk_path).used
197
- disk_metrics[disk_path] = {
198
- "total": total,
199
- "used": used,
200
- }
201
- except Exception as e:
202
- termwarn(f"Could not access disk path {disk_path}: {e}", repeat=False)
203
-
204
- return {self.name: disk_metrics}
205
-
206
- def start(self) -> None:
207
- self.metrics_monitor.start()
208
-
209
- def finish(self) -> None:
210
- self.metrics_monitor.finish()