wandb 0.19.1__py3-none-any.whl → 0.19.2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +1 -1
- wandb/__init__.pyi +3 -5
- wandb/agents/pyagent.py +1 -1
- wandb/apis/importers/wandb.py +1 -1
- wandb/apis/public/files.py +1 -1
- wandb/apis/public/jobs.py +1 -1
- wandb/apis/public/runs.py +2 -7
- wandb/apis/reports/v1/__init__.py +1 -1
- wandb/apis/reports/v2/__init__.py +1 -1
- wandb/apis/workspaces/__init__.py +1 -1
- wandb/bin/gpu_stats +0 -0
- wandb/cli/beta.py +7 -4
- wandb/cli/cli.py +5 -7
- wandb/docker/__init__.py +4 -4
- wandb/integration/fastai/__init__.py +4 -6
- wandb/integration/keras/keras.py +5 -3
- wandb/integration/metaflow/metaflow.py +7 -7
- wandb/integration/prodigy/prodigy.py +3 -11
- wandb/integration/sagemaker/__init__.py +5 -3
- wandb/integration/sagemaker/config.py +17 -8
- wandb/integration/sagemaker/files.py +0 -1
- wandb/integration/sagemaker/resources.py +47 -18
- wandb/integration/torch/wandb_torch.py +1 -1
- wandb/proto/v3/wandb_internal_pb2.py +273 -235
- wandb/proto/v4/wandb_internal_pb2.py +222 -214
- wandb/proto/v5/wandb_internal_pb2.py +222 -214
- wandb/sdk/artifacts/artifact.py +3 -9
- wandb/sdk/backend/backend.py +1 -1
- wandb/sdk/data_types/base_types/wb_value.py +1 -1
- wandb/sdk/data_types/graph.py +2 -2
- wandb/sdk/data_types/saved_model.py +1 -1
- wandb/sdk/data_types/video.py +1 -1
- wandb/sdk/interface/interface.py +25 -25
- wandb/sdk/interface/interface_shared.py +21 -5
- wandb/sdk/internal/handler.py +19 -1
- wandb/sdk/internal/internal.py +1 -1
- wandb/sdk/internal/internal_api.py +4 -5
- wandb/sdk/internal/sample.py +2 -2
- wandb/sdk/internal/sender.py +1 -2
- wandb/sdk/internal/settings_static.py +3 -1
- wandb/sdk/internal/system/assets/disk.py +4 -4
- wandb/sdk/internal/system/assets/gpu.py +1 -1
- wandb/sdk/internal/system/assets/memory.py +1 -1
- wandb/sdk/internal/system/system_info.py +1 -1
- wandb/sdk/internal/system/system_monitor.py +3 -1
- wandb/sdk/internal/tb_watcher.py +1 -1
- wandb/sdk/launch/_project_spec.py +3 -3
- wandb/sdk/launch/builder/abstract.py +1 -1
- wandb/sdk/lib/apikey.py +2 -3
- wandb/sdk/lib/fsm.py +1 -1
- wandb/sdk/lib/gitlib.py +1 -1
- wandb/sdk/lib/gql_request.py +1 -1
- wandb/sdk/lib/interrupt.py +37 -0
- wandb/sdk/lib/lazyloader.py +1 -1
- wandb/sdk/lib/service_connection.py +1 -1
- wandb/sdk/lib/telemetry.py +1 -1
- wandb/sdk/service/_startup_debug.py +1 -1
- wandb/sdk/service/server_sock.py +3 -2
- wandb/sdk/service/service.py +1 -1
- wandb/sdk/service/streams.py +19 -17
- wandb/sdk/verify/verify.py +13 -13
- wandb/sdk/wandb_init.py +95 -104
- wandb/sdk/wandb_login.py +1 -1
- wandb/sdk/wandb_metadata.py +547 -0
- wandb/sdk/wandb_run.py +127 -35
- wandb/sdk/wandb_settings.py +5 -36
- wandb/sdk/wandb_setup.py +83 -82
- wandb/sdk/wandb_sweep.py +2 -2
- wandb/sdk/wandb_sync.py +15 -18
- wandb/sync/sync.py +10 -10
- wandb/util.py +11 -3
- wandb/wandb_agent.py +11 -16
- wandb/wandb_controller.py +7 -7
- {wandb-0.19.1.dist-info → wandb-0.19.2.dist-info}/METADATA +5 -3
- {wandb-0.19.1.dist-info → wandb-0.19.2.dist-info}/RECORD +78 -76
- {wandb-0.19.1.dist-info → wandb-0.19.2.dist-info}/WHEEL +1 -1
- {wandb-0.19.1.dist-info → wandb-0.19.2.dist-info}/entry_points.txt +0 -0
- {wandb-0.19.1.dist-info → wandb-0.19.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,547 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import sys
|
4
|
+
from contextlib import contextmanager
|
5
|
+
from datetime import datetime, timezone
|
6
|
+
from typing import Any, Callable
|
7
|
+
|
8
|
+
from google.protobuf.timestamp_pb2 import Timestamp
|
9
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
10
|
+
|
11
|
+
from wandb.proto import wandb_internal_pb2
|
12
|
+
|
13
|
+
if sys.version_info >= (3, 11):
|
14
|
+
from typing import Self
|
15
|
+
else:
|
16
|
+
from typing_extensions import Self
|
17
|
+
|
18
|
+
|
19
|
+
class DiskInfo(BaseModel, validate_assignment=True):
|
20
|
+
total: int | None = None
|
21
|
+
used: int | None = None
|
22
|
+
|
23
|
+
def to_proto(self) -> wandb_internal_pb2.DiskInfo:
|
24
|
+
return wandb_internal_pb2.DiskInfo(
|
25
|
+
total=self.total or 0,
|
26
|
+
used=self.used or 0,
|
27
|
+
)
|
28
|
+
|
29
|
+
@classmethod
|
30
|
+
def from_proto(cls, proto: wandb_internal_pb2.DiskInfo) -> DiskInfo:
|
31
|
+
return cls(total=proto.total, used=proto.used)
|
32
|
+
|
33
|
+
|
34
|
+
class MemoryInfo(BaseModel, validate_assignment=True):
|
35
|
+
total: int | None = None
|
36
|
+
|
37
|
+
def to_proto(self) -> wandb_internal_pb2.MemoryInfo:
|
38
|
+
return wandb_internal_pb2.MemoryInfo(total=self.total or 0)
|
39
|
+
|
40
|
+
@classmethod
|
41
|
+
def from_proto(cls, proto: wandb_internal_pb2.MemoryInfo) -> MemoryInfo:
|
42
|
+
return cls(total=proto.total)
|
43
|
+
|
44
|
+
|
45
|
+
class CpuInfo(BaseModel, validate_assignment=True):
|
46
|
+
count: int | None = None
|
47
|
+
count_logical: int | None = None
|
48
|
+
|
49
|
+
def to_proto(self) -> wandb_internal_pb2.CpuInfo:
|
50
|
+
return wandb_internal_pb2.CpuInfo(
|
51
|
+
count=self.count or 0,
|
52
|
+
count_logical=self.count_logical or 0,
|
53
|
+
)
|
54
|
+
|
55
|
+
@classmethod
|
56
|
+
def from_proto(cls, proto: wandb_internal_pb2.CpuInfo) -> CpuInfo:
|
57
|
+
return cls(count=proto.count, count_logical=proto.count_logical)
|
58
|
+
|
59
|
+
|
60
|
+
class AppleInfo(BaseModel, validate_assignment=True):
|
61
|
+
name: str | None = None
|
62
|
+
ecpu_cores: int | None = None
|
63
|
+
pcpu_cores: int | None = None
|
64
|
+
gpu_cores: int | None = None
|
65
|
+
memory_gb: int | None = None
|
66
|
+
swap_total_bytes: int | None = None
|
67
|
+
ram_total_bytes: int | None = None
|
68
|
+
|
69
|
+
def to_proto(self) -> wandb_internal_pb2.AppleInfo:
|
70
|
+
return wandb_internal_pb2.AppleInfo(
|
71
|
+
name=self.name or "",
|
72
|
+
ecpu_cores=self.ecpu_cores or 0,
|
73
|
+
pcpu_cores=self.pcpu_cores or 0,
|
74
|
+
gpu_cores=self.gpu_cores or 0,
|
75
|
+
memory_gb=self.memory_gb or 0,
|
76
|
+
swap_total_bytes=self.swap_total_bytes or 0,
|
77
|
+
ram_total_bytes=self.ram_total_bytes or 0,
|
78
|
+
)
|
79
|
+
|
80
|
+
@classmethod
|
81
|
+
def from_proto(cls, proto: wandb_internal_pb2.AppleInfo) -> AppleInfo:
|
82
|
+
return cls(
|
83
|
+
name=proto.name,
|
84
|
+
ecpu_cores=proto.ecpu_cores,
|
85
|
+
pcpu_cores=proto.pcpu_cores,
|
86
|
+
gpu_cores=proto.gpu_cores,
|
87
|
+
memory_gb=proto.memory_gb,
|
88
|
+
swap_total_bytes=proto.swap_total_bytes,
|
89
|
+
ram_total_bytes=proto.ram_total_bytes,
|
90
|
+
)
|
91
|
+
|
92
|
+
|
93
|
+
class GpuNvidiaInfo(BaseModel, validate_assignment=True):
|
94
|
+
name: str | None = None
|
95
|
+
memory_total: int | None = None
|
96
|
+
cuda_cores: int | None = None
|
97
|
+
architecture: str | None = None
|
98
|
+
|
99
|
+
def to_proto(self) -> wandb_internal_pb2.GpuNvidiaInfo:
|
100
|
+
return wandb_internal_pb2.GpuNvidiaInfo(
|
101
|
+
name=self.name or "",
|
102
|
+
memory_total=self.memory_total or 0,
|
103
|
+
cuda_cores=self.cuda_cores or 0,
|
104
|
+
architecture=self.architecture or "",
|
105
|
+
)
|
106
|
+
|
107
|
+
@classmethod
|
108
|
+
def from_proto(cls, proto: wandb_internal_pb2.GpuNvidiaInfo) -> GpuNvidiaInfo:
|
109
|
+
return cls(
|
110
|
+
name=proto.name,
|
111
|
+
memory_total=proto.memory_total,
|
112
|
+
cuda_cores=proto.cuda_cores,
|
113
|
+
architecture=proto.architecture,
|
114
|
+
)
|
115
|
+
|
116
|
+
|
117
|
+
class GpuAmdInfo(BaseModel, validate_assignment=True):
|
118
|
+
id: str | None = None
|
119
|
+
unique_id: str | None = None
|
120
|
+
vbios_version: str | None = None
|
121
|
+
performance_level: str | None = None
|
122
|
+
gpu_overdrive: str | None = None
|
123
|
+
gpu_memory_overdrive: str | None = None
|
124
|
+
max_power: str | None = None
|
125
|
+
series: str | None = None
|
126
|
+
model: str | None = None
|
127
|
+
vendor: str | None = None
|
128
|
+
sku: str | None = None
|
129
|
+
sclk_range: str | None = None
|
130
|
+
mclk_range: str | None = None
|
131
|
+
|
132
|
+
def to_proto(self) -> wandb_internal_pb2.GpuAmdInfo:
|
133
|
+
return wandb_internal_pb2.GpuAmdInfo(
|
134
|
+
id=self.id or "",
|
135
|
+
unique_id=self.unique_id or "",
|
136
|
+
vbios_version=self.vbios_version or "",
|
137
|
+
performance_level=self.performance_level or "",
|
138
|
+
gpu_overdrive=self.gpu_overdrive or "",
|
139
|
+
gpu_memory_overdrive=self.gpu_memory_overdrive or "",
|
140
|
+
max_power=self.max_power or "",
|
141
|
+
series=self.series or "",
|
142
|
+
model=self.model or "",
|
143
|
+
vendor=self.vendor or "",
|
144
|
+
sku=self.sku or "",
|
145
|
+
sclk_range=self.sclk_range or "",
|
146
|
+
mclk_range=self.mclk_range or "",
|
147
|
+
)
|
148
|
+
|
149
|
+
@classmethod
|
150
|
+
def from_proto(cls, proto: wandb_internal_pb2.GpuAmdInfo) -> GpuAmdInfo:
|
151
|
+
return cls(
|
152
|
+
id=proto.id,
|
153
|
+
unique_id=proto.unique_id,
|
154
|
+
vbios_version=proto.vbios_version,
|
155
|
+
performance_level=proto.performance_level,
|
156
|
+
gpu_overdrive=proto.gpu_overdrive,
|
157
|
+
gpu_memory_overdrive=proto.gpu_memory_overdrive,
|
158
|
+
max_power=proto.max_power,
|
159
|
+
series=proto.series,
|
160
|
+
model=proto.model,
|
161
|
+
vendor=proto.vendor,
|
162
|
+
sku=proto.sku,
|
163
|
+
sclk_range=proto.sclk_range,
|
164
|
+
mclk_range=proto.mclk_range,
|
165
|
+
)
|
166
|
+
|
167
|
+
|
168
|
+
class TrainiumInfo(BaseModel, validate_assignment=True):
|
169
|
+
name: str | None = None
|
170
|
+
vendor: str | None = None
|
171
|
+
neuron_device_count: int | None = None
|
172
|
+
neuroncore_per_device_count: int | None = None
|
173
|
+
|
174
|
+
def to_proto(self) -> wandb_internal_pb2.TrainiumInfo:
|
175
|
+
return wandb_internal_pb2.TrainiumInfo(
|
176
|
+
name=self.name or "",
|
177
|
+
vendor=self.vendor or "",
|
178
|
+
neuron_device_count=self.neuron_device_count or 0,
|
179
|
+
neuroncore_per_device_count=self.neuroncore_per_device_count or 0,
|
180
|
+
)
|
181
|
+
|
182
|
+
@classmethod
|
183
|
+
def from_proto(cls, proto: wandb_internal_pb2.TrainiumInfo) -> TrainiumInfo:
|
184
|
+
return cls(
|
185
|
+
name=proto.name,
|
186
|
+
vendor=proto.vendor,
|
187
|
+
neuron_device_count=proto.neuron_device_count,
|
188
|
+
neuroncore_per_device_count=proto.neuroncore_per_device_count,
|
189
|
+
)
|
190
|
+
|
191
|
+
|
192
|
+
class TPUInfo(BaseModel, validate_assignment=True):
|
193
|
+
name: str | None = None
|
194
|
+
hbm_gib: int | None = None
|
195
|
+
devices_per_chip: int | None = None
|
196
|
+
count: int | None = None
|
197
|
+
|
198
|
+
def to_proto(self) -> wandb_internal_pb2.TPUInfo:
|
199
|
+
return wandb_internal_pb2.TPUInfo(
|
200
|
+
name=self.name or "",
|
201
|
+
hbm_gib=self.hbm_gib or 0,
|
202
|
+
devices_per_chip=self.devices_per_chip or 0,
|
203
|
+
count=self.count or 0,
|
204
|
+
)
|
205
|
+
|
206
|
+
@classmethod
|
207
|
+
def from_proto(cls, proto: wandb_internal_pb2.TPUInfo) -> TPUInfo:
|
208
|
+
return cls(
|
209
|
+
name=proto.name,
|
210
|
+
hbm_gib=proto.hbm_gib,
|
211
|
+
devices_per_chip=proto.devices_per_chip,
|
212
|
+
count=proto.count,
|
213
|
+
)
|
214
|
+
|
215
|
+
|
216
|
+
class GitRepoRecord(BaseModel, validate_assignment=True):
|
217
|
+
remote_url: str | None = Field(None, alias="remote")
|
218
|
+
commit: str | None = None
|
219
|
+
|
220
|
+
def to_proto(self) -> wandb_internal_pb2.GitRepoRecord:
|
221
|
+
return wandb_internal_pb2.GitRepoRecord(
|
222
|
+
remote_url=self.remote_url or "",
|
223
|
+
commit=self.commit or "",
|
224
|
+
)
|
225
|
+
|
226
|
+
@classmethod
|
227
|
+
def from_proto(cls, proto: wandb_internal_pb2.GitRepoRecord) -> GitRepoRecord:
|
228
|
+
return cls(remote=proto.remote_url, commit=proto.commit)
|
229
|
+
|
230
|
+
|
231
|
+
class Metadata(BaseModel, validate_assignment=True):
|
232
|
+
"""Metadata about the run environment.
|
233
|
+
|
234
|
+
NOTE: Definitions must be kept in sync with wandb_internal.proto::MetadataRequest.
|
235
|
+
|
236
|
+
Attributes:
|
237
|
+
os (str, optional): Operating system.
|
238
|
+
python (str, optional): Python version.
|
239
|
+
heartbeat_at (datetime, optional): Timestamp of last heartbeat.
|
240
|
+
started_at (datetime, optional): Timestamp of run start.
|
241
|
+
docker (str, optional): Docker image.
|
242
|
+
cuda (str, optional): CUDA version.
|
243
|
+
args (List[str]): Command-line arguments.
|
244
|
+
state (str, optional): Run state.
|
245
|
+
program (str, optional): Program name.
|
246
|
+
code_path (str, optional): Path to code.
|
247
|
+
git (GitRepoRecord, optional): Git repository information.
|
248
|
+
email (str, optional): Email address.
|
249
|
+
root (str, optional): Root directory.
|
250
|
+
host (str, optional): Host name.
|
251
|
+
username (str, optional): Username.
|
252
|
+
executable (str, optional): Python executable path.
|
253
|
+
code_path_local (str, optional): Local code path.
|
254
|
+
colab (str, optional): Colab URL.
|
255
|
+
cpu_count (int, optional): CPU count.
|
256
|
+
cpu_count_logical (int, optional): Logical CPU count.
|
257
|
+
gpu_type (str, optional): GPU type.
|
258
|
+
disk (Dict[str, DiskInfo]): Disk information.
|
259
|
+
memory (MemoryInfo, optional): Memory information.
|
260
|
+
cpu (CpuInfo, optional): CPU information.
|
261
|
+
apple (AppleInfo, optional): Apple silicon information.
|
262
|
+
gpu_nvidia (List[GpuNvidiaInfo]): NVIDIA GPU information.
|
263
|
+
gpu_amd (List[GpuAmdInfo]): AMD GPU information.
|
264
|
+
slurm (Dict[str, str]): Slurm environment information.
|
265
|
+
cuda_version (str, optional): CUDA version.
|
266
|
+
trainium (TrainiumInfo, optional): Trainium information.
|
267
|
+
tpu (TPUInfo, optional): TPU information.
|
268
|
+
"""
|
269
|
+
|
270
|
+
# TODO: Pydantic configuration.
|
271
|
+
model_config = ConfigDict(
|
272
|
+
extra="ignore", # ignore extra fields
|
273
|
+
validate_default=True, # validate default values
|
274
|
+
)
|
275
|
+
|
276
|
+
os: str | None = None
|
277
|
+
python: str | None = None
|
278
|
+
heartbeat_at: datetime | None = Field(default=None, alias="heartbeatAt")
|
279
|
+
started_at: datetime | None = Field(default=None, alias="startedAt")
|
280
|
+
docker: str | None = None
|
281
|
+
cuda: str | None = None
|
282
|
+
args: list[str] = Field(default_factory=list)
|
283
|
+
state: str | None = None
|
284
|
+
program: str | None = None
|
285
|
+
code_path: str | None = Field(default=None, alias="codePath")
|
286
|
+
git: GitRepoRecord | None = None
|
287
|
+
email: str | None = None
|
288
|
+
root: str | None = None
|
289
|
+
host: str | None = None
|
290
|
+
username: str | None = None
|
291
|
+
executable: str | None = None
|
292
|
+
code_path_local: str | None = Field(default=None, alias="codePathLocal")
|
293
|
+
colab: str | None = None
|
294
|
+
cpu_count: int | None = Field(default=None, alias="cpuCount")
|
295
|
+
cpu_count_logical: int | None = Field(default=None, alias="cpuCountLogical")
|
296
|
+
gpu_type: str | None = Field(default=None, alias="gpuType")
|
297
|
+
gpu_count: int | None = Field(default=None, alias="gpuCount")
|
298
|
+
disk: dict[str, DiskInfo] = Field(default_factory=dict)
|
299
|
+
memory: MemoryInfo | None = None
|
300
|
+
cpu: CpuInfo | None = None
|
301
|
+
apple: AppleInfo | None = None
|
302
|
+
gpu_nvidia: list[GpuNvidiaInfo] = Field(default_factory=list, alias="gpuNvidia")
|
303
|
+
gpu_amd: list[GpuAmdInfo] = Field(default_factory=list, alias="gpuAmd")
|
304
|
+
slurm: dict[str, str] = Field(default_factory=dict)
|
305
|
+
cuda_version: str | None = Field(default=None, alias="cudaVersion")
|
306
|
+
trainium: TrainiumInfo | None = None
|
307
|
+
tpu: TPUInfo | None = None
|
308
|
+
|
309
|
+
def __init__(self, **data):
|
310
|
+
super().__init__(**data)
|
311
|
+
|
312
|
+
# Callback for post-update. This is used in the Run object to trigger
|
313
|
+
# a metadata update after the object is modified.
|
314
|
+
self._post_update_callback: Callable | None = None # type: ignore
|
315
|
+
|
316
|
+
def _set_callback(self, callback: Callable) -> None:
|
317
|
+
self._post_update_callback = callback
|
318
|
+
|
319
|
+
@contextmanager
|
320
|
+
def disable_callback(self):
|
321
|
+
"""Temporarily disable callback."""
|
322
|
+
original_callback = self._post_update_callback
|
323
|
+
self._post_update_callback = None
|
324
|
+
try:
|
325
|
+
yield
|
326
|
+
finally:
|
327
|
+
self._post_update_callback = original_callback
|
328
|
+
|
329
|
+
@model_validator(mode="after")
|
330
|
+
def _callback(self) -> Self:
|
331
|
+
if getattr(self, "_post_update_callback", None) is not None:
|
332
|
+
self._post_update_callback(self.to_proto()) # type: ignore
|
333
|
+
|
334
|
+
return self
|
335
|
+
|
336
|
+
@classmethod
|
337
|
+
def _datetime_to_timestamp(cls, dt: datetime | None) -> Timestamp | None:
|
338
|
+
"""Convert a datetime to a protobuf Timestamp."""
|
339
|
+
if dt is None:
|
340
|
+
return None
|
341
|
+
ts = Timestamp()
|
342
|
+
# Convert to UTC if the datetime has a timezone
|
343
|
+
if dt.tzinfo is not None:
|
344
|
+
dt = dt.astimezone(timezone.utc)
|
345
|
+
# Convert to seconds and nanos
|
346
|
+
ts.seconds = int(dt.timestamp())
|
347
|
+
ts.nanos = dt.microsecond * 1000
|
348
|
+
return ts
|
349
|
+
|
350
|
+
@classmethod
|
351
|
+
def _timestamp_to_datetime(cls, ts: Timestamp | None) -> datetime | None:
|
352
|
+
"""Convert a protobuf Timestamp to a datetime."""
|
353
|
+
if ts is None:
|
354
|
+
return None
|
355
|
+
# Create UTC datetime from seconds and add microseconds
|
356
|
+
dt = datetime.fromtimestamp(ts.seconds, tz=timezone.utc)
|
357
|
+
return dt.replace(microsecond=ts.nanos // 1000)
|
358
|
+
|
359
|
+
def to_proto(self) -> wandb_internal_pb2.MetadataRequest: # noqa: C901
|
360
|
+
"""Convert the metadata to a protobuf message."""
|
361
|
+
proto = wandb_internal_pb2.MetadataRequest()
|
362
|
+
|
363
|
+
# A flag to indicate that the metadata has been modified by the user.
|
364
|
+
# Updates to the metadata object originating from the user take precedence
|
365
|
+
# over automatic updates.
|
366
|
+
proto._user_modified = True
|
367
|
+
|
368
|
+
# Handle all scalar fields
|
369
|
+
if self.os is not None:
|
370
|
+
proto.os = self.os
|
371
|
+
if self.python is not None:
|
372
|
+
proto.python = self.python
|
373
|
+
if self.docker is not None:
|
374
|
+
proto.docker = self.docker
|
375
|
+
if self.cuda is not None:
|
376
|
+
proto.cuda = self.cuda
|
377
|
+
if self.state is not None:
|
378
|
+
proto.state = self.state
|
379
|
+
if self.program is not None:
|
380
|
+
proto.program = self.program
|
381
|
+
if self.code_path is not None:
|
382
|
+
proto.code_path = self.code_path
|
383
|
+
if self.email is not None:
|
384
|
+
proto.email = self.email
|
385
|
+
if self.root is not None:
|
386
|
+
proto.root = self.root
|
387
|
+
if self.host is not None:
|
388
|
+
proto.host = self.host
|
389
|
+
if self.username is not None:
|
390
|
+
proto.username = self.username
|
391
|
+
if self.executable is not None:
|
392
|
+
proto.executable = self.executable
|
393
|
+
if self.code_path_local is not None:
|
394
|
+
proto.code_path_local = self.code_path_local
|
395
|
+
if self.colab is not None:
|
396
|
+
proto.colab = self.colab
|
397
|
+
if self.cpu_count is not None:
|
398
|
+
proto.cpu_count = self.cpu_count
|
399
|
+
if self.cpu_count_logical is not None:
|
400
|
+
proto.cpu_count_logical = self.cpu_count_logical
|
401
|
+
if self.gpu_type is not None:
|
402
|
+
proto.gpu_type = self.gpu_type
|
403
|
+
if self.gpu_count is not None:
|
404
|
+
proto.gpu_count = self.gpu_count
|
405
|
+
if self.cuda_version is not None:
|
406
|
+
proto.cuda_version = self.cuda_version
|
407
|
+
|
408
|
+
# Handle timestamp fields
|
409
|
+
if self.heartbeat_at is not None:
|
410
|
+
proto.heartbeat_at.CopyFrom(self._datetime_to_timestamp(self.heartbeat_at))
|
411
|
+
if self.started_at is not None:
|
412
|
+
proto.started_at.CopyFrom(self._datetime_to_timestamp(self.started_at))
|
413
|
+
|
414
|
+
# Handle nested message fields
|
415
|
+
if self.git is not None:
|
416
|
+
proto.git.CopyFrom(self.git.to_proto())
|
417
|
+
if self.memory is not None:
|
418
|
+
proto.memory.CopyFrom(self.memory.to_proto())
|
419
|
+
if self.cpu is not None:
|
420
|
+
proto.cpu.CopyFrom(self.cpu.to_proto())
|
421
|
+
if self.apple is not None:
|
422
|
+
proto.apple.CopyFrom(self.apple.to_proto())
|
423
|
+
if self.trainium is not None:
|
424
|
+
proto.trainium.CopyFrom(self.trainium.to_proto())
|
425
|
+
if self.tpu is not None:
|
426
|
+
proto.tpu.CopyFrom(self.tpu.to_proto())
|
427
|
+
|
428
|
+
# Handle repeated fields
|
429
|
+
if self.args:
|
430
|
+
proto.args.extend(self.args)
|
431
|
+
if self.gpu_nvidia:
|
432
|
+
proto.gpu_nvidia.extend(gpu.to_proto() for gpu in self.gpu_nvidia)
|
433
|
+
if self.gpu_amd:
|
434
|
+
proto.gpu_amd.extend(gpu.to_proto() for gpu in self.gpu_amd)
|
435
|
+
|
436
|
+
# Handle map fields
|
437
|
+
if self.disk:
|
438
|
+
for k, v in self.disk.items():
|
439
|
+
proto.disk[k].CopyFrom(v.to_proto())
|
440
|
+
if self.slurm:
|
441
|
+
proto.slurm.update(self.slurm)
|
442
|
+
|
443
|
+
return proto
|
444
|
+
|
445
|
+
def update_from_proto( # noqa: C901
|
446
|
+
self,
|
447
|
+
proto: wandb_internal_pb2.MetadataRequest,
|
448
|
+
skip_existing: bool = False,
|
449
|
+
):
|
450
|
+
"""Update the metadata from a protobuf message.
|
451
|
+
|
452
|
+
Args:
|
453
|
+
proto (wandb_internal_pb2.MetadataRequest): The protobuf message.
|
454
|
+
skip_existing (bool, optional): Skip updating fields that are already set.
|
455
|
+
"""
|
456
|
+
data: dict[str, Any] = {}
|
457
|
+
|
458
|
+
# Handle all scalar fields.
|
459
|
+
if proto.os:
|
460
|
+
data["os"] = proto.os
|
461
|
+
if proto.python:
|
462
|
+
data["python"] = proto.python
|
463
|
+
if proto.docker:
|
464
|
+
data["docker"] = proto.docker
|
465
|
+
if proto.cuda:
|
466
|
+
data["cuda"] = proto.cuda
|
467
|
+
if proto.state:
|
468
|
+
data["state"] = proto.state
|
469
|
+
if proto.program:
|
470
|
+
data["program"] = proto.program
|
471
|
+
if proto.code_path:
|
472
|
+
data["code_path"] = proto.code_path
|
473
|
+
if proto.email:
|
474
|
+
data["email"] = proto.email
|
475
|
+
if proto.root:
|
476
|
+
data["root"] = proto.root
|
477
|
+
if proto.host:
|
478
|
+
data["host"] = proto.host
|
479
|
+
if proto.username:
|
480
|
+
data["username"] = proto.username
|
481
|
+
if proto.executable:
|
482
|
+
data["executable"] = proto.executable
|
483
|
+
if proto.code_path_local:
|
484
|
+
data["code_path_local"] = proto.code_path_local
|
485
|
+
if proto.colab:
|
486
|
+
data["colab"] = proto.colab
|
487
|
+
if proto.cpu_count:
|
488
|
+
data["cpu_count"] = proto.cpu_count
|
489
|
+
if proto.cpu_count_logical:
|
490
|
+
data["cpu_count_logical"] = proto.cpu_count_logical
|
491
|
+
if proto.gpu_type:
|
492
|
+
data["gpu_type"] = proto.gpu_type
|
493
|
+
if proto.gpu_count:
|
494
|
+
data["gpu_count"] = proto.gpu_count
|
495
|
+
if proto.cuda_version:
|
496
|
+
data["cuda_version"] = proto.cuda_version
|
497
|
+
|
498
|
+
# Handle timestamp fields (these are messages, so use HasField)
|
499
|
+
if proto.HasField("heartbeat_at"):
|
500
|
+
data["heartbeat_at"] = self._timestamp_to_datetime(proto.heartbeat_at)
|
501
|
+
if proto.HasField("started_at"):
|
502
|
+
data["started_at"] = self._timestamp_to_datetime(proto.started_at)
|
503
|
+
|
504
|
+
# Handle nested message fields (these have presence)
|
505
|
+
if proto.HasField("git"):
|
506
|
+
data["git"] = GitRepoRecord.from_proto(proto.git)
|
507
|
+
if proto.HasField("memory"):
|
508
|
+
data["memory"] = MemoryInfo.from_proto(proto.memory)
|
509
|
+
if proto.HasField("cpu"):
|
510
|
+
data["cpu"] = CpuInfo.from_proto(proto.cpu)
|
511
|
+
if proto.HasField("apple"):
|
512
|
+
data["apple"] = AppleInfo.from_proto(proto.apple)
|
513
|
+
if proto.HasField("trainium"):
|
514
|
+
data["trainium"] = TrainiumInfo.from_proto(proto.trainium)
|
515
|
+
if proto.HasField("tpu"):
|
516
|
+
data["tpu"] = TPUInfo.from_proto(proto.tpu)
|
517
|
+
|
518
|
+
# Handle repeated fields
|
519
|
+
if len(proto.args) > 0:
|
520
|
+
data["args"] = list(proto.args)
|
521
|
+
else:
|
522
|
+
data["args"] = []
|
523
|
+
if len(proto.gpu_nvidia) > 0:
|
524
|
+
data["gpu_nvidia"] = [
|
525
|
+
GpuNvidiaInfo.from_proto(gpu) for gpu in proto.gpu_nvidia
|
526
|
+
]
|
527
|
+
else:
|
528
|
+
data["gpu_nvidia"] = []
|
529
|
+
if len(proto.gpu_amd) > 0:
|
530
|
+
data["gpu_amd"] = [GpuAmdInfo.from_proto(gpu) for gpu in proto.gpu_amd]
|
531
|
+
else:
|
532
|
+
data["gpu_amd"] = []
|
533
|
+
|
534
|
+
# Handle map fields
|
535
|
+
if len(proto.disk) > 0:
|
536
|
+
data["disk"] = {k: DiskInfo.from_proto(v) for k, v in proto.disk.items()}
|
537
|
+
else:
|
538
|
+
data["disk"] = {}
|
539
|
+
if len(proto.slurm) > 0:
|
540
|
+
data["slurm"] = dict(proto.slurm)
|
541
|
+
else:
|
542
|
+
data["slurm"] = {}
|
543
|
+
|
544
|
+
for k, v in data.items():
|
545
|
+
if skip_existing and getattr(self, k) is not None:
|
546
|
+
continue
|
547
|
+
setattr(self, k, v)
|