wandb 0.19.2__py3-none-any.whl → 0.19.4rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wandb/__init__.py +1 -1
- wandb/__init__.pyi +13 -3
- wandb/bin/gpu_stats +0 -0
- wandb/integration/metaflow/metaflow.py +7 -9
- wandb/sdk/interface/interface.py +2 -8
- wandb/sdk/internal/tb_watcher.py +3 -1
- wandb/sdk/wandb_init.py +395 -238
- wandb/sdk/wandb_run.py +11 -19
- wandb/sdk/wandb_settings.py +2 -27
- {wandb-0.19.2.dist-info → wandb-0.19.4rc1.dist-info}/METADATA +1 -1
- {wandb-0.19.2.dist-info → wandb-0.19.4rc1.dist-info}/RECORD +14 -14
- {wandb-0.19.2.dist-info → wandb-0.19.4rc1.dist-info}/WHEEL +0 -0
- {wandb-0.19.2.dist-info → wandb-0.19.4rc1.dist-info}/entry_points.txt +0 -0
- {wandb-0.19.2.dist-info → wandb-0.19.4rc1.dist-info}/licenses/LICENSE +0 -0
wandb/sdk/wandb_init.py
CHANGED
@@ -11,9 +11,11 @@ For more on using `wandb.init()`, including code snippets, check out our
|
|
11
11
|
from __future__ import annotations
|
12
12
|
|
13
13
|
import copy
|
14
|
+
import dataclasses
|
14
15
|
import json
|
15
16
|
import logging
|
16
17
|
import os
|
18
|
+
import pathlib
|
17
19
|
import platform
|
18
20
|
import sys
|
19
21
|
import tempfile
|
@@ -48,14 +50,6 @@ from .wandb_settings import Settings
|
|
48
50
|
if TYPE_CHECKING:
|
49
51
|
from wandb.proto import wandb_internal_pb2 as pb
|
50
52
|
|
51
|
-
logger: wandb_setup.Logger | None = None # logger configured during wandb.init()
|
52
|
-
|
53
|
-
|
54
|
-
def _set_logger(log_object: wandb_setup.Logger | None) -> None:
|
55
|
-
"""Configure module logger."""
|
56
|
-
global logger
|
57
|
-
logger = log_object
|
58
|
-
|
59
53
|
|
60
54
|
def _huggingface_version() -> str | None:
|
61
55
|
if "transformers" in sys.modules:
|
@@ -112,27 +106,83 @@ def _handle_launch_config(settings: Settings) -> dict[str, Any]:
|
|
112
106
|
return launch_run_config
|
113
107
|
|
114
108
|
|
109
|
+
@dataclasses.dataclass(frozen=True)
|
110
|
+
class _ConfigParts:
|
111
|
+
base_no_artifacts: dict[str, Any]
|
112
|
+
"""The run config passed to `init()` minus any artifact-valued keys."""
|
113
|
+
|
114
|
+
sweep_no_artifacts: dict[str, Any]
|
115
|
+
"""The config loaded as part of a sweep minus any artifact-valued keys."""
|
116
|
+
|
117
|
+
launch_no_artifacts: dict[str, Any]
|
118
|
+
"""The config loaded as part of Launch minus any artifact-valued keys."""
|
119
|
+
|
120
|
+
artifacts: dict[str, Any]
|
121
|
+
"""Artifact keys removed from config dictionaries.
|
122
|
+
|
123
|
+
Due to implementation details of how a Run is constructed,
|
124
|
+
artifacts must be inserted into its config after initialization.
|
125
|
+
"""
|
126
|
+
|
127
|
+
|
115
128
|
class _WandbInit:
|
116
|
-
|
129
|
+
def __init__(
|
130
|
+
self,
|
131
|
+
wl: wandb_setup._WandbSetup,
|
132
|
+
telemetry: telemetry.TelemetryRecord,
|
133
|
+
) -> None:
|
134
|
+
self._wl = wl
|
135
|
+
|
136
|
+
self._telemetry = telemetry
|
137
|
+
"""Telemetry gathered before creating a run.
|
138
|
+
|
139
|
+
After the run is created, `telemetry.context()` is used instead.
|
140
|
+
"""
|
117
141
|
|
118
|
-
def __init__(self) -> None:
|
119
142
|
self.kwargs = None
|
120
|
-
self.settings: Settings | None = None
|
121
|
-
self.sweep_config: dict[str, Any] = {}
|
122
|
-
self.launch_config: dict[str, Any] = {}
|
123
|
-
self.config: dict[str, Any] = {}
|
124
143
|
self.run: Run | None = None
|
125
144
|
self.backend: Backend | None = None
|
126
145
|
|
127
146
|
self._teardown_hooks: list[TeardownHook] = []
|
128
|
-
self._wl = wandb.setup()
|
129
147
|
self.notebook: wandb.jupyter.Notebook | None = None # type: ignore
|
130
148
|
self.printer = printer.new_printer()
|
131
149
|
|
132
|
-
self._init_telemetry_obj = telemetry.TelemetryRecord()
|
133
|
-
|
134
150
|
self.deprecated_features_used: dict[str, str] = dict()
|
135
151
|
|
152
|
+
@property
|
153
|
+
def _logger(self) -> wandb_setup.Logger:
|
154
|
+
return self._wl._get_logger()
|
155
|
+
|
156
|
+
def maybe_login(self, init_settings: Settings) -> None:
|
157
|
+
"""Log in if we are not creating an offline or disabled run.
|
158
|
+
|
159
|
+
This may change the W&B singleton settings.
|
160
|
+
|
161
|
+
Args:
|
162
|
+
init_settings: Settings passed to `wandb.init()` or set via
|
163
|
+
keyword arguments.
|
164
|
+
"""
|
165
|
+
# Allow settings passed to init() to override inferred values.
|
166
|
+
#
|
167
|
+
# Calling login() may change settings on the singleton,
|
168
|
+
# so these may not be the final run settings.
|
169
|
+
run_settings = self._wl.settings.model_copy()
|
170
|
+
run_settings.update_from_settings(init_settings)
|
171
|
+
|
172
|
+
# NOTE: _noop or _offline can become true after _login().
|
173
|
+
# _noop happens if _login hits a timeout.
|
174
|
+
# _offline can be selected by the user at the login prompt.
|
175
|
+
if run_settings._noop or run_settings._offline:
|
176
|
+
return
|
177
|
+
|
178
|
+
wandb_login._login(
|
179
|
+
anonymous=run_settings.anonymous,
|
180
|
+
force=run_settings.force,
|
181
|
+
_disable_warning=True,
|
182
|
+
_silent=run_settings.quiet or run_settings.silent,
|
183
|
+
_entity=run_settings.entity,
|
184
|
+
)
|
185
|
+
|
136
186
|
def warn_env_vars_change_after_setup(self) -> None:
|
137
187
|
"""Warn if environment variables change after wandb singleton is initialized.
|
138
188
|
|
@@ -202,24 +252,15 @@ class _WandbInit:
|
|
202
252
|
warn("run_id", init_settings.run_id)
|
203
253
|
init_settings.run_id = None
|
204
254
|
|
205
|
-
def
|
206
|
-
|
207
|
-
init_settings: Settings,
|
208
|
-
config: dict | str | None = None,
|
209
|
-
config_exclude_keys: list[str] | None = None,
|
210
|
-
config_include_keys: list[str] | None = None,
|
211
|
-
allow_val_change: bool | None = None,
|
212
|
-
monitor_gym: bool | None = None,
|
213
|
-
) -> None:
|
214
|
-
"""Complete setup for `wandb.init()`.
|
255
|
+
def make_run_settings(self, init_settings: Settings) -> Settings:
|
256
|
+
"""Returns the run's settings.
|
215
257
|
|
216
|
-
|
258
|
+
Args:
|
259
|
+
init_settings: Settings passed to `wandb.init()` or set via
|
260
|
+
keyword arguments.
|
217
261
|
"""
|
218
262
|
self.warn_env_vars_change_after_setup()
|
219
263
|
|
220
|
-
_set_logger(self._wl._get_logger())
|
221
|
-
assert logger
|
222
|
-
|
223
264
|
self.clear_run_path_if_sweep_or_launch(init_settings)
|
224
265
|
|
225
266
|
# Inherit global settings.
|
@@ -231,20 +272,129 @@ class _WandbInit:
|
|
231
272
|
# Infer the run ID from SageMaker.
|
232
273
|
if not settings.sagemaker_disable and sagemaker.is_using_sagemaker():
|
233
274
|
if sagemaker.set_run_id(settings):
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
if
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
275
|
+
self._logger.info("set run ID and group based on SageMaker")
|
276
|
+
self._telemetry.feature.sagemaker = True
|
277
|
+
|
278
|
+
# get status of code saving before applying user settings
|
279
|
+
save_code_pre_user_settings = settings.save_code
|
280
|
+
if not settings._offline and not settings._noop:
|
281
|
+
user_settings = self._wl._load_user_settings()
|
282
|
+
if user_settings is not None:
|
283
|
+
settings.update_from_dict(user_settings)
|
284
|
+
|
285
|
+
# ensure that user settings don't set saving to true
|
286
|
+
# if user explicitly set these to false in UI
|
287
|
+
if save_code_pre_user_settings is False:
|
288
|
+
settings.save_code = False
|
289
|
+
|
290
|
+
# TODO: remove this once we refactor the client. This is a temporary
|
291
|
+
# fix to make sure that we use the same project name for wandb-core.
|
292
|
+
# The reason this is not going through the settings object is to
|
293
|
+
# avoid failure cases in other parts of the code that will be
|
294
|
+
# removed with the switch to wandb-core.
|
295
|
+
if settings.project is None:
|
296
|
+
settings.project = wandb.util.auto_project_name(settings.program)
|
297
|
+
|
298
|
+
settings.x_start_time = time.time()
|
299
|
+
|
300
|
+
return settings
|
301
|
+
|
302
|
+
def _load_autoresume_run_id(self, resume_file: pathlib.Path) -> str | None:
|
303
|
+
"""Returns the run_id stored in the auto-resume file, if any.
|
247
304
|
|
305
|
+
Returns None if the file does not exist or is not in a valid format.
|
306
|
+
|
307
|
+
Args:
|
308
|
+
resume_file: The file path to use for resume='auto' mode.
|
309
|
+
"""
|
310
|
+
if not resume_file.exists():
|
311
|
+
return None
|
312
|
+
|
313
|
+
with resume_file.open() as f:
|
314
|
+
try:
|
315
|
+
return json.load(f)["run_id"]
|
316
|
+
|
317
|
+
except json.JSONDecodeError as e:
|
318
|
+
self._logger.exception(
|
319
|
+
f"could not decode {resume_file}, ignoring",
|
320
|
+
exc_info=e,
|
321
|
+
)
|
322
|
+
return None
|
323
|
+
|
324
|
+
except KeyError:
|
325
|
+
self._logger.error(
|
326
|
+
f"resume file at {resume_file} did not store a run_id"
|
327
|
+
)
|
328
|
+
return None
|
329
|
+
|
330
|
+
def _save_autoresume_run_id(
|
331
|
+
self,
|
332
|
+
*,
|
333
|
+
resume_file: pathlib.Path,
|
334
|
+
run_id: str,
|
335
|
+
) -> None:
|
336
|
+
"""Write the run ID to the auto-resume file."""
|
337
|
+
resume_file.parent.mkdir(exist_ok=True)
|
338
|
+
with resume_file.open("w") as f:
|
339
|
+
json.dump({"run_id": run_id}, f)
|
340
|
+
|
341
|
+
def set_run_id(self, settings: Settings) -> None:
|
342
|
+
"""Set the run ID and possibly save it to the auto-resume file.
|
343
|
+
|
344
|
+
After this, `settings.run_id` is guaranteed to be set.
|
345
|
+
|
346
|
+
Args:
|
347
|
+
settings: The run's settings derived from the environment
|
348
|
+
and explicit values passed to `wandb.init()`.
|
349
|
+
"""
|
350
|
+
if settings.resume == "auto" and settings.resume_fname:
|
351
|
+
resume_path = pathlib.Path(settings.resume_fname)
|
352
|
+
else:
|
353
|
+
resume_path = None
|
354
|
+
|
355
|
+
if resume_path:
|
356
|
+
previous_id = self._load_autoresume_run_id(resume_path)
|
357
|
+
|
358
|
+
if not previous_id:
|
359
|
+
pass
|
360
|
+
elif settings.run_id is None:
|
361
|
+
self._logger.info(f"loaded run ID from {resume_path}")
|
362
|
+
settings.run_id = previous_id
|
363
|
+
elif settings.run_id != previous_id:
|
364
|
+
wandb.termwarn(
|
365
|
+
f"Ignoring ID {previous_id} loaded due to resume='auto'"
|
366
|
+
f" because the run ID is set to {settings.run_id}.",
|
367
|
+
)
|
368
|
+
|
369
|
+
# If no run ID was inferred, explicitly set, or loaded from an
|
370
|
+
# auto-resume file, then we generate a new ID.
|
371
|
+
if settings.run_id is None:
|
372
|
+
settings.run_id = runid.generate_id()
|
373
|
+
|
374
|
+
if resume_path:
|
375
|
+
self._save_autoresume_run_id(
|
376
|
+
resume_file=resume_path,
|
377
|
+
run_id=settings.run_id,
|
378
|
+
)
|
379
|
+
|
380
|
+
def make_run_config(
|
381
|
+
self,
|
382
|
+
settings: Settings,
|
383
|
+
config: dict | str | None = None,
|
384
|
+
config_exclude_keys: list[str] | None = None,
|
385
|
+
config_include_keys: list[str] | None = None,
|
386
|
+
) -> _ConfigParts:
|
387
|
+
"""Construct the run's config.
|
388
|
+
|
389
|
+
Args:
|
390
|
+
settings: The run's finalized settings.
|
391
|
+
config: The config passed to `init()`.
|
392
|
+
config_exclude_keys: Deprecated. Keys to filter out from `config`.
|
393
|
+
config_include_keys: Deprecated. Keys to include from `config`.
|
394
|
+
|
395
|
+
Returns:
|
396
|
+
Initial values for the run's config.
|
397
|
+
"""
|
248
398
|
# TODO: remove this once officially deprecated
|
249
399
|
if config_exclude_keys:
|
250
400
|
self.deprecated_features_used["config_exclude_keys"] = (
|
@@ -260,123 +410,77 @@ class _WandbInit:
|
|
260
410
|
exclude=config_exclude_keys,
|
261
411
|
)
|
262
412
|
|
263
|
-
|
264
|
-
|
265
|
-
|
413
|
+
result = _ConfigParts(
|
414
|
+
base_no_artifacts=dict(),
|
415
|
+
sweep_no_artifacts=dict(),
|
416
|
+
launch_no_artifacts=dict(),
|
417
|
+
artifacts=dict(),
|
418
|
+
)
|
266
419
|
|
267
420
|
if not settings.sagemaker_disable and sagemaker.is_using_sagemaker():
|
268
421
|
sagemaker_config = sagemaker.parse_sm_config()
|
269
|
-
self._split_artifacts_from_config(
|
270
|
-
|
271
|
-
|
272
|
-
|
422
|
+
self._split_artifacts_from_config(
|
423
|
+
sagemaker_config,
|
424
|
+
config_target=result.base_no_artifacts,
|
425
|
+
artifacts=result.artifacts,
|
426
|
+
)
|
427
|
+
self._telemetry.feature.sagemaker = True
|
273
428
|
|
274
429
|
if self._wl._config:
|
275
|
-
self._split_artifacts_from_config(
|
430
|
+
self._split_artifacts_from_config(
|
431
|
+
self._wl._config,
|
432
|
+
config_target=result.base_no_artifacts,
|
433
|
+
artifacts=result.artifacts,
|
434
|
+
)
|
276
435
|
|
277
436
|
if config and isinstance(config, dict):
|
278
|
-
self._split_artifacts_from_config(
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
if sweep_config:
|
283
|
-
self._split_artifacts_from_config(sweep_config, self.sweep_config)
|
284
|
-
|
285
|
-
if monitor_gym and len(wandb.patched["gym"]) == 0:
|
286
|
-
wandb.gym.monitor() # type: ignore
|
287
|
-
|
288
|
-
if wandb.patched["tensorboard"]:
|
289
|
-
with telemetry.context(obj=self._init_telemetry_obj) as tel:
|
290
|
-
tel.feature.tensorboard_patch = True
|
291
|
-
|
292
|
-
if settings.sync_tensorboard:
|
293
|
-
if len(wandb.patched["tensorboard"]) == 0:
|
294
|
-
wandb.tensorboard.patch() # type: ignore
|
295
|
-
with telemetry.context(obj=self._init_telemetry_obj) as tel:
|
296
|
-
tel.feature.tensorboard_sync = True
|
297
|
-
|
298
|
-
if not settings._offline and not settings._noop:
|
299
|
-
wandb_login._login(
|
300
|
-
anonymous=settings.anonymous,
|
301
|
-
force=settings.force,
|
302
|
-
_disable_warning=True,
|
303
|
-
_silent=settings.quiet or settings.silent,
|
304
|
-
_entity=settings.entity,
|
437
|
+
self._split_artifacts_from_config(
|
438
|
+
config,
|
439
|
+
config_target=result.base_no_artifacts,
|
440
|
+
artifacts=result.artifacts,
|
305
441
|
)
|
306
442
|
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
"base_url": self._wl.settings.base_url,
|
314
|
-
"force": self._wl.settings.force,
|
315
|
-
"login_timeout": self._wl.settings.login_timeout,
|
316
|
-
}.items()
|
317
|
-
if v is not None
|
318
|
-
}
|
319
|
-
if login_settings:
|
320
|
-
settings.update_from_dict(login_settings)
|
321
|
-
|
322
|
-
# handle custom resume logic
|
323
|
-
settings.handle_resume_logic()
|
324
|
-
|
325
|
-
# get status of code saving before applying user settings
|
326
|
-
save_code_pre_user_settings = settings.save_code
|
327
|
-
if not settings._offline and not settings._noop:
|
328
|
-
user_settings = self._wl._load_user_settings()
|
329
|
-
if user_settings is not None:
|
330
|
-
settings.update_from_dict(user_settings)
|
331
|
-
|
332
|
-
# ensure that user settings don't set saving to true
|
333
|
-
# if user explicitly set these to false in UI
|
334
|
-
if save_code_pre_user_settings is False:
|
335
|
-
settings.save_code = False
|
336
|
-
|
337
|
-
# TODO: remove this once we refactor the client. This is a temporary
|
338
|
-
# fix to make sure that we use the same project name for wandb-core.
|
339
|
-
# The reason this is not going through the settings object is to
|
340
|
-
# avoid failure cases in other parts of the code that will be
|
341
|
-
# removed with the switch to wandb-core.
|
342
|
-
if settings.project is None:
|
343
|
-
settings.project = wandb.util.auto_project_name(settings.program)
|
344
|
-
|
345
|
-
settings.x_start_time = time.time()
|
346
|
-
|
347
|
-
if not settings._noop:
|
348
|
-
self._log_setup(settings)
|
443
|
+
if self._wl._sweep_config:
|
444
|
+
self._split_artifacts_from_config(
|
445
|
+
self._wl._sweep_config,
|
446
|
+
config_target=result.sweep_no_artifacts,
|
447
|
+
artifacts=result.artifacts,
|
448
|
+
)
|
349
449
|
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
450
|
+
if launch_config := _handle_launch_config(settings):
|
451
|
+
self._split_artifacts_from_config(
|
452
|
+
launch_config,
|
453
|
+
config_target=result.launch_no_artifacts,
|
454
|
+
artifacts=result.artifacts,
|
455
|
+
)
|
355
456
|
|
356
|
-
|
457
|
+
return result
|
357
458
|
|
358
459
|
def teardown(self) -> None:
|
359
460
|
# TODO: currently this is only called on failed wandb.init attempts
|
360
461
|
# normally this happens on the run object
|
361
|
-
|
362
|
-
logger.info("tearing down wandb.init")
|
462
|
+
self._logger.info("tearing down wandb.init")
|
363
463
|
for hook in self._teardown_hooks:
|
364
464
|
hook.call()
|
365
465
|
|
366
466
|
def _split_artifacts_from_config(
|
367
|
-
self,
|
467
|
+
self,
|
468
|
+
config_source: dict,
|
469
|
+
config_target: dict,
|
470
|
+
artifacts: dict,
|
368
471
|
) -> None:
|
369
472
|
for k, v in config_source.items():
|
370
473
|
if _is_artifact_representation(v):
|
371
|
-
|
474
|
+
artifacts[k] = v
|
372
475
|
else:
|
373
476
|
config_target.setdefault(k, v)
|
374
477
|
|
375
|
-
def
|
376
|
-
"""
|
478
|
+
def _create_logger(self, log_fname: str) -> logging.Logger:
|
479
|
+
"""Returns a logger configured to write to a file.
|
377
480
|
|
378
|
-
This adds a run_id to the log, in case of multiple processes on the same
|
379
|
-
Currently, there is no way to disable logging after it's
|
481
|
+
This adds a run_id to the log, in case of multiple processes on the same
|
482
|
+
machine. Currently, there is no way to disable logging after it's
|
483
|
+
enabled.
|
380
484
|
"""
|
381
485
|
handler = logging.FileHandler(log_fname)
|
382
486
|
handler.setLevel(logging.INFO)
|
@@ -387,7 +491,8 @@ class _WandbInit:
|
|
387
491
|
)
|
388
492
|
|
389
493
|
handler.setFormatter(formatter)
|
390
|
-
|
494
|
+
|
495
|
+
logger = logging.getLogger("wandb")
|
391
496
|
logger.propagate = False
|
392
497
|
logger.addHandler(handler)
|
393
498
|
# TODO: make me configurable
|
@@ -399,6 +504,8 @@ class _WandbInit:
|
|
399
504
|
)
|
400
505
|
)
|
401
506
|
|
507
|
+
return logger
|
508
|
+
|
402
509
|
def _safe_symlink(
|
403
510
|
self, base: str, target: str, name: str, delete: bool = False
|
404
511
|
) -> None:
|
@@ -429,14 +536,14 @@ class _WandbInit:
|
|
429
536
|
if self.notebook.save_ipynb(): # type: ignore
|
430
537
|
assert self.run is not None
|
431
538
|
res = self.run.log_code(root=None)
|
432
|
-
|
539
|
+
self._logger.info("saved code: %s", res) # type: ignore
|
433
540
|
if self.backend.interface is not None:
|
434
|
-
|
541
|
+
self._logger.info("pausing backend") # type: ignore
|
435
542
|
self.backend.interface.publish_pause()
|
436
543
|
|
437
544
|
def _resume_backend(self, *args: Any, **kwargs: Any) -> None: # noqa
|
438
545
|
if self.backend is not None and self.backend.interface is not None:
|
439
|
-
|
546
|
+
self._logger.info("resuming backend") # type: ignore
|
440
547
|
self.backend.interface.publish_resume()
|
441
548
|
|
442
549
|
def _jupyter_teardown(self) -> None:
|
@@ -447,8 +554,8 @@ class _WandbInit:
|
|
447
554
|
if self.notebook.save_ipynb():
|
448
555
|
assert self.run is not None
|
449
556
|
res = self.run.log_code(root=None)
|
450
|
-
|
451
|
-
|
557
|
+
self._logger.info("saved code and history: %s", res) # type: ignore
|
558
|
+
self._logger.info("cleaning up jupyter logic") # type: ignore
|
452
559
|
# because of how we bind our methods we manually find them to unregister
|
453
560
|
for hook in ipython.events.callbacks["pre_run_cell"]:
|
454
561
|
if "_resume_backend" in hook.__name__:
|
@@ -459,14 +566,14 @@ class _WandbInit:
|
|
459
566
|
ipython.display_pub.publish = ipython.display_pub._orig_publish
|
460
567
|
del ipython.display_pub._orig_publish
|
461
568
|
|
462
|
-
def
|
569
|
+
def monkeypatch_ipython(self, settings: Settings) -> None:
|
463
570
|
"""Add hooks, and session history saving."""
|
464
571
|
self.notebook = wandb.jupyter.Notebook(settings) # type: ignore
|
465
572
|
ipython = self.notebook.shell
|
466
573
|
|
467
574
|
# Monkey patch ipython publish to capture displayed outputs
|
468
575
|
if not hasattr(ipython.display_pub, "_orig_publish"):
|
469
|
-
|
576
|
+
self._logger.info("configuring jupyter hooks %s", self) # type: ignore
|
470
577
|
ipython.display_pub._orig_publish = ipython.display_pub.publish
|
471
578
|
# Registering resume and pause hooks
|
472
579
|
|
@@ -485,7 +592,7 @@ class _WandbInit:
|
|
485
592
|
|
486
593
|
ipython.display_pub.publish = publish
|
487
594
|
|
488
|
-
def
|
595
|
+
def setup_run_log_directory(self, settings: Settings) -> None:
|
489
596
|
"""Set up logging from settings."""
|
490
597
|
filesystem.mkdir_exists_ok(os.path.dirname(settings.log_user))
|
491
598
|
filesystem.mkdir_exists_ok(os.path.dirname(settings.log_internal))
|
@@ -513,25 +620,22 @@ class _WandbInit:
|
|
513
620
|
delete=True,
|
514
621
|
)
|
515
622
|
|
516
|
-
|
517
|
-
self.
|
518
|
-
|
519
|
-
assert self._wl
|
520
|
-
assert logger
|
521
|
-
|
522
|
-
self._wl._early_logger_flush(logger)
|
523
|
-
logger.info(f"Logging user logs to {settings.log_user}")
|
524
|
-
logger.info(f"Logging internal logs to {settings.log_internal}")
|
623
|
+
self._wl._early_logger_flush(self._create_logger(settings.log_user))
|
624
|
+
self._logger.info(f"Logging user logs to {settings.log_user}")
|
625
|
+
self._logger.info(f"Logging internal logs to {settings.log_internal}")
|
525
626
|
|
526
|
-
def
|
627
|
+
def make_disabled_run(self, config: _ConfigParts) -> Run:
|
527
628
|
"""Returns a Run-like object where all methods are no-ops.
|
528
629
|
|
529
|
-
This method is used when
|
530
|
-
|
630
|
+
This method is used when the `mode` setting is set to "disabled", such as
|
631
|
+
by wandb.init(mode="disabled") or by setting the WANDB_MODE environment
|
632
|
+
variable to "disabled".
|
633
|
+
|
634
|
+
It creates a Run object that mimics the behavior of a normal Run but doesn't
|
531
635
|
communicate with the W&B servers.
|
532
636
|
|
533
|
-
The returned Run object has all expected attributes and methods, but they
|
534
|
-
no-op versions that don't perform any actual logging or communication.
|
637
|
+
The returned Run object has all expected attributes and methods, but they
|
638
|
+
are no-op versions that don't perform any actual logging or communication.
|
535
639
|
"""
|
536
640
|
run_id = runid.generate_id()
|
537
641
|
drun = Run(
|
@@ -549,8 +653,8 @@ class _WandbInit:
|
|
549
653
|
)
|
550
654
|
# config, summary, and metadata objects
|
551
655
|
drun._config = wandb.sdk.wandb_config.Config()
|
552
|
-
drun._config.update(
|
553
|
-
drun._config.update(
|
656
|
+
drun._config.update(config.sweep_no_artifacts)
|
657
|
+
drun._config.update(config.base_no_artifacts)
|
554
658
|
drun.summary = SummaryDisabled() # type: ignore
|
555
659
|
drun._Run__metadata = wandb.sdk.wandb_metadata.Metadata()
|
556
660
|
|
@@ -635,24 +739,19 @@ class _WandbInit:
|
|
635
739
|
percent_done = handle.percent_done
|
636
740
|
self.printer.progress_update(line, percent_done=percent_done)
|
637
741
|
|
638
|
-
def init(self) -> Run: # noqa: C901
|
639
|
-
|
640
|
-
raise RuntimeError("Logger not initialized")
|
641
|
-
logger.info("calling init triggers")
|
742
|
+
def init(self, settings: Settings, config: _ConfigParts) -> Run: # noqa: C901
|
743
|
+
self._logger.info("calling init triggers")
|
642
744
|
trigger.call("on_init")
|
643
745
|
|
644
|
-
assert self.settings is not None
|
645
746
|
assert self._wl is not None
|
646
747
|
|
647
|
-
|
648
|
-
f"wandb.init called with sweep_config: {
|
748
|
+
self._logger.info(
|
749
|
+
f"wandb.init called with sweep_config: {config.sweep_no_artifacts}"
|
750
|
+
f"\nconfig: {config.base_no_artifacts}"
|
649
751
|
)
|
650
752
|
|
651
|
-
if self.settings._noop:
|
652
|
-
return self._make_run_disabled()
|
653
753
|
if (
|
654
|
-
|
655
|
-
or (self.settings._jupyter and self.settings.reinit is not False)
|
754
|
+
settings.reinit or (settings._jupyter and settings.reinit is not False)
|
656
755
|
) and len(self._wl._global_run_stack) > 0:
|
657
756
|
if len(self._wl._global_run_stack) > 1:
|
658
757
|
wandb.termwarn(
|
@@ -663,63 +762,66 @@ class _WandbInit:
|
|
663
762
|
)
|
664
763
|
|
665
764
|
latest_run = self._wl._global_run_stack[-1]
|
666
|
-
|
765
|
+
self._logger.info(f"found existing run on stack: {latest_run.id}")
|
667
766
|
latest_run.finish()
|
668
767
|
elif wandb.run is not None and os.getpid() == wandb.run._init_pid:
|
669
|
-
|
768
|
+
self._logger.info("wandb.init() called when a run is still active")
|
769
|
+
|
770
|
+
# NOTE: Updates telemetry on the pre-existing run.
|
670
771
|
with telemetry.context() as tel:
|
671
772
|
tel.feature.init_return_run = True
|
773
|
+
|
672
774
|
return wandb.run
|
673
775
|
|
674
|
-
|
776
|
+
self._logger.info("starting backend")
|
675
777
|
|
676
|
-
if not
|
778
|
+
if not settings.x_disable_service:
|
677
779
|
service = self._wl.ensure_service()
|
678
|
-
|
780
|
+
self._logger.info("sending inform_init request")
|
679
781
|
service.inform_init(
|
680
|
-
settings=
|
681
|
-
run_id=
|
782
|
+
settings=settings.to_proto(),
|
783
|
+
run_id=settings.run_id, # type: ignore
|
682
784
|
)
|
683
785
|
else:
|
684
786
|
service = None
|
685
787
|
|
686
788
|
mailbox = Mailbox()
|
687
789
|
backend = Backend(
|
688
|
-
settings=
|
790
|
+
settings=settings,
|
689
791
|
service=service,
|
690
792
|
mailbox=mailbox,
|
691
793
|
)
|
692
794
|
backend.ensure_launched()
|
693
|
-
|
795
|
+
self._logger.info("backend started and connected")
|
694
796
|
|
695
797
|
# resuming needs access to the server, check server_status()?
|
696
798
|
run = Run(
|
697
|
-
config=
|
698
|
-
settings=
|
699
|
-
sweep_config=
|
700
|
-
launch_config=
|
799
|
+
config=config.base_no_artifacts,
|
800
|
+
settings=settings,
|
801
|
+
sweep_config=config.sweep_no_artifacts,
|
802
|
+
launch_config=config.launch_no_artifacts,
|
701
803
|
)
|
702
804
|
|
703
805
|
# Populate initial telemetry
|
704
|
-
with telemetry.context(run=run, obj=self.
|
806
|
+
with telemetry.context(run=run, obj=self._telemetry) as tel:
|
705
807
|
tel.cli_version = wandb.__version__
|
706
808
|
tel.python_version = platform.python_version()
|
707
809
|
tel.platform = f"{platform.system()}-{platform.machine()}".lower()
|
708
810
|
hf_version = _huggingface_version()
|
709
811
|
if hf_version:
|
710
812
|
tel.huggingface_version = hf_version
|
711
|
-
if
|
813
|
+
if settings._jupyter:
|
712
814
|
tel.env.jupyter = True
|
713
|
-
if
|
815
|
+
if settings._ipython:
|
714
816
|
tel.env.ipython = True
|
715
|
-
if
|
817
|
+
if settings._colab:
|
716
818
|
tel.env.colab = True
|
717
|
-
if
|
819
|
+
if settings._kaggle:
|
718
820
|
tel.env.kaggle = True
|
719
|
-
if
|
821
|
+
if settings._windows:
|
720
822
|
tel.env.windows = True
|
721
823
|
|
722
|
-
if
|
824
|
+
if settings.launch:
|
723
825
|
tel.feature.launch = True
|
724
826
|
|
725
827
|
for module_name in telemetry.list_telemetry_imports(only_imported=True):
|
@@ -727,8 +829,8 @@ class _WandbInit:
|
|
727
829
|
|
728
830
|
# probe the active start method
|
729
831
|
active_start_method: str | None = None
|
730
|
-
if
|
731
|
-
active_start_method =
|
832
|
+
if settings.start_method == "thread":
|
833
|
+
active_start_method = settings.start_method
|
732
834
|
else:
|
733
835
|
active_start_method = getattr(
|
734
836
|
backend._multiprocessing, "get_start_method", lambda: None
|
@@ -746,7 +848,7 @@ class _WandbInit:
|
|
746
848
|
if os.environ.get("PEX"):
|
747
849
|
tel.env.pex = True
|
748
850
|
|
749
|
-
if
|
851
|
+
if settings._aws_lambda:
|
750
852
|
tel.env.aws_lambda = True
|
751
853
|
|
752
854
|
if os.environ.get(wandb.env._DISABLE_SERVICE):
|
@@ -754,13 +856,13 @@ class _WandbInit:
|
|
754
856
|
|
755
857
|
if service:
|
756
858
|
tel.feature.service = True
|
757
|
-
if
|
859
|
+
if settings.x_flow_control_disabled:
|
758
860
|
tel.feature.flow_control_disabled = True
|
759
|
-
if
|
861
|
+
if settings.x_flow_control_custom:
|
760
862
|
tel.feature.flow_control_custom = True
|
761
|
-
if not
|
863
|
+
if not settings.x_require_legacy_service:
|
762
864
|
tel.feature.core = True
|
763
|
-
if
|
865
|
+
if settings._shared:
|
764
866
|
wandb.termwarn(
|
765
867
|
"The `_shared` feature is experimental and may change. "
|
766
868
|
"Please contact support@wandb.com for guidance and to report any issues."
|
@@ -769,7 +871,7 @@ class _WandbInit:
|
|
769
871
|
|
770
872
|
tel.env.maybe_mp = _maybe_mp_process(backend)
|
771
873
|
|
772
|
-
if not
|
874
|
+
if not settings.label_disable:
|
773
875
|
if self.notebook:
|
774
876
|
run._label_probe_notebook(self.notebook)
|
775
877
|
else:
|
@@ -783,7 +885,7 @@ class _WandbInit:
|
|
783
885
|
run=run,
|
784
886
|
)
|
785
887
|
|
786
|
-
|
888
|
+
self._logger.info("updated telemetry")
|
787
889
|
|
788
890
|
run._set_library(self._wl)
|
789
891
|
run._set_backend(backend)
|
@@ -797,25 +899,23 @@ class _WandbInit:
|
|
797
899
|
# Using GitRepo() blocks & can be slow, depending on user's current git setup.
|
798
900
|
# We don't want to block run initialization/start request, so populate run's git
|
799
901
|
# info beforehand.
|
800
|
-
if not (
|
902
|
+
if not (settings.disable_git or settings.x_disable_machine_info):
|
801
903
|
run._populate_git_info()
|
802
904
|
|
803
905
|
run_result: pb.RunUpdateResult | None = None
|
804
906
|
|
805
|
-
if
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
wandb.termwarn(
|
811
|
-
"`resume` will be ignored since W&B syncing is set to `offline`. "
|
812
|
-
f"Starting a new run with run id {run.id}."
|
813
|
-
)
|
907
|
+
if settings._offline and settings.resume:
|
908
|
+
wandb.termwarn(
|
909
|
+
"`resume` will be ignored since W&B syncing is set to `offline`. "
|
910
|
+
f"Starting a new run with run id {run.id}."
|
911
|
+
)
|
814
912
|
error: wandb.Error | None = None
|
815
913
|
|
816
|
-
timeout =
|
914
|
+
timeout = settings.init_timeout
|
817
915
|
|
818
|
-
|
916
|
+
self._logger.info(
|
917
|
+
f"communicating run to backend with {timeout} second timeout",
|
918
|
+
)
|
819
919
|
|
820
920
|
run_init_handle = backend.interface.deliver_run(run)
|
821
921
|
result = run_init_handle.wait(
|
@@ -842,7 +942,7 @@ class _WandbInit:
|
|
842
942
|
error = ProtobufErrorHandler.to_exception(run_result.error)
|
843
943
|
|
844
944
|
if error is not None:
|
845
|
-
|
945
|
+
self._logger.error(f"encountered error: {error}")
|
846
946
|
if not service:
|
847
947
|
# Shutdown the backend and get rid of the logger
|
848
948
|
# we don't need to do console cleanup at this point
|
@@ -860,19 +960,19 @@ class _WandbInit:
|
|
860
960
|
)
|
861
961
|
|
862
962
|
if run_result.run.resumed:
|
863
|
-
|
963
|
+
self._logger.info("run resumed")
|
864
964
|
with telemetry.context(run=run) as tel:
|
865
965
|
tel.feature.resumed = run_result.run.resumed
|
866
966
|
run._set_run_obj(run_result.run)
|
867
967
|
|
868
|
-
|
968
|
+
self._logger.info("starting run threads in backend")
|
869
969
|
# initiate run (stats and metadata probing)
|
870
970
|
|
871
971
|
if service:
|
872
|
-
assert
|
972
|
+
assert settings.run_id
|
873
973
|
service.inform_start(
|
874
|
-
settings=
|
875
|
-
run_id=
|
974
|
+
settings=settings.to_proto(),
|
975
|
+
run_id=settings.run_id,
|
876
976
|
)
|
877
977
|
|
878
978
|
assert backend.interface
|
@@ -889,15 +989,15 @@ class _WandbInit:
|
|
889
989
|
|
890
990
|
run._handle_launch_artifact_overrides()
|
891
991
|
if (
|
892
|
-
|
893
|
-
and
|
894
|
-
and os.path.exists(
|
992
|
+
settings.launch
|
993
|
+
and settings.launch_config_path
|
994
|
+
and os.path.exists(settings.launch_config_path)
|
895
995
|
):
|
896
|
-
run.save(
|
996
|
+
run.save(settings.launch_config_path)
|
897
997
|
# put artifacts in run config here
|
898
998
|
# since doing so earlier will cause an error
|
899
999
|
# as the run is not upserted
|
900
|
-
for k, v in
|
1000
|
+
for k, v in config.artifacts.items():
|
901
1001
|
run.config.update({k: v}, allow_val_change=True)
|
902
1002
|
job_artifact = run._launch_artifact_mapping.get(
|
903
1003
|
wandb.util.LAUNCH_JOB_ARTIFACT_SLOT_NAME
|
@@ -907,7 +1007,7 @@ class _WandbInit:
|
|
907
1007
|
|
908
1008
|
self.backend = backend
|
909
1009
|
run._on_start()
|
910
|
-
|
1010
|
+
self._logger.info("run started, returning control to user process")
|
911
1011
|
return run
|
912
1012
|
|
913
1013
|
|
@@ -938,10 +1038,7 @@ def _attach(
|
|
938
1038
|
wandb._assert_is_user_process() # type: ignore
|
939
1039
|
|
940
1040
|
_wl = wandb.setup()
|
941
|
-
|
942
|
-
_set_logger(_wl._get_logger())
|
943
|
-
if logger is None:
|
944
|
-
raise UsageError("logger is not initialized")
|
1041
|
+
logger = _wl._get_logger()
|
945
1042
|
|
946
1043
|
service = _wl.ensure_service()
|
947
1044
|
|
@@ -992,6 +1089,26 @@ def _attach(
|
|
992
1089
|
return run
|
993
1090
|
|
994
1091
|
|
1092
|
+
def _monkeypatch_openai_gym() -> None:
|
1093
|
+
"""Patch OpenAI gym to log to the global `wandb.run`."""
|
1094
|
+
if len(wandb.patched["gym"]) > 0:
|
1095
|
+
return
|
1096
|
+
|
1097
|
+
from wandb.integration import gym
|
1098
|
+
|
1099
|
+
gym.monitor()
|
1100
|
+
|
1101
|
+
|
1102
|
+
def _monkeypatch_tensorboard() -> None:
|
1103
|
+
"""Patch TensorBoard to log to the global `wandb.run`."""
|
1104
|
+
if len(wandb.patched["tensorboard"]) > 0:
|
1105
|
+
return
|
1106
|
+
|
1107
|
+
from wandb.integration import tensorboard as tb_module
|
1108
|
+
|
1109
|
+
tb_module.patch()
|
1110
|
+
|
1111
|
+
|
995
1112
|
def init( # noqa: C901
|
996
1113
|
entity: str | None = None,
|
997
1114
|
project: str | None = None,
|
@@ -1229,6 +1346,8 @@ def init( # noqa: C901
|
|
1229
1346
|
"""
|
1230
1347
|
wandb._assert_is_user_process() # type: ignore
|
1231
1348
|
|
1349
|
+
init_telemetry = telemetry.TelemetryRecord()
|
1350
|
+
|
1232
1351
|
init_settings = Settings()
|
1233
1352
|
if isinstance(settings, dict):
|
1234
1353
|
init_settings = Settings(**settings)
|
@@ -1276,27 +1395,65 @@ def init( # noqa: C901
|
|
1276
1395
|
if resume_from is not None:
|
1277
1396
|
init_settings.resume_from = resume_from # type: ignore
|
1278
1397
|
|
1398
|
+
if config is not None:
|
1399
|
+
init_telemetry.feature.set_init_config = True
|
1400
|
+
|
1401
|
+
wl: wandb_setup._WandbSetup | None = None
|
1402
|
+
|
1279
1403
|
try:
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1404
|
+
wl = wandb.setup()
|
1405
|
+
|
1406
|
+
wi = _WandbInit(wl, init_telemetry)
|
1407
|
+
|
1408
|
+
wi.maybe_login(init_settings)
|
1409
|
+
run_settings = wi.make_run_settings(init_settings)
|
1410
|
+
|
1411
|
+
if run_settings.run_id is not None:
|
1412
|
+
init_telemetry.feature.set_init_id = True
|
1413
|
+
if run_settings.run_name is not None:
|
1414
|
+
init_telemetry.feature.set_init_name = True
|
1415
|
+
if run_settings.run_tags is not None:
|
1416
|
+
init_telemetry.feature.set_init_tags = True
|
1417
|
+
if run_settings._offline:
|
1418
|
+
init_telemetry.feature.offline = True
|
1419
|
+
|
1420
|
+
wi.set_run_id(run_settings)
|
1421
|
+
|
1422
|
+
run_config = wi.make_run_config(
|
1423
|
+
settings=run_settings,
|
1283
1424
|
config=config,
|
1284
1425
|
config_exclude_keys=config_exclude_keys,
|
1285
1426
|
config_include_keys=config_include_keys,
|
1286
|
-
allow_val_change=allow_val_change,
|
1287
|
-
monitor_gym=monitor_gym,
|
1288
1427
|
)
|
1289
|
-
|
1428
|
+
|
1429
|
+
if run_settings._noop:
|
1430
|
+
return wi.make_disabled_run(run_config)
|
1431
|
+
|
1432
|
+
wi.setup_run_log_directory(run_settings)
|
1433
|
+
if run_settings._jupyter:
|
1434
|
+
wi.monkeypatch_ipython(run_settings)
|
1435
|
+
|
1436
|
+
if monitor_gym:
|
1437
|
+
_monkeypatch_openai_gym()
|
1438
|
+
|
1439
|
+
if wandb.patched["tensorboard"]:
|
1440
|
+
# NOTE: The user may have called the patch function directly.
|
1441
|
+
init_telemetry.feature.tensorboard_patch = True
|
1442
|
+
if run_settings.sync_tensorboard:
|
1443
|
+
_monkeypatch_tensorboard()
|
1444
|
+
init_telemetry.feature.tensorboard_sync = True
|
1445
|
+
|
1446
|
+
return wi.init(run_settings, run_config)
|
1290
1447
|
|
1291
1448
|
except KeyboardInterrupt as e:
|
1292
|
-
if
|
1293
|
-
|
1449
|
+
if wl:
|
1450
|
+
wl._get_logger().warning("interrupted", exc_info=e)
|
1294
1451
|
|
1295
1452
|
raise
|
1296
1453
|
|
1297
1454
|
except Exception as e:
|
1298
|
-
if
|
1299
|
-
|
1455
|
+
if wl:
|
1456
|
+
wl._get_logger().exception("error in wandb.init()", exc_info=e)
|
1300
1457
|
|
1301
1458
|
# Need to build delay into this sentry capture because our exit hooks
|
1302
1459
|
# mess with sentry's ability to send out errors before the program ends.
|