wandb 0.19.2__py3-none-win_amd64.whl → 0.19.4rc1__py3-none-win_amd64.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +1 -1
- wandb/__init__.pyi +13 -3
- wandb/bin/gpu_stats.exe +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/integration/metaflow/metaflow.py +7 -9
- wandb/sdk/interface/interface.py +2 -8
- wandb/sdk/internal/tb_watcher.py +3 -1
- wandb/sdk/wandb_init.py +395 -238
- wandb/sdk/wandb_run.py +11 -19
- wandb/sdk/wandb_settings.py +2 -27
- {wandb-0.19.2.dist-info → wandb-0.19.4rc1.dist-info}/METADATA +1 -1
- {wandb-0.19.2.dist-info → wandb-0.19.4rc1.dist-info}/RECORD +15 -15
- {wandb-0.19.2.dist-info → wandb-0.19.4rc1.dist-info}/WHEEL +0 -0
- {wandb-0.19.2.dist-info → wandb-0.19.4rc1.dist-info}/entry_points.txt +0 -0
- {wandb-0.19.2.dist-info → wandb-0.19.4rc1.dist-info}/licenses/LICENSE +0 -0
wandb/sdk/wandb_init.py
CHANGED
@@ -11,9 +11,11 @@ For more on using `wandb.init()`, including code snippets, check out our
|
|
11
11
|
from __future__ import annotations
|
12
12
|
|
13
13
|
import copy
|
14
|
+
import dataclasses
|
14
15
|
import json
|
15
16
|
import logging
|
16
17
|
import os
|
18
|
+
import pathlib
|
17
19
|
import platform
|
18
20
|
import sys
|
19
21
|
import tempfile
|
@@ -48,14 +50,6 @@ from .wandb_settings import Settings
|
|
48
50
|
if TYPE_CHECKING:
|
49
51
|
from wandb.proto import wandb_internal_pb2 as pb
|
50
52
|
|
51
|
-
logger: wandb_setup.Logger | None = None # logger configured during wandb.init()
|
52
|
-
|
53
|
-
|
54
|
-
def _set_logger(log_object: wandb_setup.Logger | None) -> None:
|
55
|
-
"""Configure module logger."""
|
56
|
-
global logger
|
57
|
-
logger = log_object
|
58
|
-
|
59
53
|
|
60
54
|
def _huggingface_version() -> str | None:
|
61
55
|
if "transformers" in sys.modules:
|
@@ -112,27 +106,83 @@ def _handle_launch_config(settings: Settings) -> dict[str, Any]:
|
|
112
106
|
return launch_run_config
|
113
107
|
|
114
108
|
|
109
|
+
@dataclasses.dataclass(frozen=True)
|
110
|
+
class _ConfigParts:
|
111
|
+
base_no_artifacts: dict[str, Any]
|
112
|
+
"""The run config passed to `init()` minus any artifact-valued keys."""
|
113
|
+
|
114
|
+
sweep_no_artifacts: dict[str, Any]
|
115
|
+
"""The config loaded as part of a sweep minus any artifact-valued keys."""
|
116
|
+
|
117
|
+
launch_no_artifacts: dict[str, Any]
|
118
|
+
"""The config loaded as part of Launch minus any artifact-valued keys."""
|
119
|
+
|
120
|
+
artifacts: dict[str, Any]
|
121
|
+
"""Artifact keys removed from config dictionaries.
|
122
|
+
|
123
|
+
Due to implementation details of how a Run is constructed,
|
124
|
+
artifacts must be inserted into its config after initialization.
|
125
|
+
"""
|
126
|
+
|
127
|
+
|
115
128
|
class _WandbInit:
|
116
|
-
|
129
|
+
def __init__(
|
130
|
+
self,
|
131
|
+
wl: wandb_setup._WandbSetup,
|
132
|
+
telemetry: telemetry.TelemetryRecord,
|
133
|
+
) -> None:
|
134
|
+
self._wl = wl
|
135
|
+
|
136
|
+
self._telemetry = telemetry
|
137
|
+
"""Telemetry gathered before creating a run.
|
138
|
+
|
139
|
+
After the run is created, `telemetry.context()` is used instead.
|
140
|
+
"""
|
117
141
|
|
118
|
-
def __init__(self) -> None:
|
119
142
|
self.kwargs = None
|
120
|
-
self.settings: Settings | None = None
|
121
|
-
self.sweep_config: dict[str, Any] = {}
|
122
|
-
self.launch_config: dict[str, Any] = {}
|
123
|
-
self.config: dict[str, Any] = {}
|
124
143
|
self.run: Run | None = None
|
125
144
|
self.backend: Backend | None = None
|
126
145
|
|
127
146
|
self._teardown_hooks: list[TeardownHook] = []
|
128
|
-
self._wl = wandb.setup()
|
129
147
|
self.notebook: wandb.jupyter.Notebook | None = None # type: ignore
|
130
148
|
self.printer = printer.new_printer()
|
131
149
|
|
132
|
-
self._init_telemetry_obj = telemetry.TelemetryRecord()
|
133
|
-
|
134
150
|
self.deprecated_features_used: dict[str, str] = dict()
|
135
151
|
|
152
|
+
@property
|
153
|
+
def _logger(self) -> wandb_setup.Logger:
|
154
|
+
return self._wl._get_logger()
|
155
|
+
|
156
|
+
def maybe_login(self, init_settings: Settings) -> None:
|
157
|
+
"""Log in if we are not creating an offline or disabled run.
|
158
|
+
|
159
|
+
This may change the W&B singleton settings.
|
160
|
+
|
161
|
+
Args:
|
162
|
+
init_settings: Settings passed to `wandb.init()` or set via
|
163
|
+
keyword arguments.
|
164
|
+
"""
|
165
|
+
# Allow settings passed to init() to override inferred values.
|
166
|
+
#
|
167
|
+
# Calling login() may change settings on the singleton,
|
168
|
+
# so these may not be the final run settings.
|
169
|
+
run_settings = self._wl.settings.model_copy()
|
170
|
+
run_settings.update_from_settings(init_settings)
|
171
|
+
|
172
|
+
# NOTE: _noop or _offline can become true after _login().
|
173
|
+
# _noop happens if _login hits a timeout.
|
174
|
+
# _offline can be selected by the user at the login prompt.
|
175
|
+
if run_settings._noop or run_settings._offline:
|
176
|
+
return
|
177
|
+
|
178
|
+
wandb_login._login(
|
179
|
+
anonymous=run_settings.anonymous,
|
180
|
+
force=run_settings.force,
|
181
|
+
_disable_warning=True,
|
182
|
+
_silent=run_settings.quiet or run_settings.silent,
|
183
|
+
_entity=run_settings.entity,
|
184
|
+
)
|
185
|
+
|
136
186
|
def warn_env_vars_change_after_setup(self) -> None:
|
137
187
|
"""Warn if environment variables change after wandb singleton is initialized.
|
138
188
|
|
@@ -202,24 +252,15 @@ class _WandbInit:
|
|
202
252
|
warn("run_id", init_settings.run_id)
|
203
253
|
init_settings.run_id = None
|
204
254
|
|
205
|
-
def
|
206
|
-
|
207
|
-
init_settings: Settings,
|
208
|
-
config: dict | str | None = None,
|
209
|
-
config_exclude_keys: list[str] | None = None,
|
210
|
-
config_include_keys: list[str] | None = None,
|
211
|
-
allow_val_change: bool | None = None,
|
212
|
-
monitor_gym: bool | None = None,
|
213
|
-
) -> None:
|
214
|
-
"""Complete setup for `wandb.init()`.
|
255
|
+
def make_run_settings(self, init_settings: Settings) -> Settings:
|
256
|
+
"""Returns the run's settings.
|
215
257
|
|
216
|
-
|
258
|
+
Args:
|
259
|
+
init_settings: Settings passed to `wandb.init()` or set via
|
260
|
+
keyword arguments.
|
217
261
|
"""
|
218
262
|
self.warn_env_vars_change_after_setup()
|
219
263
|
|
220
|
-
_set_logger(self._wl._get_logger())
|
221
|
-
assert logger
|
222
|
-
|
223
264
|
self.clear_run_path_if_sweep_or_launch(init_settings)
|
224
265
|
|
225
266
|
# Inherit global settings.
|
@@ -231,20 +272,129 @@ class _WandbInit:
|
|
231
272
|
# Infer the run ID from SageMaker.
|
232
273
|
if not settings.sagemaker_disable and sagemaker.is_using_sagemaker():
|
233
274
|
if sagemaker.set_run_id(settings):
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
if
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
275
|
+
self._logger.info("set run ID and group based on SageMaker")
|
276
|
+
self._telemetry.feature.sagemaker = True
|
277
|
+
|
278
|
+
# get status of code saving before applying user settings
|
279
|
+
save_code_pre_user_settings = settings.save_code
|
280
|
+
if not settings._offline and not settings._noop:
|
281
|
+
user_settings = self._wl._load_user_settings()
|
282
|
+
if user_settings is not None:
|
283
|
+
settings.update_from_dict(user_settings)
|
284
|
+
|
285
|
+
# ensure that user settings don't set saving to true
|
286
|
+
# if user explicitly set these to false in UI
|
287
|
+
if save_code_pre_user_settings is False:
|
288
|
+
settings.save_code = False
|
289
|
+
|
290
|
+
# TODO: remove this once we refactor the client. This is a temporary
|
291
|
+
# fix to make sure that we use the same project name for wandb-core.
|
292
|
+
# The reason this is not going through the settings object is to
|
293
|
+
# avoid failure cases in other parts of the code that will be
|
294
|
+
# removed with the switch to wandb-core.
|
295
|
+
if settings.project is None:
|
296
|
+
settings.project = wandb.util.auto_project_name(settings.program)
|
297
|
+
|
298
|
+
settings.x_start_time = time.time()
|
299
|
+
|
300
|
+
return settings
|
301
|
+
|
302
|
+
def _load_autoresume_run_id(self, resume_file: pathlib.Path) -> str | None:
|
303
|
+
"""Returns the run_id stored in the auto-resume file, if any.
|
247
304
|
|
305
|
+
Returns None if the file does not exist or is not in a valid format.
|
306
|
+
|
307
|
+
Args:
|
308
|
+
resume_file: The file path to use for resume='auto' mode.
|
309
|
+
"""
|
310
|
+
if not resume_file.exists():
|
311
|
+
return None
|
312
|
+
|
313
|
+
with resume_file.open() as f:
|
314
|
+
try:
|
315
|
+
return json.load(f)["run_id"]
|
316
|
+
|
317
|
+
except json.JSONDecodeError as e:
|
318
|
+
self._logger.exception(
|
319
|
+
f"could not decode {resume_file}, ignoring",
|
320
|
+
exc_info=e,
|
321
|
+
)
|
322
|
+
return None
|
323
|
+
|
324
|
+
except KeyError:
|
325
|
+
self._logger.error(
|
326
|
+
f"resume file at {resume_file} did not store a run_id"
|
327
|
+
)
|
328
|
+
return None
|
329
|
+
|
330
|
+
def _save_autoresume_run_id(
|
331
|
+
self,
|
332
|
+
*,
|
333
|
+
resume_file: pathlib.Path,
|
334
|
+
run_id: str,
|
335
|
+
) -> None:
|
336
|
+
"""Write the run ID to the auto-resume file."""
|
337
|
+
resume_file.parent.mkdir(exist_ok=True)
|
338
|
+
with resume_file.open("w") as f:
|
339
|
+
json.dump({"run_id": run_id}, f)
|
340
|
+
|
341
|
+
def set_run_id(self, settings: Settings) -> None:
|
342
|
+
"""Set the run ID and possibly save it to the auto-resume file.
|
343
|
+
|
344
|
+
After this, `settings.run_id` is guaranteed to be set.
|
345
|
+
|
346
|
+
Args:
|
347
|
+
settings: The run's settings derived from the environment
|
348
|
+
and explicit values passed to `wandb.init()`.
|
349
|
+
"""
|
350
|
+
if settings.resume == "auto" and settings.resume_fname:
|
351
|
+
resume_path = pathlib.Path(settings.resume_fname)
|
352
|
+
else:
|
353
|
+
resume_path = None
|
354
|
+
|
355
|
+
if resume_path:
|
356
|
+
previous_id = self._load_autoresume_run_id(resume_path)
|
357
|
+
|
358
|
+
if not previous_id:
|
359
|
+
pass
|
360
|
+
elif settings.run_id is None:
|
361
|
+
self._logger.info(f"loaded run ID from {resume_path}")
|
362
|
+
settings.run_id = previous_id
|
363
|
+
elif settings.run_id != previous_id:
|
364
|
+
wandb.termwarn(
|
365
|
+
f"Ignoring ID {previous_id} loaded due to resume='auto'"
|
366
|
+
f" because the run ID is set to {settings.run_id}.",
|
367
|
+
)
|
368
|
+
|
369
|
+
# If no run ID was inferred, explicitly set, or loaded from an
|
370
|
+
# auto-resume file, then we generate a new ID.
|
371
|
+
if settings.run_id is None:
|
372
|
+
settings.run_id = runid.generate_id()
|
373
|
+
|
374
|
+
if resume_path:
|
375
|
+
self._save_autoresume_run_id(
|
376
|
+
resume_file=resume_path,
|
377
|
+
run_id=settings.run_id,
|
378
|
+
)
|
379
|
+
|
380
|
+
def make_run_config(
|
381
|
+
self,
|
382
|
+
settings: Settings,
|
383
|
+
config: dict | str | None = None,
|
384
|
+
config_exclude_keys: list[str] | None = None,
|
385
|
+
config_include_keys: list[str] | None = None,
|
386
|
+
) -> _ConfigParts:
|
387
|
+
"""Construct the run's config.
|
388
|
+
|
389
|
+
Args:
|
390
|
+
settings: The run's finalized settings.
|
391
|
+
config: The config passed to `init()`.
|
392
|
+
config_exclude_keys: Deprecated. Keys to filter out from `config`.
|
393
|
+
config_include_keys: Deprecated. Keys to include from `config`.
|
394
|
+
|
395
|
+
Returns:
|
396
|
+
Initial values for the run's config.
|
397
|
+
"""
|
248
398
|
# TODO: remove this once officially deprecated
|
249
399
|
if config_exclude_keys:
|
250
400
|
self.deprecated_features_used["config_exclude_keys"] = (
|
@@ -260,123 +410,77 @@ class _WandbInit:
|
|
260
410
|
exclude=config_exclude_keys,
|
261
411
|
)
|
262
412
|
|
263
|
-
|
264
|
-
|
265
|
-
|
413
|
+
result = _ConfigParts(
|
414
|
+
base_no_artifacts=dict(),
|
415
|
+
sweep_no_artifacts=dict(),
|
416
|
+
launch_no_artifacts=dict(),
|
417
|
+
artifacts=dict(),
|
418
|
+
)
|
266
419
|
|
267
420
|
if not settings.sagemaker_disable and sagemaker.is_using_sagemaker():
|
268
421
|
sagemaker_config = sagemaker.parse_sm_config()
|
269
|
-
self._split_artifacts_from_config(
|
270
|
-
|
271
|
-
|
272
|
-
|
422
|
+
self._split_artifacts_from_config(
|
423
|
+
sagemaker_config,
|
424
|
+
config_target=result.base_no_artifacts,
|
425
|
+
artifacts=result.artifacts,
|
426
|
+
)
|
427
|
+
self._telemetry.feature.sagemaker = True
|
273
428
|
|
274
429
|
if self._wl._config:
|
275
|
-
self._split_artifacts_from_config(
|
430
|
+
self._split_artifacts_from_config(
|
431
|
+
self._wl._config,
|
432
|
+
config_target=result.base_no_artifacts,
|
433
|
+
artifacts=result.artifacts,
|
434
|
+
)
|
276
435
|
|
277
436
|
if config and isinstance(config, dict):
|
278
|
-
self._split_artifacts_from_config(
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
if sweep_config:
|
283
|
-
self._split_artifacts_from_config(sweep_config, self.sweep_config)
|
284
|
-
|
285
|
-
if monitor_gym and len(wandb.patched["gym"]) == 0:
|
286
|
-
wandb.gym.monitor() # type: ignore
|
287
|
-
|
288
|
-
if wandb.patched["tensorboard"]:
|
289
|
-
with telemetry.context(obj=self._init_telemetry_obj) as tel:
|
290
|
-
tel.feature.tensorboard_patch = True
|
291
|
-
|
292
|
-
if settings.sync_tensorboard:
|
293
|
-
if len(wandb.patched["tensorboard"]) == 0:
|
294
|
-
wandb.tensorboard.patch() # type: ignore
|
295
|
-
with telemetry.context(obj=self._init_telemetry_obj) as tel:
|
296
|
-
tel.feature.tensorboard_sync = True
|
297
|
-
|
298
|
-
if not settings._offline and not settings._noop:
|
299
|
-
wandb_login._login(
|
300
|
-
anonymous=settings.anonymous,
|
301
|
-
force=settings.force,
|
302
|
-
_disable_warning=True,
|
303
|
-
_silent=settings.quiet or settings.silent,
|
304
|
-
_entity=settings.entity,
|
437
|
+
self._split_artifacts_from_config(
|
438
|
+
config,
|
439
|
+
config_target=result.base_no_artifacts,
|
440
|
+
artifacts=result.artifacts,
|
305
441
|
)
|
306
442
|
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
"base_url": self._wl.settings.base_url,
|
314
|
-
"force": self._wl.settings.force,
|
315
|
-
"login_timeout": self._wl.settings.login_timeout,
|
316
|
-
}.items()
|
317
|
-
if v is not None
|
318
|
-
}
|
319
|
-
if login_settings:
|
320
|
-
settings.update_from_dict(login_settings)
|
321
|
-
|
322
|
-
# handle custom resume logic
|
323
|
-
settings.handle_resume_logic()
|
324
|
-
|
325
|
-
# get status of code saving before applying user settings
|
326
|
-
save_code_pre_user_settings = settings.save_code
|
327
|
-
if not settings._offline and not settings._noop:
|
328
|
-
user_settings = self._wl._load_user_settings()
|
329
|
-
if user_settings is not None:
|
330
|
-
settings.update_from_dict(user_settings)
|
331
|
-
|
332
|
-
# ensure that user settings don't set saving to true
|
333
|
-
# if user explicitly set these to false in UI
|
334
|
-
if save_code_pre_user_settings is False:
|
335
|
-
settings.save_code = False
|
336
|
-
|
337
|
-
# TODO: remove this once we refactor the client. This is a temporary
|
338
|
-
# fix to make sure that we use the same project name for wandb-core.
|
339
|
-
# The reason this is not going through the settings object is to
|
340
|
-
# avoid failure cases in other parts of the code that will be
|
341
|
-
# removed with the switch to wandb-core.
|
342
|
-
if settings.project is None:
|
343
|
-
settings.project = wandb.util.auto_project_name(settings.program)
|
344
|
-
|
345
|
-
settings.x_start_time = time.time()
|
346
|
-
|
347
|
-
if not settings._noop:
|
348
|
-
self._log_setup(settings)
|
443
|
+
if self._wl._sweep_config:
|
444
|
+
self._split_artifacts_from_config(
|
445
|
+
self._wl._sweep_config,
|
446
|
+
config_target=result.sweep_no_artifacts,
|
447
|
+
artifacts=result.artifacts,
|
448
|
+
)
|
349
449
|
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
450
|
+
if launch_config := _handle_launch_config(settings):
|
451
|
+
self._split_artifacts_from_config(
|
452
|
+
launch_config,
|
453
|
+
config_target=result.launch_no_artifacts,
|
454
|
+
artifacts=result.artifacts,
|
455
|
+
)
|
355
456
|
|
356
|
-
|
457
|
+
return result
|
357
458
|
|
358
459
|
def teardown(self) -> None:
|
359
460
|
# TODO: currently this is only called on failed wandb.init attempts
|
360
461
|
# normally this happens on the run object
|
361
|
-
|
362
|
-
logger.info("tearing down wandb.init")
|
462
|
+
self._logger.info("tearing down wandb.init")
|
363
463
|
for hook in self._teardown_hooks:
|
364
464
|
hook.call()
|
365
465
|
|
366
466
|
def _split_artifacts_from_config(
|
367
|
-
self,
|
467
|
+
self,
|
468
|
+
config_source: dict,
|
469
|
+
config_target: dict,
|
470
|
+
artifacts: dict,
|
368
471
|
) -> None:
|
369
472
|
for k, v in config_source.items():
|
370
473
|
if _is_artifact_representation(v):
|
371
|
-
|
474
|
+
artifacts[k] = v
|
372
475
|
else:
|
373
476
|
config_target.setdefault(k, v)
|
374
477
|
|
375
|
-
def
|
376
|
-
"""
|
478
|
+
def _create_logger(self, log_fname: str) -> logging.Logger:
|
479
|
+
"""Returns a logger configured to write to a file.
|
377
480
|
|
378
|
-
This adds a run_id to the log, in case of multiple processes on the same
|
379
|
-
Currently, there is no way to disable logging after it's
|
481
|
+
This adds a run_id to the log, in case of multiple processes on the same
|
482
|
+
machine. Currently, there is no way to disable logging after it's
|
483
|
+
enabled.
|
380
484
|
"""
|
381
485
|
handler = logging.FileHandler(log_fname)
|
382
486
|
handler.setLevel(logging.INFO)
|
@@ -387,7 +491,8 @@ class _WandbInit:
|
|
387
491
|
)
|
388
492
|
|
389
493
|
handler.setFormatter(formatter)
|
390
|
-
|
494
|
+
|
495
|
+
logger = logging.getLogger("wandb")
|
391
496
|
logger.propagate = False
|
392
497
|
logger.addHandler(handler)
|
393
498
|
# TODO: make me configurable
|
@@ -399,6 +504,8 @@ class _WandbInit:
|
|
399
504
|
)
|
400
505
|
)
|
401
506
|
|
507
|
+
return logger
|
508
|
+
|
402
509
|
def _safe_symlink(
|
403
510
|
self, base: str, target: str, name: str, delete: bool = False
|
404
511
|
) -> None:
|
@@ -429,14 +536,14 @@ class _WandbInit:
|
|
429
536
|
if self.notebook.save_ipynb(): # type: ignore
|
430
537
|
assert self.run is not None
|
431
538
|
res = self.run.log_code(root=None)
|
432
|
-
|
539
|
+
self._logger.info("saved code: %s", res) # type: ignore
|
433
540
|
if self.backend.interface is not None:
|
434
|
-
|
541
|
+
self._logger.info("pausing backend") # type: ignore
|
435
542
|
self.backend.interface.publish_pause()
|
436
543
|
|
437
544
|
def _resume_backend(self, *args: Any, **kwargs: Any) -> None: # noqa
|
438
545
|
if self.backend is not None and self.backend.interface is not None:
|
439
|
-
|
546
|
+
self._logger.info("resuming backend") # type: ignore
|
440
547
|
self.backend.interface.publish_resume()
|
441
548
|
|
442
549
|
def _jupyter_teardown(self) -> None:
|
@@ -447,8 +554,8 @@ class _WandbInit:
|
|
447
554
|
if self.notebook.save_ipynb():
|
448
555
|
assert self.run is not None
|
449
556
|
res = self.run.log_code(root=None)
|
450
|
-
|
451
|
-
|
557
|
+
self._logger.info("saved code and history: %s", res) # type: ignore
|
558
|
+
self._logger.info("cleaning up jupyter logic") # type: ignore
|
452
559
|
# because of how we bind our methods we manually find them to unregister
|
453
560
|
for hook in ipython.events.callbacks["pre_run_cell"]:
|
454
561
|
if "_resume_backend" in hook.__name__:
|
@@ -459,14 +566,14 @@ class _WandbInit:
|
|
459
566
|
ipython.display_pub.publish = ipython.display_pub._orig_publish
|
460
567
|
del ipython.display_pub._orig_publish
|
461
568
|
|
462
|
-
def
|
569
|
+
def monkeypatch_ipython(self, settings: Settings) -> None:
|
463
570
|
"""Add hooks, and session history saving."""
|
464
571
|
self.notebook = wandb.jupyter.Notebook(settings) # type: ignore
|
465
572
|
ipython = self.notebook.shell
|
466
573
|
|
467
574
|
# Monkey patch ipython publish to capture displayed outputs
|
468
575
|
if not hasattr(ipython.display_pub, "_orig_publish"):
|
469
|
-
|
576
|
+
self._logger.info("configuring jupyter hooks %s", self) # type: ignore
|
470
577
|
ipython.display_pub._orig_publish = ipython.display_pub.publish
|
471
578
|
# Registering resume and pause hooks
|
472
579
|
|
@@ -485,7 +592,7 @@ class _WandbInit:
|
|
485
592
|
|
486
593
|
ipython.display_pub.publish = publish
|
487
594
|
|
488
|
-
def
|
595
|
+
def setup_run_log_directory(self, settings: Settings) -> None:
|
489
596
|
"""Set up logging from settings."""
|
490
597
|
filesystem.mkdir_exists_ok(os.path.dirname(settings.log_user))
|
491
598
|
filesystem.mkdir_exists_ok(os.path.dirname(settings.log_internal))
|
@@ -513,25 +620,22 @@ class _WandbInit:
|
|
513
620
|
delete=True,
|
514
621
|
)
|
515
622
|
|
516
|
-
|
517
|
-
self.
|
518
|
-
|
519
|
-
assert self._wl
|
520
|
-
assert logger
|
521
|
-
|
522
|
-
self._wl._early_logger_flush(logger)
|
523
|
-
logger.info(f"Logging user logs to {settings.log_user}")
|
524
|
-
logger.info(f"Logging internal logs to {settings.log_internal}")
|
623
|
+
self._wl._early_logger_flush(self._create_logger(settings.log_user))
|
624
|
+
self._logger.info(f"Logging user logs to {settings.log_user}")
|
625
|
+
self._logger.info(f"Logging internal logs to {settings.log_internal}")
|
525
626
|
|
526
|
-
def
|
627
|
+
def make_disabled_run(self, config: _ConfigParts) -> Run:
|
527
628
|
"""Returns a Run-like object where all methods are no-ops.
|
528
629
|
|
529
|
-
This method is used when
|
530
|
-
|
630
|
+
This method is used when the `mode` setting is set to "disabled", such as
|
631
|
+
by wandb.init(mode="disabled") or by setting the WANDB_MODE environment
|
632
|
+
variable to "disabled".
|
633
|
+
|
634
|
+
It creates a Run object that mimics the behavior of a normal Run but doesn't
|
531
635
|
communicate with the W&B servers.
|
532
636
|
|
533
|
-
The returned Run object has all expected attributes and methods, but they
|
534
|
-
no-op versions that don't perform any actual logging or communication.
|
637
|
+
The returned Run object has all expected attributes and methods, but they
|
638
|
+
are no-op versions that don't perform any actual logging or communication.
|
535
639
|
"""
|
536
640
|
run_id = runid.generate_id()
|
537
641
|
drun = Run(
|
@@ -549,8 +653,8 @@ class _WandbInit:
|
|
549
653
|
)
|
550
654
|
# config, summary, and metadata objects
|
551
655
|
drun._config = wandb.sdk.wandb_config.Config()
|
552
|
-
drun._config.update(
|
553
|
-
drun._config.update(
|
656
|
+
drun._config.update(config.sweep_no_artifacts)
|
657
|
+
drun._config.update(config.base_no_artifacts)
|
554
658
|
drun.summary = SummaryDisabled() # type: ignore
|
555
659
|
drun._Run__metadata = wandb.sdk.wandb_metadata.Metadata()
|
556
660
|
|
@@ -635,24 +739,19 @@ class _WandbInit:
|
|
635
739
|
percent_done = handle.percent_done
|
636
740
|
self.printer.progress_update(line, percent_done=percent_done)
|
637
741
|
|
638
|
-
def init(self) -> Run: # noqa: C901
|
639
|
-
|
640
|
-
raise RuntimeError("Logger not initialized")
|
641
|
-
logger.info("calling init triggers")
|
742
|
+
def init(self, settings: Settings, config: _ConfigParts) -> Run: # noqa: C901
|
743
|
+
self._logger.info("calling init triggers")
|
642
744
|
trigger.call("on_init")
|
643
745
|
|
644
|
-
assert self.settings is not None
|
645
746
|
assert self._wl is not None
|
646
747
|
|
647
|
-
|
648
|
-
f"wandb.init called with sweep_config: {
|
748
|
+
self._logger.info(
|
749
|
+
f"wandb.init called with sweep_config: {config.sweep_no_artifacts}"
|
750
|
+
f"\nconfig: {config.base_no_artifacts}"
|
649
751
|
)
|
650
752
|
|
651
|
-
if self.settings._noop:
|
652
|
-
return self._make_run_disabled()
|
653
753
|
if (
|
654
|
-
|
655
|
-
or (self.settings._jupyter and self.settings.reinit is not False)
|
754
|
+
settings.reinit or (settings._jupyter and settings.reinit is not False)
|
656
755
|
) and len(self._wl._global_run_stack) > 0:
|
657
756
|
if len(self._wl._global_run_stack) > 1:
|
658
757
|
wandb.termwarn(
|
@@ -663,63 +762,66 @@ class _WandbInit:
|
|
663
762
|
)
|
664
763
|
|
665
764
|
latest_run = self._wl._global_run_stack[-1]
|
666
|
-
|
765
|
+
self._logger.info(f"found existing run on stack: {latest_run.id}")
|
667
766
|
latest_run.finish()
|
668
767
|
elif wandb.run is not None and os.getpid() == wandb.run._init_pid:
|
669
|
-
|
768
|
+
self._logger.info("wandb.init() called when a run is still active")
|
769
|
+
|
770
|
+
# NOTE: Updates telemetry on the pre-existing run.
|
670
771
|
with telemetry.context() as tel:
|
671
772
|
tel.feature.init_return_run = True
|
773
|
+
|
672
774
|
return wandb.run
|
673
775
|
|
674
|
-
|
776
|
+
self._logger.info("starting backend")
|
675
777
|
|
676
|
-
if not
|
778
|
+
if not settings.x_disable_service:
|
677
779
|
service = self._wl.ensure_service()
|
678
|
-
|
780
|
+
self._logger.info("sending inform_init request")
|
679
781
|
service.inform_init(
|
680
|
-
settings=
|
681
|
-
run_id=
|
782
|
+
settings=settings.to_proto(),
|
783
|
+
run_id=settings.run_id, # type: ignore
|
682
784
|
)
|
683
785
|
else:
|
684
786
|
service = None
|
685
787
|
|
686
788
|
mailbox = Mailbox()
|
687
789
|
backend = Backend(
|
688
|
-
settings=
|
790
|
+
settings=settings,
|
689
791
|
service=service,
|
690
792
|
mailbox=mailbox,
|
691
793
|
)
|
692
794
|
backend.ensure_launched()
|
693
|
-
|
795
|
+
self._logger.info("backend started and connected")
|
694
796
|
|
695
797
|
# resuming needs access to the server, check server_status()?
|
696
798
|
run = Run(
|
697
|
-
config=
|
698
|
-
settings=
|
699
|
-
sweep_config=
|
700
|
-
launch_config=
|
799
|
+
config=config.base_no_artifacts,
|
800
|
+
settings=settings,
|
801
|
+
sweep_config=config.sweep_no_artifacts,
|
802
|
+
launch_config=config.launch_no_artifacts,
|
701
803
|
)
|
702
804
|
|
703
805
|
# Populate initial telemetry
|
704
|
-
with telemetry.context(run=run, obj=self.
|
806
|
+
with telemetry.context(run=run, obj=self._telemetry) as tel:
|
705
807
|
tel.cli_version = wandb.__version__
|
706
808
|
tel.python_version = platform.python_version()
|
707
809
|
tel.platform = f"{platform.system()}-{platform.machine()}".lower()
|
708
810
|
hf_version = _huggingface_version()
|
709
811
|
if hf_version:
|
710
812
|
tel.huggingface_version = hf_version
|
711
|
-
if
|
813
|
+
if settings._jupyter:
|
712
814
|
tel.env.jupyter = True
|
713
|
-
if
|
815
|
+
if settings._ipython:
|
714
816
|
tel.env.ipython = True
|
715
|
-
if
|
817
|
+
if settings._colab:
|
716
818
|
tel.env.colab = True
|
717
|
-
if
|
819
|
+
if settings._kaggle:
|
718
820
|
tel.env.kaggle = True
|
719
|
-
if
|
821
|
+
if settings._windows:
|
720
822
|
tel.env.windows = True
|
721
823
|
|
722
|
-
if
|
824
|
+
if settings.launch:
|
723
825
|
tel.feature.launch = True
|
724
826
|
|
725
827
|
for module_name in telemetry.list_telemetry_imports(only_imported=True):
|
@@ -727,8 +829,8 @@ class _WandbInit:
|
|
727
829
|
|
728
830
|
# probe the active start method
|
729
831
|
active_start_method: str | None = None
|
730
|
-
if
|
731
|
-
active_start_method =
|
832
|
+
if settings.start_method == "thread":
|
833
|
+
active_start_method = settings.start_method
|
732
834
|
else:
|
733
835
|
active_start_method = getattr(
|
734
836
|
backend._multiprocessing, "get_start_method", lambda: None
|
@@ -746,7 +848,7 @@ class _WandbInit:
|
|
746
848
|
if os.environ.get("PEX"):
|
747
849
|
tel.env.pex = True
|
748
850
|
|
749
|
-
if
|
851
|
+
if settings._aws_lambda:
|
750
852
|
tel.env.aws_lambda = True
|
751
853
|
|
752
854
|
if os.environ.get(wandb.env._DISABLE_SERVICE):
|
@@ -754,13 +856,13 @@ class _WandbInit:
|
|
754
856
|
|
755
857
|
if service:
|
756
858
|
tel.feature.service = True
|
757
|
-
if
|
859
|
+
if settings.x_flow_control_disabled:
|
758
860
|
tel.feature.flow_control_disabled = True
|
759
|
-
if
|
861
|
+
if settings.x_flow_control_custom:
|
760
862
|
tel.feature.flow_control_custom = True
|
761
|
-
if not
|
863
|
+
if not settings.x_require_legacy_service:
|
762
864
|
tel.feature.core = True
|
763
|
-
if
|
865
|
+
if settings._shared:
|
764
866
|
wandb.termwarn(
|
765
867
|
"The `_shared` feature is experimental and may change. "
|
766
868
|
"Please contact support@wandb.com for guidance and to report any issues."
|
@@ -769,7 +871,7 @@ class _WandbInit:
|
|
769
871
|
|
770
872
|
tel.env.maybe_mp = _maybe_mp_process(backend)
|
771
873
|
|
772
|
-
if not
|
874
|
+
if not settings.label_disable:
|
773
875
|
if self.notebook:
|
774
876
|
run._label_probe_notebook(self.notebook)
|
775
877
|
else:
|
@@ -783,7 +885,7 @@ class _WandbInit:
|
|
783
885
|
run=run,
|
784
886
|
)
|
785
887
|
|
786
|
-
|
888
|
+
self._logger.info("updated telemetry")
|
787
889
|
|
788
890
|
run._set_library(self._wl)
|
789
891
|
run._set_backend(backend)
|
@@ -797,25 +899,23 @@ class _WandbInit:
|
|
797
899
|
# Using GitRepo() blocks & can be slow, depending on user's current git setup.
|
798
900
|
# We don't want to block run initialization/start request, so populate run's git
|
799
901
|
# info beforehand.
|
800
|
-
if not (
|
902
|
+
if not (settings.disable_git or settings.x_disable_machine_info):
|
801
903
|
run._populate_git_info()
|
802
904
|
|
803
905
|
run_result: pb.RunUpdateResult | None = None
|
804
906
|
|
805
|
-
if
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
wandb.termwarn(
|
811
|
-
"`resume` will be ignored since W&B syncing is set to `offline`. "
|
812
|
-
f"Starting a new run with run id {run.id}."
|
813
|
-
)
|
907
|
+
if settings._offline and settings.resume:
|
908
|
+
wandb.termwarn(
|
909
|
+
"`resume` will be ignored since W&B syncing is set to `offline`. "
|
910
|
+
f"Starting a new run with run id {run.id}."
|
911
|
+
)
|
814
912
|
error: wandb.Error | None = None
|
815
913
|
|
816
|
-
timeout =
|
914
|
+
timeout = settings.init_timeout
|
817
915
|
|
818
|
-
|
916
|
+
self._logger.info(
|
917
|
+
f"communicating run to backend with {timeout} second timeout",
|
918
|
+
)
|
819
919
|
|
820
920
|
run_init_handle = backend.interface.deliver_run(run)
|
821
921
|
result = run_init_handle.wait(
|
@@ -842,7 +942,7 @@ class _WandbInit:
|
|
842
942
|
error = ProtobufErrorHandler.to_exception(run_result.error)
|
843
943
|
|
844
944
|
if error is not None:
|
845
|
-
|
945
|
+
self._logger.error(f"encountered error: {error}")
|
846
946
|
if not service:
|
847
947
|
# Shutdown the backend and get rid of the logger
|
848
948
|
# we don't need to do console cleanup at this point
|
@@ -860,19 +960,19 @@ class _WandbInit:
|
|
860
960
|
)
|
861
961
|
|
862
962
|
if run_result.run.resumed:
|
863
|
-
|
963
|
+
self._logger.info("run resumed")
|
864
964
|
with telemetry.context(run=run) as tel:
|
865
965
|
tel.feature.resumed = run_result.run.resumed
|
866
966
|
run._set_run_obj(run_result.run)
|
867
967
|
|
868
|
-
|
968
|
+
self._logger.info("starting run threads in backend")
|
869
969
|
# initiate run (stats and metadata probing)
|
870
970
|
|
871
971
|
if service:
|
872
|
-
assert
|
972
|
+
assert settings.run_id
|
873
973
|
service.inform_start(
|
874
|
-
settings=
|
875
|
-
run_id=
|
974
|
+
settings=settings.to_proto(),
|
975
|
+
run_id=settings.run_id,
|
876
976
|
)
|
877
977
|
|
878
978
|
assert backend.interface
|
@@ -889,15 +989,15 @@ class _WandbInit:
|
|
889
989
|
|
890
990
|
run._handle_launch_artifact_overrides()
|
891
991
|
if (
|
892
|
-
|
893
|
-
and
|
894
|
-
and os.path.exists(
|
992
|
+
settings.launch
|
993
|
+
and settings.launch_config_path
|
994
|
+
and os.path.exists(settings.launch_config_path)
|
895
995
|
):
|
896
|
-
run.save(
|
996
|
+
run.save(settings.launch_config_path)
|
897
997
|
# put artifacts in run config here
|
898
998
|
# since doing so earlier will cause an error
|
899
999
|
# as the run is not upserted
|
900
|
-
for k, v in
|
1000
|
+
for k, v in config.artifacts.items():
|
901
1001
|
run.config.update({k: v}, allow_val_change=True)
|
902
1002
|
job_artifact = run._launch_artifact_mapping.get(
|
903
1003
|
wandb.util.LAUNCH_JOB_ARTIFACT_SLOT_NAME
|
@@ -907,7 +1007,7 @@ class _WandbInit:
|
|
907
1007
|
|
908
1008
|
self.backend = backend
|
909
1009
|
run._on_start()
|
910
|
-
|
1010
|
+
self._logger.info("run started, returning control to user process")
|
911
1011
|
return run
|
912
1012
|
|
913
1013
|
|
@@ -938,10 +1038,7 @@ def _attach(
|
|
938
1038
|
wandb._assert_is_user_process() # type: ignore
|
939
1039
|
|
940
1040
|
_wl = wandb.setup()
|
941
|
-
|
942
|
-
_set_logger(_wl._get_logger())
|
943
|
-
if logger is None:
|
944
|
-
raise UsageError("logger is not initialized")
|
1041
|
+
logger = _wl._get_logger()
|
945
1042
|
|
946
1043
|
service = _wl.ensure_service()
|
947
1044
|
|
@@ -992,6 +1089,26 @@ def _attach(
|
|
992
1089
|
return run
|
993
1090
|
|
994
1091
|
|
1092
|
+
def _monkeypatch_openai_gym() -> None:
|
1093
|
+
"""Patch OpenAI gym to log to the global `wandb.run`."""
|
1094
|
+
if len(wandb.patched["gym"]) > 0:
|
1095
|
+
return
|
1096
|
+
|
1097
|
+
from wandb.integration import gym
|
1098
|
+
|
1099
|
+
gym.monitor()
|
1100
|
+
|
1101
|
+
|
1102
|
+
def _monkeypatch_tensorboard() -> None:
|
1103
|
+
"""Patch TensorBoard to log to the global `wandb.run`."""
|
1104
|
+
if len(wandb.patched["tensorboard"]) > 0:
|
1105
|
+
return
|
1106
|
+
|
1107
|
+
from wandb.integration import tensorboard as tb_module
|
1108
|
+
|
1109
|
+
tb_module.patch()
|
1110
|
+
|
1111
|
+
|
995
1112
|
def init( # noqa: C901
|
996
1113
|
entity: str | None = None,
|
997
1114
|
project: str | None = None,
|
@@ -1229,6 +1346,8 @@ def init( # noqa: C901
|
|
1229
1346
|
"""
|
1230
1347
|
wandb._assert_is_user_process() # type: ignore
|
1231
1348
|
|
1349
|
+
init_telemetry = telemetry.TelemetryRecord()
|
1350
|
+
|
1232
1351
|
init_settings = Settings()
|
1233
1352
|
if isinstance(settings, dict):
|
1234
1353
|
init_settings = Settings(**settings)
|
@@ -1276,27 +1395,65 @@ def init( # noqa: C901
|
|
1276
1395
|
if resume_from is not None:
|
1277
1396
|
init_settings.resume_from = resume_from # type: ignore
|
1278
1397
|
|
1398
|
+
if config is not None:
|
1399
|
+
init_telemetry.feature.set_init_config = True
|
1400
|
+
|
1401
|
+
wl: wandb_setup._WandbSetup | None = None
|
1402
|
+
|
1279
1403
|
try:
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1404
|
+
wl = wandb.setup()
|
1405
|
+
|
1406
|
+
wi = _WandbInit(wl, init_telemetry)
|
1407
|
+
|
1408
|
+
wi.maybe_login(init_settings)
|
1409
|
+
run_settings = wi.make_run_settings(init_settings)
|
1410
|
+
|
1411
|
+
if run_settings.run_id is not None:
|
1412
|
+
init_telemetry.feature.set_init_id = True
|
1413
|
+
if run_settings.run_name is not None:
|
1414
|
+
init_telemetry.feature.set_init_name = True
|
1415
|
+
if run_settings.run_tags is not None:
|
1416
|
+
init_telemetry.feature.set_init_tags = True
|
1417
|
+
if run_settings._offline:
|
1418
|
+
init_telemetry.feature.offline = True
|
1419
|
+
|
1420
|
+
wi.set_run_id(run_settings)
|
1421
|
+
|
1422
|
+
run_config = wi.make_run_config(
|
1423
|
+
settings=run_settings,
|
1283
1424
|
config=config,
|
1284
1425
|
config_exclude_keys=config_exclude_keys,
|
1285
1426
|
config_include_keys=config_include_keys,
|
1286
|
-
allow_val_change=allow_val_change,
|
1287
|
-
monitor_gym=monitor_gym,
|
1288
1427
|
)
|
1289
|
-
|
1428
|
+
|
1429
|
+
if run_settings._noop:
|
1430
|
+
return wi.make_disabled_run(run_config)
|
1431
|
+
|
1432
|
+
wi.setup_run_log_directory(run_settings)
|
1433
|
+
if run_settings._jupyter:
|
1434
|
+
wi.monkeypatch_ipython(run_settings)
|
1435
|
+
|
1436
|
+
if monitor_gym:
|
1437
|
+
_monkeypatch_openai_gym()
|
1438
|
+
|
1439
|
+
if wandb.patched["tensorboard"]:
|
1440
|
+
# NOTE: The user may have called the patch function directly.
|
1441
|
+
init_telemetry.feature.tensorboard_patch = True
|
1442
|
+
if run_settings.sync_tensorboard:
|
1443
|
+
_monkeypatch_tensorboard()
|
1444
|
+
init_telemetry.feature.tensorboard_sync = True
|
1445
|
+
|
1446
|
+
return wi.init(run_settings, run_config)
|
1290
1447
|
|
1291
1448
|
except KeyboardInterrupt as e:
|
1292
|
-
if
|
1293
|
-
|
1449
|
+
if wl:
|
1450
|
+
wl._get_logger().warning("interrupted", exc_info=e)
|
1294
1451
|
|
1295
1452
|
raise
|
1296
1453
|
|
1297
1454
|
except Exception as e:
|
1298
|
-
if
|
1299
|
-
|
1455
|
+
if wl:
|
1456
|
+
wl._get_logger().exception("error in wandb.init()", exc_info=e)
|
1300
1457
|
|
1301
1458
|
# Need to build delay into this sentry capture because our exit hooks
|
1302
1459
|
# mess with sentry's ability to send out errors before the program ends.
|