wandb 0.19.1rc1__py3-none-win32.whl → 0.19.3__py3-none-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wandb/__init__.py +1 -7
- wandb/__init__.pyi +15 -7
- wandb/agents/pyagent.py +1 -1
- wandb/apis/importers/wandb.py +1 -1
- wandb/apis/public/files.py +1 -1
- wandb/apis/public/jobs.py +1 -1
- wandb/apis/public/runs.py +2 -7
- wandb/apis/reports/v1/__init__.py +1 -1
- wandb/apis/reports/v2/__init__.py +1 -1
- wandb/apis/workspaces/__init__.py +1 -1
- wandb/bin/gpu_stats.exe +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/beta.py +7 -4
- wandb/cli/cli.py +5 -7
- wandb/docker/__init__.py +4 -4
- wandb/integration/fastai/__init__.py +4 -6
- wandb/integration/keras/keras.py +5 -3
- wandb/integration/metaflow/metaflow.py +14 -16
- wandb/integration/prodigy/prodigy.py +3 -11
- wandb/integration/sagemaker/__init__.py +5 -3
- wandb/integration/sagemaker/config.py +17 -8
- wandb/integration/sagemaker/files.py +0 -1
- wandb/integration/sagemaker/resources.py +47 -18
- wandb/integration/torch/wandb_torch.py +1 -1
- wandb/proto/v3/wandb_internal_pb2.py +273 -235
- wandb/proto/v4/wandb_internal_pb2.py +222 -214
- wandb/proto/v5/wandb_internal_pb2.py +222 -214
- wandb/sdk/artifacts/artifact.py +3 -9
- wandb/sdk/backend/backend.py +1 -1
- wandb/sdk/data_types/base_types/wb_value.py +1 -1
- wandb/sdk/data_types/graph.py +2 -2
- wandb/sdk/data_types/saved_model.py +1 -1
- wandb/sdk/data_types/video.py +1 -1
- wandb/sdk/interface/interface.py +25 -25
- wandb/sdk/interface/interface_shared.py +21 -5
- wandb/sdk/internal/handler.py +19 -1
- wandb/sdk/internal/internal.py +1 -1
- wandb/sdk/internal/internal_api.py +4 -5
- wandb/sdk/internal/sample.py +2 -2
- wandb/sdk/internal/sender.py +1 -2
- wandb/sdk/internal/settings_static.py +3 -1
- wandb/sdk/internal/system/assets/disk.py +4 -4
- wandb/sdk/internal/system/assets/gpu.py +1 -1
- wandb/sdk/internal/system/assets/memory.py +1 -1
- wandb/sdk/internal/system/system_info.py +1 -1
- wandb/sdk/internal/system/system_monitor.py +3 -1
- wandb/sdk/internal/tb_watcher.py +1 -1
- wandb/sdk/launch/_project_spec.py +3 -3
- wandb/sdk/launch/builder/abstract.py +1 -1
- wandb/sdk/lib/apikey.py +2 -3
- wandb/sdk/lib/fsm.py +1 -1
- wandb/sdk/lib/gitlib.py +1 -1
- wandb/sdk/lib/gql_request.py +1 -1
- wandb/sdk/lib/interrupt.py +37 -0
- wandb/sdk/lib/lazyloader.py +1 -1
- wandb/sdk/lib/progress.py +7 -1
- wandb/sdk/lib/service_connection.py +1 -1
- wandb/sdk/lib/telemetry.py +1 -1
- wandb/sdk/service/_startup_debug.py +1 -1
- wandb/sdk/service/server_sock.py +3 -2
- wandb/sdk/service/service.py +1 -1
- wandb/sdk/service/streams.py +19 -17
- wandb/sdk/verify/verify.py +13 -13
- wandb/sdk/wandb_init.py +316 -246
- wandb/sdk/wandb_login.py +1 -1
- wandb/sdk/wandb_metadata.py +547 -0
- wandb/sdk/wandb_run.py +134 -39
- wandb/sdk/wandb_settings.py +7 -63
- wandb/sdk/wandb_setup.py +83 -82
- wandb/sdk/wandb_sweep.py +2 -2
- wandb/sdk/wandb_sync.py +15 -18
- wandb/sync/sync.py +10 -10
- wandb/util.py +11 -3
- wandb/wandb_agent.py +11 -16
- wandb/wandb_controller.py +7 -7
- {wandb-0.19.1rc1.dist-info → wandb-0.19.3.dist-info}/METADATA +3 -2
- {wandb-0.19.1rc1.dist-info → wandb-0.19.3.dist-info}/RECORD +80 -78
- {wandb-0.19.1rc1.dist-info → wandb-0.19.3.dist-info}/WHEEL +0 -0
- {wandb-0.19.1rc1.dist-info → wandb-0.19.3.dist-info}/entry_points.txt +0 -0
- {wandb-0.19.1rc1.dist-info → wandb-0.19.3.dist-info}/licenses/LICENSE +0 -0
wandb/sdk/wandb_init.py
CHANGED
@@ -14,6 +14,7 @@ import copy
|
|
14
14
|
import json
|
15
15
|
import logging
|
16
16
|
import os
|
17
|
+
import pathlib
|
17
18
|
import platform
|
18
19
|
import sys
|
19
20
|
import tempfile
|
@@ -48,14 +49,6 @@ from .wandb_settings import Settings
|
|
48
49
|
if TYPE_CHECKING:
|
49
50
|
from wandb.proto import wandb_internal_pb2 as pb
|
50
51
|
|
51
|
-
logger: logging.Logger | None = None # logger configured during wandb.init()
|
52
|
-
|
53
|
-
|
54
|
-
def _set_logger(log_object: logging.Logger) -> None:
|
55
|
-
"""Configure module logger."""
|
56
|
-
global logger
|
57
|
-
logger = log_object
|
58
|
-
|
59
52
|
|
60
53
|
def _huggingface_version() -> str | None:
|
61
54
|
if "transformers" in sys.modules:
|
@@ -115,9 +108,10 @@ def _handle_launch_config(settings: Settings) -> dict[str, Any]:
|
|
115
108
|
class _WandbInit:
|
116
109
|
_init_telemetry_obj: telemetry.TelemetryRecord
|
117
110
|
|
118
|
-
def __init__(self) -> None:
|
111
|
+
def __init__(self, wl: wandb_setup._WandbSetup) -> None:
|
112
|
+
self._wl = wl
|
113
|
+
|
119
114
|
self.kwargs = None
|
120
|
-
self.settings: Settings | None = None
|
121
115
|
self.sweep_config: dict[str, Any] = {}
|
122
116
|
self.launch_config: dict[str, Any] = {}
|
123
117
|
self.config: dict[str, Any] = {}
|
@@ -125,7 +119,6 @@ class _WandbInit:
|
|
125
119
|
self.backend: Backend | None = None
|
126
120
|
|
127
121
|
self._teardown_hooks: list[TeardownHook] = []
|
128
|
-
self._wl: wandb_setup._WandbSetup | None = None
|
129
122
|
self.notebook: wandb.jupyter.Notebook | None = None # type: ignore
|
130
123
|
self.printer = printer.new_printer()
|
131
124
|
|
@@ -133,13 +126,47 @@ class _WandbInit:
|
|
133
126
|
|
134
127
|
self.deprecated_features_used: dict[str, str] = dict()
|
135
128
|
|
129
|
+
@property
|
130
|
+
def _logger(self) -> wandb_setup.Logger:
|
131
|
+
return self._wl._get_logger()
|
132
|
+
|
133
|
+
def maybe_login(self, init_settings: Settings) -> None:
|
134
|
+
"""Log in if we are not creating an offline or disabled run.
|
135
|
+
|
136
|
+
This may change the W&B singleton settings.
|
137
|
+
|
138
|
+
Args:
|
139
|
+
init_settings: Settings passed to `wandb.init()` or set via
|
140
|
+
keyword arguments.
|
141
|
+
"""
|
142
|
+
# Allow settings passed to init() to override inferred values.
|
143
|
+
#
|
144
|
+
# Calling login() may change settings on the singleton,
|
145
|
+
# so these may not be the final run settings.
|
146
|
+
run_settings = self._wl.settings.model_copy()
|
147
|
+
run_settings.update_from_settings(init_settings)
|
148
|
+
|
149
|
+
# NOTE: _noop or _offline can become true after _login().
|
150
|
+
# _noop happens if _login hits a timeout.
|
151
|
+
# _offline can be selected by the user at the login prompt.
|
152
|
+
if run_settings._noop or run_settings._offline:
|
153
|
+
return
|
154
|
+
|
155
|
+
wandb_login._login(
|
156
|
+
anonymous=run_settings.anonymous,
|
157
|
+
force=run_settings.force,
|
158
|
+
_disable_warning=True,
|
159
|
+
_silent=run_settings.quiet or run_settings.silent,
|
160
|
+
_entity=run_settings.entity,
|
161
|
+
)
|
162
|
+
|
136
163
|
def warn_env_vars_change_after_setup(self) -> None:
|
137
164
|
"""Warn if environment variables change after wandb singleton is initialized.
|
138
165
|
|
139
166
|
Any settings from environment variables set after the singleton is initialized
|
140
167
|
(via login/setup/etc.) will be ignored.
|
141
168
|
"""
|
142
|
-
singleton = wandb_setup.
|
169
|
+
singleton = wandb_setup.singleton()
|
143
170
|
if singleton is None:
|
144
171
|
return
|
145
172
|
|
@@ -167,66 +194,176 @@ class _WandbInit:
|
|
167
194
|
)
|
168
195
|
self.printer.display(line, level="warn")
|
169
196
|
|
170
|
-
def
|
197
|
+
def clear_run_path_if_sweep_or_launch(
|
171
198
|
self,
|
172
199
|
init_settings: Settings,
|
173
|
-
config: dict | str | None = None,
|
174
|
-
config_exclude_keys: list[str] | None = None,
|
175
|
-
config_include_keys: list[str] | None = None,
|
176
|
-
allow_val_change: bool | None = None,
|
177
|
-
monitor_gym: bool | None = None,
|
178
200
|
) -> None:
|
179
|
-
"""
|
201
|
+
"""Clear project/entity/run_id keys if in a Sweep or a Launch context.
|
180
202
|
|
181
|
-
|
203
|
+
Args:
|
204
|
+
init_settings: Settings specified in the call to `wandb.init()`.
|
182
205
|
"""
|
183
|
-
|
206
|
+
when_doing_thing = ""
|
184
207
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
# TODO: x_disable_service is deprecated, remove this once officially deprecated
|
190
|
-
if init_settings.x_disable_service:
|
191
|
-
setup_settings_dict["x_disable_service"] = init_settings.x_disable_service
|
192
|
-
setup_settings = (
|
193
|
-
wandb.Settings(**setup_settings_dict) if setup_settings_dict else None
|
194
|
-
)
|
208
|
+
if self._wl.settings.sweep_id:
|
209
|
+
when_doing_thing = "when running a sweep"
|
210
|
+
elif self._wl.settings.launch:
|
211
|
+
when_doing_thing = "when running from a wandb launch context"
|
195
212
|
|
196
|
-
|
213
|
+
if not when_doing_thing:
|
214
|
+
return
|
197
215
|
|
198
|
-
|
199
|
-
|
216
|
+
def warn(key: str, value: str) -> None:
|
217
|
+
self.printer.display(
|
218
|
+
f"Ignoring {key} {value!r} {when_doing_thing}.",
|
219
|
+
level="warn",
|
220
|
+
)
|
221
|
+
|
222
|
+
if init_settings.project is not None:
|
223
|
+
warn("project", init_settings.project)
|
224
|
+
init_settings.project = None
|
225
|
+
if init_settings.entity is not None:
|
226
|
+
warn("entity", init_settings.entity)
|
227
|
+
init_settings.entity = None
|
228
|
+
if init_settings.run_id is not None:
|
229
|
+
warn("run_id", init_settings.run_id)
|
230
|
+
init_settings.run_id = None
|
231
|
+
|
232
|
+
def compute_run_settings(self, init_settings: Settings) -> Settings:
|
233
|
+
"""Returns the run's settings.
|
234
|
+
|
235
|
+
Args:
|
236
|
+
init_settings: Settings passed to `wandb.init()` or set via
|
237
|
+
keyword arguments.
|
238
|
+
"""
|
239
|
+
self.warn_env_vars_change_after_setup()
|
200
240
|
|
201
|
-
|
202
|
-
settings = self._wl.settings.copy()
|
241
|
+
self.clear_run_path_if_sweep_or_launch(init_settings)
|
203
242
|
|
204
|
-
#
|
205
|
-
|
206
|
-
init_settings.sweep_id = settings.sweep_id
|
207
|
-
init_settings.handle_sweep_logic()
|
208
|
-
if settings.launch:
|
209
|
-
init_settings.launch = settings.launch
|
210
|
-
init_settings.handle_launch_logic()
|
243
|
+
# Inherit global settings.
|
244
|
+
settings = self._wl.settings.model_copy()
|
211
245
|
|
212
|
-
# Apply settings from wandb.init() call
|
246
|
+
# Apply settings from wandb.init() call.
|
213
247
|
settings.update_from_settings(init_settings)
|
214
248
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
249
|
+
# Infer the run ID from SageMaker.
|
250
|
+
if not settings.sagemaker_disable and sagemaker.is_using_sagemaker():
|
251
|
+
if sagemaker.set_run_id(settings):
|
252
|
+
self._logger.info("set run ID and group based on SageMaker")
|
253
|
+
with telemetry.context(obj=self._init_telemetry_obj) as tel:
|
254
|
+
tel.feature.sagemaker = True
|
255
|
+
|
256
|
+
# get status of code saving before applying user settings
|
257
|
+
save_code_pre_user_settings = settings.save_code
|
258
|
+
if not settings._offline and not settings._noop:
|
259
|
+
user_settings = self._wl._load_user_settings()
|
260
|
+
if user_settings is not None:
|
261
|
+
settings.update_from_dict(user_settings)
|
262
|
+
|
263
|
+
# ensure that user settings don't set saving to true
|
264
|
+
# if user explicitly set these to false in UI
|
265
|
+
if save_code_pre_user_settings is False:
|
266
|
+
settings.save_code = False
|
267
|
+
|
268
|
+
# TODO: remove this once we refactor the client. This is a temporary
|
269
|
+
# fix to make sure that we use the same project name for wandb-core.
|
270
|
+
# The reason this is not going through the settings object is to
|
271
|
+
# avoid failure cases in other parts of the code that will be
|
272
|
+
# removed with the switch to wandb-core.
|
273
|
+
if settings.project is None:
|
274
|
+
settings.project = wandb.util.auto_project_name(settings.program)
|
275
|
+
|
276
|
+
settings.x_start_time = time.time()
|
277
|
+
|
278
|
+
return settings
|
279
|
+
|
280
|
+
def _load_autoresume_run_id(self, resume_file: pathlib.Path) -> str | None:
|
281
|
+
"""Returns the run_id stored in the auto-resume file, if any.
|
282
|
+
|
283
|
+
Returns None if the file does not exist or is not in a valid format.
|
284
|
+
|
285
|
+
Args:
|
286
|
+
resume_file: The file path to use for resume='auto' mode.
|
287
|
+
"""
|
288
|
+
if not resume_file.exists():
|
289
|
+
return None
|
290
|
+
|
291
|
+
with resume_file.open() as f:
|
292
|
+
try:
|
293
|
+
return json.load(f)["run_id"]
|
294
|
+
|
295
|
+
except json.JSONDecodeError as e:
|
296
|
+
self._logger.exception(
|
297
|
+
f"could not decode {resume_file}, ignoring",
|
298
|
+
exc_info=e,
|
299
|
+
)
|
300
|
+
return None
|
301
|
+
|
302
|
+
except KeyError:
|
303
|
+
self._logger.error(
|
304
|
+
f"resume file at {resume_file} did not store a run_id"
|
305
|
+
)
|
306
|
+
return None
|
307
|
+
|
308
|
+
def _save_autoresume_run_id(
|
309
|
+
self,
|
310
|
+
*,
|
311
|
+
resume_file: pathlib.Path,
|
312
|
+
run_id: str,
|
313
|
+
) -> None:
|
314
|
+
"""Write the run ID to the auto-resume file."""
|
315
|
+
resume_file.parent.mkdir(exist_ok=True)
|
316
|
+
with resume_file.open("w") as f:
|
317
|
+
json.dump({"run_id": run_id}, f)
|
318
|
+
|
319
|
+
def set_run_id(self, settings: Settings) -> None:
|
320
|
+
"""Set the run ID and possibly save it to the auto-resume file.
|
321
|
+
|
322
|
+
After this, `settings.run_id` is guaranteed to be set.
|
323
|
+
|
324
|
+
Args:
|
325
|
+
settings: The run's settings derived from the environment
|
326
|
+
and explicit values passed to `wandb.init()`.
|
327
|
+
"""
|
328
|
+
if settings.resume == "auto" and settings.resume_fname:
|
329
|
+
resume_path = pathlib.Path(settings.resume_fname)
|
330
|
+
else:
|
331
|
+
resume_path = None
|
332
|
+
|
333
|
+
if resume_path:
|
334
|
+
previous_id = self._load_autoresume_run_id(resume_path)
|
335
|
+
|
336
|
+
if not previous_id:
|
337
|
+
pass
|
338
|
+
elif settings.run_id is None:
|
339
|
+
self._logger.info(f"loaded run ID from {resume_path}")
|
340
|
+
settings.run_id = previous_id
|
341
|
+
elif settings.run_id != previous_id:
|
342
|
+
wandb.termwarn(
|
343
|
+
f"Ignoring ID {previous_id} loaded due to resume='auto'"
|
344
|
+
f" because the run ID is set to {settings.run_id}.",
|
345
|
+
)
|
346
|
+
|
347
|
+
# If no run ID was inferred, explicitly set, or loaded from an
|
348
|
+
# auto-resume file, then we generate a new ID.
|
349
|
+
if settings.run_id is None:
|
350
|
+
settings.run_id = runid.generate_id()
|
229
351
|
|
352
|
+
if resume_path:
|
353
|
+
self._save_autoresume_run_id(
|
354
|
+
resume_file=resume_path,
|
355
|
+
run_id=settings.run_id,
|
356
|
+
)
|
357
|
+
|
358
|
+
def setup(
|
359
|
+
self,
|
360
|
+
settings: Settings,
|
361
|
+
config: dict | str | None = None,
|
362
|
+
config_exclude_keys: list[str] | None = None,
|
363
|
+
config_include_keys: list[str] | None = None,
|
364
|
+
monitor_gym: bool | None = None,
|
365
|
+
) -> None:
|
366
|
+
"""Compute the run's config and some telemetry."""
|
230
367
|
with telemetry.context(obj=self._init_telemetry_obj) as tel:
|
231
368
|
if config is not None:
|
232
369
|
tel.feature.set_init_config = True
|
@@ -252,23 +389,25 @@ class _WandbInit:
|
|
252
389
|
exclude=config_exclude_keys,
|
253
390
|
)
|
254
391
|
|
255
|
-
#
|
256
|
-
self.sweep_config = dict()
|
257
|
-
sweep_config = self._wl._sweep_config or dict()
|
392
|
+
# Construct the run's config.
|
258
393
|
self.config = dict()
|
259
394
|
self.init_artifact_config: dict[str, Any] = dict()
|
260
|
-
for config_data in (
|
261
|
-
sagemaker_config,
|
262
|
-
self._wl._config,
|
263
|
-
config,
|
264
|
-
):
|
265
|
-
if not config_data:
|
266
|
-
continue
|
267
|
-
# split out artifacts, since when inserted into
|
268
|
-
# config they will trigger use_artifact
|
269
|
-
# but the run is not yet upserted
|
270
|
-
self._split_artifacts_from_config(config_data, self.config) # type: ignore
|
271
395
|
|
396
|
+
if not settings.sagemaker_disable and sagemaker.is_using_sagemaker():
|
397
|
+
sagemaker_config = sagemaker.parse_sm_config()
|
398
|
+
self._split_artifacts_from_config(sagemaker_config, self.config)
|
399
|
+
|
400
|
+
with telemetry.context(obj=self._init_telemetry_obj) as tel:
|
401
|
+
tel.feature.sagemaker = True
|
402
|
+
|
403
|
+
if self._wl._config:
|
404
|
+
self._split_artifacts_from_config(self._wl._config, self.config)
|
405
|
+
|
406
|
+
if config and isinstance(config, dict):
|
407
|
+
self._split_artifacts_from_config(config, self.config)
|
408
|
+
|
409
|
+
self.sweep_config = dict()
|
410
|
+
sweep_config = self._wl._sweep_config or dict()
|
272
411
|
if sweep_config:
|
273
412
|
self._split_artifacts_from_config(sweep_config, self.sweep_config)
|
274
413
|
|
@@ -285,57 +424,6 @@ class _WandbInit:
|
|
285
424
|
with telemetry.context(obj=self._init_telemetry_obj) as tel:
|
286
425
|
tel.feature.tensorboard_sync = True
|
287
426
|
|
288
|
-
if not settings._offline and not settings._noop:
|
289
|
-
wandb_login._login(
|
290
|
-
anonymous=settings.anonymous,
|
291
|
-
force=settings.force,
|
292
|
-
_disable_warning=True,
|
293
|
-
_silent=settings.quiet or settings.silent,
|
294
|
-
_entity=settings.entity,
|
295
|
-
)
|
296
|
-
|
297
|
-
# apply updated global state after login was handled
|
298
|
-
wl = wandb.setup()
|
299
|
-
assert wl is not None
|
300
|
-
login_settings = {
|
301
|
-
k: v
|
302
|
-
for k, v in {
|
303
|
-
"anonymous": wl.settings.anonymous,
|
304
|
-
"api_key": wl.settings.api_key,
|
305
|
-
"base_url": wl.settings.base_url,
|
306
|
-
"force": wl.settings.force,
|
307
|
-
"login_timeout": wl.settings.login_timeout,
|
308
|
-
}.items()
|
309
|
-
if v is not None
|
310
|
-
}
|
311
|
-
if login_settings:
|
312
|
-
settings.update_from_dict(login_settings)
|
313
|
-
|
314
|
-
# handle custom resume logic
|
315
|
-
settings.handle_resume_logic()
|
316
|
-
|
317
|
-
# get status of code saving before applying user settings
|
318
|
-
save_code_pre_user_settings = settings.save_code
|
319
|
-
if not settings._offline and not settings._noop:
|
320
|
-
user_settings = self._wl._load_user_settings()
|
321
|
-
if user_settings is not None:
|
322
|
-
settings.update_from_dict(user_settings)
|
323
|
-
|
324
|
-
# ensure that user settings don't set saving to true
|
325
|
-
# if user explicitly set these to false in UI
|
326
|
-
if save_code_pre_user_settings is False:
|
327
|
-
settings.save_code = False
|
328
|
-
|
329
|
-
# TODO: remove this once we refactor the client. This is a temporary
|
330
|
-
# fix to make sure that we use the same project name for wandb-core.
|
331
|
-
# The reason this is not going throught the settings object is to
|
332
|
-
# avoid failure cases in other parts of the code that will be
|
333
|
-
# removed with the switch to wandb-core.
|
334
|
-
if settings.project is None:
|
335
|
-
settings.project = wandb.util.auto_project_name(settings.program)
|
336
|
-
|
337
|
-
settings.x_start_time = time.time()
|
338
|
-
|
339
427
|
if not settings._noop:
|
340
428
|
self._log_setup(settings)
|
341
429
|
|
@@ -345,13 +433,10 @@ class _WandbInit:
|
|
345
433
|
if launch_config:
|
346
434
|
self._split_artifacts_from_config(launch_config, self.launch_config)
|
347
435
|
|
348
|
-
self.settings = settings
|
349
|
-
|
350
436
|
def teardown(self) -> None:
|
351
437
|
# TODO: currently this is only called on failed wandb.init attempts
|
352
438
|
# normally this happens on the run object
|
353
|
-
|
354
|
-
logger.info("tearing down wandb.init")
|
439
|
+
self._logger.info("tearing down wandb.init")
|
355
440
|
for hook in self._teardown_hooks:
|
356
441
|
hook.call()
|
357
442
|
|
@@ -364,35 +449,24 @@ class _WandbInit:
|
|
364
449
|
else:
|
365
450
|
config_target.setdefault(k, v)
|
366
451
|
|
367
|
-
def
|
368
|
-
"""
|
452
|
+
def _create_logger(self, log_fname: str) -> logging.Logger:
|
453
|
+
"""Returns a logger configured to write to a file.
|
369
454
|
|
370
|
-
This adds a run_id to the log, in case of multiple processes on the same
|
371
|
-
Currently, there is no way to disable logging after it's
|
455
|
+
This adds a run_id to the log, in case of multiple processes on the same
|
456
|
+
machine. Currently, there is no way to disable logging after it's
|
457
|
+
enabled.
|
372
458
|
"""
|
373
459
|
handler = logging.FileHandler(log_fname)
|
374
460
|
handler.setLevel(logging.INFO)
|
375
461
|
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
if run_id:
|
382
|
-
formatter = logging.Formatter(
|
383
|
-
"%(asctime)s %(levelname)-7s %(threadName)-10s:%(process)d "
|
384
|
-
"[%(run_id)s:%(filename)s:%(funcName)s():%(lineno)s] %(message)s"
|
385
|
-
)
|
386
|
-
else:
|
387
|
-
formatter = logging.Formatter(
|
388
|
-
"%(asctime)s %(levelname)-7s %(threadName)-10s:%(process)d "
|
389
|
-
"[%(filename)s:%(funcName)s():%(lineno)s] %(message)s"
|
390
|
-
)
|
462
|
+
formatter = logging.Formatter(
|
463
|
+
"%(asctime)s %(levelname)-7s %(threadName)-10s:%(process)d "
|
464
|
+
"[%(filename)s:%(funcName)s():%(lineno)s] %(message)s"
|
465
|
+
)
|
391
466
|
|
392
467
|
handler.setFormatter(formatter)
|
393
|
-
|
394
|
-
|
395
|
-
assert logger is not None
|
468
|
+
|
469
|
+
logger = logging.getLogger("wandb")
|
396
470
|
logger.propagate = False
|
397
471
|
logger.addHandler(handler)
|
398
472
|
# TODO: make me configurable
|
@@ -404,10 +478,12 @@ class _WandbInit:
|
|
404
478
|
)
|
405
479
|
)
|
406
480
|
|
481
|
+
return logger
|
482
|
+
|
407
483
|
def _safe_symlink(
|
408
484
|
self, base: str, target: str, name: str, delete: bool = False
|
409
485
|
) -> None:
|
410
|
-
# TODO(jhr): do this with relpaths, but i
|
486
|
+
# TODO(jhr): do this with relpaths, but i can't figure it out on no sleep
|
411
487
|
if not hasattr(os, "symlink"):
|
412
488
|
return
|
413
489
|
|
@@ -434,14 +510,14 @@ class _WandbInit:
|
|
434
510
|
if self.notebook.save_ipynb(): # type: ignore
|
435
511
|
assert self.run is not None
|
436
512
|
res = self.run.log_code(root=None)
|
437
|
-
|
513
|
+
self._logger.info("saved code: %s", res) # type: ignore
|
438
514
|
if self.backend.interface is not None:
|
439
|
-
|
515
|
+
self._logger.info("pausing backend") # type: ignore
|
440
516
|
self.backend.interface.publish_pause()
|
441
517
|
|
442
518
|
def _resume_backend(self, *args: Any, **kwargs: Any) -> None: # noqa
|
443
519
|
if self.backend is not None and self.backend.interface is not None:
|
444
|
-
|
520
|
+
self._logger.info("resuming backend") # type: ignore
|
445
521
|
self.backend.interface.publish_resume()
|
446
522
|
|
447
523
|
def _jupyter_teardown(self) -> None:
|
@@ -452,8 +528,8 @@ class _WandbInit:
|
|
452
528
|
if self.notebook.save_ipynb():
|
453
529
|
assert self.run is not None
|
454
530
|
res = self.run.log_code(root=None)
|
455
|
-
|
456
|
-
|
531
|
+
self._logger.info("saved code and history: %s", res) # type: ignore
|
532
|
+
self._logger.info("cleaning up jupyter logic") # type: ignore
|
457
533
|
# because of how we bind our methods we manually find them to unregister
|
458
534
|
for hook in ipython.events.callbacks["pre_run_cell"]:
|
459
535
|
if "_resume_backend" in hook.__name__:
|
@@ -471,7 +547,7 @@ class _WandbInit:
|
|
471
547
|
|
472
548
|
# Monkey patch ipython publish to capture displayed outputs
|
473
549
|
if not hasattr(ipython.display_pub, "_orig_publish"):
|
474
|
-
|
550
|
+
self._logger.info("configuring jupyter hooks %s", self) # type: ignore
|
475
551
|
ipython.display_pub._orig_publish = ipython.display_pub.publish
|
476
552
|
# Registering resume and pause hooks
|
477
553
|
|
@@ -518,15 +594,9 @@ class _WandbInit:
|
|
518
594
|
delete=True,
|
519
595
|
)
|
520
596
|
|
521
|
-
|
522
|
-
self.
|
523
|
-
|
524
|
-
assert self._wl
|
525
|
-
assert logger
|
526
|
-
|
527
|
-
self._wl._early_logger_flush(logger)
|
528
|
-
logger.info(f"Logging user logs to {settings.log_user}")
|
529
|
-
logger.info(f"Logging internal logs to {settings.log_internal}")
|
597
|
+
self._wl._early_logger_flush(self._create_logger(settings.log_user))
|
598
|
+
self._logger.info(f"Logging user logs to {settings.log_user}")
|
599
|
+
self._logger.info(f"Logging internal logs to {settings.log_internal}")
|
530
600
|
|
531
601
|
def _make_run_disabled(self) -> Run:
|
532
602
|
"""Returns a Run-like object where all methods are no-ops.
|
@@ -552,11 +622,13 @@ class _WandbInit:
|
|
552
622
|
entity="dummy",
|
553
623
|
)
|
554
624
|
)
|
555
|
-
# config and
|
625
|
+
# config, summary, and metadata objects
|
556
626
|
drun._config = wandb.sdk.wandb_config.Config()
|
557
627
|
drun._config.update(self.sweep_config)
|
558
628
|
drun._config.update(self.config)
|
559
629
|
drun.summary = SummaryDisabled() # type: ignore
|
630
|
+
drun._Run__metadata = wandb.sdk.wandb_metadata.Metadata()
|
631
|
+
|
560
632
|
# methods
|
561
633
|
drun.log = lambda data, *_, **__: drun.summary.update(data) # type: ignore
|
562
634
|
drun.finish = lambda *_, **__: module.unset_globals() # type: ignore
|
@@ -638,24 +710,20 @@ class _WandbInit:
|
|
638
710
|
percent_done = handle.percent_done
|
639
711
|
self.printer.progress_update(line, percent_done=percent_done)
|
640
712
|
|
641
|
-
def init(self) -> Run: # noqa: C901
|
642
|
-
|
643
|
-
raise RuntimeError("Logger not initialized")
|
644
|
-
logger.info("calling init triggers")
|
713
|
+
def init(self, settings: Settings) -> Run: # noqa: C901
|
714
|
+
self._logger.info("calling init triggers")
|
645
715
|
trigger.call("on_init")
|
646
716
|
|
647
|
-
assert self.settings is not None
|
648
717
|
assert self._wl is not None
|
649
718
|
|
650
|
-
|
719
|
+
self._logger.info(
|
651
720
|
f"wandb.init called with sweep_config: {self.sweep_config}\nconfig: {self.config}"
|
652
721
|
)
|
653
722
|
|
654
|
-
if
|
723
|
+
if settings._noop:
|
655
724
|
return self._make_run_disabled()
|
656
725
|
if (
|
657
|
-
|
658
|
-
or (self.settings._jupyter and self.settings.reinit is not False)
|
726
|
+
settings.reinit or (settings._jupyter and settings.reinit is not False)
|
659
727
|
) and len(self._wl._global_run_stack) > 0:
|
660
728
|
if len(self._wl._global_run_stack) > 1:
|
661
729
|
wandb.termwarn(
|
@@ -666,40 +734,39 @@ class _WandbInit:
|
|
666
734
|
)
|
667
735
|
|
668
736
|
latest_run = self._wl._global_run_stack[-1]
|
669
|
-
|
737
|
+
self._logger.info(f"found existing run on stack: {latest_run.id}")
|
670
738
|
latest_run.finish()
|
671
|
-
elif
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
service = self._wl.service
|
683
|
-
if service:
|
684
|
-
logger.info("sending inform_init request")
|
739
|
+
elif wandb.run is not None and os.getpid() == wandb.run._init_pid:
|
740
|
+
self._logger.info("wandb.init() called when a run is still active")
|
741
|
+
with telemetry.context() as tel:
|
742
|
+
tel.feature.init_return_run = True
|
743
|
+
return wandb.run
|
744
|
+
|
745
|
+
self._logger.info("starting backend")
|
746
|
+
|
747
|
+
if not settings.x_disable_service:
|
748
|
+
service = self._wl.ensure_service()
|
749
|
+
self._logger.info("sending inform_init request")
|
685
750
|
service.inform_init(
|
686
|
-
settings=
|
687
|
-
run_id=
|
751
|
+
settings=settings.to_proto(),
|
752
|
+
run_id=settings.run_id, # type: ignore
|
688
753
|
)
|
754
|
+
else:
|
755
|
+
service = None
|
689
756
|
|
690
757
|
mailbox = Mailbox()
|
691
758
|
backend = Backend(
|
692
|
-
settings=
|
759
|
+
settings=settings,
|
693
760
|
service=service,
|
694
761
|
mailbox=mailbox,
|
695
762
|
)
|
696
763
|
backend.ensure_launched()
|
697
|
-
|
764
|
+
self._logger.info("backend started and connected")
|
698
765
|
|
699
766
|
# resuming needs access to the server, check server_status()?
|
700
767
|
run = Run(
|
701
768
|
config=self.config,
|
702
|
-
settings=
|
769
|
+
settings=settings,
|
703
770
|
sweep_config=self.sweep_config,
|
704
771
|
launch_config=self.launch_config,
|
705
772
|
)
|
@@ -712,18 +779,18 @@ class _WandbInit:
|
|
712
779
|
hf_version = _huggingface_version()
|
713
780
|
if hf_version:
|
714
781
|
tel.huggingface_version = hf_version
|
715
|
-
if
|
782
|
+
if settings._jupyter:
|
716
783
|
tel.env.jupyter = True
|
717
|
-
if
|
784
|
+
if settings._ipython:
|
718
785
|
tel.env.ipython = True
|
719
|
-
if
|
786
|
+
if settings._colab:
|
720
787
|
tel.env.colab = True
|
721
|
-
if
|
788
|
+
if settings._kaggle:
|
722
789
|
tel.env.kaggle = True
|
723
|
-
if
|
790
|
+
if settings._windows:
|
724
791
|
tel.env.windows = True
|
725
792
|
|
726
|
-
if
|
793
|
+
if settings.launch:
|
727
794
|
tel.feature.launch = True
|
728
795
|
|
729
796
|
for module_name in telemetry.list_telemetry_imports(only_imported=True):
|
@@ -731,8 +798,8 @@ class _WandbInit:
|
|
731
798
|
|
732
799
|
# probe the active start method
|
733
800
|
active_start_method: str | None = None
|
734
|
-
if
|
735
|
-
active_start_method =
|
801
|
+
if settings.start_method == "thread":
|
802
|
+
active_start_method = settings.start_method
|
736
803
|
else:
|
737
804
|
active_start_method = getattr(
|
738
805
|
backend._multiprocessing, "get_start_method", lambda: None
|
@@ -750,7 +817,7 @@ class _WandbInit:
|
|
750
817
|
if os.environ.get("PEX"):
|
751
818
|
tel.env.pex = True
|
752
819
|
|
753
|
-
if
|
820
|
+
if settings._aws_lambda:
|
754
821
|
tel.env.aws_lambda = True
|
755
822
|
|
756
823
|
if os.environ.get(wandb.env._DISABLE_SERVICE):
|
@@ -758,13 +825,13 @@ class _WandbInit:
|
|
758
825
|
|
759
826
|
if service:
|
760
827
|
tel.feature.service = True
|
761
|
-
if
|
828
|
+
if settings.x_flow_control_disabled:
|
762
829
|
tel.feature.flow_control_disabled = True
|
763
|
-
if
|
830
|
+
if settings.x_flow_control_custom:
|
764
831
|
tel.feature.flow_control_custom = True
|
765
|
-
if not
|
832
|
+
if not settings.x_require_legacy_service:
|
766
833
|
tel.feature.core = True
|
767
|
-
if
|
834
|
+
if settings._shared:
|
768
835
|
wandb.termwarn(
|
769
836
|
"The `_shared` feature is experimental and may change. "
|
770
837
|
"Please contact support@wandb.com for guidance and to report any issues."
|
@@ -773,7 +840,7 @@ class _WandbInit:
|
|
773
840
|
|
774
841
|
tel.env.maybe_mp = _maybe_mp_process(backend)
|
775
842
|
|
776
|
-
if not
|
843
|
+
if not settings.label_disable:
|
777
844
|
if self.notebook:
|
778
845
|
run._label_probe_notebook(self.notebook)
|
779
846
|
else:
|
@@ -787,7 +854,7 @@ class _WandbInit:
|
|
787
854
|
run=run,
|
788
855
|
)
|
789
856
|
|
790
|
-
|
857
|
+
self._logger.info("updated telemetry")
|
791
858
|
|
792
859
|
run._set_library(self._wl)
|
793
860
|
run._set_backend(backend)
|
@@ -801,25 +868,27 @@ class _WandbInit:
|
|
801
868
|
# Using GitRepo() blocks & can be slow, depending on user's current git setup.
|
802
869
|
# We don't want to block run initialization/start request, so populate run's git
|
803
870
|
# info beforehand.
|
804
|
-
if not (
|
871
|
+
if not (settings.disable_git or settings.x_disable_machine_info):
|
805
872
|
run._populate_git_info()
|
806
873
|
|
807
874
|
run_result: pb.RunUpdateResult | None = None
|
808
875
|
|
809
|
-
if
|
876
|
+
if settings._offline:
|
810
877
|
with telemetry.context(run=run) as tel:
|
811
878
|
tel.feature.offline = True
|
812
879
|
|
813
|
-
if
|
880
|
+
if settings.resume:
|
814
881
|
wandb.termwarn(
|
815
882
|
"`resume` will be ignored since W&B syncing is set to `offline`. "
|
816
883
|
f"Starting a new run with run id {run.id}."
|
817
884
|
)
|
818
885
|
error: wandb.Error | None = None
|
819
886
|
|
820
|
-
timeout =
|
887
|
+
timeout = settings.init_timeout
|
821
888
|
|
822
|
-
|
889
|
+
self._logger.info(
|
890
|
+
f"communicating run to backend with {timeout} second timeout",
|
891
|
+
)
|
823
892
|
|
824
893
|
run_init_handle = backend.interface.deliver_run(run)
|
825
894
|
result = run_init_handle.wait(
|
@@ -846,7 +915,7 @@ class _WandbInit:
|
|
846
915
|
error = ProtobufErrorHandler.to_exception(run_result.error)
|
847
916
|
|
848
917
|
if error is not None:
|
849
|
-
|
918
|
+
self._logger.error(f"encountered error: {error}")
|
850
919
|
if not service:
|
851
920
|
# Shutdown the backend and get rid of the logger
|
852
921
|
# we don't need to do console cleanup at this point
|
@@ -864,21 +933,19 @@ class _WandbInit:
|
|
864
933
|
)
|
865
934
|
|
866
935
|
if run_result.run.resumed:
|
867
|
-
|
936
|
+
self._logger.info("run resumed")
|
868
937
|
with telemetry.context(run=run) as tel:
|
869
938
|
tel.feature.resumed = run_result.run.resumed
|
870
939
|
run._set_run_obj(run_result.run)
|
871
940
|
|
872
|
-
|
873
|
-
|
874
|
-
logger.info("starting run threads in backend")
|
941
|
+
self._logger.info("starting run threads in backend")
|
875
942
|
# initiate run (stats and metadata probing)
|
876
943
|
|
877
944
|
if service:
|
878
|
-
assert
|
945
|
+
assert settings.run_id
|
879
946
|
service.inform_start(
|
880
|
-
settings=
|
881
|
-
run_id=
|
947
|
+
settings=settings.to_proto(),
|
948
|
+
run_id=settings.run_id,
|
882
949
|
)
|
883
950
|
|
884
951
|
assert backend.interface
|
@@ -895,11 +962,11 @@ class _WandbInit:
|
|
895
962
|
|
896
963
|
run._handle_launch_artifact_overrides()
|
897
964
|
if (
|
898
|
-
|
899
|
-
and
|
900
|
-
and os.path.exists(
|
965
|
+
settings.launch
|
966
|
+
and settings.launch_config_path
|
967
|
+
and os.path.exists(settings.launch_config_path)
|
901
968
|
):
|
902
|
-
run.save(
|
969
|
+
run.save(settings.launch_config_path)
|
903
970
|
# put artifacts in run config here
|
904
971
|
# since doing so earlier will cause an error
|
905
972
|
# as the run is not upserted
|
@@ -913,7 +980,7 @@ class _WandbInit:
|
|
913
980
|
|
914
981
|
self.backend = backend
|
915
982
|
run._on_start()
|
916
|
-
|
983
|
+
self._logger.info("run started, returning control to user process")
|
917
984
|
return run
|
918
985
|
|
919
986
|
|
@@ -943,16 +1010,10 @@ def _attach(
|
|
943
1010
|
)
|
944
1011
|
wandb._assert_is_user_process() # type: ignore
|
945
1012
|
|
946
|
-
_wl =
|
947
|
-
|
948
|
-
|
949
|
-
_set_logger(_wl._get_logger())
|
950
|
-
if logger is None:
|
951
|
-
raise UsageError("logger is not initialized")
|
1013
|
+
_wl = wandb.setup()
|
1014
|
+
logger = _wl._get_logger()
|
952
1015
|
|
953
|
-
service = _wl.
|
954
|
-
if not service:
|
955
|
-
raise UsageError(f"Unable to attach to run {attach_id} (no service process)")
|
1016
|
+
service = _wl.ensure_service()
|
956
1017
|
|
957
1018
|
try:
|
958
1019
|
attach_settings = service.inform_attach(attach_id=attach_id)
|
@@ -1081,7 +1142,7 @@ def init( # noqa: C901
|
|
1081
1142
|
entity: The username or team name under which the runs will be logged.
|
1082
1143
|
The entity must already exist, so ensure you’ve created your account
|
1083
1144
|
or team in the UI before starting to log runs. If not specified, the
|
1084
|
-
run will default your
|
1145
|
+
run will default your default entity. To change the default entity,
|
1085
1146
|
go to [your settings](https://wandb.ai/settings) and update the
|
1086
1147
|
"Default location to create new projects" under "Default team".
|
1087
1148
|
project: The name of the project under which this run will be logged.
|
@@ -1285,27 +1346,36 @@ def init( # noqa: C901
|
|
1285
1346
|
if resume_from is not None:
|
1286
1347
|
init_settings.resume_from = resume_from # type: ignore
|
1287
1348
|
|
1349
|
+
wl: wandb_setup._WandbSetup | None = None
|
1350
|
+
|
1288
1351
|
try:
|
1289
|
-
|
1352
|
+
wl = wandb.setup()
|
1353
|
+
|
1354
|
+
wi = _WandbInit(wl)
|
1355
|
+
|
1356
|
+
wi.maybe_login(init_settings)
|
1357
|
+
run_settings = wi.compute_run_settings(init_settings)
|
1358
|
+
wi.set_run_id(run_settings)
|
1359
|
+
|
1290
1360
|
wi.setup(
|
1291
|
-
|
1361
|
+
settings=run_settings,
|
1292
1362
|
config=config,
|
1293
1363
|
config_exclude_keys=config_exclude_keys,
|
1294
1364
|
config_include_keys=config_include_keys,
|
1295
|
-
allow_val_change=allow_val_change,
|
1296
1365
|
monitor_gym=monitor_gym,
|
1297
1366
|
)
|
1298
|
-
|
1367
|
+
|
1368
|
+
return wi.init(run_settings)
|
1299
1369
|
|
1300
1370
|
except KeyboardInterrupt as e:
|
1301
|
-
if
|
1302
|
-
|
1371
|
+
if wl:
|
1372
|
+
wl._get_logger().warning("interrupted", exc_info=e)
|
1303
1373
|
|
1304
1374
|
raise
|
1305
1375
|
|
1306
1376
|
except Exception as e:
|
1307
|
-
if
|
1308
|
-
|
1377
|
+
if wl:
|
1378
|
+
wl._get_logger().exception("error in wandb.init()", exc_info=e)
|
1309
1379
|
|
1310
1380
|
# Need to build delay into this sentry capture because our exit hooks
|
1311
1381
|
# mess with sentry's ability to send out errors before the program ends.
|