wandb 0.19.1rc1__py3-none-musllinux_1_2_aarch64.whl → 0.19.3__py3-none-musllinux_1_2_aarch64.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +1 -7
- wandb/__init__.pyi +15 -7
- wandb/agents/pyagent.py +1 -1
- wandb/apis/importers/wandb.py +1 -1
- wandb/apis/public/files.py +1 -1
- wandb/apis/public/jobs.py +1 -1
- wandb/apis/public/runs.py +2 -7
- wandb/apis/reports/v1/__init__.py +1 -1
- wandb/apis/reports/v2/__init__.py +1 -1
- wandb/apis/workspaces/__init__.py +1 -1
- wandb/bin/gpu_stats +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/beta.py +7 -4
- wandb/cli/cli.py +5 -7
- wandb/docker/__init__.py +4 -4
- wandb/integration/fastai/__init__.py +4 -6
- wandb/integration/keras/keras.py +5 -3
- wandb/integration/metaflow/metaflow.py +14 -16
- wandb/integration/prodigy/prodigy.py +3 -11
- wandb/integration/sagemaker/__init__.py +5 -3
- wandb/integration/sagemaker/config.py +17 -8
- wandb/integration/sagemaker/files.py +0 -1
- wandb/integration/sagemaker/resources.py +47 -18
- wandb/integration/torch/wandb_torch.py +1 -1
- wandb/proto/v3/wandb_internal_pb2.py +273 -235
- wandb/proto/v4/wandb_internal_pb2.py +222 -214
- wandb/proto/v5/wandb_internal_pb2.py +222 -214
- wandb/sdk/artifacts/artifact.py +3 -9
- wandb/sdk/backend/backend.py +1 -1
- wandb/sdk/data_types/base_types/wb_value.py +1 -1
- wandb/sdk/data_types/graph.py +2 -2
- wandb/sdk/data_types/saved_model.py +1 -1
- wandb/sdk/data_types/video.py +1 -1
- wandb/sdk/interface/interface.py +25 -25
- wandb/sdk/interface/interface_shared.py +21 -5
- wandb/sdk/internal/handler.py +19 -1
- wandb/sdk/internal/internal.py +1 -1
- wandb/sdk/internal/internal_api.py +4 -5
- wandb/sdk/internal/sample.py +2 -2
- wandb/sdk/internal/sender.py +1 -2
- wandb/sdk/internal/settings_static.py +3 -1
- wandb/sdk/internal/system/assets/disk.py +4 -4
- wandb/sdk/internal/system/assets/gpu.py +1 -1
- wandb/sdk/internal/system/assets/memory.py +1 -1
- wandb/sdk/internal/system/system_info.py +1 -1
- wandb/sdk/internal/system/system_monitor.py +3 -1
- wandb/sdk/internal/tb_watcher.py +1 -1
- wandb/sdk/launch/_project_spec.py +3 -3
- wandb/sdk/launch/builder/abstract.py +1 -1
- wandb/sdk/lib/apikey.py +2 -3
- wandb/sdk/lib/fsm.py +1 -1
- wandb/sdk/lib/gitlib.py +1 -1
- wandb/sdk/lib/gql_request.py +1 -1
- wandb/sdk/lib/interrupt.py +37 -0
- wandb/sdk/lib/lazyloader.py +1 -1
- wandb/sdk/lib/progress.py +7 -1
- wandb/sdk/lib/service_connection.py +1 -1
- wandb/sdk/lib/telemetry.py +1 -1
- wandb/sdk/service/_startup_debug.py +1 -1
- wandb/sdk/service/server_sock.py +3 -2
- wandb/sdk/service/service.py +1 -1
- wandb/sdk/service/streams.py +19 -17
- wandb/sdk/verify/verify.py +13 -13
- wandb/sdk/wandb_init.py +316 -246
- wandb/sdk/wandb_login.py +1 -1
- wandb/sdk/wandb_metadata.py +547 -0
- wandb/sdk/wandb_run.py +134 -39
- wandb/sdk/wandb_settings.py +7 -63
- wandb/sdk/wandb_setup.py +83 -82
- wandb/sdk/wandb_sweep.py +2 -2
- wandb/sdk/wandb_sync.py +15 -18
- wandb/sync/sync.py +10 -10
- wandb/util.py +11 -3
- wandb/wandb_agent.py +11 -16
- wandb/wandb_controller.py +7 -7
- {wandb-0.19.1rc1.dist-info → wandb-0.19.3.dist-info}/METADATA +5 -3
- {wandb-0.19.1rc1.dist-info → wandb-0.19.3.dist-info}/RECORD +80 -78
- {wandb-0.19.1rc1.dist-info → wandb-0.19.3.dist-info}/WHEEL +1 -1
- {wandb-0.19.1rc1.dist-info → wandb-0.19.3.dist-info}/entry_points.txt +0 -0
- {wandb-0.19.1rc1.dist-info → wandb-0.19.3.dist-info}/licenses/LICENSE +0 -0
wandb/sdk/wandb_init.py
CHANGED
@@ -14,6 +14,7 @@ import copy
|
|
14
14
|
import json
|
15
15
|
import logging
|
16
16
|
import os
|
17
|
+
import pathlib
|
17
18
|
import platform
|
18
19
|
import sys
|
19
20
|
import tempfile
|
@@ -48,14 +49,6 @@ from .wandb_settings import Settings
|
|
48
49
|
if TYPE_CHECKING:
|
49
50
|
from wandb.proto import wandb_internal_pb2 as pb
|
50
51
|
|
51
|
-
logger: logging.Logger | None = None # logger configured during wandb.init()
|
52
|
-
|
53
|
-
|
54
|
-
def _set_logger(log_object: logging.Logger) -> None:
|
55
|
-
"""Configure module logger."""
|
56
|
-
global logger
|
57
|
-
logger = log_object
|
58
|
-
|
59
52
|
|
60
53
|
def _huggingface_version() -> str | None:
|
61
54
|
if "transformers" in sys.modules:
|
@@ -115,9 +108,10 @@ def _handle_launch_config(settings: Settings) -> dict[str, Any]:
|
|
115
108
|
class _WandbInit:
|
116
109
|
_init_telemetry_obj: telemetry.TelemetryRecord
|
117
110
|
|
118
|
-
def __init__(self) -> None:
|
111
|
+
def __init__(self, wl: wandb_setup._WandbSetup) -> None:
|
112
|
+
self._wl = wl
|
113
|
+
|
119
114
|
self.kwargs = None
|
120
|
-
self.settings: Settings | None = None
|
121
115
|
self.sweep_config: dict[str, Any] = {}
|
122
116
|
self.launch_config: dict[str, Any] = {}
|
123
117
|
self.config: dict[str, Any] = {}
|
@@ -125,7 +119,6 @@ class _WandbInit:
|
|
125
119
|
self.backend: Backend | None = None
|
126
120
|
|
127
121
|
self._teardown_hooks: list[TeardownHook] = []
|
128
|
-
self._wl: wandb_setup._WandbSetup | None = None
|
129
122
|
self.notebook: wandb.jupyter.Notebook | None = None # type: ignore
|
130
123
|
self.printer = printer.new_printer()
|
131
124
|
|
@@ -133,13 +126,47 @@ class _WandbInit:
|
|
133
126
|
|
134
127
|
self.deprecated_features_used: dict[str, str] = dict()
|
135
128
|
|
129
|
+
@property
|
130
|
+
def _logger(self) -> wandb_setup.Logger:
|
131
|
+
return self._wl._get_logger()
|
132
|
+
|
133
|
+
def maybe_login(self, init_settings: Settings) -> None:
|
134
|
+
"""Log in if we are not creating an offline or disabled run.
|
135
|
+
|
136
|
+
This may change the W&B singleton settings.
|
137
|
+
|
138
|
+
Args:
|
139
|
+
init_settings: Settings passed to `wandb.init()` or set via
|
140
|
+
keyword arguments.
|
141
|
+
"""
|
142
|
+
# Allow settings passed to init() to override inferred values.
|
143
|
+
#
|
144
|
+
# Calling login() may change settings on the singleton,
|
145
|
+
# so these may not be the final run settings.
|
146
|
+
run_settings = self._wl.settings.model_copy()
|
147
|
+
run_settings.update_from_settings(init_settings)
|
148
|
+
|
149
|
+
# NOTE: _noop or _offline can become true after _login().
|
150
|
+
# _noop happens if _login hits a timeout.
|
151
|
+
# _offline can be selected by the user at the login prompt.
|
152
|
+
if run_settings._noop or run_settings._offline:
|
153
|
+
return
|
154
|
+
|
155
|
+
wandb_login._login(
|
156
|
+
anonymous=run_settings.anonymous,
|
157
|
+
force=run_settings.force,
|
158
|
+
_disable_warning=True,
|
159
|
+
_silent=run_settings.quiet or run_settings.silent,
|
160
|
+
_entity=run_settings.entity,
|
161
|
+
)
|
162
|
+
|
136
163
|
def warn_env_vars_change_after_setup(self) -> None:
|
137
164
|
"""Warn if environment variables change after wandb singleton is initialized.
|
138
165
|
|
139
166
|
Any settings from environment variables set after the singleton is initialized
|
140
167
|
(via login/setup/etc.) will be ignored.
|
141
168
|
"""
|
142
|
-
singleton = wandb_setup.
|
169
|
+
singleton = wandb_setup.singleton()
|
143
170
|
if singleton is None:
|
144
171
|
return
|
145
172
|
|
@@ -167,66 +194,176 @@ class _WandbInit:
|
|
167
194
|
)
|
168
195
|
self.printer.display(line, level="warn")
|
169
196
|
|
170
|
-
def
|
197
|
+
def clear_run_path_if_sweep_or_launch(
|
171
198
|
self,
|
172
199
|
init_settings: Settings,
|
173
|
-
config: dict | str | None = None,
|
174
|
-
config_exclude_keys: list[str] | None = None,
|
175
|
-
config_include_keys: list[str] | None = None,
|
176
|
-
allow_val_change: bool | None = None,
|
177
|
-
monitor_gym: bool | None = None,
|
178
200
|
) -> None:
|
179
|
-
"""
|
201
|
+
"""Clear project/entity/run_id keys if in a Sweep or a Launch context.
|
180
202
|
|
181
|
-
|
203
|
+
Args:
|
204
|
+
init_settings: Settings specified in the call to `wandb.init()`.
|
182
205
|
"""
|
183
|
-
|
206
|
+
when_doing_thing = ""
|
184
207
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
# TODO: x_disable_service is deprecated, remove this once officially deprecated
|
190
|
-
if init_settings.x_disable_service:
|
191
|
-
setup_settings_dict["x_disable_service"] = init_settings.x_disable_service
|
192
|
-
setup_settings = (
|
193
|
-
wandb.Settings(**setup_settings_dict) if setup_settings_dict else None
|
194
|
-
)
|
208
|
+
if self._wl.settings.sweep_id:
|
209
|
+
when_doing_thing = "when running a sweep"
|
210
|
+
elif self._wl.settings.launch:
|
211
|
+
when_doing_thing = "when running from a wandb launch context"
|
195
212
|
|
196
|
-
|
213
|
+
if not when_doing_thing:
|
214
|
+
return
|
197
215
|
|
198
|
-
|
199
|
-
|
216
|
+
def warn(key: str, value: str) -> None:
|
217
|
+
self.printer.display(
|
218
|
+
f"Ignoring {key} {value!r} {when_doing_thing}.",
|
219
|
+
level="warn",
|
220
|
+
)
|
221
|
+
|
222
|
+
if init_settings.project is not None:
|
223
|
+
warn("project", init_settings.project)
|
224
|
+
init_settings.project = None
|
225
|
+
if init_settings.entity is not None:
|
226
|
+
warn("entity", init_settings.entity)
|
227
|
+
init_settings.entity = None
|
228
|
+
if init_settings.run_id is not None:
|
229
|
+
warn("run_id", init_settings.run_id)
|
230
|
+
init_settings.run_id = None
|
231
|
+
|
232
|
+
def compute_run_settings(self, init_settings: Settings) -> Settings:
|
233
|
+
"""Returns the run's settings.
|
234
|
+
|
235
|
+
Args:
|
236
|
+
init_settings: Settings passed to `wandb.init()` or set via
|
237
|
+
keyword arguments.
|
238
|
+
"""
|
239
|
+
self.warn_env_vars_change_after_setup()
|
200
240
|
|
201
|
-
|
202
|
-
settings = self._wl.settings.copy()
|
241
|
+
self.clear_run_path_if_sweep_or_launch(init_settings)
|
203
242
|
|
204
|
-
#
|
205
|
-
|
206
|
-
init_settings.sweep_id = settings.sweep_id
|
207
|
-
init_settings.handle_sweep_logic()
|
208
|
-
if settings.launch:
|
209
|
-
init_settings.launch = settings.launch
|
210
|
-
init_settings.handle_launch_logic()
|
243
|
+
# Inherit global settings.
|
244
|
+
settings = self._wl.settings.model_copy()
|
211
245
|
|
212
|
-
# Apply settings from wandb.init() call
|
246
|
+
# Apply settings from wandb.init() call.
|
213
247
|
settings.update_from_settings(init_settings)
|
214
248
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
249
|
+
# Infer the run ID from SageMaker.
|
250
|
+
if not settings.sagemaker_disable and sagemaker.is_using_sagemaker():
|
251
|
+
if sagemaker.set_run_id(settings):
|
252
|
+
self._logger.info("set run ID and group based on SageMaker")
|
253
|
+
with telemetry.context(obj=self._init_telemetry_obj) as tel:
|
254
|
+
tel.feature.sagemaker = True
|
255
|
+
|
256
|
+
# get status of code saving before applying user settings
|
257
|
+
save_code_pre_user_settings = settings.save_code
|
258
|
+
if not settings._offline and not settings._noop:
|
259
|
+
user_settings = self._wl._load_user_settings()
|
260
|
+
if user_settings is not None:
|
261
|
+
settings.update_from_dict(user_settings)
|
262
|
+
|
263
|
+
# ensure that user settings don't set saving to true
|
264
|
+
# if user explicitly set these to false in UI
|
265
|
+
if save_code_pre_user_settings is False:
|
266
|
+
settings.save_code = False
|
267
|
+
|
268
|
+
# TODO: remove this once we refactor the client. This is a temporary
|
269
|
+
# fix to make sure that we use the same project name for wandb-core.
|
270
|
+
# The reason this is not going through the settings object is to
|
271
|
+
# avoid failure cases in other parts of the code that will be
|
272
|
+
# removed with the switch to wandb-core.
|
273
|
+
if settings.project is None:
|
274
|
+
settings.project = wandb.util.auto_project_name(settings.program)
|
275
|
+
|
276
|
+
settings.x_start_time = time.time()
|
277
|
+
|
278
|
+
return settings
|
279
|
+
|
280
|
+
def _load_autoresume_run_id(self, resume_file: pathlib.Path) -> str | None:
|
281
|
+
"""Returns the run_id stored in the auto-resume file, if any.
|
282
|
+
|
283
|
+
Returns None if the file does not exist or is not in a valid format.
|
284
|
+
|
285
|
+
Args:
|
286
|
+
resume_file: The file path to use for resume='auto' mode.
|
287
|
+
"""
|
288
|
+
if not resume_file.exists():
|
289
|
+
return None
|
290
|
+
|
291
|
+
with resume_file.open() as f:
|
292
|
+
try:
|
293
|
+
return json.load(f)["run_id"]
|
294
|
+
|
295
|
+
except json.JSONDecodeError as e:
|
296
|
+
self._logger.exception(
|
297
|
+
f"could not decode {resume_file}, ignoring",
|
298
|
+
exc_info=e,
|
299
|
+
)
|
300
|
+
return None
|
301
|
+
|
302
|
+
except KeyError:
|
303
|
+
self._logger.error(
|
304
|
+
f"resume file at {resume_file} did not store a run_id"
|
305
|
+
)
|
306
|
+
return None
|
307
|
+
|
308
|
+
def _save_autoresume_run_id(
|
309
|
+
self,
|
310
|
+
*,
|
311
|
+
resume_file: pathlib.Path,
|
312
|
+
run_id: str,
|
313
|
+
) -> None:
|
314
|
+
"""Write the run ID to the auto-resume file."""
|
315
|
+
resume_file.parent.mkdir(exist_ok=True)
|
316
|
+
with resume_file.open("w") as f:
|
317
|
+
json.dump({"run_id": run_id}, f)
|
318
|
+
|
319
|
+
def set_run_id(self, settings: Settings) -> None:
|
320
|
+
"""Set the run ID and possibly save it to the auto-resume file.
|
321
|
+
|
322
|
+
After this, `settings.run_id` is guaranteed to be set.
|
323
|
+
|
324
|
+
Args:
|
325
|
+
settings: The run's settings derived from the environment
|
326
|
+
and explicit values passed to `wandb.init()`.
|
327
|
+
"""
|
328
|
+
if settings.resume == "auto" and settings.resume_fname:
|
329
|
+
resume_path = pathlib.Path(settings.resume_fname)
|
330
|
+
else:
|
331
|
+
resume_path = None
|
332
|
+
|
333
|
+
if resume_path:
|
334
|
+
previous_id = self._load_autoresume_run_id(resume_path)
|
335
|
+
|
336
|
+
if not previous_id:
|
337
|
+
pass
|
338
|
+
elif settings.run_id is None:
|
339
|
+
self._logger.info(f"loaded run ID from {resume_path}")
|
340
|
+
settings.run_id = previous_id
|
341
|
+
elif settings.run_id != previous_id:
|
342
|
+
wandb.termwarn(
|
343
|
+
f"Ignoring ID {previous_id} loaded due to resume='auto'"
|
344
|
+
f" because the run ID is set to {settings.run_id}.",
|
345
|
+
)
|
346
|
+
|
347
|
+
# If no run ID was inferred, explicitly set, or loaded from an
|
348
|
+
# auto-resume file, then we generate a new ID.
|
349
|
+
if settings.run_id is None:
|
350
|
+
settings.run_id = runid.generate_id()
|
229
351
|
|
352
|
+
if resume_path:
|
353
|
+
self._save_autoresume_run_id(
|
354
|
+
resume_file=resume_path,
|
355
|
+
run_id=settings.run_id,
|
356
|
+
)
|
357
|
+
|
358
|
+
def setup(
|
359
|
+
self,
|
360
|
+
settings: Settings,
|
361
|
+
config: dict | str | None = None,
|
362
|
+
config_exclude_keys: list[str] | None = None,
|
363
|
+
config_include_keys: list[str] | None = None,
|
364
|
+
monitor_gym: bool | None = None,
|
365
|
+
) -> None:
|
366
|
+
"""Compute the run's config and some telemetry."""
|
230
367
|
with telemetry.context(obj=self._init_telemetry_obj) as tel:
|
231
368
|
if config is not None:
|
232
369
|
tel.feature.set_init_config = True
|
@@ -252,23 +389,25 @@ class _WandbInit:
|
|
252
389
|
exclude=config_exclude_keys,
|
253
390
|
)
|
254
391
|
|
255
|
-
#
|
256
|
-
self.sweep_config = dict()
|
257
|
-
sweep_config = self._wl._sweep_config or dict()
|
392
|
+
# Construct the run's config.
|
258
393
|
self.config = dict()
|
259
394
|
self.init_artifact_config: dict[str, Any] = dict()
|
260
|
-
for config_data in (
|
261
|
-
sagemaker_config,
|
262
|
-
self._wl._config,
|
263
|
-
config,
|
264
|
-
):
|
265
|
-
if not config_data:
|
266
|
-
continue
|
267
|
-
# split out artifacts, since when inserted into
|
268
|
-
# config they will trigger use_artifact
|
269
|
-
# but the run is not yet upserted
|
270
|
-
self._split_artifacts_from_config(config_data, self.config) # type: ignore
|
271
395
|
|
396
|
+
if not settings.sagemaker_disable and sagemaker.is_using_sagemaker():
|
397
|
+
sagemaker_config = sagemaker.parse_sm_config()
|
398
|
+
self._split_artifacts_from_config(sagemaker_config, self.config)
|
399
|
+
|
400
|
+
with telemetry.context(obj=self._init_telemetry_obj) as tel:
|
401
|
+
tel.feature.sagemaker = True
|
402
|
+
|
403
|
+
if self._wl._config:
|
404
|
+
self._split_artifacts_from_config(self._wl._config, self.config)
|
405
|
+
|
406
|
+
if config and isinstance(config, dict):
|
407
|
+
self._split_artifacts_from_config(config, self.config)
|
408
|
+
|
409
|
+
self.sweep_config = dict()
|
410
|
+
sweep_config = self._wl._sweep_config or dict()
|
272
411
|
if sweep_config:
|
273
412
|
self._split_artifacts_from_config(sweep_config, self.sweep_config)
|
274
413
|
|
@@ -285,57 +424,6 @@ class _WandbInit:
|
|
285
424
|
with telemetry.context(obj=self._init_telemetry_obj) as tel:
|
286
425
|
tel.feature.tensorboard_sync = True
|
287
426
|
|
288
|
-
if not settings._offline and not settings._noop:
|
289
|
-
wandb_login._login(
|
290
|
-
anonymous=settings.anonymous,
|
291
|
-
force=settings.force,
|
292
|
-
_disable_warning=True,
|
293
|
-
_silent=settings.quiet or settings.silent,
|
294
|
-
_entity=settings.entity,
|
295
|
-
)
|
296
|
-
|
297
|
-
# apply updated global state after login was handled
|
298
|
-
wl = wandb.setup()
|
299
|
-
assert wl is not None
|
300
|
-
login_settings = {
|
301
|
-
k: v
|
302
|
-
for k, v in {
|
303
|
-
"anonymous": wl.settings.anonymous,
|
304
|
-
"api_key": wl.settings.api_key,
|
305
|
-
"base_url": wl.settings.base_url,
|
306
|
-
"force": wl.settings.force,
|
307
|
-
"login_timeout": wl.settings.login_timeout,
|
308
|
-
}.items()
|
309
|
-
if v is not None
|
310
|
-
}
|
311
|
-
if login_settings:
|
312
|
-
settings.update_from_dict(login_settings)
|
313
|
-
|
314
|
-
# handle custom resume logic
|
315
|
-
settings.handle_resume_logic()
|
316
|
-
|
317
|
-
# get status of code saving before applying user settings
|
318
|
-
save_code_pre_user_settings = settings.save_code
|
319
|
-
if not settings._offline and not settings._noop:
|
320
|
-
user_settings = self._wl._load_user_settings()
|
321
|
-
if user_settings is not None:
|
322
|
-
settings.update_from_dict(user_settings)
|
323
|
-
|
324
|
-
# ensure that user settings don't set saving to true
|
325
|
-
# if user explicitly set these to false in UI
|
326
|
-
if save_code_pre_user_settings is False:
|
327
|
-
settings.save_code = False
|
328
|
-
|
329
|
-
# TODO: remove this once we refactor the client. This is a temporary
|
330
|
-
# fix to make sure that we use the same project name for wandb-core.
|
331
|
-
# The reason this is not going throught the settings object is to
|
332
|
-
# avoid failure cases in other parts of the code that will be
|
333
|
-
# removed with the switch to wandb-core.
|
334
|
-
if settings.project is None:
|
335
|
-
settings.project = wandb.util.auto_project_name(settings.program)
|
336
|
-
|
337
|
-
settings.x_start_time = time.time()
|
338
|
-
|
339
427
|
if not settings._noop:
|
340
428
|
self._log_setup(settings)
|
341
429
|
|
@@ -345,13 +433,10 @@ class _WandbInit:
|
|
345
433
|
if launch_config:
|
346
434
|
self._split_artifacts_from_config(launch_config, self.launch_config)
|
347
435
|
|
348
|
-
self.settings = settings
|
349
|
-
|
350
436
|
def teardown(self) -> None:
|
351
437
|
# TODO: currently this is only called on failed wandb.init attempts
|
352
438
|
# normally this happens on the run object
|
353
|
-
|
354
|
-
logger.info("tearing down wandb.init")
|
439
|
+
self._logger.info("tearing down wandb.init")
|
355
440
|
for hook in self._teardown_hooks:
|
356
441
|
hook.call()
|
357
442
|
|
@@ -364,35 +449,24 @@ class _WandbInit:
|
|
364
449
|
else:
|
365
450
|
config_target.setdefault(k, v)
|
366
451
|
|
367
|
-
def
|
368
|
-
"""
|
452
|
+
def _create_logger(self, log_fname: str) -> logging.Logger:
|
453
|
+
"""Returns a logger configured to write to a file.
|
369
454
|
|
370
|
-
This adds a run_id to the log, in case of multiple processes on the same
|
371
|
-
Currently, there is no way to disable logging after it's
|
455
|
+
This adds a run_id to the log, in case of multiple processes on the same
|
456
|
+
machine. Currently, there is no way to disable logging after it's
|
457
|
+
enabled.
|
372
458
|
"""
|
373
459
|
handler = logging.FileHandler(log_fname)
|
374
460
|
handler.setLevel(logging.INFO)
|
375
461
|
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
if run_id:
|
382
|
-
formatter = logging.Formatter(
|
383
|
-
"%(asctime)s %(levelname)-7s %(threadName)-10s:%(process)d "
|
384
|
-
"[%(run_id)s:%(filename)s:%(funcName)s():%(lineno)s] %(message)s"
|
385
|
-
)
|
386
|
-
else:
|
387
|
-
formatter = logging.Formatter(
|
388
|
-
"%(asctime)s %(levelname)-7s %(threadName)-10s:%(process)d "
|
389
|
-
"[%(filename)s:%(funcName)s():%(lineno)s] %(message)s"
|
390
|
-
)
|
462
|
+
formatter = logging.Formatter(
|
463
|
+
"%(asctime)s %(levelname)-7s %(threadName)-10s:%(process)d "
|
464
|
+
"[%(filename)s:%(funcName)s():%(lineno)s] %(message)s"
|
465
|
+
)
|
391
466
|
|
392
467
|
handler.setFormatter(formatter)
|
393
|
-
|
394
|
-
|
395
|
-
assert logger is not None
|
468
|
+
|
469
|
+
logger = logging.getLogger("wandb")
|
396
470
|
logger.propagate = False
|
397
471
|
logger.addHandler(handler)
|
398
472
|
# TODO: make me configurable
|
@@ -404,10 +478,12 @@ class _WandbInit:
|
|
404
478
|
)
|
405
479
|
)
|
406
480
|
|
481
|
+
return logger
|
482
|
+
|
407
483
|
def _safe_symlink(
|
408
484
|
self, base: str, target: str, name: str, delete: bool = False
|
409
485
|
) -> None:
|
410
|
-
# TODO(jhr): do this with relpaths, but i
|
486
|
+
# TODO(jhr): do this with relpaths, but i can't figure it out on no sleep
|
411
487
|
if not hasattr(os, "symlink"):
|
412
488
|
return
|
413
489
|
|
@@ -434,14 +510,14 @@ class _WandbInit:
|
|
434
510
|
if self.notebook.save_ipynb(): # type: ignore
|
435
511
|
assert self.run is not None
|
436
512
|
res = self.run.log_code(root=None)
|
437
|
-
|
513
|
+
self._logger.info("saved code: %s", res) # type: ignore
|
438
514
|
if self.backend.interface is not None:
|
439
|
-
|
515
|
+
self._logger.info("pausing backend") # type: ignore
|
440
516
|
self.backend.interface.publish_pause()
|
441
517
|
|
442
518
|
def _resume_backend(self, *args: Any, **kwargs: Any) -> None: # noqa
|
443
519
|
if self.backend is not None and self.backend.interface is not None:
|
444
|
-
|
520
|
+
self._logger.info("resuming backend") # type: ignore
|
445
521
|
self.backend.interface.publish_resume()
|
446
522
|
|
447
523
|
def _jupyter_teardown(self) -> None:
|
@@ -452,8 +528,8 @@ class _WandbInit:
|
|
452
528
|
if self.notebook.save_ipynb():
|
453
529
|
assert self.run is not None
|
454
530
|
res = self.run.log_code(root=None)
|
455
|
-
|
456
|
-
|
531
|
+
self._logger.info("saved code and history: %s", res) # type: ignore
|
532
|
+
self._logger.info("cleaning up jupyter logic") # type: ignore
|
457
533
|
# because of how we bind our methods we manually find them to unregister
|
458
534
|
for hook in ipython.events.callbacks["pre_run_cell"]:
|
459
535
|
if "_resume_backend" in hook.__name__:
|
@@ -471,7 +547,7 @@ class _WandbInit:
|
|
471
547
|
|
472
548
|
# Monkey patch ipython publish to capture displayed outputs
|
473
549
|
if not hasattr(ipython.display_pub, "_orig_publish"):
|
474
|
-
|
550
|
+
self._logger.info("configuring jupyter hooks %s", self) # type: ignore
|
475
551
|
ipython.display_pub._orig_publish = ipython.display_pub.publish
|
476
552
|
# Registering resume and pause hooks
|
477
553
|
|
@@ -518,15 +594,9 @@ class _WandbInit:
|
|
518
594
|
delete=True,
|
519
595
|
)
|
520
596
|
|
521
|
-
|
522
|
-
self.
|
523
|
-
|
524
|
-
assert self._wl
|
525
|
-
assert logger
|
526
|
-
|
527
|
-
self._wl._early_logger_flush(logger)
|
528
|
-
logger.info(f"Logging user logs to {settings.log_user}")
|
529
|
-
logger.info(f"Logging internal logs to {settings.log_internal}")
|
597
|
+
self._wl._early_logger_flush(self._create_logger(settings.log_user))
|
598
|
+
self._logger.info(f"Logging user logs to {settings.log_user}")
|
599
|
+
self._logger.info(f"Logging internal logs to {settings.log_internal}")
|
530
600
|
|
531
601
|
def _make_run_disabled(self) -> Run:
|
532
602
|
"""Returns a Run-like object where all methods are no-ops.
|
@@ -552,11 +622,13 @@ class _WandbInit:
|
|
552
622
|
entity="dummy",
|
553
623
|
)
|
554
624
|
)
|
555
|
-
# config and
|
625
|
+
# config, summary, and metadata objects
|
556
626
|
drun._config = wandb.sdk.wandb_config.Config()
|
557
627
|
drun._config.update(self.sweep_config)
|
558
628
|
drun._config.update(self.config)
|
559
629
|
drun.summary = SummaryDisabled() # type: ignore
|
630
|
+
drun._Run__metadata = wandb.sdk.wandb_metadata.Metadata()
|
631
|
+
|
560
632
|
# methods
|
561
633
|
drun.log = lambda data, *_, **__: drun.summary.update(data) # type: ignore
|
562
634
|
drun.finish = lambda *_, **__: module.unset_globals() # type: ignore
|
@@ -638,24 +710,20 @@ class _WandbInit:
|
|
638
710
|
percent_done = handle.percent_done
|
639
711
|
self.printer.progress_update(line, percent_done=percent_done)
|
640
712
|
|
641
|
-
def init(self) -> Run: # noqa: C901
|
642
|
-
|
643
|
-
raise RuntimeError("Logger not initialized")
|
644
|
-
logger.info("calling init triggers")
|
713
|
+
def init(self, settings: Settings) -> Run: # noqa: C901
|
714
|
+
self._logger.info("calling init triggers")
|
645
715
|
trigger.call("on_init")
|
646
716
|
|
647
|
-
assert self.settings is not None
|
648
717
|
assert self._wl is not None
|
649
718
|
|
650
|
-
|
719
|
+
self._logger.info(
|
651
720
|
f"wandb.init called with sweep_config: {self.sweep_config}\nconfig: {self.config}"
|
652
721
|
)
|
653
722
|
|
654
|
-
if
|
723
|
+
if settings._noop:
|
655
724
|
return self._make_run_disabled()
|
656
725
|
if (
|
657
|
-
|
658
|
-
or (self.settings._jupyter and self.settings.reinit is not False)
|
726
|
+
settings.reinit or (settings._jupyter and settings.reinit is not False)
|
659
727
|
) and len(self._wl._global_run_stack) > 0:
|
660
728
|
if len(self._wl._global_run_stack) > 1:
|
661
729
|
wandb.termwarn(
|
@@ -666,40 +734,39 @@ class _WandbInit:
|
|
666
734
|
)
|
667
735
|
|
668
736
|
latest_run = self._wl._global_run_stack[-1]
|
669
|
-
|
737
|
+
self._logger.info(f"found existing run on stack: {latest_run.id}")
|
670
738
|
latest_run.finish()
|
671
|
-
elif
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
service = self._wl.service
|
683
|
-
if service:
|
684
|
-
logger.info("sending inform_init request")
|
739
|
+
elif wandb.run is not None and os.getpid() == wandb.run._init_pid:
|
740
|
+
self._logger.info("wandb.init() called when a run is still active")
|
741
|
+
with telemetry.context() as tel:
|
742
|
+
tel.feature.init_return_run = True
|
743
|
+
return wandb.run
|
744
|
+
|
745
|
+
self._logger.info("starting backend")
|
746
|
+
|
747
|
+
if not settings.x_disable_service:
|
748
|
+
service = self._wl.ensure_service()
|
749
|
+
self._logger.info("sending inform_init request")
|
685
750
|
service.inform_init(
|
686
|
-
settings=
|
687
|
-
run_id=
|
751
|
+
settings=settings.to_proto(),
|
752
|
+
run_id=settings.run_id, # type: ignore
|
688
753
|
)
|
754
|
+
else:
|
755
|
+
service = None
|
689
756
|
|
690
757
|
mailbox = Mailbox()
|
691
758
|
backend = Backend(
|
692
|
-
settings=
|
759
|
+
settings=settings,
|
693
760
|
service=service,
|
694
761
|
mailbox=mailbox,
|
695
762
|
)
|
696
763
|
backend.ensure_launched()
|
697
|
-
|
764
|
+
self._logger.info("backend started and connected")
|
698
765
|
|
699
766
|
# resuming needs access to the server, check server_status()?
|
700
767
|
run = Run(
|
701
768
|
config=self.config,
|
702
|
-
settings=
|
769
|
+
settings=settings,
|
703
770
|
sweep_config=self.sweep_config,
|
704
771
|
launch_config=self.launch_config,
|
705
772
|
)
|
@@ -712,18 +779,18 @@ class _WandbInit:
|
|
712
779
|
hf_version = _huggingface_version()
|
713
780
|
if hf_version:
|
714
781
|
tel.huggingface_version = hf_version
|
715
|
-
if
|
782
|
+
if settings._jupyter:
|
716
783
|
tel.env.jupyter = True
|
717
|
-
if
|
784
|
+
if settings._ipython:
|
718
785
|
tel.env.ipython = True
|
719
|
-
if
|
786
|
+
if settings._colab:
|
720
787
|
tel.env.colab = True
|
721
|
-
if
|
788
|
+
if settings._kaggle:
|
722
789
|
tel.env.kaggle = True
|
723
|
-
if
|
790
|
+
if settings._windows:
|
724
791
|
tel.env.windows = True
|
725
792
|
|
726
|
-
if
|
793
|
+
if settings.launch:
|
727
794
|
tel.feature.launch = True
|
728
795
|
|
729
796
|
for module_name in telemetry.list_telemetry_imports(only_imported=True):
|
@@ -731,8 +798,8 @@ class _WandbInit:
|
|
731
798
|
|
732
799
|
# probe the active start method
|
733
800
|
active_start_method: str | None = None
|
734
|
-
if
|
735
|
-
active_start_method =
|
801
|
+
if settings.start_method == "thread":
|
802
|
+
active_start_method = settings.start_method
|
736
803
|
else:
|
737
804
|
active_start_method = getattr(
|
738
805
|
backend._multiprocessing, "get_start_method", lambda: None
|
@@ -750,7 +817,7 @@ class _WandbInit:
|
|
750
817
|
if os.environ.get("PEX"):
|
751
818
|
tel.env.pex = True
|
752
819
|
|
753
|
-
if
|
820
|
+
if settings._aws_lambda:
|
754
821
|
tel.env.aws_lambda = True
|
755
822
|
|
756
823
|
if os.environ.get(wandb.env._DISABLE_SERVICE):
|
@@ -758,13 +825,13 @@ class _WandbInit:
|
|
758
825
|
|
759
826
|
if service:
|
760
827
|
tel.feature.service = True
|
761
|
-
if
|
828
|
+
if settings.x_flow_control_disabled:
|
762
829
|
tel.feature.flow_control_disabled = True
|
763
|
-
if
|
830
|
+
if settings.x_flow_control_custom:
|
764
831
|
tel.feature.flow_control_custom = True
|
765
|
-
if not
|
832
|
+
if not settings.x_require_legacy_service:
|
766
833
|
tel.feature.core = True
|
767
|
-
if
|
834
|
+
if settings._shared:
|
768
835
|
wandb.termwarn(
|
769
836
|
"The `_shared` feature is experimental and may change. "
|
770
837
|
"Please contact support@wandb.com for guidance and to report any issues."
|
@@ -773,7 +840,7 @@ class _WandbInit:
|
|
773
840
|
|
774
841
|
tel.env.maybe_mp = _maybe_mp_process(backend)
|
775
842
|
|
776
|
-
if not
|
843
|
+
if not settings.label_disable:
|
777
844
|
if self.notebook:
|
778
845
|
run._label_probe_notebook(self.notebook)
|
779
846
|
else:
|
@@ -787,7 +854,7 @@ class _WandbInit:
|
|
787
854
|
run=run,
|
788
855
|
)
|
789
856
|
|
790
|
-
|
857
|
+
self._logger.info("updated telemetry")
|
791
858
|
|
792
859
|
run._set_library(self._wl)
|
793
860
|
run._set_backend(backend)
|
@@ -801,25 +868,27 @@ class _WandbInit:
|
|
801
868
|
# Using GitRepo() blocks & can be slow, depending on user's current git setup.
|
802
869
|
# We don't want to block run initialization/start request, so populate run's git
|
803
870
|
# info beforehand.
|
804
|
-
if not (
|
871
|
+
if not (settings.disable_git or settings.x_disable_machine_info):
|
805
872
|
run._populate_git_info()
|
806
873
|
|
807
874
|
run_result: pb.RunUpdateResult | None = None
|
808
875
|
|
809
|
-
if
|
876
|
+
if settings._offline:
|
810
877
|
with telemetry.context(run=run) as tel:
|
811
878
|
tel.feature.offline = True
|
812
879
|
|
813
|
-
if
|
880
|
+
if settings.resume:
|
814
881
|
wandb.termwarn(
|
815
882
|
"`resume` will be ignored since W&B syncing is set to `offline`. "
|
816
883
|
f"Starting a new run with run id {run.id}."
|
817
884
|
)
|
818
885
|
error: wandb.Error | None = None
|
819
886
|
|
820
|
-
timeout =
|
887
|
+
timeout = settings.init_timeout
|
821
888
|
|
822
|
-
|
889
|
+
self._logger.info(
|
890
|
+
f"communicating run to backend with {timeout} second timeout",
|
891
|
+
)
|
823
892
|
|
824
893
|
run_init_handle = backend.interface.deliver_run(run)
|
825
894
|
result = run_init_handle.wait(
|
@@ -846,7 +915,7 @@ class _WandbInit:
|
|
846
915
|
error = ProtobufErrorHandler.to_exception(run_result.error)
|
847
916
|
|
848
917
|
if error is not None:
|
849
|
-
|
918
|
+
self._logger.error(f"encountered error: {error}")
|
850
919
|
if not service:
|
851
920
|
# Shutdown the backend and get rid of the logger
|
852
921
|
# we don't need to do console cleanup at this point
|
@@ -864,21 +933,19 @@ class _WandbInit:
|
|
864
933
|
)
|
865
934
|
|
866
935
|
if run_result.run.resumed:
|
867
|
-
|
936
|
+
self._logger.info("run resumed")
|
868
937
|
with telemetry.context(run=run) as tel:
|
869
938
|
tel.feature.resumed = run_result.run.resumed
|
870
939
|
run._set_run_obj(run_result.run)
|
871
940
|
|
872
|
-
|
873
|
-
|
874
|
-
logger.info("starting run threads in backend")
|
941
|
+
self._logger.info("starting run threads in backend")
|
875
942
|
# initiate run (stats and metadata probing)
|
876
943
|
|
877
944
|
if service:
|
878
|
-
assert
|
945
|
+
assert settings.run_id
|
879
946
|
service.inform_start(
|
880
|
-
settings=
|
881
|
-
run_id=
|
947
|
+
settings=settings.to_proto(),
|
948
|
+
run_id=settings.run_id,
|
882
949
|
)
|
883
950
|
|
884
951
|
assert backend.interface
|
@@ -895,11 +962,11 @@ class _WandbInit:
|
|
895
962
|
|
896
963
|
run._handle_launch_artifact_overrides()
|
897
964
|
if (
|
898
|
-
|
899
|
-
and
|
900
|
-
and os.path.exists(
|
965
|
+
settings.launch
|
966
|
+
and settings.launch_config_path
|
967
|
+
and os.path.exists(settings.launch_config_path)
|
901
968
|
):
|
902
|
-
run.save(
|
969
|
+
run.save(settings.launch_config_path)
|
903
970
|
# put artifacts in run config here
|
904
971
|
# since doing so earlier will cause an error
|
905
972
|
# as the run is not upserted
|
@@ -913,7 +980,7 @@ class _WandbInit:
|
|
913
980
|
|
914
981
|
self.backend = backend
|
915
982
|
run._on_start()
|
916
|
-
|
983
|
+
self._logger.info("run started, returning control to user process")
|
917
984
|
return run
|
918
985
|
|
919
986
|
|
@@ -943,16 +1010,10 @@ def _attach(
|
|
943
1010
|
)
|
944
1011
|
wandb._assert_is_user_process() # type: ignore
|
945
1012
|
|
946
|
-
_wl =
|
947
|
-
|
948
|
-
|
949
|
-
_set_logger(_wl._get_logger())
|
950
|
-
if logger is None:
|
951
|
-
raise UsageError("logger is not initialized")
|
1013
|
+
_wl = wandb.setup()
|
1014
|
+
logger = _wl._get_logger()
|
952
1015
|
|
953
|
-
service = _wl.
|
954
|
-
if not service:
|
955
|
-
raise UsageError(f"Unable to attach to run {attach_id} (no service process)")
|
1016
|
+
service = _wl.ensure_service()
|
956
1017
|
|
957
1018
|
try:
|
958
1019
|
attach_settings = service.inform_attach(attach_id=attach_id)
|
@@ -1081,7 +1142,7 @@ def init( # noqa: C901
|
|
1081
1142
|
entity: The username or team name under which the runs will be logged.
|
1082
1143
|
The entity must already exist, so ensure you’ve created your account
|
1083
1144
|
or team in the UI before starting to log runs. If not specified, the
|
1084
|
-
run will default your
|
1145
|
+
run will default your default entity. To change the default entity,
|
1085
1146
|
go to [your settings](https://wandb.ai/settings) and update the
|
1086
1147
|
"Default location to create new projects" under "Default team".
|
1087
1148
|
project: The name of the project under which this run will be logged.
|
@@ -1285,27 +1346,36 @@ def init( # noqa: C901
|
|
1285
1346
|
if resume_from is not None:
|
1286
1347
|
init_settings.resume_from = resume_from # type: ignore
|
1287
1348
|
|
1349
|
+
wl: wandb_setup._WandbSetup | None = None
|
1350
|
+
|
1288
1351
|
try:
|
1289
|
-
|
1352
|
+
wl = wandb.setup()
|
1353
|
+
|
1354
|
+
wi = _WandbInit(wl)
|
1355
|
+
|
1356
|
+
wi.maybe_login(init_settings)
|
1357
|
+
run_settings = wi.compute_run_settings(init_settings)
|
1358
|
+
wi.set_run_id(run_settings)
|
1359
|
+
|
1290
1360
|
wi.setup(
|
1291
|
-
|
1361
|
+
settings=run_settings,
|
1292
1362
|
config=config,
|
1293
1363
|
config_exclude_keys=config_exclude_keys,
|
1294
1364
|
config_include_keys=config_include_keys,
|
1295
|
-
allow_val_change=allow_val_change,
|
1296
1365
|
monitor_gym=monitor_gym,
|
1297
1366
|
)
|
1298
|
-
|
1367
|
+
|
1368
|
+
return wi.init(run_settings)
|
1299
1369
|
|
1300
1370
|
except KeyboardInterrupt as e:
|
1301
|
-
if
|
1302
|
-
|
1371
|
+
if wl:
|
1372
|
+
wl._get_logger().warning("interrupted", exc_info=e)
|
1303
1373
|
|
1304
1374
|
raise
|
1305
1375
|
|
1306
1376
|
except Exception as e:
|
1307
|
-
if
|
1308
|
-
|
1377
|
+
if wl:
|
1378
|
+
wl._get_logger().exception("error in wandb.init()", exc_info=e)
|
1309
1379
|
|
1310
1380
|
# Need to build delay into this sentry capture because our exit hooks
|
1311
1381
|
# mess with sentry's ability to send out errors before the program ends.
|