wandb 0.16.5__py3-none-any.whl → 0.17.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- package_readme.md +95 -0
- wandb/__init__.py +2 -3
- wandb/agents/pyagent.py +0 -1
- wandb/analytics/sentry.py +2 -1
- wandb/apis/importers/internals/internal.py +0 -1
- wandb/apis/importers/internals/protocols.py +30 -56
- wandb/apis/importers/mlflow.py +13 -26
- wandb/apis/importers/wandb.py +8 -14
- wandb/apis/internal.py +0 -3
- wandb/apis/public/api.py +55 -3
- wandb/apis/public/artifacts.py +1 -0
- wandb/apis/public/files.py +1 -0
- wandb/apis/public/history.py +1 -0
- wandb/apis/public/jobs.py +17 -4
- wandb/apis/public/projects.py +1 -0
- wandb/apis/public/reports.py +1 -0
- wandb/apis/public/runs.py +15 -17
- wandb/apis/public/sweeps.py +1 -0
- wandb/apis/public/teams.py +1 -0
- wandb/apis/public/users.py +1 -0
- wandb/apis/reports/v1/_blocks.py +3 -7
- wandb/apis/reports/v2/gql.py +1 -0
- wandb/apis/reports/v2/interface.py +3 -4
- wandb/apis/reports/v2/internal.py +5 -8
- wandb/cli/cli.py +95 -22
- wandb/data_types.py +9 -6
- wandb/docker/__init__.py +1 -1
- wandb/env.py +38 -8
- wandb/errors/__init__.py +5 -0
- wandb/errors/term.py +10 -2
- wandb/filesync/step_checksum.py +1 -4
- wandb/filesync/step_prepare.py +4 -24
- wandb/filesync/step_upload.py +4 -106
- wandb/filesync/upload_job.py +0 -76
- wandb/integration/catboost/catboost.py +1 -1
- wandb/integration/fastai/__init__.py +1 -0
- wandb/integration/huggingface/resolver.py +2 -2
- wandb/integration/keras/__init__.py +1 -0
- wandb/integration/keras/callbacks/metrics_logger.py +1 -1
- wandb/integration/keras/keras.py +7 -7
- wandb/integration/langchain/wandb_tracer.py +1 -0
- wandb/integration/lightning/fabric/logger.py +1 -3
- wandb/integration/metaflow/metaflow.py +41 -6
- wandb/integration/openai/fine_tuning.py +77 -40
- wandb/integration/prodigy/prodigy.py +1 -1
- wandb/old/summary.py +1 -1
- wandb/plot/confusion_matrix.py +1 -1
- wandb/plot/pr_curve.py +2 -1
- wandb/plot/roc_curve.py +2 -1
- wandb/{plots → plot}/utils.py +13 -25
- wandb/proto/v3/wandb_internal_pb2.py +364 -332
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +322 -316
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/proto/wandb_deprecated.py +7 -1
- wandb/proto/wandb_internal_codegen.py +3 -29
- wandb/sdk/artifacts/artifact.py +51 -20
- wandb/sdk/artifacts/artifact_download_logger.py +1 -0
- wandb/sdk/artifacts/artifact_file_cache.py +18 -4
- wandb/sdk/artifacts/artifact_instance_cache.py +1 -0
- wandb/sdk/artifacts/artifact_manifest.py +1 -0
- wandb/sdk/artifacts/artifact_manifest_entry.py +7 -3
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -0
- wandb/sdk/artifacts/artifact_saver.py +18 -27
- wandb/sdk/artifacts/artifact_state.py +1 -0
- wandb/sdk/artifacts/artifact_ttl.py +1 -0
- wandb/sdk/artifacts/exceptions.py +1 -0
- wandb/sdk/artifacts/storage_handlers/azure_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/gcs_handler.py +13 -18
- wandb/sdk/artifacts/storage_handlers/http_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/multi_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +5 -3
- wandb/sdk/artifacts/storage_handlers/tracking_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +1 -0
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +1 -0
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +3 -42
- wandb/sdk/artifacts/storage_policy.py +2 -12
- wandb/sdk/data_types/_dtypes.py +8 -8
- wandb/sdk/data_types/base_types/media.py +3 -6
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +3 -1
- wandb/sdk/data_types/image.py +1 -1
- wandb/sdk/data_types/video.py +1 -1
- wandb/sdk/integration_utils/auto_logging.py +5 -6
- wandb/sdk/integration_utils/data_logging.py +10 -6
- wandb/sdk/interface/interface.py +86 -38
- wandb/sdk/interface/interface_shared.py +7 -13
- wandb/sdk/internal/datastore.py +1 -1
- wandb/sdk/internal/file_pusher.py +2 -5
- wandb/sdk/internal/file_stream.py +5 -18
- wandb/sdk/internal/handler.py +18 -2
- wandb/sdk/internal/internal.py +0 -1
- wandb/sdk/internal/internal_api.py +1 -129
- wandb/sdk/internal/internal_util.py +0 -1
- wandb/sdk/internal/job_builder.py +159 -45
- wandb/sdk/internal/profiler.py +1 -0
- wandb/sdk/internal/progress.py +0 -28
- wandb/sdk/internal/run.py +1 -0
- wandb/sdk/internal/sender.py +1 -2
- wandb/sdk/internal/system/assets/gpu_amd.py +44 -44
- wandb/sdk/internal/system/assets/gpu_apple.py +56 -11
- wandb/sdk/internal/system/assets/interfaces.py +6 -8
- wandb/sdk/internal/system/assets/open_metrics.py +2 -2
- wandb/sdk/internal/system/assets/trainium.py +1 -3
- wandb/sdk/launch/__init__.py +9 -1
- wandb/sdk/launch/_launch.py +9 -24
- wandb/sdk/launch/_launch_add.py +1 -3
- wandb/sdk/launch/_project_spec.py +188 -241
- wandb/sdk/launch/agent/agent.py +115 -48
- wandb/sdk/launch/agent/config.py +80 -14
- wandb/sdk/launch/builder/abstract.py +69 -1
- wandb/sdk/launch/builder/build.py +156 -555
- wandb/sdk/launch/builder/context_manager.py +235 -0
- wandb/sdk/launch/builder/docker_builder.py +8 -23
- wandb/sdk/launch/builder/kaniko_builder.py +161 -159
- wandb/sdk/launch/builder/noop.py +1 -0
- wandb/sdk/launch/builder/templates/dockerfile.py +92 -0
- wandb/sdk/launch/create_job.py +68 -63
- wandb/sdk/launch/environment/abstract.py +1 -0
- wandb/sdk/launch/environment/gcp_environment.py +1 -0
- wandb/sdk/launch/environment/local_environment.py +1 -0
- wandb/sdk/launch/inputs/files.py +148 -0
- wandb/sdk/launch/inputs/internal.py +217 -0
- wandb/sdk/launch/inputs/manage.py +95 -0
- wandb/sdk/launch/loader.py +1 -0
- wandb/sdk/launch/registry/abstract.py +1 -0
- wandb/sdk/launch/registry/azure_container_registry.py +1 -0
- wandb/sdk/launch/registry/elastic_container_registry.py +1 -0
- wandb/sdk/launch/registry/google_artifact_registry.py +2 -1
- wandb/sdk/launch/registry/local_registry.py +1 -0
- wandb/sdk/launch/runner/abstract.py +1 -0
- wandb/sdk/launch/runner/kubernetes_monitor.py +4 -1
- wandb/sdk/launch/runner/kubernetes_runner.py +9 -10
- wandb/sdk/launch/runner/local_container.py +2 -3
- wandb/sdk/launch/runner/local_process.py +8 -29
- wandb/sdk/launch/runner/sagemaker_runner.py +21 -20
- wandb/sdk/launch/runner/vertex_runner.py +8 -7
- wandb/sdk/launch/sweeps/scheduler.py +7 -4
- wandb/sdk/launch/sweeps/scheduler_sweep.py +2 -1
- wandb/sdk/launch/sweeps/utils.py +3 -3
- wandb/sdk/launch/utils.py +33 -140
- wandb/sdk/lib/_settings_toposort_generated.py +1 -5
- wandb/sdk/lib/fsm.py +8 -12
- wandb/sdk/lib/gitlib.py +4 -4
- wandb/sdk/lib/import_hooks.py +1 -1
- wandb/sdk/lib/lazyloader.py +0 -1
- wandb/sdk/lib/proto_util.py +23 -2
- wandb/sdk/lib/redirect.py +19 -14
- wandb/sdk/lib/retry.py +3 -2
- wandb/sdk/lib/run_moment.py +7 -1
- wandb/sdk/lib/tracelog.py +1 -1
- wandb/sdk/service/service.py +19 -16
- wandb/sdk/verify/verify.py +2 -1
- wandb/sdk/wandb_init.py +16 -63
- wandb/sdk/wandb_manager.py +2 -2
- wandb/sdk/wandb_require.py +5 -0
- wandb/sdk/wandb_run.py +164 -90
- wandb/sdk/wandb_settings.py +2 -48
- wandb/sdk/wandb_setup.py +1 -1
- wandb/sklearn/__init__.py +1 -0
- wandb/sklearn/plot/__init__.py +1 -0
- wandb/sklearn/plot/classifier.py +11 -12
- wandb/sklearn/plot/clusterer.py +2 -1
- wandb/sklearn/plot/regressor.py +1 -0
- wandb/sklearn/plot/shared.py +1 -0
- wandb/sklearn/utils.py +1 -0
- wandb/testing/relay.py +4 -4
- wandb/trigger.py +1 -0
- wandb/util.py +67 -54
- wandb/wandb_controller.py +2 -3
- wandb/wandb_torch.py +1 -2
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/METADATA +67 -70
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/RECORD +178 -188
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/WHEEL +1 -2
- wandb/bin/apple_gpu_stats +0 -0
- wandb/catboost/__init__.py +0 -9
- wandb/fastai/__init__.py +0 -9
- wandb/keras/__init__.py +0 -18
- wandb/lightgbm/__init__.py +0 -9
- wandb/plots/__init__.py +0 -6
- wandb/plots/explain_text.py +0 -36
- wandb/plots/heatmap.py +0 -81
- wandb/plots/named_entity.py +0 -43
- wandb/plots/part_of_speech.py +0 -50
- wandb/plots/plot_definitions.py +0 -768
- wandb/plots/precision_recall.py +0 -121
- wandb/plots/roc.py +0 -103
- wandb/sacred/__init__.py +0 -3
- wandb/xgboost/__init__.py +0 -9
- wandb-0.16.5.dist-info/top_level.txt +0 -1
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info}/entry_points.txt +0 -0
- {wandb-0.16.5.dist-info → wandb-0.17.0.dist-info/licenses}/LICENSE +0 -0
wandb/sdk/wandb_init.py
CHANGED
@@ -15,7 +15,6 @@ import os
|
|
15
15
|
import platform
|
16
16
|
import sys
|
17
17
|
import tempfile
|
18
|
-
import traceback
|
19
18
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union
|
20
19
|
|
21
20
|
import wandb
|
@@ -195,12 +194,6 @@ class _WandbInit:
|
|
195
194
|
# Start with settings from wandb library singleton
|
196
195
|
settings: Settings = self._wl.settings.copy()
|
197
196
|
|
198
|
-
# when using launch, we don't want to reuse the same run id from the singleton
|
199
|
-
# since users might launch multiple runs in the same process
|
200
|
-
# TODO(kdg): allow users to control this via launch settings
|
201
|
-
if settings.launch and singleton is not None:
|
202
|
-
settings.update({"run_id": None}, source=Source.INIT)
|
203
|
-
|
204
197
|
settings_param = kwargs.pop("settings", None)
|
205
198
|
if settings_param is not None and isinstance(settings_param, (Settings, dict)):
|
206
199
|
settings.update(settings_param, source=Source.INIT)
|
@@ -561,7 +554,7 @@ class _WandbInit:
|
|
561
554
|
percent_done = handle.percent_done
|
562
555
|
self.printer.progress_update(line, percent_done=percent_done)
|
563
556
|
|
564
|
-
def init(self) -> Union[Run, RunDisabled
|
557
|
+
def init(self) -> Union[Run, RunDisabled]: # noqa: C901
|
565
558
|
if logger is None:
|
566
559
|
raise RuntimeError("Logger not initialized")
|
567
560
|
logger.info("calling init triggers")
|
@@ -660,9 +653,6 @@ class _WandbInit:
|
|
660
653
|
if self.settings.launch:
|
661
654
|
tel.feature.launch = True
|
662
655
|
|
663
|
-
if self.settings._async_upload_concurrency_limit:
|
664
|
-
tel.feature.async_uploads = True
|
665
|
-
|
666
656
|
for module_name in telemetry.list_telemetry_imports(only_imported=True):
|
667
657
|
setattr(tel.imports_init, module_name, True)
|
668
658
|
|
@@ -849,13 +839,6 @@ class _WandbInit:
|
|
849
839
|
return run
|
850
840
|
|
851
841
|
|
852
|
-
def getcaller() -> None:
|
853
|
-
if not logger:
|
854
|
-
return None
|
855
|
-
src, line, func, stack = logger.findCaller(stack_info=True)
|
856
|
-
print("Problem at:", src, line, func)
|
857
|
-
|
858
|
-
|
859
842
|
def _attach(
|
860
843
|
attach_id: Optional[str] = None,
|
861
844
|
run_id: Optional[str] = None,
|
@@ -964,7 +947,7 @@ def init(
|
|
964
947
|
id: Optional[str] = None,
|
965
948
|
fork_from: Optional[str] = None,
|
966
949
|
settings: Union[Settings, Dict[str, Any], None] = None,
|
967
|
-
) -> Union[Run, RunDisabled
|
950
|
+
) -> Union[Run, RunDisabled]:
|
968
951
|
r"""Start a new run to track and log to W&B.
|
969
952
|
|
970
953
|
In an ML training pipeline, you could add `wandb.init()`
|
@@ -1124,10 +1107,10 @@ def init(
|
|
1124
1107
|
for saving hyperparameters to compare across runs. The ID cannot
|
1125
1108
|
contain the following special characters: `/\#?%:`.
|
1126
1109
|
See [our guide to resuming runs](https://docs.wandb.com/guides/runs/resuming).
|
1127
|
-
fork_from: (str, optional) A string with the format
|
1110
|
+
fork_from: (str, optional) A string with the format {run_id}?_step={step} describing
|
1128
1111
|
a moment in a previous run to fork a new run from. Creates a new run that picks up
|
1129
1112
|
logging history from the specified run at the specified moment. The target run must
|
1130
|
-
be in the current project.
|
1113
|
+
be in the current project. Example: `fork_from="my-run-id?_step=1234"`.
|
1131
1114
|
|
1132
1115
|
Examples:
|
1133
1116
|
### Set where the run is logged
|
@@ -1170,9 +1153,6 @@ def init(
|
|
1170
1153
|
wandb._assert_is_user_process()
|
1171
1154
|
|
1172
1155
|
kwargs = dict(locals())
|
1173
|
-
error_seen = None
|
1174
|
-
except_exit = None
|
1175
|
-
run: Optional[Union[Run, RunDisabled]] = None
|
1176
1156
|
|
1177
1157
|
# convert fork_from into a version that can be passed to settings
|
1178
1158
|
if fork_from is not None and resume is not None:
|
@@ -1181,46 +1161,19 @@ def init(
|
|
1181
1161
|
try:
|
1182
1162
|
wi = _WandbInit()
|
1183
1163
|
wi.setup(kwargs)
|
1184
|
-
|
1185
|
-
|
1186
|
-
try:
|
1187
|
-
run = wi.init()
|
1188
|
-
except_exit = wi.settings._except_exit
|
1189
|
-
except (KeyboardInterrupt, Exception) as e:
|
1190
|
-
if not isinstance(e, KeyboardInterrupt):
|
1191
|
-
wandb._sentry.exception(e)
|
1192
|
-
if not (
|
1193
|
-
wandb.wandb_agent._is_running() and isinstance(e, KeyboardInterrupt)
|
1194
|
-
):
|
1195
|
-
getcaller()
|
1196
|
-
assert logger
|
1197
|
-
if wi.settings.problem == "fatal":
|
1198
|
-
raise
|
1199
|
-
if wi.settings.problem == "warn":
|
1200
|
-
pass
|
1201
|
-
# TODO(jhr): figure out how to make this RunDummy
|
1202
|
-
run = None
|
1203
|
-
except Error as e:
|
1204
|
-
if logger is not None:
|
1205
|
-
logger.exception(str(e))
|
1206
|
-
raise e
|
1164
|
+
return wi.init()
|
1165
|
+
|
1207
1166
|
except KeyboardInterrupt as e:
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1167
|
+
if logger is not None:
|
1168
|
+
logger.warning("interrupted", exc_info=e)
|
1169
|
+
|
1170
|
+
raise
|
1171
|
+
|
1211
1172
|
except Exception as e:
|
1212
|
-
|
1213
|
-
|
1214
|
-
|
1215
|
-
logger.error("error", exc_info=e)
|
1173
|
+
if logger is not None:
|
1174
|
+
logger.exception("error in wandb.init()", exc_info=e)
|
1175
|
+
|
1216
1176
|
# Need to build delay into this sentry capture because our exit hooks
|
1217
1177
|
# mess with sentry's ability to send out errors before the program ends.
|
1218
|
-
wandb._sentry.
|
1219
|
-
|
1220
|
-
finally:
|
1221
|
-
if error_seen:
|
1222
|
-
if except_exit:
|
1223
|
-
wandb.termerror("Abnormal program exit")
|
1224
|
-
os._exit(1)
|
1225
|
-
raise Error("An unexpected error occurred") from error_seen
|
1226
|
-
return run
|
1178
|
+
wandb._sentry.reraise(e)
|
1179
|
+
raise AssertionError() # unreachable
|
wandb/sdk/wandb_manager.py
CHANGED
@@ -5,7 +5,7 @@ Create a manager channel.
|
|
5
5
|
|
6
6
|
import atexit
|
7
7
|
import os
|
8
|
-
from typing import TYPE_CHECKING,
|
8
|
+
from typing import TYPE_CHECKING, Callable, Optional
|
9
9
|
|
10
10
|
import psutil
|
11
11
|
|
@@ -205,7 +205,7 @@ class _Manager:
|
|
205
205
|
svc_iface = self._get_service_interface()
|
206
206
|
svc_iface._svc_inform_start(settings=settings, run_id=run_id)
|
207
207
|
|
208
|
-
def _inform_attach(self, attach_id: str) -> Optional[
|
208
|
+
def _inform_attach(self, attach_id: str) -> Optional["wandb_settings_pb2.Settings"]:
|
209
209
|
svc_iface = self._get_service_interface()
|
210
210
|
try:
|
211
211
|
response = svc_iface._svc_inform_attach(attach_id=attach_id)
|
wandb/sdk/wandb_require.py
CHANGED
@@ -9,9 +9,11 @@ Example:
|
|
9
9
|
wandb.require("incremental-artifacts@beta")
|
10
10
|
"""
|
11
11
|
|
12
|
+
import os
|
12
13
|
from typing import Optional, Sequence, Union
|
13
14
|
|
14
15
|
import wandb
|
16
|
+
from wandb.env import _REQUIRE_CORE
|
15
17
|
from wandb.errors import UnsupportedError
|
16
18
|
from wandb.sdk import wandb_run
|
17
19
|
from wandb.sdk.lib.wburls import wburls
|
@@ -38,6 +40,9 @@ class _Requires:
|
|
38
40
|
def require_service(self) -> None:
|
39
41
|
self._require_service()
|
40
42
|
|
43
|
+
def require_core(self) -> None:
|
44
|
+
os.environ[_REQUIRE_CORE] = "true"
|
45
|
+
|
41
46
|
def apply(self) -> None:
|
42
47
|
"""Call require_* method for supported features."""
|
43
48
|
last_message: str = ""
|
wandb/sdk/wandb_run.py
CHANGED
@@ -207,6 +207,15 @@ class RunStatusChecker:
|
|
207
207
|
self._network_status_thread.start()
|
208
208
|
self._internal_messages_thread.start()
|
209
209
|
|
210
|
+
@staticmethod
|
211
|
+
def _abandon_status_check(
|
212
|
+
lock: threading.Lock,
|
213
|
+
handle: Optional[MailboxHandle],
|
214
|
+
):
|
215
|
+
with lock:
|
216
|
+
if handle:
|
217
|
+
handle.abandon()
|
218
|
+
|
210
219
|
def _loop_check_status(
|
211
220
|
self,
|
212
221
|
*,
|
@@ -265,13 +274,19 @@ class RunStatusChecker:
|
|
265
274
|
)
|
266
275
|
)
|
267
276
|
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
277
|
+
try:
|
278
|
+
self._loop_check_status(
|
279
|
+
lock=self._network_status_lock,
|
280
|
+
set_handle=lambda x: setattr(self, "_network_status_handle", x),
|
281
|
+
timeout=self._retry_polling_interval,
|
282
|
+
request=self._interface.deliver_network_status,
|
283
|
+
process=_process_network_status,
|
284
|
+
)
|
285
|
+
except BrokenPipeError:
|
286
|
+
self._abandon_status_check(
|
287
|
+
self._network_status_lock,
|
288
|
+
self._network_status_handle,
|
289
|
+
)
|
275
290
|
|
276
291
|
def check_stop_status(self) -> None:
|
277
292
|
def _process_stop_status(result: Result) -> None:
|
@@ -283,13 +298,19 @@ class RunStatusChecker:
|
|
283
298
|
thread.interrupt_main()
|
284
299
|
return
|
285
300
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
301
|
+
try:
|
302
|
+
self._loop_check_status(
|
303
|
+
lock=self._stop_status_lock,
|
304
|
+
set_handle=lambda x: setattr(self, "_stop_status_handle", x),
|
305
|
+
timeout=self._stop_polling_interval,
|
306
|
+
request=self._interface.deliver_stop_status,
|
307
|
+
process=_process_stop_status,
|
308
|
+
)
|
309
|
+
except BrokenPipeError:
|
310
|
+
self._abandon_status_check(
|
311
|
+
self._stop_status_lock,
|
312
|
+
self._stop_status_handle,
|
313
|
+
)
|
293
314
|
|
294
315
|
def check_internal_messages(self) -> None:
|
295
316
|
def _process_internal_messages(result: Result) -> None:
|
@@ -297,25 +318,34 @@ class RunStatusChecker:
|
|
297
318
|
for msg in internal_messages.messages.warning:
|
298
319
|
wandb.termwarn(msg)
|
299
320
|
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
321
|
+
try:
|
322
|
+
self._loop_check_status(
|
323
|
+
lock=self._internal_messages_lock,
|
324
|
+
set_handle=lambda x: setattr(self, "_internal_messages_handle", x),
|
325
|
+
timeout=1,
|
326
|
+
request=self._interface.deliver_internal_messages,
|
327
|
+
process=_process_internal_messages,
|
328
|
+
)
|
329
|
+
except BrokenPipeError:
|
330
|
+
self._abandon_status_check(
|
331
|
+
self._internal_messages_lock,
|
332
|
+
self._internal_messages_handle,
|
333
|
+
)
|
307
334
|
|
308
335
|
def stop(self) -> None:
|
309
336
|
self._join_event.set()
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
337
|
+
self._abandon_status_check(
|
338
|
+
self._stop_status_lock,
|
339
|
+
self._stop_status_handle,
|
340
|
+
)
|
341
|
+
self._abandon_status_check(
|
342
|
+
self._network_status_lock,
|
343
|
+
self._network_status_handle,
|
344
|
+
)
|
345
|
+
self._abandon_status_check(
|
346
|
+
self._internal_messages_lock,
|
347
|
+
self._internal_messages_handle,
|
348
|
+
)
|
319
349
|
|
320
350
|
def join(self) -> None:
|
321
351
|
self.stop()
|
@@ -327,8 +357,7 @@ class RunStatusChecker:
|
|
327
357
|
class _run_decorator: # noqa: N801
|
328
358
|
_is_attaching: str = ""
|
329
359
|
|
330
|
-
class Dummy:
|
331
|
-
...
|
360
|
+
class Dummy: ...
|
332
361
|
|
333
362
|
@classmethod
|
334
363
|
def _attach(cls, func: Callable) -> Callable:
|
@@ -1401,6 +1430,10 @@ class Run:
|
|
1401
1430
|
# self._printer.display(line)
|
1402
1431
|
|
1403
1432
|
def _summary_get_current_summary_callback(self) -> Dict[str, Any]:
|
1433
|
+
if self._is_finished:
|
1434
|
+
# TODO: WB-18420: fetch summary from backend and stage it before run is finished
|
1435
|
+
wandb.termwarn("Summary data not available in finished run")
|
1436
|
+
return {}
|
1404
1437
|
if not self._backend or not self._backend.interface:
|
1405
1438
|
return {}
|
1406
1439
|
handle = self._backend.interface.deliver_get_summary()
|
@@ -1793,7 +1826,7 @@ class Run:
|
|
1793
1826
|
import wandb
|
1794
1827
|
|
1795
1828
|
run = wandb.init()
|
1796
|
-
run.log({"pr": wandb.
|
1829
|
+
run.log({"pr": wandb.plot.pr_curve(y_test, y_probas, labels)})
|
1797
1830
|
```
|
1798
1831
|
|
1799
1832
|
### 3D Object
|
@@ -1854,7 +1887,7 @@ class Run:
|
|
1854
1887
|
picked up automatically.
|
1855
1888
|
|
1856
1889
|
A `base_path` may be provided to control the directory structure of
|
1857
|
-
uploaded files. It should be a prefix of `glob_str`, and the
|
1890
|
+
uploaded files. It should be a prefix of `glob_str`, and the directory
|
1858
1891
|
structure beneath it is preserved. It's best understood through
|
1859
1892
|
examples:
|
1860
1893
|
|
@@ -1866,16 +1899,19 @@ class Run:
|
|
1866
1899
|
# => Saves files in an "are/myfiles/" folder in the run.
|
1867
1900
|
|
1868
1901
|
wandb.save("/User/username/Documents/run123/*.txt")
|
1869
|
-
# => Saves files in a "run123/" folder in the run.
|
1902
|
+
# => Saves files in a "run123/" folder in the run. See note below.
|
1870
1903
|
|
1871
1904
|
wandb.save("/User/username/Documents/run123/*.txt", base_path="/User")
|
1872
1905
|
# => Saves files in a "username/Documents/run123/" folder in the run.
|
1873
1906
|
|
1874
1907
|
wandb.save("files/*/saveme.txt")
|
1875
1908
|
# => Saves each "saveme.txt" file in an appropriate subdirectory
|
1876
|
-
#
|
1909
|
+
# of "files/".
|
1877
1910
|
```
|
1878
1911
|
|
1912
|
+
Note: when given an absolute path or glob and no `base_path`, one
|
1913
|
+
directory level is preserved as in the example above.
|
1914
|
+
|
1879
1915
|
Arguments:
|
1880
1916
|
glob_str: A relative or absolute path or Unix glob.
|
1881
1917
|
base_path: A path to use to infer a directory structure; see examples.
|
@@ -1909,7 +1945,11 @@ class Run:
|
|
1909
1945
|
# Provide a better error message for a common misuse.
|
1910
1946
|
wandb.termlog(f"{glob_str} is a cloud storage url, can't save file to W&B.")
|
1911
1947
|
return []
|
1912
|
-
|
1948
|
+
# NOTE: We use PurePath instead of Path because WindowsPath doesn't
|
1949
|
+
# like asterisks and errors out in resolve(). It also makes logical
|
1950
|
+
# sense: globs aren't real paths, they're just path-like strings.
|
1951
|
+
glob_path = pathlib.PurePath(glob_str)
|
1952
|
+
resolved_glob_path = pathlib.PurePath(os.path.abspath(glob_path))
|
1913
1953
|
|
1914
1954
|
if base_path is not None:
|
1915
1955
|
base_path = pathlib.Path(base_path)
|
@@ -1923,15 +1963,14 @@ class Run:
|
|
1923
1963
|
'wandb.save("/mnt/folder/file.h5", base_path="/mnt")',
|
1924
1964
|
repeat=False,
|
1925
1965
|
)
|
1926
|
-
base_path =
|
1966
|
+
base_path = resolved_glob_path.parent.parent
|
1927
1967
|
|
1928
1968
|
if policy not in ("live", "end", "now"):
|
1929
1969
|
raise ValueError(
|
1930
1970
|
'Only "live", "end" and "now" policies are currently supported.'
|
1931
1971
|
)
|
1932
1972
|
|
1933
|
-
|
1934
|
-
resolved_base_path = base_path.resolve()
|
1973
|
+
resolved_base_path = pathlib.PurePath(os.path.abspath(base_path))
|
1935
1974
|
|
1936
1975
|
return self._save(
|
1937
1976
|
resolved_glob_path,
|
@@ -1941,8 +1980,8 @@ class Run:
|
|
1941
1980
|
|
1942
1981
|
def _save(
|
1943
1982
|
self,
|
1944
|
-
glob_path: pathlib.
|
1945
|
-
base_path: pathlib.
|
1983
|
+
glob_path: pathlib.PurePath,
|
1984
|
+
base_path: pathlib.PurePath,
|
1946
1985
|
policy: "PolicyName",
|
1947
1986
|
) -> List[str]:
|
1948
1987
|
# Can't use is_relative_to() because that's added in Python 3.9,
|
@@ -1961,55 +2000,67 @@ class Run:
|
|
1961
2000
|
with telemetry.context(run=self) as tel:
|
1962
2001
|
tel.feature.save = True
|
1963
2002
|
|
1964
|
-
#
|
1965
|
-
|
1966
|
-
|
1967
|
-
|
2003
|
+
# Files in the files directory matched by the glob, including old and
|
2004
|
+
# new ones.
|
2005
|
+
globbed_files = set(
|
2006
|
+
pathlib.Path(
|
1968
2007
|
self._settings.files_dir,
|
1969
2008
|
).glob(relative_glob_str)
|
1970
|
-
|
2009
|
+
)
|
1971
2010
|
|
1972
|
-
had_symlinked_files = len(
|
2011
|
+
had_symlinked_files = len(globbed_files) > 0
|
1973
2012
|
is_star_glob = "*" in relative_glob_str
|
1974
2013
|
|
1975
2014
|
# The base_path may itself be a glob, so we can't do
|
1976
2015
|
# base_path.glob(relative_glob_str)
|
1977
2016
|
for path_str in glob.glob(str(base_path / relative_glob_str)):
|
1978
|
-
|
2017
|
+
source_path = pathlib.Path(path_str).absolute()
|
1979
2018
|
|
1980
2019
|
# We can't use relative_to() because base_path may be a glob.
|
1981
|
-
|
2020
|
+
relative_path = pathlib.Path(*source_path.parts[len(base_path.parts) :])
|
1982
2021
|
|
1983
|
-
|
2022
|
+
target_path = pathlib.Path(self._settings.files_dir, relative_path)
|
2023
|
+
globbed_files.add(target_path)
|
1984
2024
|
|
1985
|
-
|
1986
|
-
|
2025
|
+
# If the file is already where it needs to be, don't create a symlink.
|
2026
|
+
if source_path.resolve() == target_path.resolve():
|
2027
|
+
continue
|
2028
|
+
|
2029
|
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
1987
2030
|
|
1988
2031
|
# Delete the symlink if it exists.
|
1989
2032
|
try:
|
1990
|
-
|
2033
|
+
target_path.unlink()
|
1991
2034
|
except FileNotFoundError:
|
1992
2035
|
# In Python 3.8, we would pass missing_ok=True, but as of now
|
1993
2036
|
# we support down to Python 3.7.
|
1994
2037
|
pass
|
1995
2038
|
|
1996
|
-
|
2039
|
+
target_path.symlink_to(source_path)
|
1997
2040
|
|
1998
2041
|
# Inform users that new files aren't detected automatically.
|
1999
2042
|
if not had_symlinked_files and is_star_glob:
|
2000
|
-
file_str = f"{len(
|
2001
|
-
if len(
|
2043
|
+
file_str = f"{len(globbed_files)} file"
|
2044
|
+
if len(globbed_files) > 1:
|
2002
2045
|
file_str += "s"
|
2003
2046
|
wandb.termwarn(
|
2004
2047
|
f"Symlinked {file_str} into the W&B run directory, "
|
2005
2048
|
"call wandb.save again to sync new files."
|
2006
2049
|
)
|
2007
2050
|
|
2008
|
-
files_dict: FilesDict = {
|
2051
|
+
files_dict: FilesDict = {
|
2052
|
+
"files": [
|
2053
|
+
(
|
2054
|
+
GlobStr(str(f.relative_to(self._settings.files_dir))),
|
2055
|
+
policy,
|
2056
|
+
)
|
2057
|
+
for f in globbed_files
|
2058
|
+
]
|
2059
|
+
}
|
2009
2060
|
if self._backend and self._backend.interface:
|
2010
2061
|
self._backend.interface.publish_files(files_dict)
|
2011
2062
|
|
2012
|
-
return
|
2063
|
+
return [str(f) for f in globbed_files]
|
2013
2064
|
|
2014
2065
|
@_run_decorator._attach
|
2015
2066
|
def restore(
|
@@ -2341,16 +2392,17 @@ class Run:
|
|
2341
2392
|
if self._settings._offline:
|
2342
2393
|
return
|
2343
2394
|
if self._backend and self._backend.interface:
|
2344
|
-
|
2345
|
-
|
2346
|
-
|
2347
|
-
|
2348
|
-
|
2349
|
-
|
2350
|
-
|
2351
|
-
|
2352
|
-
|
2353
|
-
|
2395
|
+
if not self._settings._disable_update_check:
|
2396
|
+
logger.info("communicating current version")
|
2397
|
+
version_handle = self._backend.interface.deliver_check_version(
|
2398
|
+
current_version=wandb.__version__
|
2399
|
+
)
|
2400
|
+
version_result = version_handle.wait(timeout=30)
|
2401
|
+
if not version_result:
|
2402
|
+
version_handle.abandon()
|
2403
|
+
else:
|
2404
|
+
self._check_version = version_result.response.check_version_response
|
2405
|
+
logger.info("got version response %s", self._check_version)
|
2354
2406
|
|
2355
2407
|
def _on_start(self) -> None:
|
2356
2408
|
# would like to move _set_global to _on_ready to unify _on_start and _on_attach
|
@@ -2425,6 +2477,8 @@ class Run:
|
|
2425
2477
|
self._telemetry_obj_active = True
|
2426
2478
|
self._telemetry_flush()
|
2427
2479
|
|
2480
|
+
self._detect_and_apply_job_inputs()
|
2481
|
+
|
2428
2482
|
# object is about to be returned to the user, don't let them modify it
|
2429
2483
|
self._freeze()
|
2430
2484
|
|
@@ -2432,6 +2486,12 @@ class Run:
|
|
2432
2486
|
if os.path.exists(self._settings.resume_fname):
|
2433
2487
|
os.remove(self._settings.resume_fname)
|
2434
2488
|
|
2489
|
+
def _detect_and_apply_job_inputs(self) -> None:
|
2490
|
+
"""If the user has staged launch inputs, apply them to the run."""
|
2491
|
+
from wandb.sdk.launch.inputs.internal import StagedLaunchInputs
|
2492
|
+
|
2493
|
+
StagedLaunchInputs().apply(self)
|
2494
|
+
|
2435
2495
|
def _make_job_source_reqs(self) -> Tuple[List[str], Dict[str, Any], Dict[str, Any]]:
|
2436
2496
|
from wandb.util import working_set
|
2437
2497
|
|
@@ -2690,12 +2750,23 @@ class Run:
|
|
2690
2750
|
if i not in valid:
|
2691
2751
|
raise wandb.Error(f"Unhandled define_metric() arg: summary op: {i}")
|
2692
2752
|
summary_ops.append(i)
|
2753
|
+
with telemetry.context(run=self) as tel:
|
2754
|
+
tel.feature.metric_summary = True
|
2693
2755
|
goal_cleaned: Optional[str] = None
|
2694
2756
|
if goal is not None:
|
2695
2757
|
goal_cleaned = goal[:3].lower()
|
2696
2758
|
valid_goal = {"min", "max"}
|
2697
2759
|
if goal_cleaned not in valid_goal:
|
2698
2760
|
raise wandb.Error(f"Unhandled define_metric() arg: goal: {goal}")
|
2761
|
+
with telemetry.context(run=self) as tel:
|
2762
|
+
tel.feature.metric_goal = True
|
2763
|
+
if hidden:
|
2764
|
+
with telemetry.context(run=self) as tel:
|
2765
|
+
tel.feature.metric_hidden = True
|
2766
|
+
if step_sync:
|
2767
|
+
with telemetry.context(run=self) as tel:
|
2768
|
+
tel.feature.metric_step_sync = True
|
2769
|
+
|
2699
2770
|
m = wandb_metric.Metric(
|
2700
2771
|
name=name,
|
2701
2772
|
step_metric=step_metric,
|
@@ -3158,13 +3229,13 @@ class Run:
|
|
3158
3229
|
)
|
3159
3230
|
if entity and artifact._source_entity and entity != artifact._source_entity:
|
3160
3231
|
raise ValueError(
|
3161
|
-
f"Artifact {artifact.name} is owned by entity
|
3162
|
-
f"moved to '{
|
3232
|
+
f"Artifact {artifact.name} is owned by entity "
|
3233
|
+
f"'{artifact._source_entity}'; it can't be moved to '{entity}'"
|
3163
3234
|
)
|
3164
3235
|
if project and artifact._source_project and project != artifact._source_project:
|
3165
3236
|
raise ValueError(
|
3166
|
-
f"Artifact {artifact.name} exists in project
|
3167
|
-
f"moved to '{
|
3237
|
+
f"Artifact {artifact.name} exists in project "
|
3238
|
+
f"'{artifact._source_project}'; it can't be moved to '{project}'"
|
3168
3239
|
)
|
3169
3240
|
|
3170
3241
|
def _prepare_artifact(
|
@@ -3291,8 +3362,8 @@ class Run:
|
|
3291
3362
|
path: (str) path to downloaded model artifact file(s).
|
3292
3363
|
"""
|
3293
3364
|
artifact = self.use_artifact(artifact_or_name=name)
|
3294
|
-
assert
|
3295
|
-
artifact.type.lower()
|
3365
|
+
assert (
|
3366
|
+
"model" in str(artifact.type.lower())
|
3296
3367
|
), "You can only use this method for 'model' artifacts. For an artifact to be a 'model' artifact, its type property must contain the substring 'model'."
|
3297
3368
|
path = artifact.download()
|
3298
3369
|
|
@@ -3384,8 +3455,8 @@ class Run:
|
|
3384
3455
|
public_api = self._public_api()
|
3385
3456
|
try:
|
3386
3457
|
artifact = public_api.artifact(name=f"{name}:latest")
|
3387
|
-
assert
|
3388
|
-
artifact.type.lower()
|
3458
|
+
assert (
|
3459
|
+
"model" in str(artifact.type.lower())
|
3389
3460
|
), "You can only use this method for 'model' artifacts. For an artifact to be a 'model' artifact, its type property must contain the substring 'model'."
|
3390
3461
|
artifact = self._log_artifact(
|
3391
3462
|
artifact_or_path=path, name=name, type=artifact.type
|
@@ -3585,7 +3656,7 @@ class Run:
|
|
3585
3656
|
if settings._offline or settings.silent:
|
3586
3657
|
return
|
3587
3658
|
|
3588
|
-
|
3659
|
+
run_url = settings.run_url
|
3589
3660
|
project_url = settings.project_url
|
3590
3661
|
sweep_url = settings.sweep_url
|
3591
3662
|
|
@@ -3596,7 +3667,7 @@ class Run:
|
|
3596
3667
|
|
3597
3668
|
if printer._html:
|
3598
3669
|
if not wandb.jupyter.maybe_display():
|
3599
|
-
run_line = f"<strong>{printer.link(
|
3670
|
+
run_line = f"<strong>{printer.link(run_url, run_name)}</strong>"
|
3600
3671
|
project_line, sweep_line = "", ""
|
3601
3672
|
|
3602
3673
|
# TODO(settings): make settings the source of truth
|
@@ -3628,7 +3699,7 @@ class Run:
|
|
3628
3699
|
f'{printer.emoji("broom")} View sweep at {printer.link(sweep_url)}'
|
3629
3700
|
)
|
3630
3701
|
printer.display(
|
3631
|
-
f'{printer.emoji("rocket")} View run at {printer.link(
|
3702
|
+
f'{printer.emoji("rocket")} View run at {printer.link(run_url)}',
|
3632
3703
|
)
|
3633
3704
|
|
3634
3705
|
# TODO(settings) use `wandb_settings` (if self.settings.anonymous == "true":)
|
@@ -3643,7 +3714,7 @@ class Run:
|
|
3643
3714
|
# FOOTER
|
3644
3715
|
# ------------------------------------------------------------------------------
|
3645
3716
|
# Note: All the footer methods are static methods since we want to share the printing logic
|
3646
|
-
# with the service execution path that doesn't have
|
3717
|
+
# with the service execution path that doesn't have access to the run instance
|
3647
3718
|
@staticmethod
|
3648
3719
|
def _footer(
|
3649
3720
|
sampled_history: Optional["SampledHistoryResponse"] = None,
|
@@ -3871,10 +3942,13 @@ class Run:
|
|
3871
3942
|
else:
|
3872
3943
|
info = []
|
3873
3944
|
if settings.run_name and settings.run_url:
|
3874
|
-
|
3875
|
-
|
3876
|
-
|
3877
|
-
|
3945
|
+
info.append(
|
3946
|
+
f"{printer.emoji('rocket')} View run {printer.name(settings.run_name)} at: {printer.link(settings.run_url)}"
|
3947
|
+
)
|
3948
|
+
if settings.project_url:
|
3949
|
+
info.append(
|
3950
|
+
f"{printer.emoji('star')} View project at: {printer.link(settings.project_url)}"
|
3951
|
+
)
|
3878
3952
|
if poll_exit_response and poll_exit_response.file_counts:
|
3879
3953
|
logger.info("logging synced files")
|
3880
3954
|
file_counts = poll_exit_response.file_counts
|
@@ -3943,11 +4017,11 @@ class Run:
|
|
3943
4017
|
|
3944
4018
|
# Render summary if available
|
3945
4019
|
if summary:
|
3946
|
-
final_summary = {
|
3947
|
-
|
3948
|
-
|
3949
|
-
|
3950
|
-
|
4020
|
+
final_summary = {}
|
4021
|
+
for item in summary.item:
|
4022
|
+
if item.key.startswith("_") or len(item.nested_key) > 0:
|
4023
|
+
continue
|
4024
|
+
final_summary[item.key] = json.loads(item.value_json)
|
3951
4025
|
|
3952
4026
|
logger.info("rendering summary")
|
3953
4027
|
summary_rows = []
|