wandb 0.17.6__py3-none-any.whl → 0.17.8__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- package_readme.md +47 -53
- wandb/__init__.py +10 -19
- wandb/__init__.pyi +964 -0
- wandb/agents/pyagent.py +1 -2
- wandb/bin/nvidia_gpu_stats +0 -0
- wandb/cli/cli.py +21 -0
- wandb/data_types.py +4 -3
- wandb/env.py +13 -0
- wandb/integration/keras/__init__.py +2 -5
- wandb/integration/keras/callbacks/metrics_logger.py +10 -4
- wandb/integration/keras/callbacks/model_checkpoint.py +0 -5
- wandb/integration/keras/keras.py +11 -0
- wandb/integration/kfp/wandb_logging.py +1 -1
- wandb/integration/lightning/fabric/logger.py +1 -1
- wandb/integration/openai/fine_tuning.py +13 -5
- wandb/integration/ultralytics/pose_utils.py +0 -1
- wandb/proto/v3/wandb_internal_pb2.py +24 -24
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_telemetry_pb2.py +12 -12
- wandb/proto/v4/wandb_internal_pb2.py +24 -24
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_telemetry_pb2.py +12 -12
- wandb/proto/v5/wandb_internal_pb2.py +24 -24
- wandb/proto/v5/wandb_settings_pb2.py +2 -2
- wandb/proto/v5/wandb_telemetry_pb2.py +12 -12
- wandb/proto/wandb_deprecated.py +2 -0
- wandb/sdk/artifacts/artifact.py +22 -26
- wandb/sdk/artifacts/artifact_manifest_entry.py +10 -2
- wandb/sdk/artifacts/storage_handlers/gcs_handler.py +31 -0
- wandb/sdk/data_types/_dtypes.py +5 -5
- wandb/sdk/data_types/base_types/media.py +3 -1
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +3 -1
- wandb/sdk/data_types/helper_types/image_mask.py +3 -1
- wandb/sdk/data_types/image.py +3 -1
- wandb/sdk/data_types/saved_model.py +3 -1
- wandb/sdk/data_types/video.py +2 -2
- wandb/sdk/interface/interface.py +17 -16
- wandb/sdk/interface/interface_shared.py +6 -9
- wandb/sdk/internal/datastore.py +1 -1
- wandb/sdk/internal/handler.py +5 -3
- wandb/sdk/internal/internal.py +1 -1
- wandb/sdk/internal/job_builder.py +5 -2
- wandb/sdk/internal/tb_watcher.py +2 -2
- wandb/sdk/internal/update.py +2 -2
- wandb/sdk/launch/builder/kaniko_builder.py +13 -5
- wandb/sdk/launch/create_job.py +2 -0
- wandb/sdk/lib/_settings_toposort_generated.py +1 -0
- wandb/sdk/lib/apikey.py +1 -1
- wandb/sdk/service/service.py +7 -2
- wandb/sdk/service/streams.py +2 -4
- wandb/sdk/wandb_config.py +4 -1
- wandb/sdk/wandb_init.py +59 -8
- wandb/sdk/wandb_manager.py +0 -3
- wandb/sdk/wandb_require.py +22 -1
- wandb/sdk/wandb_run.py +137 -92
- wandb/sdk/wandb_settings.py +26 -2
- wandb/sdk/wandb_setup.py +69 -3
- wandb/sdk/wandb_sweep.py +5 -2
- wandb/testing/relay.py +7 -1
- {wandb-0.17.6.dist-info → wandb-0.17.8.dist-info}/METADATA +48 -54
- {wandb-0.17.6.dist-info → wandb-0.17.8.dist-info}/RECORD +64 -63
- {wandb-0.17.6.dist-info → wandb-0.17.8.dist-info}/WHEEL +0 -0
- {wandb-0.17.6.dist-info → wandb-0.17.8.dist-info}/entry_points.txt +0 -0
- {wandb-0.17.6.dist-info → wandb-0.17.8.dist-info}/licenses/LICENSE +0 -0
wandb/sdk/wandb_run.py
CHANGED
@@ -42,6 +42,7 @@ from wandb._globals import _datatypes_set_callback
|
|
42
42
|
from wandb.apis import internal, public
|
43
43
|
from wandb.apis.internal import Api
|
44
44
|
from wandb.apis.public import Api as PublicApi
|
45
|
+
from wandb.errors import CommError
|
45
46
|
from wandb.proto.wandb_internal_pb2 import (
|
46
47
|
MetricRecord,
|
47
48
|
PollExitResponse,
|
@@ -69,7 +70,7 @@ from wandb.viz import CustomChart, Visualize, custom_chart
|
|
69
70
|
|
70
71
|
from . import wandb_config, wandb_metric, wandb_summary
|
71
72
|
from .data_types._dtypes import TypeRegistry
|
72
|
-
from .interface.interface import GlobStr, InterfaceBase
|
73
|
+
from .interface.interface import FilesDict, GlobStr, InterfaceBase, PolicyName
|
73
74
|
from .interface.summary_record import SummaryRecord
|
74
75
|
from .lib import (
|
75
76
|
config_util,
|
@@ -89,6 +90,7 @@ from .lib.printer import get_printer
|
|
89
90
|
from .lib.proto_util import message_to_dict
|
90
91
|
from .lib.reporting import Reporter
|
91
92
|
from .lib.wburls import wburls
|
93
|
+
from .wandb_alerts import AlertLevel
|
92
94
|
from .wandb_settings import Settings
|
93
95
|
from .wandb_setup import _WandbSetup
|
94
96
|
|
@@ -108,9 +110,7 @@ if TYPE_CHECKING:
|
|
108
110
|
SampledHistoryResponse,
|
109
111
|
)
|
110
112
|
|
111
|
-
from .interface.interface import FilesDict, PolicyName
|
112
113
|
from .lib.printer import PrinterJupyter, PrinterTerm
|
113
|
-
from .wandb_alerts import AlertLevel
|
114
114
|
|
115
115
|
class GitSourceDict(TypedDict):
|
116
116
|
remote: str
|
@@ -171,10 +171,12 @@ class RunStatusChecker:
|
|
171
171
|
interface: InterfaceBase,
|
172
172
|
stop_polling_interval: int = 15,
|
173
173
|
retry_polling_interval: int = 5,
|
174
|
+
internal_messages_polling_interval: int = 10,
|
174
175
|
) -> None:
|
175
176
|
self._interface = interface
|
176
177
|
self._stop_polling_interval = stop_polling_interval
|
177
178
|
self._retry_polling_interval = retry_polling_interval
|
179
|
+
self._internal_messages_polling_interval = internal_messages_polling_interval
|
178
180
|
|
179
181
|
self._join_event = threading.Event()
|
180
182
|
|
@@ -295,7 +297,7 @@ class RunStatusChecker:
|
|
295
297
|
if stop_status.run_should_stop:
|
296
298
|
# TODO(frz): This check is required
|
297
299
|
# until WB-3606 is resolved on server side.
|
298
|
-
if not wandb.agents.pyagent.is_running():
|
300
|
+
if not wandb.agents.pyagent.is_running(): # type: ignore
|
299
301
|
thread.interrupt_main()
|
300
302
|
return
|
301
303
|
|
@@ -323,7 +325,7 @@ class RunStatusChecker:
|
|
323
325
|
self._loop_check_status(
|
324
326
|
lock=self._internal_messages_lock,
|
325
327
|
set_handle=lambda x: setattr(self, "_internal_messages_handle", x),
|
326
|
-
timeout=
|
328
|
+
timeout=self._internal_messages_polling_interval,
|
327
329
|
request=self._interface.deliver_internal_messages,
|
328
330
|
process=_process_internal_messages,
|
329
331
|
)
|
@@ -621,7 +623,7 @@ class Run:
|
|
621
623
|
)
|
622
624
|
self.summary._set_update_callback(self._summary_update_callback)
|
623
625
|
self._step = 0
|
624
|
-
self._torch_history: Optional[wandb.wandb_torch.TorchHistory] = None
|
626
|
+
self._torch_history: Optional[wandb.wandb_torch.TorchHistory] = None # type: ignore
|
625
627
|
|
626
628
|
# todo: eventually would be nice to make this configurable using self._settings._start_time
|
627
629
|
# need to test (jhr): if you set start time to 2 days ago and run a test for 15 minutes,
|
@@ -796,11 +798,15 @@ class Run:
|
|
796
798
|
self._unique_launch_artifact_sequence_names[sequence_name] = item
|
797
799
|
|
798
800
|
def _telemetry_callback(self, telem_obj: telemetry.TelemetryRecord) -> None:
|
801
|
+
if not hasattr(self, "_telemetry_obj"):
|
802
|
+
return
|
799
803
|
self._telemetry_obj.MergeFrom(telem_obj)
|
800
804
|
self._telemetry_obj_dirty = True
|
801
805
|
self._telemetry_flush()
|
802
806
|
|
803
807
|
def _telemetry_flush(self) -> None:
|
808
|
+
if not hasattr(self, "_telemetry_obj"):
|
809
|
+
return
|
804
810
|
if not self._telemetry_obj_active:
|
805
811
|
return
|
806
812
|
if not self._telemetry_obj_dirty:
|
@@ -920,9 +926,9 @@ class Run:
|
|
920
926
|
self.__dict__.update(state)
|
921
927
|
|
922
928
|
@property
|
923
|
-
def _torch(self) -> "wandb.wandb_torch.TorchHistory":
|
929
|
+
def _torch(self) -> "wandb.wandb_torch.TorchHistory": # type: ignore
|
924
930
|
if self._torch_history is None:
|
925
|
-
self._torch_history = wandb.wandb_torch.TorchHistory()
|
931
|
+
self._torch_history = wandb.wandb_torch.TorchHistory() # type: ignore
|
926
932
|
return self._torch_history
|
927
933
|
|
928
934
|
@property
|
@@ -1089,13 +1095,14 @@ class Run:
|
|
1089
1095
|
@_run_decorator._attach
|
1090
1096
|
def mode(self) -> str:
|
1091
1097
|
"""For compatibility with `0.9.x` and earlier, deprecate eventually."""
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1098
|
+
if hasattr(self, "_telemetry_obj"):
|
1099
|
+
deprecate.deprecate(
|
1100
|
+
field_name=deprecate.Deprecated.run__mode,
|
1101
|
+
warning_message=(
|
1102
|
+
"The mode property of wandb.run is deprecated "
|
1103
|
+
"and will be removed in a future release."
|
1104
|
+
),
|
1105
|
+
)
|
1099
1106
|
return "dryrun" if self._settings._offline else "run"
|
1100
1107
|
|
1101
1108
|
@property
|
@@ -1675,15 +1682,15 @@ class Run:
|
|
1675
1682
|
commit: Optional[bool] = None,
|
1676
1683
|
sync: Optional[bool] = None,
|
1677
1684
|
) -> None:
|
1678
|
-
"""
|
1685
|
+
"""Upload run data.
|
1679
1686
|
|
1680
|
-
Use `
|
1687
|
+
Use `log` to log data from runs, such as scalars, images, video,
|
1681
1688
|
histograms, plots, and tables.
|
1682
1689
|
|
1683
1690
|
See our [guides to logging](https://docs.wandb.ai/guides/track/log) for
|
1684
1691
|
live examples, code snippets, best practices, and more.
|
1685
1692
|
|
1686
|
-
The most basic usage is `
|
1693
|
+
The most basic usage is `run.log({"train-loss": 0.5, "accuracy": 0.9})`.
|
1687
1694
|
This will save the loss and accuracy to the run's history and update
|
1688
1695
|
the summary values for these metrics.
|
1689
1696
|
|
@@ -1692,48 +1699,91 @@ class Run:
|
|
1692
1699
|
of the W&B app, or export data to visualize and explore locally, e.g. in
|
1693
1700
|
Jupyter notebooks, with [our API](https://docs.wandb.ai/guides/track/public-api-guide).
|
1694
1701
|
|
1695
|
-
In the UI, summary values show up in the run table to compare single values across runs.
|
1696
|
-
Summary values can also be set directly with `wandb.run.summary["key"] = value`.
|
1697
|
-
|
1698
1702
|
Logged values don't have to be scalars. Logging any wandb object is supported.
|
1699
|
-
For example `
|
1703
|
+
For example `run.log({"example": wandb.Image("myimage.jpg")})` will log an
|
1700
1704
|
example image which will be displayed nicely in the W&B UI.
|
1701
1705
|
See the [reference documentation](https://docs.wandb.com/ref/python/data-types)
|
1702
1706
|
for all of the different supported types or check out our
|
1703
1707
|
[guides to logging](https://docs.wandb.ai/guides/track/log) for examples,
|
1704
1708
|
from 3D molecular structures and segmentation masks to PR curves and histograms.
|
1705
|
-
`wandb.Table`
|
1709
|
+
You can use `wandb.Table` to log structured data. See our
|
1706
1710
|
[guide to logging tables](https://docs.wandb.ai/guides/data-vis/log-tables)
|
1707
1711
|
for details.
|
1708
1712
|
|
1709
|
-
|
1710
|
-
|
1711
|
-
|
1712
|
-
|
1713
|
+
The W&B UI organizes metrics with a forward slash (`/`) in their name
|
1714
|
+
into sections named using the text before the final slash. For example,
|
1715
|
+
the following results in two sections named "train" and "validate":
|
1716
|
+
|
1717
|
+
```
|
1718
|
+
run.log({
|
1719
|
+
"train/accuracy": 0.9,
|
1720
|
+
"train/loss": 30,
|
1721
|
+
"validate/accuracy": 0.8,
|
1722
|
+
"validate/loss": 20,
|
1723
|
+
})
|
1724
|
+
```
|
1713
1725
|
|
1714
|
-
|
1715
|
-
|
1716
|
-
If it's inconvenient to log related metrics together
|
1717
|
-
calling `wandb.log({"train-loss": 0.5}, commit=False)` and then
|
1718
|
-
`wandb.log({"accuracy": 0.9})` is equivalent to calling
|
1719
|
-
`wandb.log({"train-loss": 0.5, "accuracy": 0.9})`.
|
1726
|
+
Only one level of nesting is supported; `run.log({"a/b/c": 1})`
|
1727
|
+
produces a section named "a/b".
|
1720
1728
|
|
1721
|
-
`
|
1722
|
-
|
1723
|
-
|
1729
|
+
`run.log` is not intended to be called more than a few times per second.
|
1730
|
+
For optimal performance, limit your logging to once every N iterations,
|
1731
|
+
or collect data over multiple iterations and log it in a single step.
|
1732
|
+
|
1733
|
+
### The W&B step
|
1734
|
+
|
1735
|
+
With basic usage, each call to `log` creates a new "step".
|
1736
|
+
The step must always increase, and it is not possible to log
|
1737
|
+
to a previous step.
|
1738
|
+
|
1739
|
+
Note that you can use any metric as the X axis in charts.
|
1740
|
+
In many cases, it is better to treat the W&B step like
|
1741
|
+
you'd treat a timestamp rather than a training step.
|
1742
|
+
|
1743
|
+
```
|
1744
|
+
# Example: log an "epoch" metric for use as an X axis.
|
1745
|
+
run.log({"epoch": 40, "train-loss": 0.5})
|
1746
|
+
```
|
1747
|
+
|
1748
|
+
See also [define_metric](https://docs.wandb.ai/ref/python/run#define_metric).
|
1749
|
+
|
1750
|
+
It is possible to use multiple `log` invocations to log to
|
1751
|
+
the same step with the `step` and `commit` parameters.
|
1752
|
+
The following are all equivalent:
|
1753
|
+
|
1754
|
+
```
|
1755
|
+
# Normal usage:
|
1756
|
+
run.log({"train-loss": 0.5, "accuracy": 0.8})
|
1757
|
+
run.log({"train-loss": 0.4, "accuracy": 0.9})
|
1758
|
+
|
1759
|
+
# Implicit step without auto-incrementing:
|
1760
|
+
run.log({"train-loss": 0.5}, commit=False)
|
1761
|
+
run.log({"accuracy": 0.8})
|
1762
|
+
run.log({"train-loss": 0.4}, commit=False)
|
1763
|
+
run.log({"accuracy": 0.9})
|
1764
|
+
|
1765
|
+
# Explicit step:
|
1766
|
+
run.log({"train-loss": 0.5}, step=current_step)
|
1767
|
+
run.log({"accuracy": 0.8}, step=current_step)
|
1768
|
+
current_step += 1
|
1769
|
+
run.log({"train-loss": 0.4}, step=current_step)
|
1770
|
+
run.log({"accuracy": 0.9}, step=current_step)
|
1771
|
+
```
|
1724
1772
|
|
1725
1773
|
Arguments:
|
1726
|
-
data:
|
1727
|
-
|
1728
|
-
|
1729
|
-
|
1730
|
-
|
1731
|
-
|
1732
|
-
|
1733
|
-
|
1734
|
-
|
1735
|
-
|
1736
|
-
|
1774
|
+
data: A `dict` with `str` keys and values that are serializable
|
1775
|
+
Python objects including: `int`, `float` and `string`;
|
1776
|
+
any of the `wandb.data_types`; lists, tuples and NumPy arrays
|
1777
|
+
of serializable Python objects; other `dict`s of this
|
1778
|
+
structure.
|
1779
|
+
step: The step number to log. If `None`, then an implicit
|
1780
|
+
auto-incrementing step is used. See the notes in
|
1781
|
+
the description.
|
1782
|
+
commit: If true, finalize and upload the step. If false, then
|
1783
|
+
accumulate data for the step. See the notes in the description.
|
1784
|
+
If `step` is `None`, then the default is `commit=True`;
|
1785
|
+
otherwise, the default is `commit=False`.
|
1786
|
+
sync: This argument is deprecated and does nothing.
|
1737
1787
|
|
1738
1788
|
Examples:
|
1739
1789
|
For more and more detailed examples, see
|
@@ -1885,7 +1935,7 @@ class Run:
|
|
1885
1935
|
self,
|
1886
1936
|
glob_str: Optional[Union[str, os.PathLike]] = None,
|
1887
1937
|
base_path: Optional[Union[str, os.PathLike]] = None,
|
1888
|
-
policy:
|
1938
|
+
policy: PolicyName = "live",
|
1889
1939
|
) -> Union[bool, List[str]]:
|
1890
1940
|
"""Sync one or more files to W&B.
|
1891
1941
|
|
@@ -2158,12 +2208,13 @@ class Run:
|
|
2158
2208
|
@_run_decorator._attach
|
2159
2209
|
def join(self, exit_code: Optional[int] = None) -> None:
|
2160
2210
|
"""Deprecated alias for `finish()` - use finish instead."""
|
2161
|
-
|
2162
|
-
|
2163
|
-
|
2164
|
-
|
2165
|
-
|
2166
|
-
|
2211
|
+
if hasattr(self, "_telemetry_obj"):
|
2212
|
+
deprecate.deprecate(
|
2213
|
+
field_name=deprecate.Deprecated.run__join,
|
2214
|
+
warning_message=(
|
2215
|
+
"wandb.run.join() is deprecated, please use wandb.run.finish()."
|
2216
|
+
),
|
2217
|
+
)
|
2167
2218
|
self._finish(exit_code=exit_code)
|
2168
2219
|
|
2169
2220
|
@_run_decorator._noop_on_finish()
|
@@ -2368,11 +2419,7 @@ class Run:
|
|
2368
2419
|
return
|
2369
2420
|
self._atexit_cleanup_called = True
|
2370
2421
|
|
2371
|
-
exit_code = (
|
2372
|
-
exit_code #
|
2373
|
-
or (self._hooks and self._hooks.exit_code)
|
2374
|
-
or 0
|
2375
|
-
)
|
2422
|
+
exit_code = exit_code or (self._hooks and self._hooks.exit_code) or 0
|
2376
2423
|
self._exit_code = exit_code
|
2377
2424
|
logger.info(f"got exitcode: {exit_code}")
|
2378
2425
|
|
@@ -2389,7 +2436,7 @@ class Run:
|
|
2389
2436
|
self._on_finish()
|
2390
2437
|
|
2391
2438
|
except KeyboardInterrupt:
|
2392
|
-
if not wandb.wandb_agent._is_running():
|
2439
|
+
if not wandb.wandb_agent._is_running(): # type: ignore
|
2393
2440
|
wandb.termerror("Control-C detected -- Run data was not synced")
|
2394
2441
|
raise
|
2395
2442
|
|
@@ -2433,18 +2480,6 @@ class Run:
|
|
2433
2480
|
def _on_init(self) -> None:
|
2434
2481
|
if self._settings._offline:
|
2435
2482
|
return
|
2436
|
-
if self._backend and self._backend.interface:
|
2437
|
-
if not self._settings._disable_update_check:
|
2438
|
-
logger.info("communicating current version")
|
2439
|
-
version_handle = self._backend.interface.deliver_check_version(
|
2440
|
-
current_version=wandb.__version__
|
2441
|
-
)
|
2442
|
-
version_result = version_handle.wait(timeout=30)
|
2443
|
-
if not version_result:
|
2444
|
-
version_handle.abandon()
|
2445
|
-
else:
|
2446
|
-
self._check_version = version_result.response.check_version_response
|
2447
|
-
logger.info("got version response %s", self._check_version)
|
2448
2483
|
|
2449
2484
|
def _on_start(self) -> None:
|
2450
2485
|
# would like to move _set_global to _on_ready to unify _on_start and _on_attach
|
@@ -2453,9 +2488,7 @@ class Run:
|
|
2453
2488
|
# TODO(jupyter) However _header calls _header_run_info that uses wandb.jupyter that uses
|
2454
2489
|
# `wandb.run` and hence breaks
|
2455
2490
|
self._set_globals()
|
2456
|
-
self._header(
|
2457
|
-
self._check_version, settings=self._settings, printer=self._printer
|
2458
|
-
)
|
2491
|
+
self._header(settings=self._settings, printer=self._printer)
|
2459
2492
|
|
2460
2493
|
if self._settings.save_code and self._settings.code_dir is not None:
|
2461
2494
|
self.log_code(self._settings.code_dir)
|
@@ -2640,6 +2673,18 @@ class Run:
|
|
2640
2673
|
|
2641
2674
|
assert self._backend and self._backend.interface
|
2642
2675
|
|
2676
|
+
if not self._settings._disable_update_check:
|
2677
|
+
logger.info("communicating current version")
|
2678
|
+
version_handle = self._backend.interface.deliver_check_version(
|
2679
|
+
current_version=wandb.__version__
|
2680
|
+
)
|
2681
|
+
version_result = version_handle.wait(timeout=10)
|
2682
|
+
if not version_result:
|
2683
|
+
version_handle.abandon()
|
2684
|
+
else:
|
2685
|
+
self._check_version = version_result.response.check_version_response
|
2686
|
+
logger.info("got version response %s", self._check_version)
|
2687
|
+
|
2643
2688
|
# get the server info before starting the defer state machine as
|
2644
2689
|
# it will stop communication with the server
|
2645
2690
|
server_info_handle = self._backend.interface.deliver_request_server_info()
|
@@ -2839,12 +2884,12 @@ class Run:
|
|
2839
2884
|
idx=None,
|
2840
2885
|
log_graph=False,
|
2841
2886
|
) -> None:
|
2842
|
-
wandb.watch(models, criterion, log, log_freq, idx, log_graph)
|
2887
|
+
wandb.watch(models, criterion, log, log_freq, idx, log_graph) # type: ignore
|
2843
2888
|
|
2844
2889
|
# TODO(jhr): annotate this
|
2845
2890
|
@_run_decorator._attach
|
2846
2891
|
def unwatch(self, models=None) -> None: # type: ignore
|
2847
|
-
wandb.unwatch(models=models)
|
2892
|
+
wandb.unwatch(models=models) # type: ignore
|
2848
2893
|
|
2849
2894
|
# TODO(kdg): remove all artifact swapping logic
|
2850
2895
|
def _swap_artifact_name(self, artifact_name: str, use_as: Optional[str]) -> str:
|
@@ -3514,7 +3559,7 @@ class Run:
|
|
3514
3559
|
artifact = self._log_artifact(
|
3515
3560
|
artifact_or_path=path, name=name, type=artifact.type
|
3516
3561
|
)
|
3517
|
-
except (ValueError,
|
3562
|
+
except (ValueError, CommError):
|
3518
3563
|
artifact = self._log_artifact(
|
3519
3564
|
artifact_or_path=path, name=name, type="model"
|
3520
3565
|
)
|
@@ -3534,13 +3579,13 @@ class Run:
|
|
3534
3579
|
Arguments:
|
3535
3580
|
title: (str) The title of the alert, must be less than 64 characters long.
|
3536
3581
|
text: (str) The text body of the alert.
|
3537
|
-
level: (str or
|
3582
|
+
level: (str or AlertLevel, optional) The alert level to use, either: `INFO`, `WARN`, or `ERROR`.
|
3538
3583
|
wait_duration: (int, float, or timedelta, optional) The time to wait (in seconds) before sending another
|
3539
3584
|
alert with this title.
|
3540
3585
|
"""
|
3541
|
-
level = level or
|
3542
|
-
level_str: str = level.value if isinstance(level,
|
3543
|
-
if level_str not in {lev.value for lev in
|
3586
|
+
level = level or AlertLevel.INFO
|
3587
|
+
level_str: str = level.value if isinstance(level, AlertLevel) else level
|
3588
|
+
if level_str not in {lev.value for lev in AlertLevel}:
|
3544
3589
|
raise ValueError("level must be one of 'INFO', 'WARN', or 'ERROR'")
|
3545
3590
|
|
3546
3591
|
wait_duration = wait_duration or timedelta(minutes=1)
|
@@ -3633,14 +3678,10 @@ class Run:
|
|
3633
3678
|
# with the service execution path that doesn't have access to the run instance
|
3634
3679
|
@staticmethod
|
3635
3680
|
def _header(
|
3636
|
-
check_version: Optional["CheckVersionResponse"] = None,
|
3637
3681
|
*,
|
3638
3682
|
settings: "Settings",
|
3639
3683
|
printer: Union["PrinterTerm", "PrinterJupyter"],
|
3640
3684
|
) -> None:
|
3641
|
-
Run._header_version_check_info(
|
3642
|
-
check_version, settings=settings, printer=printer
|
3643
|
-
)
|
3644
3685
|
Run._header_wandb_version_info(settings=settings, printer=printer)
|
3645
3686
|
Run._header_sync_info(settings=settings, printer=printer)
|
3646
3687
|
Run._header_run_info(settings=settings, printer=printer)
|
@@ -3661,7 +3702,9 @@ class Run:
|
|
3661
3702
|
printer.display(check_version.yank_message, level="warn")
|
3662
3703
|
|
3663
3704
|
printer.display(
|
3664
|
-
check_version.upgrade_message,
|
3705
|
+
check_version.upgrade_message,
|
3706
|
+
off=not check_version.upgrade_message,
|
3707
|
+
level="warn",
|
3665
3708
|
)
|
3666
3709
|
|
3667
3710
|
@staticmethod
|
@@ -3723,12 +3766,12 @@ class Run:
|
|
3723
3766
|
return
|
3724
3767
|
|
3725
3768
|
if printer._html:
|
3726
|
-
if not wandb.jupyter.maybe_display():
|
3769
|
+
if not wandb.jupyter.maybe_display(): # type: ignore
|
3727
3770
|
run_line = f"<strong>{printer.link(run_url, run_name)}</strong>"
|
3728
3771
|
project_line, sweep_line = "", ""
|
3729
3772
|
|
3730
3773
|
# TODO(settings): make settings the source of truth
|
3731
|
-
if not wandb.jupyter.quiet():
|
3774
|
+
if not wandb.jupyter.quiet(): # type: ignore
|
3732
3775
|
doc_html = printer.link(wburls.get("doc_run"), "docs")
|
3733
3776
|
|
3734
3777
|
project_html = printer.link(project_url, "Weights & Biases")
|
@@ -4177,9 +4220,11 @@ class Run:
|
|
4177
4220
|
printer.display(check_version.yank_message, level="warn")
|
4178
4221
|
|
4179
4222
|
# only display upgrade message if packages are bad
|
4180
|
-
|
4181
|
-
|
4182
|
-
|
4223
|
+
if check_version.upgrade_message:
|
4224
|
+
printer.display(
|
4225
|
+
check_version.upgrade_message,
|
4226
|
+
level="warn",
|
4227
|
+
)
|
4183
4228
|
|
4184
4229
|
@staticmethod
|
4185
4230
|
def _footer_notify_wandb_core(
|
wandb/sdk/wandb_settings.py
CHANGED
@@ -356,6 +356,7 @@ class SettingsData:
|
|
356
356
|
_python: str
|
357
357
|
_runqueue_item_id: str
|
358
358
|
_require_core: bool
|
359
|
+
_require_legacy_service: bool
|
359
360
|
_save_requirements: bool
|
360
361
|
_service_transport: str
|
361
362
|
_service_wait: float
|
@@ -650,7 +651,7 @@ class Settings(SettingsData):
|
|
650
651
|
},
|
651
652
|
_disable_service={
|
652
653
|
"value": False,
|
653
|
-
"preprocessor":
|
654
|
+
"preprocessor": self._process_disable_service,
|
654
655
|
"is_policy": True,
|
655
656
|
},
|
656
657
|
_disable_setproctitle={"value": False, "preprocessor": _str_as_bool},
|
@@ -718,6 +719,7 @@ class Settings(SettingsData):
|
|
718
719
|
"preprocessor": _str_as_json,
|
719
720
|
},
|
720
721
|
_require_core={"value": False, "preprocessor": _str_as_bool},
|
722
|
+
_require_legacy_service={"value": False, "preprocessor": _str_as_bool},
|
721
723
|
_save_requirements={"value": True, "preprocessor": _str_as_bool},
|
722
724
|
_service_wait={
|
723
725
|
"value": 30,
|
@@ -1172,6 +1174,16 @@ class Settings(SettingsData):
|
|
1172
1174
|
|
1173
1175
|
return True
|
1174
1176
|
|
1177
|
+
@staticmethod
|
1178
|
+
def _process_disable_service(value: Union[str, bool]) -> bool:
|
1179
|
+
value = _str_as_bool(value)
|
1180
|
+
if value:
|
1181
|
+
wandb.termwarn(
|
1182
|
+
"Disabling the wandb service is deprecated as of version 0.18.0 and will be removed in version 0.19.0.",
|
1183
|
+
repeat=False,
|
1184
|
+
)
|
1185
|
+
return value
|
1186
|
+
|
1175
1187
|
@staticmethod
|
1176
1188
|
def _validate__service_wait(value: float) -> bool:
|
1177
1189
|
if value <= 0:
|
@@ -1715,7 +1727,7 @@ class Settings(SettingsData):
|
|
1715
1727
|
|
1716
1728
|
# Attempt to get notebook information if not already set by the user
|
1717
1729
|
if self._jupyter and (self.notebook_name is None or self.notebook_name == ""):
|
1718
|
-
meta = wandb.jupyter.notebook_metadata(self.silent)
|
1730
|
+
meta = wandb.jupyter.notebook_metadata(self.silent) # type: ignore
|
1719
1731
|
settings["_jupyter_path"] = meta.get("path")
|
1720
1732
|
settings["_jupyter_name"] = meta.get("name")
|
1721
1733
|
settings["_jupyter_root"] = meta.get("root")
|
@@ -1875,9 +1887,21 @@ class Settings(SettingsData):
|
|
1875
1887
|
|
1876
1888
|
# update settings
|
1877
1889
|
self.update(init_settings, source=Source.INIT)
|
1890
|
+
self._handle_fork_logic()
|
1878
1891
|
self._handle_rewind_logic()
|
1879
1892
|
self._handle_resume_logic()
|
1880
1893
|
|
1894
|
+
def _handle_fork_logic(self) -> None:
|
1895
|
+
if self.fork_from is None:
|
1896
|
+
return
|
1897
|
+
|
1898
|
+
if self.run_id is not None and (self.fork_from.run == self.run_id):
|
1899
|
+
raise ValueError(
|
1900
|
+
"Provided `run_id` is the same as the run to `fork_from`. "
|
1901
|
+
"Please provide a different `run_id` or remove the `run_id` argument. "
|
1902
|
+
"If you want to rewind the current run, please use `resume_from` instead."
|
1903
|
+
)
|
1904
|
+
|
1881
1905
|
def _handle_rewind_logic(self) -> None:
|
1882
1906
|
if self.resume_from is None:
|
1883
1907
|
return
|
wandb/sdk/wandb_setup.py
CHANGED
@@ -18,6 +18,7 @@ import threading
|
|
18
18
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
19
19
|
|
20
20
|
import wandb
|
21
|
+
from wandb.sdk.lib import import_hooks
|
21
22
|
|
22
23
|
from . import wandb_manager, wandb_settings
|
23
24
|
from .lib import config_util, server, tracelog
|
@@ -268,6 +269,8 @@ class _WandbSetup__WandbSetup: # noqa: N801
|
|
268
269
|
self._config = config_dict
|
269
270
|
|
270
271
|
def _teardown(self, exit_code: Optional[int] = None) -> None:
|
272
|
+
import_hooks.unregister_all_post_import_hooks()
|
273
|
+
|
271
274
|
if not self._manager:
|
272
275
|
return
|
273
276
|
|
@@ -319,14 +322,77 @@ def _setup(
|
|
319
322
|
return wl
|
320
323
|
|
321
324
|
|
322
|
-
def setup(
|
323
|
-
|
324
|
-
|
325
|
+
def setup(settings: Optional[Settings] = None) -> Optional["_WandbSetup"]:
|
326
|
+
"""Prepares W&B for use in the current process and its children.
|
327
|
+
|
328
|
+
You can usually ignore this as it is implicitly called by `wandb.init()`.
|
329
|
+
|
330
|
+
When using wandb in multiple processes, calling `wandb.setup()`
|
331
|
+
in the parent process before starting child processes may improve
|
332
|
+
performance and resource utilization.
|
333
|
+
|
334
|
+
Note that `wandb.setup()` modifies `os.environ`, and it is important
|
335
|
+
that child processes inherit the modified environment variables.
|
336
|
+
|
337
|
+
See also `wandb.teardown()`.
|
338
|
+
|
339
|
+
Args:
|
340
|
+
settings (Optional[Union[Dict[str, Any], wandb.Settings]]): Configuration settings
|
341
|
+
to apply globally. These can be overridden by subsequent `wandb.init()` calls.
|
342
|
+
|
343
|
+
Example:
|
344
|
+
```python
|
345
|
+
import multiprocessing
|
346
|
+
|
347
|
+
import wandb
|
348
|
+
|
349
|
+
|
350
|
+
def run_experiment(params):
|
351
|
+
with wandb.init(config=params):
|
352
|
+
# Run experiment
|
353
|
+
pass
|
354
|
+
|
355
|
+
|
356
|
+
if __name__ == "__main__":
|
357
|
+
# Start backend and set global config
|
358
|
+
wandb.setup(settings={"project": "my_project"})
|
359
|
+
|
360
|
+
# Define experiment parameters
|
361
|
+
experiment_params = [
|
362
|
+
{"learning_rate": 0.01, "epochs": 10},
|
363
|
+
{"learning_rate": 0.001, "epochs": 20},
|
364
|
+
]
|
365
|
+
|
366
|
+
# Start multiple processes, each running a separate experiment
|
367
|
+
processes = []
|
368
|
+
for params in experiment_params:
|
369
|
+
p = multiprocessing.Process(target=run_experiment, args=(params,))
|
370
|
+
p.start()
|
371
|
+
processes.append(p)
|
372
|
+
|
373
|
+
# Wait for all processes to complete
|
374
|
+
for p in processes:
|
375
|
+
p.join()
|
376
|
+
|
377
|
+
# Optional: Explicitly shut down the backend
|
378
|
+
wandb.teardown()
|
379
|
+
```
|
380
|
+
"""
|
325
381
|
ret = _setup(settings=settings)
|
326
382
|
return ret
|
327
383
|
|
328
384
|
|
329
385
|
def teardown(exit_code: Optional[int] = None) -> None:
|
386
|
+
"""Waits for wandb to finish and frees resources.
|
387
|
+
|
388
|
+
Completes any runs that were not explicitly finished
|
389
|
+
using `run.finish()` and waits for all data to be uploaded.
|
390
|
+
|
391
|
+
It is recommended to call this at the end of a session
|
392
|
+
that used `wandb.setup()`. It is invoked automatically
|
393
|
+
in an `atexit` hook, but this is not reliable in certain setups
|
394
|
+
such as when using Python's `multiprocessing` module.
|
395
|
+
"""
|
330
396
|
setup_instance = _WandbSetup._instance
|
331
397
|
_WandbSetup._instance = None
|
332
398
|
|
wandb/sdk/wandb_sweep.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
import urllib.parse
|
2
|
-
from typing import Callable, Dict, List, Optional, Union
|
2
|
+
from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union
|
3
3
|
|
4
4
|
import wandb
|
5
5
|
from wandb import env
|
@@ -8,6 +8,9 @@ from wandb.sdk.launch.sweeps.utils import handle_sweep_config_violations
|
|
8
8
|
|
9
9
|
from . import wandb_login
|
10
10
|
|
11
|
+
if TYPE_CHECKING:
|
12
|
+
from wandb.wandb_controller import _WandbController
|
13
|
+
|
11
14
|
|
12
15
|
def _get_sweep_url(api, sweep_id):
|
13
16
|
"""Return sweep url if we can figure it out."""
|
@@ -93,7 +96,7 @@ def controller(
|
|
93
96
|
sweep_id_or_config: Optional[Union[str, Dict]] = None,
|
94
97
|
entity: Optional[str] = None,
|
95
98
|
project: Optional[str] = None,
|
96
|
-
):
|
99
|
+
) -> "_WandbController":
|
97
100
|
"""Public sweep controller constructor.
|
98
101
|
|
99
102
|
Usage:
|