wandb 0.16.4__py3-none-any.whl → 0.16.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wandb/__init__.py +2 -2
- wandb/agents/pyagent.py +1 -1
- wandb/apis/public/api.py +6 -6
- wandb/apis/reports/v2/interface.py +4 -8
- wandb/apis/reports/v2/internal.py +12 -45
- wandb/cli/cli.py +29 -5
- wandb/integration/openai/fine_tuning.py +74 -37
- wandb/integration/ultralytics/callback.py +0 -1
- wandb/proto/v3/wandb_internal_pb2.py +332 -312
- wandb/proto/v3/wandb_settings_pb2.py +13 -3
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +316 -312
- wandb/proto/v4/wandb_settings_pb2.py +5 -3
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/sdk/artifacts/artifact.py +92 -26
- wandb/sdk/artifacts/artifact_manifest_entry.py +6 -1
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -0
- wandb/sdk/artifacts/artifact_saver.py +16 -36
- wandb/sdk/artifacts/storage_handler.py +2 -1
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +13 -5
- wandb/sdk/interface/interface.py +60 -15
- wandb/sdk/interface/interface_shared.py +13 -7
- wandb/sdk/internal/file_stream.py +19 -0
- wandb/sdk/internal/handler.py +1 -4
- wandb/sdk/internal/internal_api.py +2 -0
- wandb/sdk/internal/job_builder.py +45 -17
- wandb/sdk/internal/sender.py +53 -28
- wandb/sdk/internal/settings_static.py +9 -0
- wandb/sdk/internal/system/system_info.py +4 -1
- wandb/sdk/launch/_launch.py +5 -0
- wandb/sdk/launch/_project_spec.py +5 -20
- wandb/sdk/launch/agent/agent.py +80 -37
- wandb/sdk/launch/agent/config.py +8 -0
- wandb/sdk/launch/builder/kaniko_builder.py +149 -134
- wandb/sdk/launch/create_job.py +44 -48
- wandb/sdk/launch/runner/kubernetes_monitor.py +3 -1
- wandb/sdk/launch/runner/kubernetes_runner.py +20 -2
- wandb/sdk/launch/sweeps/scheduler.py +3 -1
- wandb/sdk/launch/utils.py +23 -5
- wandb/sdk/lib/__init__.py +2 -5
- wandb/sdk/lib/_settings_toposort_generated.py +2 -0
- wandb/sdk/lib/filesystem.py +11 -1
- wandb/sdk/lib/run_moment.py +78 -0
- wandb/sdk/service/streams.py +1 -6
- wandb/sdk/wandb_init.py +12 -7
- wandb/sdk/wandb_login.py +43 -26
- wandb/sdk/wandb_run.py +179 -94
- wandb/sdk/wandb_settings.py +55 -16
- wandb/testing/relay.py +5 -6
- {wandb-0.16.4.dist-info → wandb-0.16.6.dist-info}/METADATA +1 -1
- {wandb-0.16.4.dist-info → wandb-0.16.6.dist-info}/RECORD +55 -54
- {wandb-0.16.4.dist-info → wandb-0.16.6.dist-info}/WHEEL +1 -1
- {wandb-0.16.4.dist-info → wandb-0.16.6.dist-info}/LICENSE +0 -0
- {wandb-0.16.4.dist-info → wandb-0.16.6.dist-info}/entry_points.txt +0 -0
- {wandb-0.16.4.dist-info → wandb-0.16.6.dist-info}/top_level.txt +0 -0
wandb/sdk/wandb_run.py
CHANGED
@@ -6,6 +6,7 @@ import json
|
|
6
6
|
import logging
|
7
7
|
import numbers
|
8
8
|
import os
|
9
|
+
import pathlib
|
9
10
|
import re
|
10
11
|
import sys
|
11
12
|
import threading
|
@@ -42,7 +43,6 @@ from wandb.apis import internal, public
|
|
42
43
|
from wandb.apis.internal import Api
|
43
44
|
from wandb.apis.public import Api as PublicApi
|
44
45
|
from wandb.proto.wandb_internal_pb2 import (
|
45
|
-
JobInfoResponse,
|
46
46
|
MetricRecord,
|
47
47
|
PollExitResponse,
|
48
48
|
Result,
|
@@ -531,7 +531,6 @@ class Run:
|
|
531
531
|
_check_version: Optional["CheckVersionResponse"]
|
532
532
|
_sampled_history: Optional["SampledHistoryResponse"]
|
533
533
|
_final_summary: Optional["GetSummaryResponse"]
|
534
|
-
_job_info: Optional["JobInfoResponse"]
|
535
534
|
_poll_exit_handle: Optional[MailboxHandle]
|
536
535
|
_poll_exit_response: Optional[PollExitResponse]
|
537
536
|
_server_info_response: Optional[ServerInfoResponse]
|
@@ -642,7 +641,6 @@ class Run:
|
|
642
641
|
self._server_info_response = None
|
643
642
|
self._internal_messages_response = None
|
644
643
|
self._poll_exit_handle = None
|
645
|
-
self._job_info = None
|
646
644
|
|
647
645
|
# Initialize telemetry object
|
648
646
|
self._telemetry_obj = telemetry.TelemetryRecord()
|
@@ -673,6 +671,12 @@ class Run:
|
|
673
671
|
os.path.join("code", self._settings.program_relpath)
|
674
672
|
)
|
675
673
|
|
674
|
+
if self._settings.fork_from is not None:
|
675
|
+
config[wandb_key]["branch_point"] = {
|
676
|
+
"run_id": self._settings.fork_from.run,
|
677
|
+
"step": self._settings.fork_from.value,
|
678
|
+
}
|
679
|
+
|
676
680
|
self._config._update(config, ignore_locked=True)
|
677
681
|
|
678
682
|
if sweep_config:
|
@@ -734,7 +738,7 @@ class Run:
|
|
734
738
|
and self._settings.launch_config_path
|
735
739
|
and os.path.exists(self._settings.launch_config_path)
|
736
740
|
):
|
737
|
-
self.
|
741
|
+
self.save(self._settings.launch_config_path)
|
738
742
|
with open(self._settings.launch_config_path) as fp:
|
739
743
|
launch_config = json.loads(fp.read())
|
740
744
|
if launch_config.get("overrides", {}).get("artifacts") is not None:
|
@@ -1385,6 +1389,8 @@ class Run:
|
|
1385
1389
|
|
1386
1390
|
@_run_decorator._noop_on_finish()
|
1387
1391
|
def _summary_update_callback(self, summary_record: SummaryRecord) -> None:
|
1392
|
+
with telemetry.context(run=self) as tel:
|
1393
|
+
tel.feature.set_summary = True
|
1388
1394
|
if self._backend and self._backend.interface:
|
1389
1395
|
self._backend.interface.publish_summary(summary_record)
|
1390
1396
|
|
@@ -1811,6 +1817,10 @@ class Run:
|
|
1811
1817
|
ValueError: if invalid data is passed
|
1812
1818
|
|
1813
1819
|
"""
|
1820
|
+
if step is not None:
|
1821
|
+
with telemetry.context(run=self) as tel:
|
1822
|
+
tel.feature.set_step_log = True
|
1823
|
+
|
1814
1824
|
if sync is not None:
|
1815
1825
|
deprecate.deprecate(
|
1816
1826
|
field_name=deprecate.Deprecated.run__log_sync,
|
@@ -1831,20 +1841,53 @@ class Run:
|
|
1831
1841
|
@_run_decorator._attach
|
1832
1842
|
def save(
|
1833
1843
|
self,
|
1834
|
-
glob_str: Optional[str] = None,
|
1835
|
-
base_path: Optional[str] = None,
|
1844
|
+
glob_str: Optional[Union[str, os.PathLike]] = None,
|
1845
|
+
base_path: Optional[Union[str, os.PathLike]] = None,
|
1836
1846
|
policy: "PolicyName" = "live",
|
1837
1847
|
) -> Union[bool, List[str]]:
|
1838
|
-
"""
|
1848
|
+
"""Sync one or more files to W&B.
|
1849
|
+
|
1850
|
+
Relative paths are relative to the current working directory.
|
1851
|
+
|
1852
|
+
A Unix glob, such as "myfiles/*", is expanded at the time `save` is
|
1853
|
+
called regardless of the `policy`. In particular, new files are not
|
1854
|
+
picked up automatically.
|
1855
|
+
|
1856
|
+
A `base_path` may be provided to control the directory structure of
|
1857
|
+
uploaded files. It should be a prefix of `glob_str`, and the direcotry
|
1858
|
+
structure beneath it is preserved. It's best understood through
|
1859
|
+
examples:
|
1860
|
+
|
1861
|
+
```
|
1862
|
+
wandb.save("these/are/myfiles/*")
|
1863
|
+
# => Saves files in a "these/are/myfiles/" folder in the run.
|
1864
|
+
|
1865
|
+
wandb.save("these/are/myfiles/*", base_path="these")
|
1866
|
+
# => Saves files in an "are/myfiles/" folder in the run.
|
1867
|
+
|
1868
|
+
wandb.save("/User/username/Documents/run123/*.txt")
|
1869
|
+
# => Saves files in a "run123/" folder in the run.
|
1870
|
+
|
1871
|
+
wandb.save("/User/username/Documents/run123/*.txt", base_path="/User")
|
1872
|
+
# => Saves files in a "username/Documents/run123/" folder in the run.
|
1873
|
+
|
1874
|
+
wandb.save("files/*/saveme.txt")
|
1875
|
+
# => Saves each "saveme.txt" file in an appropriate subdirectory
|
1876
|
+
# of "files/".
|
1877
|
+
```
|
1839
1878
|
|
1840
1879
|
Arguments:
|
1841
|
-
glob_str:
|
1842
|
-
|
1843
|
-
|
1844
|
-
|
1845
|
-
|
1846
|
-
|
1847
|
-
|
1880
|
+
glob_str: A relative or absolute path or Unix glob.
|
1881
|
+
base_path: A path to use to infer a directory structure; see examples.
|
1882
|
+
policy: One of `live`, `now`, or `end`.
|
1883
|
+
* live: upload the file as it changes, overwriting the previous version
|
1884
|
+
* now: upload the file once now
|
1885
|
+
* end: upload file when the run ends
|
1886
|
+
|
1887
|
+
Returns:
|
1888
|
+
Paths to the symlinks created for the matched files.
|
1889
|
+
|
1890
|
+
For historical reasons, this may return a boolean in legacy code.
|
1848
1891
|
"""
|
1849
1892
|
if glob_str is None:
|
1850
1893
|
# noop for historical reasons, run.save() may be called in legacy code
|
@@ -1857,77 +1900,128 @@ class Run:
|
|
1857
1900
|
)
|
1858
1901
|
return True
|
1859
1902
|
|
1860
|
-
|
1903
|
+
if isinstance(glob_str, bytes):
|
1904
|
+
# Preserved for backward compatibility: allow bytes inputs.
|
1905
|
+
glob_str = glob_str.decode("utf-8")
|
1906
|
+
if isinstance(glob_str, str) and (
|
1907
|
+
glob_str.startswith("gs://") or glob_str.startswith("s3://")
|
1908
|
+
):
|
1909
|
+
# Provide a better error message for a common misuse.
|
1910
|
+
wandb.termlog(f"{glob_str} is a cloud storage url, can't save file to W&B.")
|
1911
|
+
return []
|
1912
|
+
glob_path = pathlib.Path(glob_str)
|
1913
|
+
|
1914
|
+
if base_path is not None:
|
1915
|
+
base_path = pathlib.Path(base_path)
|
1916
|
+
elif not glob_path.is_absolute():
|
1917
|
+
base_path = pathlib.Path(".")
|
1918
|
+
else:
|
1919
|
+
# Absolute glob paths with no base path get special handling.
|
1920
|
+
wandb.termwarn(
|
1921
|
+
"Saving files without folders. If you want to preserve "
|
1922
|
+
"subdirectories pass base_path to wandb.save, i.e. "
|
1923
|
+
'wandb.save("/mnt/folder/file.h5", base_path="/mnt")',
|
1924
|
+
repeat=False,
|
1925
|
+
)
|
1926
|
+
base_path = glob_path.resolve().parent.parent
|
1861
1927
|
|
1862
|
-
def _save(
|
1863
|
-
self,
|
1864
|
-
glob_str: Optional[str] = None,
|
1865
|
-
base_path: Optional[str] = None,
|
1866
|
-
policy: "PolicyName" = "live",
|
1867
|
-
) -> Union[bool, List[str]]:
|
1868
1928
|
if policy not in ("live", "end", "now"):
|
1869
1929
|
raise ValueError(
|
1870
|
-
'Only "live" "end" and "now" policies are currently supported.'
|
1930
|
+
'Only "live", "end" and "now" policies are currently supported.'
|
1871
1931
|
)
|
1872
|
-
if isinstance(glob_str, bytes):
|
1873
|
-
glob_str = glob_str.decode("utf-8")
|
1874
|
-
if not isinstance(glob_str, str):
|
1875
|
-
raise ValueError("Must call wandb.save(glob_str) with glob_str a str")
|
1876
1932
|
|
1877
|
-
|
1878
|
-
|
1879
|
-
|
1880
|
-
|
1881
|
-
|
1882
|
-
|
1883
|
-
|
1884
|
-
|
1885
|
-
|
1886
|
-
|
1887
|
-
|
1888
|
-
|
1889
|
-
|
1890
|
-
|
1933
|
+
resolved_glob_path = glob_path.resolve()
|
1934
|
+
resolved_base_path = base_path.resolve()
|
1935
|
+
|
1936
|
+
return self._save(
|
1937
|
+
resolved_glob_path,
|
1938
|
+
resolved_base_path,
|
1939
|
+
policy,
|
1940
|
+
)
|
1941
|
+
|
1942
|
+
def _save(
|
1943
|
+
self,
|
1944
|
+
glob_path: pathlib.Path,
|
1945
|
+
base_path: pathlib.Path,
|
1946
|
+
policy: "PolicyName",
|
1947
|
+
) -> List[str]:
|
1948
|
+
# Can't use is_relative_to() because that's added in Python 3.9,
|
1949
|
+
# but we support down to Python 3.7.
|
1950
|
+
if not str(glob_path).startswith(str(base_path)):
|
1951
|
+
raise ValueError("Glob may not walk above the base path")
|
1952
|
+
|
1953
|
+
if glob_path == base_path:
|
1954
|
+
raise ValueError("Glob cannot be the same as the base path")
|
1955
|
+
|
1956
|
+
relative_glob = glob_path.relative_to(base_path)
|
1957
|
+
if relative_glob.parts[0] == "*":
|
1958
|
+
raise ValueError("Glob may not start with '*' relative to the base path")
|
1959
|
+
relative_glob_str = GlobStr(str(relative_glob))
|
1891
1960
|
|
1892
1961
|
with telemetry.context(run=self) as tel:
|
1893
1962
|
tel.feature.save = True
|
1894
1963
|
|
1895
|
-
|
1896
|
-
|
1897
|
-
|
1898
|
-
|
1899
|
-
|
1900
|
-
|
1901
|
-
|
1902
|
-
|
1903
|
-
|
1904
|
-
|
1905
|
-
|
1906
|
-
|
1907
|
-
|
1908
|
-
|
1909
|
-
|
1910
|
-
|
1911
|
-
|
1912
|
-
|
1913
|
-
|
1914
|
-
|
1915
|
-
|
1916
|
-
|
1917
|
-
|
1918
|
-
if
|
1964
|
+
# Files in the files directory matched by the glob, including old and
|
1965
|
+
# new ones.
|
1966
|
+
globbed_files = set(
|
1967
|
+
pathlib.Path(
|
1968
|
+
self._settings.files_dir,
|
1969
|
+
).glob(relative_glob_str)
|
1970
|
+
)
|
1971
|
+
|
1972
|
+
had_symlinked_files = len(globbed_files) > 0
|
1973
|
+
is_star_glob = "*" in relative_glob_str
|
1974
|
+
|
1975
|
+
# The base_path may itself be a glob, so we can't do
|
1976
|
+
# base_path.glob(relative_glob_str)
|
1977
|
+
for path_str in glob.glob(str(base_path / relative_glob_str)):
|
1978
|
+
source_path = pathlib.Path(path_str).absolute()
|
1979
|
+
|
1980
|
+
# We can't use relative_to() because base_path may be a glob.
|
1981
|
+
relative_path = pathlib.Path(*source_path.parts[len(base_path.parts) :])
|
1982
|
+
|
1983
|
+
target_path = pathlib.Path(self._settings.files_dir, relative_path)
|
1984
|
+
globbed_files.add(target_path)
|
1985
|
+
|
1986
|
+
# If the file is already where it needs to be, don't create a symlink.
|
1987
|
+
if source_path.resolve() == target_path.resolve():
|
1988
|
+
continue
|
1989
|
+
|
1990
|
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
1991
|
+
|
1992
|
+
# Delete the symlink if it exists.
|
1993
|
+
try:
|
1994
|
+
target_path.unlink()
|
1995
|
+
except FileNotFoundError:
|
1996
|
+
# In Python 3.8, we would pass missing_ok=True, but as of now
|
1997
|
+
# we support down to Python 3.7.
|
1998
|
+
pass
|
1999
|
+
|
2000
|
+
target_path.symlink_to(source_path)
|
2001
|
+
|
2002
|
+
# Inform users that new files aren't detected automatically.
|
2003
|
+
if not had_symlinked_files and is_star_glob:
|
2004
|
+
file_str = f"{len(globbed_files)} file"
|
2005
|
+
if len(globbed_files) > 1:
|
1919
2006
|
file_str += "s"
|
1920
2007
|
wandb.termwarn(
|
2008
|
+
f"Symlinked {file_str} into the W&B run directory, "
|
2009
|
+
"call wandb.save again to sync new files."
|
2010
|
+
)
|
2011
|
+
|
2012
|
+
files_dict: FilesDict = {
|
2013
|
+
"files": [
|
1921
2014
|
(
|
1922
|
-
|
1923
|
-
|
2015
|
+
GlobStr(str(f.relative_to(self._settings.files_dir))),
|
2016
|
+
policy,
|
1924
2017
|
)
|
1925
|
-
|
1926
|
-
|
1927
|
-
|
2018
|
+
for f in globbed_files
|
2019
|
+
]
|
2020
|
+
}
|
1928
2021
|
if self._backend and self._backend.interface:
|
1929
2022
|
self._backend.interface.publish_files(files_dict)
|
1930
|
-
|
2023
|
+
|
2024
|
+
return [str(f) for f in globbed_files]
|
1931
2025
|
|
1932
2026
|
@_run_decorator._attach
|
1933
2027
|
def restore(
|
@@ -2259,16 +2353,17 @@ class Run:
|
|
2259
2353
|
if self._settings._offline:
|
2260
2354
|
return
|
2261
2355
|
if self._backend and self._backend.interface:
|
2262
|
-
|
2263
|
-
|
2264
|
-
|
2265
|
-
|
2266
|
-
|
2267
|
-
|
2268
|
-
|
2269
|
-
|
2270
|
-
|
2271
|
-
|
2356
|
+
if not self._settings._disable_update_check:
|
2357
|
+
logger.info("communicating current version")
|
2358
|
+
version_handle = self._backend.interface.deliver_check_version(
|
2359
|
+
current_version=wandb.__version__
|
2360
|
+
)
|
2361
|
+
version_result = version_handle.wait(timeout=30)
|
2362
|
+
if not version_result:
|
2363
|
+
version_handle.abandon()
|
2364
|
+
else:
|
2365
|
+
self._check_version = version_result.response.check_version_response
|
2366
|
+
logger.info("got version response %s", self._check_version)
|
2272
2367
|
|
2273
2368
|
def _on_start(self) -> None:
|
2274
2369
|
# would like to move _set_global to _on_ready to unify _on_start and _on_attach
|
@@ -2485,7 +2580,6 @@ class Run:
|
|
2485
2580
|
sampled_history_handle = (
|
2486
2581
|
self._backend.interface.deliver_request_sampled_history()
|
2487
2582
|
)
|
2488
|
-
job_info_handle = self._backend.interface.deliver_request_job_info()
|
2489
2583
|
|
2490
2584
|
result = server_info_handle.wait(timeout=-1)
|
2491
2585
|
assert result
|
@@ -2499,10 +2593,6 @@ class Run:
|
|
2499
2593
|
assert result
|
2500
2594
|
self._final_summary = result.response.get_summary_response
|
2501
2595
|
|
2502
|
-
result = job_info_handle.wait(timeout=-1)
|
2503
|
-
assert result
|
2504
|
-
self._job_info = result.response.job_info_response
|
2505
|
-
|
2506
2596
|
if self._backend:
|
2507
2597
|
self._backend.cleanup()
|
2508
2598
|
|
@@ -2525,7 +2615,6 @@ class Run:
|
|
2525
2615
|
server_info_response=self._server_info_response,
|
2526
2616
|
check_version_response=self._check_version,
|
2527
2617
|
internal_messages_response=self._internal_messages_response,
|
2528
|
-
job_info=self._job_info,
|
2529
2618
|
reporter=self._reporter,
|
2530
2619
|
quiet=self._quiet,
|
2531
2620
|
settings=self._settings,
|
@@ -3576,7 +3665,6 @@ class Run:
|
|
3576
3665
|
server_info_response: Optional[ServerInfoResponse] = None,
|
3577
3666
|
check_version_response: Optional["CheckVersionResponse"] = None,
|
3578
3667
|
internal_messages_response: Optional["InternalMessagesResponse"] = None,
|
3579
|
-
job_info: Optional["JobInfoResponse"] = None,
|
3580
3668
|
reporter: Optional[Reporter] = None,
|
3581
3669
|
quiet: Optional[bool] = None,
|
3582
3670
|
*,
|
@@ -3593,7 +3681,6 @@ class Run:
|
|
3593
3681
|
|
3594
3682
|
Run._footer_sync_info(
|
3595
3683
|
poll_exit_response=poll_exit_response,
|
3596
|
-
job_info=job_info,
|
3597
3684
|
quiet=quiet,
|
3598
3685
|
settings=settings,
|
3599
3686
|
printer=printer,
|
@@ -3778,7 +3865,6 @@ class Run:
|
|
3778
3865
|
@staticmethod
|
3779
3866
|
def _footer_sync_info(
|
3780
3867
|
poll_exit_response: Optional[PollExitResponse] = None,
|
3781
|
-
job_info: Optional["JobInfoResponse"] = None,
|
3782
3868
|
quiet: Optional[bool] = None,
|
3783
3869
|
*,
|
3784
3870
|
settings: "Settings",
|
@@ -3798,13 +3884,12 @@ class Run:
|
|
3798
3884
|
else:
|
3799
3885
|
info = []
|
3800
3886
|
if settings.run_name and settings.run_url:
|
3801
|
-
info
|
3887
|
+
info.append(
|
3802
3888
|
f"{printer.emoji('rocket')} View run {printer.name(settings.run_name)} at: {printer.link(settings.run_url)}"
|
3803
|
-
|
3804
|
-
if
|
3805
|
-
link = f"{settings.project_url}/jobs/{job_info.sequenceId}/version_details/{job_info.version}"
|
3889
|
+
)
|
3890
|
+
if settings.project_url:
|
3806
3891
|
info.append(
|
3807
|
-
f"{printer.emoji('
|
3892
|
+
f"{printer.emoji('star')} View project at: {printer.link(settings.project_url)}"
|
3808
3893
|
)
|
3809
3894
|
if poll_exit_response and poll_exit_response.file_counts:
|
3810
3895
|
logger.info("logging synced files")
|
wandb/sdk/wandb_settings.py
CHANGED
@@ -45,6 +45,7 @@ from wandb.proto import wandb_settings_pb2
|
|
45
45
|
from wandb.sdk.internal.system.env_probe_helpers import is_aws_lambda
|
46
46
|
from wandb.sdk.lib import filesystem
|
47
47
|
from wandb.sdk.lib._settings_toposort_generated import SETTINGS_TOPOLOGICALLY_SORTED
|
48
|
+
from wandb.sdk.lib.run_moment import RunMoment
|
48
49
|
from wandb.sdk.wandb_setup import _EarlyLogger
|
49
50
|
|
50
51
|
from .lib import apikey
|
@@ -160,6 +161,14 @@ def _get_program() -> Optional[str]:
|
|
160
161
|
return None
|
161
162
|
|
162
163
|
|
164
|
+
def _runmoment_preprocessor(val: Any) -> Optional[RunMoment]:
|
165
|
+
if isinstance(val, RunMoment) or val is None:
|
166
|
+
return val
|
167
|
+
elif isinstance(val, str):
|
168
|
+
return RunMoment.from_uri(val)
|
169
|
+
raise UsageError(f"Could not parse value {val} as a RunMoment.")
|
170
|
+
|
171
|
+
|
163
172
|
def _get_program_relpath(
|
164
173
|
program: str, root: Optional[str] = None, _logger: Optional[_EarlyLogger] = None
|
165
174
|
) -> Optional[str]:
|
@@ -297,10 +306,11 @@ class SettingsData:
|
|
297
306
|
_cuda: str
|
298
307
|
_disable_meta: bool # Do not collect system metadata
|
299
308
|
_disable_service: (
|
300
|
-
bool
|
301
|
-
)
|
309
|
+
bool # Disable wandb-service, spin up internal process the old way
|
310
|
+
)
|
302
311
|
_disable_setproctitle: bool # Do not use setproctitle on internal process
|
303
312
|
_disable_stats: bool # Do not collect system metrics
|
313
|
+
_disable_update_check: bool # Disable version check
|
304
314
|
_disable_viewer: bool # Prevent early viewer query
|
305
315
|
_disable_machine_info: bool # Disable automatic machine info collection
|
306
316
|
_except_exit: bool
|
@@ -355,20 +365,18 @@ class SettingsData:
|
|
355
365
|
_stats_sample_rate_seconds: float
|
356
366
|
_stats_samples_to_average: int
|
357
367
|
_stats_join_assets: (
|
358
|
-
bool
|
359
|
-
)
|
368
|
+
bool # join metrics from different assets before sending to backend
|
369
|
+
)
|
360
370
|
_stats_neuron_monitor_config_path: (
|
361
|
-
str
|
362
|
-
)
|
371
|
+
str # path to place config file for neuron-monitor (AWS Trainium)
|
372
|
+
)
|
363
373
|
_stats_open_metrics_endpoints: Mapping[str, str] # open metrics endpoint names/urls
|
364
374
|
# open metrics filters in one of the two formats:
|
365
375
|
# - {"metric regex pattern, including endpoint name as prefix": {"label": "label value regex pattern"}}
|
366
376
|
# - ("metric regex pattern 1", "metric regex pattern 2", ...)
|
367
377
|
_stats_open_metrics_filters: Union[Sequence[str], Mapping[str, Mapping[str, str]]]
|
368
378
|
_stats_disk_paths: Sequence[str] # paths to monitor disk usage
|
369
|
-
_stats_buffer_size:
|
370
|
-
int
|
371
|
-
) # number of consolidated samples to buffer before flushing, available in run obj
|
379
|
+
_stats_buffer_size: int # number of consolidated samples to buffer before flushing, available in run obj
|
372
380
|
_tmp_code_dir: str
|
373
381
|
_tracelog: str
|
374
382
|
_unsaved_keys: Sequence[str]
|
@@ -393,6 +401,7 @@ class SettingsData:
|
|
393
401
|
entity: str
|
394
402
|
files_dir: str
|
395
403
|
force: bool
|
404
|
+
fork_from: Optional[RunMoment]
|
396
405
|
git_commit: str
|
397
406
|
git_remote: str
|
398
407
|
git_remote_url: str
|
@@ -648,6 +657,7 @@ class Settings(SettingsData):
|
|
648
657
|
"preprocessor": _str_as_bool,
|
649
658
|
"hook": lambda x: self._disable_machine_info or x,
|
650
659
|
},
|
660
|
+
_disable_update_check={"preprocessor": _str_as_bool},
|
651
661
|
_disable_viewer={"preprocessor": _str_as_bool},
|
652
662
|
_extra_http_headers={"preprocessor": _str_as_json},
|
653
663
|
# Retry filestream requests for 2 hours before dropping chunk (how do we recover?)
|
@@ -805,6 +815,10 @@ class Settings(SettingsData):
|
|
805
815
|
),
|
806
816
|
},
|
807
817
|
force={"preprocessor": _str_as_bool},
|
818
|
+
fork_from={
|
819
|
+
"value": None,
|
820
|
+
"preprocessor": _runmoment_preprocessor,
|
821
|
+
},
|
808
822
|
git_remote={"value": "origin"},
|
809
823
|
heartbeat_seconds={"value": 30},
|
810
824
|
ignore_globs={
|
@@ -1575,6 +1589,14 @@ class Settings(SettingsData):
|
|
1575
1589
|
for key, value in v.items():
|
1576
1590
|
# we only support dicts with string values for now
|
1577
1591
|
mapping.value[key] = value
|
1592
|
+
elif isinstance(v, RunMoment):
|
1593
|
+
getattr(settings, k).CopyFrom(
|
1594
|
+
wandb_settings_pb2.RunMoment(
|
1595
|
+
run=v.run,
|
1596
|
+
value=v.value,
|
1597
|
+
metric=v.metric,
|
1598
|
+
)
|
1599
|
+
)
|
1578
1600
|
elif v is None:
|
1579
1601
|
# None is the default value for all settings, so we don't need to set it,
|
1580
1602
|
# i.e. None means that the value was not set.
|
@@ -1897,16 +1919,33 @@ class Settings(SettingsData):
|
|
1897
1919
|
f.write(json.dumps({"run_id": self.run_id}))
|
1898
1920
|
|
1899
1921
|
def _apply_login(
|
1900
|
-
self,
|
1922
|
+
self,
|
1923
|
+
login_settings: Dict[str, Any],
|
1924
|
+
_logger: Optional[_EarlyLogger] = None,
|
1901
1925
|
) -> None:
|
1902
|
-
|
1926
|
+
key_map = {
|
1927
|
+
"key": "api_key",
|
1928
|
+
"host": "base_url",
|
1929
|
+
"timeout": "login_timeout",
|
1930
|
+
}
|
1931
|
+
|
1932
|
+
# Rename keys and keep only the non-None values.
|
1933
|
+
#
|
1934
|
+
# The input keys are parameters to wandb.login(), but we use different
|
1935
|
+
# names for some of them in Settings.
|
1903
1936
|
login_settings = {
|
1904
|
-
|
1937
|
+
key_map.get(key, key): value
|
1938
|
+
for key, value in login_settings.items()
|
1939
|
+
if value is not None
|
1905
1940
|
}
|
1906
|
-
|
1907
|
-
|
1908
|
-
|
1909
|
-
|
1941
|
+
|
1942
|
+
if _logger:
|
1943
|
+
_logger.info(f"Applying login settings: {_redact_dict(login_settings)}")
|
1944
|
+
|
1945
|
+
self.update(
|
1946
|
+
login_settings,
|
1947
|
+
source=Source.LOGIN,
|
1948
|
+
)
|
1910
1949
|
|
1911
1950
|
def _apply_run_start(self, run_start_settings: Dict[str, Any]) -> None:
|
1912
1951
|
# This dictionary maps from the "run message dict" to relevant fields in settings
|
wandb/testing/relay.py
CHANGED
@@ -486,11 +486,12 @@ class QueryResolver:
|
|
486
486
|
response_data: Dict[str, Any],
|
487
487
|
**kwargs: Any,
|
488
488
|
) -> Optional[Dict[str, Any]]:
|
489
|
+
results = []
|
489
490
|
for resolver in self.resolvers:
|
490
491
|
result = resolver.get("resolver")(request_data, response_data, **kwargs)
|
491
492
|
if result is not None:
|
492
|
-
|
493
|
-
return
|
493
|
+
results.append(result)
|
494
|
+
return results
|
494
495
|
|
495
496
|
|
496
497
|
class TokenizedCircularPattern:
|
@@ -768,14 +769,13 @@ class RelayServer:
|
|
768
769
|
response_data,
|
769
770
|
**kwargs,
|
770
771
|
)
|
772
|
+
for entry in snooped_context:
|
773
|
+
self.context.upsert(entry)
|
771
774
|
except Exception as e:
|
772
775
|
print("Failed to resolve context: ", e)
|
773
776
|
traceback.print_exc()
|
774
777
|
snooped_context = None
|
775
778
|
|
776
|
-
if snooped_context is not None:
|
777
|
-
self.context.upsert(snooped_context)
|
778
|
-
|
779
779
|
return None
|
780
780
|
|
781
781
|
def graphql(self) -> Mapping[str, str]:
|
@@ -815,7 +815,6 @@ class RelayServer:
|
|
815
815
|
print(relayed_response)
|
816
816
|
print(relayed_response.status_code, relayed_response.json())
|
817
817
|
print("*****************")
|
818
|
-
|
819
818
|
self.snoop_context(request, relayed_response, timer.elapsed, path=path)
|
820
819
|
|
821
820
|
return relayed_response.json()
|