wandb 0.16.4__py3-none-any.whl → 0.16.6__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +2 -2
- wandb/agents/pyagent.py +1 -1
- wandb/apis/public/api.py +6 -6
- wandb/apis/reports/v2/interface.py +4 -8
- wandb/apis/reports/v2/internal.py +12 -45
- wandb/cli/cli.py +29 -5
- wandb/integration/openai/fine_tuning.py +74 -37
- wandb/integration/ultralytics/callback.py +0 -1
- wandb/proto/v3/wandb_internal_pb2.py +332 -312
- wandb/proto/v3/wandb_settings_pb2.py +13 -3
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +316 -312
- wandb/proto/v4/wandb_settings_pb2.py +5 -3
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/sdk/artifacts/artifact.py +92 -26
- wandb/sdk/artifacts/artifact_manifest_entry.py +6 -1
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -0
- wandb/sdk/artifacts/artifact_saver.py +16 -36
- wandb/sdk/artifacts/storage_handler.py +2 -1
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +13 -5
- wandb/sdk/interface/interface.py +60 -15
- wandb/sdk/interface/interface_shared.py +13 -7
- wandb/sdk/internal/file_stream.py +19 -0
- wandb/sdk/internal/handler.py +1 -4
- wandb/sdk/internal/internal_api.py +2 -0
- wandb/sdk/internal/job_builder.py +45 -17
- wandb/sdk/internal/sender.py +53 -28
- wandb/sdk/internal/settings_static.py +9 -0
- wandb/sdk/internal/system/system_info.py +4 -1
- wandb/sdk/launch/_launch.py +5 -0
- wandb/sdk/launch/_project_spec.py +5 -20
- wandb/sdk/launch/agent/agent.py +80 -37
- wandb/sdk/launch/agent/config.py +8 -0
- wandb/sdk/launch/builder/kaniko_builder.py +149 -134
- wandb/sdk/launch/create_job.py +44 -48
- wandb/sdk/launch/runner/kubernetes_monitor.py +3 -1
- wandb/sdk/launch/runner/kubernetes_runner.py +20 -2
- wandb/sdk/launch/sweeps/scheduler.py +3 -1
- wandb/sdk/launch/utils.py +23 -5
- wandb/sdk/lib/__init__.py +2 -5
- wandb/sdk/lib/_settings_toposort_generated.py +2 -0
- wandb/sdk/lib/filesystem.py +11 -1
- wandb/sdk/lib/run_moment.py +78 -0
- wandb/sdk/service/streams.py +1 -6
- wandb/sdk/wandb_init.py +12 -7
- wandb/sdk/wandb_login.py +43 -26
- wandb/sdk/wandb_run.py +179 -94
- wandb/sdk/wandb_settings.py +55 -16
- wandb/testing/relay.py +5 -6
- {wandb-0.16.4.dist-info → wandb-0.16.6.dist-info}/METADATA +1 -1
- {wandb-0.16.4.dist-info → wandb-0.16.6.dist-info}/RECORD +55 -54
- {wandb-0.16.4.dist-info → wandb-0.16.6.dist-info}/WHEEL +1 -1
- {wandb-0.16.4.dist-info → wandb-0.16.6.dist-info}/LICENSE +0 -0
- {wandb-0.16.4.dist-info → wandb-0.16.6.dist-info}/entry_points.txt +0 -0
- {wandb-0.16.4.dist-info → wandb-0.16.6.dist-info}/top_level.txt +0 -0
wandb/sdk/wandb_run.py
CHANGED
@@ -6,6 +6,7 @@ import json
|
|
6
6
|
import logging
|
7
7
|
import numbers
|
8
8
|
import os
|
9
|
+
import pathlib
|
9
10
|
import re
|
10
11
|
import sys
|
11
12
|
import threading
|
@@ -42,7 +43,6 @@ from wandb.apis import internal, public
|
|
42
43
|
from wandb.apis.internal import Api
|
43
44
|
from wandb.apis.public import Api as PublicApi
|
44
45
|
from wandb.proto.wandb_internal_pb2 import (
|
45
|
-
JobInfoResponse,
|
46
46
|
MetricRecord,
|
47
47
|
PollExitResponse,
|
48
48
|
Result,
|
@@ -531,7 +531,6 @@ class Run:
|
|
531
531
|
_check_version: Optional["CheckVersionResponse"]
|
532
532
|
_sampled_history: Optional["SampledHistoryResponse"]
|
533
533
|
_final_summary: Optional["GetSummaryResponse"]
|
534
|
-
_job_info: Optional["JobInfoResponse"]
|
535
534
|
_poll_exit_handle: Optional[MailboxHandle]
|
536
535
|
_poll_exit_response: Optional[PollExitResponse]
|
537
536
|
_server_info_response: Optional[ServerInfoResponse]
|
@@ -642,7 +641,6 @@ class Run:
|
|
642
641
|
self._server_info_response = None
|
643
642
|
self._internal_messages_response = None
|
644
643
|
self._poll_exit_handle = None
|
645
|
-
self._job_info = None
|
646
644
|
|
647
645
|
# Initialize telemetry object
|
648
646
|
self._telemetry_obj = telemetry.TelemetryRecord()
|
@@ -673,6 +671,12 @@ class Run:
|
|
673
671
|
os.path.join("code", self._settings.program_relpath)
|
674
672
|
)
|
675
673
|
|
674
|
+
if self._settings.fork_from is not None:
|
675
|
+
config[wandb_key]["branch_point"] = {
|
676
|
+
"run_id": self._settings.fork_from.run,
|
677
|
+
"step": self._settings.fork_from.value,
|
678
|
+
}
|
679
|
+
|
676
680
|
self._config._update(config, ignore_locked=True)
|
677
681
|
|
678
682
|
if sweep_config:
|
@@ -734,7 +738,7 @@ class Run:
|
|
734
738
|
and self._settings.launch_config_path
|
735
739
|
and os.path.exists(self._settings.launch_config_path)
|
736
740
|
):
|
737
|
-
self.
|
741
|
+
self.save(self._settings.launch_config_path)
|
738
742
|
with open(self._settings.launch_config_path) as fp:
|
739
743
|
launch_config = json.loads(fp.read())
|
740
744
|
if launch_config.get("overrides", {}).get("artifacts") is not None:
|
@@ -1385,6 +1389,8 @@ class Run:
|
|
1385
1389
|
|
1386
1390
|
@_run_decorator._noop_on_finish()
|
1387
1391
|
def _summary_update_callback(self, summary_record: SummaryRecord) -> None:
|
1392
|
+
with telemetry.context(run=self) as tel:
|
1393
|
+
tel.feature.set_summary = True
|
1388
1394
|
if self._backend and self._backend.interface:
|
1389
1395
|
self._backend.interface.publish_summary(summary_record)
|
1390
1396
|
|
@@ -1811,6 +1817,10 @@ class Run:
|
|
1811
1817
|
ValueError: if invalid data is passed
|
1812
1818
|
|
1813
1819
|
"""
|
1820
|
+
if step is not None:
|
1821
|
+
with telemetry.context(run=self) as tel:
|
1822
|
+
tel.feature.set_step_log = True
|
1823
|
+
|
1814
1824
|
if sync is not None:
|
1815
1825
|
deprecate.deprecate(
|
1816
1826
|
field_name=deprecate.Deprecated.run__log_sync,
|
@@ -1831,20 +1841,53 @@ class Run:
|
|
1831
1841
|
@_run_decorator._attach
|
1832
1842
|
def save(
|
1833
1843
|
self,
|
1834
|
-
glob_str: Optional[str] = None,
|
1835
|
-
base_path: Optional[str] = None,
|
1844
|
+
glob_str: Optional[Union[str, os.PathLike]] = None,
|
1845
|
+
base_path: Optional[Union[str, os.PathLike]] = None,
|
1836
1846
|
policy: "PolicyName" = "live",
|
1837
1847
|
) -> Union[bool, List[str]]:
|
1838
|
-
"""
|
1848
|
+
"""Sync one or more files to W&B.
|
1849
|
+
|
1850
|
+
Relative paths are relative to the current working directory.
|
1851
|
+
|
1852
|
+
A Unix glob, such as "myfiles/*", is expanded at the time `save` is
|
1853
|
+
called regardless of the `policy`. In particular, new files are not
|
1854
|
+
picked up automatically.
|
1855
|
+
|
1856
|
+
A `base_path` may be provided to control the directory structure of
|
1857
|
+
uploaded files. It should be a prefix of `glob_str`, and the direcotry
|
1858
|
+
structure beneath it is preserved. It's best understood through
|
1859
|
+
examples:
|
1860
|
+
|
1861
|
+
```
|
1862
|
+
wandb.save("these/are/myfiles/*")
|
1863
|
+
# => Saves files in a "these/are/myfiles/" folder in the run.
|
1864
|
+
|
1865
|
+
wandb.save("these/are/myfiles/*", base_path="these")
|
1866
|
+
# => Saves files in an "are/myfiles/" folder in the run.
|
1867
|
+
|
1868
|
+
wandb.save("/User/username/Documents/run123/*.txt")
|
1869
|
+
# => Saves files in a "run123/" folder in the run.
|
1870
|
+
|
1871
|
+
wandb.save("/User/username/Documents/run123/*.txt", base_path="/User")
|
1872
|
+
# => Saves files in a "username/Documents/run123/" folder in the run.
|
1873
|
+
|
1874
|
+
wandb.save("files/*/saveme.txt")
|
1875
|
+
# => Saves each "saveme.txt" file in an appropriate subdirectory
|
1876
|
+
# of "files/".
|
1877
|
+
```
|
1839
1878
|
|
1840
1879
|
Arguments:
|
1841
|
-
glob_str:
|
1842
|
-
|
1843
|
-
|
1844
|
-
|
1845
|
-
|
1846
|
-
|
1847
|
-
|
1880
|
+
glob_str: A relative or absolute path or Unix glob.
|
1881
|
+
base_path: A path to use to infer a directory structure; see examples.
|
1882
|
+
policy: One of `live`, `now`, or `end`.
|
1883
|
+
* live: upload the file as it changes, overwriting the previous version
|
1884
|
+
* now: upload the file once now
|
1885
|
+
* end: upload file when the run ends
|
1886
|
+
|
1887
|
+
Returns:
|
1888
|
+
Paths to the symlinks created for the matched files.
|
1889
|
+
|
1890
|
+
For historical reasons, this may return a boolean in legacy code.
|
1848
1891
|
"""
|
1849
1892
|
if glob_str is None:
|
1850
1893
|
# noop for historical reasons, run.save() may be called in legacy code
|
@@ -1857,77 +1900,128 @@ class Run:
|
|
1857
1900
|
)
|
1858
1901
|
return True
|
1859
1902
|
|
1860
|
-
|
1903
|
+
if isinstance(glob_str, bytes):
|
1904
|
+
# Preserved for backward compatibility: allow bytes inputs.
|
1905
|
+
glob_str = glob_str.decode("utf-8")
|
1906
|
+
if isinstance(glob_str, str) and (
|
1907
|
+
glob_str.startswith("gs://") or glob_str.startswith("s3://")
|
1908
|
+
):
|
1909
|
+
# Provide a better error message for a common misuse.
|
1910
|
+
wandb.termlog(f"{glob_str} is a cloud storage url, can't save file to W&B.")
|
1911
|
+
return []
|
1912
|
+
glob_path = pathlib.Path(glob_str)
|
1913
|
+
|
1914
|
+
if base_path is not None:
|
1915
|
+
base_path = pathlib.Path(base_path)
|
1916
|
+
elif not glob_path.is_absolute():
|
1917
|
+
base_path = pathlib.Path(".")
|
1918
|
+
else:
|
1919
|
+
# Absolute glob paths with no base path get special handling.
|
1920
|
+
wandb.termwarn(
|
1921
|
+
"Saving files without folders. If you want to preserve "
|
1922
|
+
"subdirectories pass base_path to wandb.save, i.e. "
|
1923
|
+
'wandb.save("/mnt/folder/file.h5", base_path="/mnt")',
|
1924
|
+
repeat=False,
|
1925
|
+
)
|
1926
|
+
base_path = glob_path.resolve().parent.parent
|
1861
1927
|
|
1862
|
-
def _save(
|
1863
|
-
self,
|
1864
|
-
glob_str: Optional[str] = None,
|
1865
|
-
base_path: Optional[str] = None,
|
1866
|
-
policy: "PolicyName" = "live",
|
1867
|
-
) -> Union[bool, List[str]]:
|
1868
1928
|
if policy not in ("live", "end", "now"):
|
1869
1929
|
raise ValueError(
|
1870
|
-
'Only "live" "end" and "now" policies are currently supported.'
|
1930
|
+
'Only "live", "end" and "now" policies are currently supported.'
|
1871
1931
|
)
|
1872
|
-
if isinstance(glob_str, bytes):
|
1873
|
-
glob_str = glob_str.decode("utf-8")
|
1874
|
-
if not isinstance(glob_str, str):
|
1875
|
-
raise ValueError("Must call wandb.save(glob_str) with glob_str a str")
|
1876
1932
|
|
1877
|
-
|
1878
|
-
|
1879
|
-
|
1880
|
-
|
1881
|
-
|
1882
|
-
|
1883
|
-
|
1884
|
-
|
1885
|
-
|
1886
|
-
|
1887
|
-
|
1888
|
-
|
1889
|
-
|
1890
|
-
|
1933
|
+
resolved_glob_path = glob_path.resolve()
|
1934
|
+
resolved_base_path = base_path.resolve()
|
1935
|
+
|
1936
|
+
return self._save(
|
1937
|
+
resolved_glob_path,
|
1938
|
+
resolved_base_path,
|
1939
|
+
policy,
|
1940
|
+
)
|
1941
|
+
|
1942
|
+
def _save(
|
1943
|
+
self,
|
1944
|
+
glob_path: pathlib.Path,
|
1945
|
+
base_path: pathlib.Path,
|
1946
|
+
policy: "PolicyName",
|
1947
|
+
) -> List[str]:
|
1948
|
+
# Can't use is_relative_to() because that's added in Python 3.9,
|
1949
|
+
# but we support down to Python 3.7.
|
1950
|
+
if not str(glob_path).startswith(str(base_path)):
|
1951
|
+
raise ValueError("Glob may not walk above the base path")
|
1952
|
+
|
1953
|
+
if glob_path == base_path:
|
1954
|
+
raise ValueError("Glob cannot be the same as the base path")
|
1955
|
+
|
1956
|
+
relative_glob = glob_path.relative_to(base_path)
|
1957
|
+
if relative_glob.parts[0] == "*":
|
1958
|
+
raise ValueError("Glob may not start with '*' relative to the base path")
|
1959
|
+
relative_glob_str = GlobStr(str(relative_glob))
|
1891
1960
|
|
1892
1961
|
with telemetry.context(run=self) as tel:
|
1893
1962
|
tel.feature.save = True
|
1894
1963
|
|
1895
|
-
|
1896
|
-
|
1897
|
-
|
1898
|
-
|
1899
|
-
|
1900
|
-
|
1901
|
-
|
1902
|
-
|
1903
|
-
|
1904
|
-
|
1905
|
-
|
1906
|
-
|
1907
|
-
|
1908
|
-
|
1909
|
-
|
1910
|
-
|
1911
|
-
|
1912
|
-
|
1913
|
-
|
1914
|
-
|
1915
|
-
|
1916
|
-
|
1917
|
-
|
1918
|
-
if
|
1964
|
+
# Files in the files directory matched by the glob, including old and
|
1965
|
+
# new ones.
|
1966
|
+
globbed_files = set(
|
1967
|
+
pathlib.Path(
|
1968
|
+
self._settings.files_dir,
|
1969
|
+
).glob(relative_glob_str)
|
1970
|
+
)
|
1971
|
+
|
1972
|
+
had_symlinked_files = len(globbed_files) > 0
|
1973
|
+
is_star_glob = "*" in relative_glob_str
|
1974
|
+
|
1975
|
+
# The base_path may itself be a glob, so we can't do
|
1976
|
+
# base_path.glob(relative_glob_str)
|
1977
|
+
for path_str in glob.glob(str(base_path / relative_glob_str)):
|
1978
|
+
source_path = pathlib.Path(path_str).absolute()
|
1979
|
+
|
1980
|
+
# We can't use relative_to() because base_path may be a glob.
|
1981
|
+
relative_path = pathlib.Path(*source_path.parts[len(base_path.parts) :])
|
1982
|
+
|
1983
|
+
target_path = pathlib.Path(self._settings.files_dir, relative_path)
|
1984
|
+
globbed_files.add(target_path)
|
1985
|
+
|
1986
|
+
# If the file is already where it needs to be, don't create a symlink.
|
1987
|
+
if source_path.resolve() == target_path.resolve():
|
1988
|
+
continue
|
1989
|
+
|
1990
|
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
1991
|
+
|
1992
|
+
# Delete the symlink if it exists.
|
1993
|
+
try:
|
1994
|
+
target_path.unlink()
|
1995
|
+
except FileNotFoundError:
|
1996
|
+
# In Python 3.8, we would pass missing_ok=True, but as of now
|
1997
|
+
# we support down to Python 3.7.
|
1998
|
+
pass
|
1999
|
+
|
2000
|
+
target_path.symlink_to(source_path)
|
2001
|
+
|
2002
|
+
# Inform users that new files aren't detected automatically.
|
2003
|
+
if not had_symlinked_files and is_star_glob:
|
2004
|
+
file_str = f"{len(globbed_files)} file"
|
2005
|
+
if len(globbed_files) > 1:
|
1919
2006
|
file_str += "s"
|
1920
2007
|
wandb.termwarn(
|
2008
|
+
f"Symlinked {file_str} into the W&B run directory, "
|
2009
|
+
"call wandb.save again to sync new files."
|
2010
|
+
)
|
2011
|
+
|
2012
|
+
files_dict: FilesDict = {
|
2013
|
+
"files": [
|
1921
2014
|
(
|
1922
|
-
|
1923
|
-
|
2015
|
+
GlobStr(str(f.relative_to(self._settings.files_dir))),
|
2016
|
+
policy,
|
1924
2017
|
)
|
1925
|
-
|
1926
|
-
|
1927
|
-
|
2018
|
+
for f in globbed_files
|
2019
|
+
]
|
2020
|
+
}
|
1928
2021
|
if self._backend and self._backend.interface:
|
1929
2022
|
self._backend.interface.publish_files(files_dict)
|
1930
|
-
|
2023
|
+
|
2024
|
+
return [str(f) for f in globbed_files]
|
1931
2025
|
|
1932
2026
|
@_run_decorator._attach
|
1933
2027
|
def restore(
|
@@ -2259,16 +2353,17 @@ class Run:
|
|
2259
2353
|
if self._settings._offline:
|
2260
2354
|
return
|
2261
2355
|
if self._backend and self._backend.interface:
|
2262
|
-
|
2263
|
-
|
2264
|
-
|
2265
|
-
|
2266
|
-
|
2267
|
-
|
2268
|
-
|
2269
|
-
|
2270
|
-
|
2271
|
-
|
2356
|
+
if not self._settings._disable_update_check:
|
2357
|
+
logger.info("communicating current version")
|
2358
|
+
version_handle = self._backend.interface.deliver_check_version(
|
2359
|
+
current_version=wandb.__version__
|
2360
|
+
)
|
2361
|
+
version_result = version_handle.wait(timeout=30)
|
2362
|
+
if not version_result:
|
2363
|
+
version_handle.abandon()
|
2364
|
+
else:
|
2365
|
+
self._check_version = version_result.response.check_version_response
|
2366
|
+
logger.info("got version response %s", self._check_version)
|
2272
2367
|
|
2273
2368
|
def _on_start(self) -> None:
|
2274
2369
|
# would like to move _set_global to _on_ready to unify _on_start and _on_attach
|
@@ -2485,7 +2580,6 @@ class Run:
|
|
2485
2580
|
sampled_history_handle = (
|
2486
2581
|
self._backend.interface.deliver_request_sampled_history()
|
2487
2582
|
)
|
2488
|
-
job_info_handle = self._backend.interface.deliver_request_job_info()
|
2489
2583
|
|
2490
2584
|
result = server_info_handle.wait(timeout=-1)
|
2491
2585
|
assert result
|
@@ -2499,10 +2593,6 @@ class Run:
|
|
2499
2593
|
assert result
|
2500
2594
|
self._final_summary = result.response.get_summary_response
|
2501
2595
|
|
2502
|
-
result = job_info_handle.wait(timeout=-1)
|
2503
|
-
assert result
|
2504
|
-
self._job_info = result.response.job_info_response
|
2505
|
-
|
2506
2596
|
if self._backend:
|
2507
2597
|
self._backend.cleanup()
|
2508
2598
|
|
@@ -2525,7 +2615,6 @@ class Run:
|
|
2525
2615
|
server_info_response=self._server_info_response,
|
2526
2616
|
check_version_response=self._check_version,
|
2527
2617
|
internal_messages_response=self._internal_messages_response,
|
2528
|
-
job_info=self._job_info,
|
2529
2618
|
reporter=self._reporter,
|
2530
2619
|
quiet=self._quiet,
|
2531
2620
|
settings=self._settings,
|
@@ -3576,7 +3665,6 @@ class Run:
|
|
3576
3665
|
server_info_response: Optional[ServerInfoResponse] = None,
|
3577
3666
|
check_version_response: Optional["CheckVersionResponse"] = None,
|
3578
3667
|
internal_messages_response: Optional["InternalMessagesResponse"] = None,
|
3579
|
-
job_info: Optional["JobInfoResponse"] = None,
|
3580
3668
|
reporter: Optional[Reporter] = None,
|
3581
3669
|
quiet: Optional[bool] = None,
|
3582
3670
|
*,
|
@@ -3593,7 +3681,6 @@ class Run:
|
|
3593
3681
|
|
3594
3682
|
Run._footer_sync_info(
|
3595
3683
|
poll_exit_response=poll_exit_response,
|
3596
|
-
job_info=job_info,
|
3597
3684
|
quiet=quiet,
|
3598
3685
|
settings=settings,
|
3599
3686
|
printer=printer,
|
@@ -3778,7 +3865,6 @@ class Run:
|
|
3778
3865
|
@staticmethod
|
3779
3866
|
def _footer_sync_info(
|
3780
3867
|
poll_exit_response: Optional[PollExitResponse] = None,
|
3781
|
-
job_info: Optional["JobInfoResponse"] = None,
|
3782
3868
|
quiet: Optional[bool] = None,
|
3783
3869
|
*,
|
3784
3870
|
settings: "Settings",
|
@@ -3798,13 +3884,12 @@ class Run:
|
|
3798
3884
|
else:
|
3799
3885
|
info = []
|
3800
3886
|
if settings.run_name and settings.run_url:
|
3801
|
-
info
|
3887
|
+
info.append(
|
3802
3888
|
f"{printer.emoji('rocket')} View run {printer.name(settings.run_name)} at: {printer.link(settings.run_url)}"
|
3803
|
-
|
3804
|
-
if
|
3805
|
-
link = f"{settings.project_url}/jobs/{job_info.sequenceId}/version_details/{job_info.version}"
|
3889
|
+
)
|
3890
|
+
if settings.project_url:
|
3806
3891
|
info.append(
|
3807
|
-
f"{printer.emoji('
|
3892
|
+
f"{printer.emoji('star')} View project at: {printer.link(settings.project_url)}"
|
3808
3893
|
)
|
3809
3894
|
if poll_exit_response and poll_exit_response.file_counts:
|
3810
3895
|
logger.info("logging synced files")
|
wandb/sdk/wandb_settings.py
CHANGED
@@ -45,6 +45,7 @@ from wandb.proto import wandb_settings_pb2
|
|
45
45
|
from wandb.sdk.internal.system.env_probe_helpers import is_aws_lambda
|
46
46
|
from wandb.sdk.lib import filesystem
|
47
47
|
from wandb.sdk.lib._settings_toposort_generated import SETTINGS_TOPOLOGICALLY_SORTED
|
48
|
+
from wandb.sdk.lib.run_moment import RunMoment
|
48
49
|
from wandb.sdk.wandb_setup import _EarlyLogger
|
49
50
|
|
50
51
|
from .lib import apikey
|
@@ -160,6 +161,14 @@ def _get_program() -> Optional[str]:
|
|
160
161
|
return None
|
161
162
|
|
162
163
|
|
164
|
+
def _runmoment_preprocessor(val: Any) -> Optional[RunMoment]:
|
165
|
+
if isinstance(val, RunMoment) or val is None:
|
166
|
+
return val
|
167
|
+
elif isinstance(val, str):
|
168
|
+
return RunMoment.from_uri(val)
|
169
|
+
raise UsageError(f"Could not parse value {val} as a RunMoment.")
|
170
|
+
|
171
|
+
|
163
172
|
def _get_program_relpath(
|
164
173
|
program: str, root: Optional[str] = None, _logger: Optional[_EarlyLogger] = None
|
165
174
|
) -> Optional[str]:
|
@@ -297,10 +306,11 @@ class SettingsData:
|
|
297
306
|
_cuda: str
|
298
307
|
_disable_meta: bool # Do not collect system metadata
|
299
308
|
_disable_service: (
|
300
|
-
bool
|
301
|
-
)
|
309
|
+
bool # Disable wandb-service, spin up internal process the old way
|
310
|
+
)
|
302
311
|
_disable_setproctitle: bool # Do not use setproctitle on internal process
|
303
312
|
_disable_stats: bool # Do not collect system metrics
|
313
|
+
_disable_update_check: bool # Disable version check
|
304
314
|
_disable_viewer: bool # Prevent early viewer query
|
305
315
|
_disable_machine_info: bool # Disable automatic machine info collection
|
306
316
|
_except_exit: bool
|
@@ -355,20 +365,18 @@ class SettingsData:
|
|
355
365
|
_stats_sample_rate_seconds: float
|
356
366
|
_stats_samples_to_average: int
|
357
367
|
_stats_join_assets: (
|
358
|
-
bool
|
359
|
-
)
|
368
|
+
bool # join metrics from different assets before sending to backend
|
369
|
+
)
|
360
370
|
_stats_neuron_monitor_config_path: (
|
361
|
-
str
|
362
|
-
)
|
371
|
+
str # path to place config file for neuron-monitor (AWS Trainium)
|
372
|
+
)
|
363
373
|
_stats_open_metrics_endpoints: Mapping[str, str] # open metrics endpoint names/urls
|
364
374
|
# open metrics filters in one of the two formats:
|
365
375
|
# - {"metric regex pattern, including endpoint name as prefix": {"label": "label value regex pattern"}}
|
366
376
|
# - ("metric regex pattern 1", "metric regex pattern 2", ...)
|
367
377
|
_stats_open_metrics_filters: Union[Sequence[str], Mapping[str, Mapping[str, str]]]
|
368
378
|
_stats_disk_paths: Sequence[str] # paths to monitor disk usage
|
369
|
-
_stats_buffer_size:
|
370
|
-
int
|
371
|
-
) # number of consolidated samples to buffer before flushing, available in run obj
|
379
|
+
_stats_buffer_size: int # number of consolidated samples to buffer before flushing, available in run obj
|
372
380
|
_tmp_code_dir: str
|
373
381
|
_tracelog: str
|
374
382
|
_unsaved_keys: Sequence[str]
|
@@ -393,6 +401,7 @@ class SettingsData:
|
|
393
401
|
entity: str
|
394
402
|
files_dir: str
|
395
403
|
force: bool
|
404
|
+
fork_from: Optional[RunMoment]
|
396
405
|
git_commit: str
|
397
406
|
git_remote: str
|
398
407
|
git_remote_url: str
|
@@ -648,6 +657,7 @@ class Settings(SettingsData):
|
|
648
657
|
"preprocessor": _str_as_bool,
|
649
658
|
"hook": lambda x: self._disable_machine_info or x,
|
650
659
|
},
|
660
|
+
_disable_update_check={"preprocessor": _str_as_bool},
|
651
661
|
_disable_viewer={"preprocessor": _str_as_bool},
|
652
662
|
_extra_http_headers={"preprocessor": _str_as_json},
|
653
663
|
# Retry filestream requests for 2 hours before dropping chunk (how do we recover?)
|
@@ -805,6 +815,10 @@ class Settings(SettingsData):
|
|
805
815
|
),
|
806
816
|
},
|
807
817
|
force={"preprocessor": _str_as_bool},
|
818
|
+
fork_from={
|
819
|
+
"value": None,
|
820
|
+
"preprocessor": _runmoment_preprocessor,
|
821
|
+
},
|
808
822
|
git_remote={"value": "origin"},
|
809
823
|
heartbeat_seconds={"value": 30},
|
810
824
|
ignore_globs={
|
@@ -1575,6 +1589,14 @@ class Settings(SettingsData):
|
|
1575
1589
|
for key, value in v.items():
|
1576
1590
|
# we only support dicts with string values for now
|
1577
1591
|
mapping.value[key] = value
|
1592
|
+
elif isinstance(v, RunMoment):
|
1593
|
+
getattr(settings, k).CopyFrom(
|
1594
|
+
wandb_settings_pb2.RunMoment(
|
1595
|
+
run=v.run,
|
1596
|
+
value=v.value,
|
1597
|
+
metric=v.metric,
|
1598
|
+
)
|
1599
|
+
)
|
1578
1600
|
elif v is None:
|
1579
1601
|
# None is the default value for all settings, so we don't need to set it,
|
1580
1602
|
# i.e. None means that the value was not set.
|
@@ -1897,16 +1919,33 @@ class Settings(SettingsData):
|
|
1897
1919
|
f.write(json.dumps({"run_id": self.run_id}))
|
1898
1920
|
|
1899
1921
|
def _apply_login(
|
1900
|
-
self,
|
1922
|
+
self,
|
1923
|
+
login_settings: Dict[str, Any],
|
1924
|
+
_logger: Optional[_EarlyLogger] = None,
|
1901
1925
|
) -> None:
|
1902
|
-
|
1926
|
+
key_map = {
|
1927
|
+
"key": "api_key",
|
1928
|
+
"host": "base_url",
|
1929
|
+
"timeout": "login_timeout",
|
1930
|
+
}
|
1931
|
+
|
1932
|
+
# Rename keys and keep only the non-None values.
|
1933
|
+
#
|
1934
|
+
# The input keys are parameters to wandb.login(), but we use different
|
1935
|
+
# names for some of them in Settings.
|
1903
1936
|
login_settings = {
|
1904
|
-
|
1937
|
+
key_map.get(key, key): value
|
1938
|
+
for key, value in login_settings.items()
|
1939
|
+
if value is not None
|
1905
1940
|
}
|
1906
|
-
|
1907
|
-
|
1908
|
-
|
1909
|
-
|
1941
|
+
|
1942
|
+
if _logger:
|
1943
|
+
_logger.info(f"Applying login settings: {_redact_dict(login_settings)}")
|
1944
|
+
|
1945
|
+
self.update(
|
1946
|
+
login_settings,
|
1947
|
+
source=Source.LOGIN,
|
1948
|
+
)
|
1910
1949
|
|
1911
1950
|
def _apply_run_start(self, run_start_settings: Dict[str, Any]) -> None:
|
1912
1951
|
# This dictionary maps from the "run message dict" to relevant fields in settings
|
wandb/testing/relay.py
CHANGED
@@ -486,11 +486,12 @@ class QueryResolver:
|
|
486
486
|
response_data: Dict[str, Any],
|
487
487
|
**kwargs: Any,
|
488
488
|
) -> Optional[Dict[str, Any]]:
|
489
|
+
results = []
|
489
490
|
for resolver in self.resolvers:
|
490
491
|
result = resolver.get("resolver")(request_data, response_data, **kwargs)
|
491
492
|
if result is not None:
|
492
|
-
|
493
|
-
return
|
493
|
+
results.append(result)
|
494
|
+
return results
|
494
495
|
|
495
496
|
|
496
497
|
class TokenizedCircularPattern:
|
@@ -768,14 +769,13 @@ class RelayServer:
|
|
768
769
|
response_data,
|
769
770
|
**kwargs,
|
770
771
|
)
|
772
|
+
for entry in snooped_context:
|
773
|
+
self.context.upsert(entry)
|
771
774
|
except Exception as e:
|
772
775
|
print("Failed to resolve context: ", e)
|
773
776
|
traceback.print_exc()
|
774
777
|
snooped_context = None
|
775
778
|
|
776
|
-
if snooped_context is not None:
|
777
|
-
self.context.upsert(snooped_context)
|
778
|
-
|
779
779
|
return None
|
780
780
|
|
781
781
|
def graphql(self) -> Mapping[str, str]:
|
@@ -815,7 +815,6 @@ class RelayServer:
|
|
815
815
|
print(relayed_response)
|
816
816
|
print(relayed_response.status_code, relayed_response.json())
|
817
817
|
print("*****************")
|
818
|
-
|
819
818
|
self.snoop_context(request, relayed_response, timer.elapsed, path=path)
|
820
819
|
|
821
820
|
return relayed_response.json()
|