wandb 0.18.7__py3-none-win32.whl → 0.19.0rc1__py3-none-win32.whl
Sign up to get free protection for your applications and to get access to all the features.
- package_readme.md +8 -0
- wandb/__init__.py +5 -7
- wandb/__init__.pyi +51 -30
- wandb/analytics/sentry.py +4 -10
- wandb/apis/importers/internals/internal.py +6 -6
- wandb/apis/importers/internals/protocols.py +11 -7
- wandb/apis/public/jobs.py +1 -7
- wandb/apis/public/reports.py +6 -17
- wandb/apis/public/runs.py +12 -10
- wandb/bin/gpu_stats.exe +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/cli.py +9 -45
- wandb/env.py +3 -5
- wandb/errors/links.py +1 -1
- wandb/errors/term.py +1 -6
- wandb/filesync/dir_watcher.py +3 -3
- wandb/filesync/step_upload.py +2 -5
- wandb/integration/fastai/__init__.py +1 -6
- wandb/integration/gym/__init__.py +1 -7
- wandb/integration/keras/callbacks/metrics_logger.py +1 -8
- wandb/integration/keras/callbacks/model_checkpoint.py +1 -8
- wandb/integration/keras/keras.py +3 -5
- wandb/integration/lightgbm/__init__.py +1 -1
- wandb/integration/sb3/sb3.py +1 -7
- wandb/integration/sklearn/utils.py +1 -1
- wandb/integration/tensorboard/log.py +1 -2
- wandb/integration/torch/wandb_torch.py +1 -1
- wandb/integration/ultralytics/bbox_utils.py +9 -2
- wandb/jupyter.py +4 -4
- wandb/proto/v3/wandb_internal_pb2.py +31 -31
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_internal_pb2.py +31 -31
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v5/wandb_internal_pb2.py +31 -31
- wandb/proto/v5/wandb_settings_pb2.py +2 -2
- wandb/proto/wandb_deprecated.py +1 -11
- wandb/proto/wandb_generate_deprecated.py +3 -7
- wandb/sdk/artifacts/artifact.py +2 -10
- wandb/sdk/artifacts/artifact_file_cache.py +2 -5
- wandb/sdk/artifacts/artifact_saver.py +2 -6
- wandb/sdk/artifacts/storage_handlers/gcs_handler.py +2 -4
- wandb/sdk/artifacts/storage_handlers/local_file_handler.py +2 -4
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +2 -4
- wandb/sdk/backend/backend.py +1 -1
- wandb/sdk/data_types/histogram.py +1 -3
- wandb/sdk/data_types/object_3d.py +2 -6
- wandb/sdk/data_types/table.py +1 -1
- wandb/sdk/data_types/utils.py +1 -2
- wandb/sdk/data_types/video.py +15 -4
- wandb/sdk/integration_utils/auto_logging.py +1 -8
- wandb/sdk/interface/interface.py +12 -5
- wandb/sdk/interface/interface_shared.py +9 -0
- wandb/sdk/internal/file_stream.py +1 -4
- wandb/sdk/internal/flow_control.py +1 -1
- wandb/sdk/internal/handler.py +7 -2
- wandb/sdk/internal/internal.py +3 -3
- wandb/sdk/internal/internal_api.py +3 -10
- wandb/sdk/internal/job_builder.py +20 -12
- wandb/sdk/internal/progress.py +1 -5
- wandb/sdk/internal/sender.py +9 -13
- wandb/sdk/internal/settings_static.py +4 -10
- wandb/sdk/internal/system/assets/cpu.py +2 -2
- wandb/sdk/internal/system/assets/disk.py +3 -3
- wandb/sdk/internal/system/assets/gpu.py +7 -7
- wandb/sdk/internal/system/assets/gpu_amd.py +1 -7
- wandb/sdk/internal/system/assets/interfaces.py +11 -13
- wandb/sdk/internal/system/assets/ipu.py +1 -1
- wandb/sdk/internal/system/assets/memory.py +2 -2
- wandb/sdk/internal/system/assets/open_metrics.py +2 -8
- wandb/sdk/internal/system/assets/trainium.py +3 -9
- wandb/sdk/internal/system/system_info.py +14 -13
- wandb/sdk/internal/system/system_monitor.py +5 -12
- wandb/sdk/internal/tb_watcher.py +1 -1
- wandb/sdk/internal/writer.py +1 -1
- wandb/sdk/launch/agent/run_queue_item_file_saver.py +1 -7
- wandb/sdk/launch/create_job.py +2 -3
- wandb/sdk/launch/runner/abstract.py +1 -6
- wandb/sdk/launch/runner/kubernetes_monitor.py +2 -4
- wandb/sdk/lib/apikey.py +2 -6
- wandb/sdk/lib/fsm.py +12 -6
- wandb/sdk/lib/ipython.py +1 -6
- wandb/sdk/lib/module.py +0 -3
- wandb/sdk/lib/progress.py +2 -3
- wandb/sdk/lib/run_moment.py +1 -7
- wandb/sdk/lib/server.py +10 -24
- wandb/sdk/service/server.py +1 -1
- wandb/sdk/service/service.py +5 -5
- wandb/sdk/wandb_init.py +215 -166
- wandb/sdk/wandb_login.py +17 -27
- wandb/sdk/wandb_run.py +90 -116
- wandb/sdk/wandb_settings.py +978 -1778
- wandb/sdk/wandb_setup.py +86 -89
- wandb/sdk/wandb_watch.py +1 -1
- wandb/sync/sync.py +1 -2
- wandb/util.py +6 -39
- wandb/wandb_controller.py +10 -12
- {wandb-0.18.7.dist-info → wandb-0.19.0rc1.dist-info}/METADATA +14 -4
- {wandb-0.18.7.dist-info → wandb-0.19.0rc1.dist-info}/RECORD +101 -106
- {wandb-0.18.7.dist-info → wandb-0.19.0rc1.dist-info}/WHEEL +1 -1
- wandb/integration/magic.py +0 -556
- wandb/magic.py +0 -3
- wandb/sdk/lib/_settings_toposort_generate.py +0 -159
- wandb/sdk/lib/_settings_toposort_generated.py +0 -251
- wandb/sdk/lib/reporting.py +0 -99
- {wandb-0.18.7.dist-info → wandb-0.19.0rc1.dist-info}/entry_points.txt +0 -0
- {wandb-0.18.7.dist-info → wandb-0.19.0rc1.dist-info}/licenses/LICENSE +0 -0
@@ -5,7 +5,18 @@ import logging
|
|
5
5
|
import os
|
6
6
|
import re
|
7
7
|
import sys
|
8
|
-
from typing import
|
8
|
+
from typing import (
|
9
|
+
TYPE_CHECKING,
|
10
|
+
Any,
|
11
|
+
Callable,
|
12
|
+
Dict,
|
13
|
+
List,
|
14
|
+
Literal,
|
15
|
+
Optional,
|
16
|
+
Tuple,
|
17
|
+
TypedDict,
|
18
|
+
Union,
|
19
|
+
)
|
9
20
|
|
10
21
|
import wandb
|
11
22
|
from wandb.sdk.artifacts.artifact import Artifact
|
@@ -16,11 +27,6 @@ from wandb.util import make_artifact_name_safe
|
|
16
27
|
|
17
28
|
from .settings_static import SettingsStatic
|
18
29
|
|
19
|
-
if sys.version_info >= (3, 8):
|
20
|
-
from typing import Literal, TypedDict
|
21
|
-
else:
|
22
|
-
from typing_extensions import Literal, TypedDict
|
23
|
-
|
24
30
|
_logger = logging.getLogger(__name__)
|
25
31
|
|
26
32
|
if TYPE_CHECKING:
|
@@ -152,7 +158,7 @@ class JobBuilder:
|
|
152
158
|
self._logged_code_artifact = None
|
153
159
|
self._job_seq_id = None
|
154
160
|
self._job_version_alias = None
|
155
|
-
self._disable = settings.disable_job_creation
|
161
|
+
self._disable = settings.disable_job_creation or settings.x_disable_machine_info
|
156
162
|
self._partial_source_id = None
|
157
163
|
self._aliases = []
|
158
164
|
self._source_type: Optional[Literal["repo", "artifact", "image"]] = (
|
@@ -228,16 +234,16 @@ class JobBuilder:
|
|
228
234
|
):
|
229
235
|
return None, None
|
230
236
|
|
231
|
-
if root is None or self._settings.
|
237
|
+
if root is None or self._settings.x_jupyter_root is None:
|
232
238
|
_logger.info("target path does not exist, exiting")
|
233
239
|
return None, None
|
234
|
-
assert self._settings.
|
240
|
+
assert self._settings.x_jupyter_root is not None
|
235
241
|
# git notebooks set the root to the git root,
|
236
242
|
# jupyter_root contains the path where the jupyter notebook was started
|
237
243
|
# program_relpath contains the path from jupyter_root to the file
|
238
244
|
# full program path here is actually the relpath from the program to the git root
|
239
245
|
full_program_path = os.path.join(
|
240
|
-
os.path.relpath(str(self._settings.
|
246
|
+
os.path.relpath(str(self._settings.x_jupyter_root), root),
|
241
247
|
program_relpath,
|
242
248
|
)
|
243
249
|
full_program_path = os.path.normpath(full_program_path)
|
@@ -476,7 +482,8 @@ class JobBuilder:
|
|
476
482
|
# can't build a job without a python version
|
477
483
|
if runtime is None:
|
478
484
|
self._log_if_verbose(
|
479
|
-
"No python version found in metadata, not creating job artifact.
|
485
|
+
"No python version found in metadata, not creating job artifact. "
|
486
|
+
"See https://docs.wandb.ai/guides/launch/create-job",
|
480
487
|
"warn",
|
481
488
|
)
|
482
489
|
return None
|
@@ -505,7 +512,8 @@ class JobBuilder:
|
|
505
512
|
program_relpath = self._get_program_relpath(source_type, metadata)
|
506
513
|
if not self._partial and source_type != "image" and not program_relpath:
|
507
514
|
self._log_if_verbose(
|
508
|
-
"No program path found, not creating job artifact.
|
515
|
+
"No program path found, not creating job artifact. "
|
516
|
+
"See https://docs.wandb.ai/guides/launch/create-job",
|
509
517
|
"warn",
|
510
518
|
)
|
511
519
|
return None
|
wandb/sdk/internal/progress.py
CHANGED
@@ -1,16 +1,12 @@
|
|
1
1
|
"""progress."""
|
2
2
|
|
3
3
|
import os
|
4
|
-
import sys
|
5
4
|
from typing import IO, TYPE_CHECKING, Optional
|
6
5
|
|
7
6
|
from wandb.errors import CommError
|
8
7
|
|
9
8
|
if TYPE_CHECKING:
|
10
|
-
|
11
|
-
from typing import Protocol
|
12
|
-
else:
|
13
|
-
from typing_extensions import Protocol
|
9
|
+
from typing import Protocol
|
14
10
|
|
15
11
|
class ProgressFn(Protocol):
|
16
12
|
def __call__(self, new_bytes: int, total_bytes: int) -> None:
|
wandb/sdk/internal/sender.py
CHANGED
@@ -6,7 +6,6 @@ import json
|
|
6
6
|
import logging
|
7
7
|
import os
|
8
8
|
import queue
|
9
|
-
import sys
|
10
9
|
import threading
|
11
10
|
import time
|
12
11
|
import traceback
|
@@ -19,6 +18,7 @@ from typing import (
|
|
19
18
|
Dict,
|
20
19
|
Generator,
|
21
20
|
List,
|
21
|
+
Literal,
|
22
22
|
Optional,
|
23
23
|
Tuple,
|
24
24
|
Type,
|
@@ -57,11 +57,6 @@ from wandb.sdk.lib import (
|
|
57
57
|
from wandb.sdk.lib.mailbox import ContextCancelledError
|
58
58
|
from wandb.sdk.lib.proto_util import message_to_dict
|
59
59
|
|
60
|
-
if sys.version_info >= (3, 8):
|
61
|
-
from typing import Literal
|
62
|
-
else:
|
63
|
-
from typing_extensions import Literal
|
64
|
-
|
65
60
|
if TYPE_CHECKING:
|
66
61
|
from wandb.proto.wandb_internal_pb2 import (
|
67
62
|
ArtifactManifest,
|
@@ -326,18 +321,19 @@ class SendManager:
|
|
326
321
|
) -> "SendManager":
|
327
322
|
"""Set up a standalone SendManager.
|
328
323
|
|
329
|
-
|
324
|
+
Exclusively used in `sync.py`.
|
330
325
|
"""
|
326
|
+
print(root_dir)
|
331
327
|
files_dir = os.path.join(root_dir, "files")
|
332
328
|
settings = wandb.Settings(
|
333
|
-
|
329
|
+
x_files_dir=files_dir,
|
334
330
|
root_dir=root_dir,
|
335
331
|
# _start_time=0,
|
336
332
|
resume=resume,
|
337
333
|
# ignore_globs=(),
|
338
|
-
|
334
|
+
x_sync=True,
|
339
335
|
disable_job_creation=False,
|
340
|
-
|
336
|
+
x_file_stream_timeout_seconds=0,
|
341
337
|
)
|
342
338
|
record_q: Queue[Record] = queue.Queue()
|
343
339
|
result_q: Queue[Result] = queue.Queue()
|
@@ -440,7 +436,7 @@ class SendManager:
|
|
440
436
|
# state machine
|
441
437
|
# - skipping the exit record in `wandb sync` mode so that
|
442
438
|
# it is always executed as the last record
|
443
|
-
if not self._settings._offline and not self._settings.
|
439
|
+
if not self._settings._offline and not self._settings.x_sync:
|
444
440
|
assert record_num == self._send_record_num + 1
|
445
441
|
self._send_record_num = record_num
|
446
442
|
|
@@ -914,7 +910,7 @@ class SendManager:
|
|
914
910
|
# update telemetry
|
915
911
|
if run.telemetry:
|
916
912
|
self._telemetry_obj.MergeFrom(run.telemetry)
|
917
|
-
if self._settings.
|
913
|
+
if self._settings.x_sync:
|
918
914
|
self._telemetry_obj.feature.sync = True
|
919
915
|
|
920
916
|
# build config dict
|
@@ -1124,7 +1120,7 @@ class SendManager:
|
|
1124
1120
|
self._api,
|
1125
1121
|
self._run.run_id,
|
1126
1122
|
self._run.start_time.ToMicroseconds() / 1e6,
|
1127
|
-
timeout=self._settings.
|
1123
|
+
timeout=self._settings.x_file_stream_timeout_seconds or 0,
|
1128
1124
|
settings=self._api_settings,
|
1129
1125
|
)
|
1130
1126
|
# Ensure the streaming polices have the proper offsets
|
@@ -1,12 +1,11 @@
|
|
1
|
-
from
|
2
|
-
from typing import Any, Iterable, Sequence, Tuple
|
1
|
+
from typing import Any, Iterable
|
3
2
|
|
4
3
|
from wandb.proto import wandb_settings_pb2
|
5
4
|
from wandb.sdk.lib import RunMoment
|
6
|
-
from wandb.sdk.wandb_settings import
|
5
|
+
from wandb.sdk.wandb_settings import Settings
|
7
6
|
|
8
7
|
|
9
|
-
class SettingsStatic(
|
8
|
+
class SettingsStatic(Settings):
|
10
9
|
"""A readonly object that wraps a protobuf Settings message.
|
11
10
|
|
12
11
|
Implements the mapping protocol, so you can access settings as
|
@@ -19,8 +18,7 @@ class SettingsStatic(SettingsData):
|
|
19
18
|
|
20
19
|
def _from_proto(self, proto: wandb_settings_pb2.Settings) -> None:
|
21
20
|
forks_specified: list[str] = []
|
22
|
-
for
|
23
|
-
key = field.name
|
21
|
+
for key in Settings.model_fields: # type: ignore [attr-defined]
|
24
22
|
value: Any = None
|
25
23
|
if key == "_stats_open_metrics_filters":
|
26
24
|
# todo: it's an underscored field, refactor into
|
@@ -52,10 +50,6 @@ class SettingsStatic(SettingsData):
|
|
52
50
|
else:
|
53
51
|
if proto.HasField(key): # type: ignore [arg-type]
|
54
52
|
value = getattr(proto, key).value
|
55
|
-
if field.type == Sequence[str]:
|
56
|
-
value = list(value)
|
57
|
-
elif field.type == Tuple[str]:
|
58
|
-
value = tuple(value)
|
59
53
|
else:
|
60
54
|
value = None
|
61
55
|
object.__setattr__(self, key, value)
|
@@ -117,9 +117,9 @@ class CPU:
|
|
117
117
|
) -> None:
|
118
118
|
self.name: str = self.__class__.__name__.lower()
|
119
119
|
self.metrics: List[Metric] = [
|
120
|
-
ProcessCpuPercent(settings.
|
120
|
+
ProcessCpuPercent(settings.x_stats_pid),
|
121
121
|
CpuPercent(),
|
122
|
-
ProcessCpuThreads(settings.
|
122
|
+
ProcessCpuThreads(settings.x_stats_pid),
|
123
123
|
]
|
124
124
|
self.metrics_monitor: MetricsMonitor = MetricsMonitor(
|
125
125
|
self.name,
|
@@ -167,8 +167,8 @@ class Disk:
|
|
167
167
|
self.name = self.__class__.__name__.lower()
|
168
168
|
self.settings = settings
|
169
169
|
self.metrics: List[Metric] = [
|
170
|
-
DiskUsagePercent(list(settings.
|
171
|
-
DiskUsage(list(settings.
|
170
|
+
DiskUsagePercent(list(settings.x_stats_disk_paths or ["/"])),
|
171
|
+
DiskUsage(list(settings.x_stats_disk_paths or ["/"])),
|
172
172
|
DiskIn(),
|
173
173
|
DiskOut(),
|
174
174
|
]
|
@@ -186,7 +186,7 @@ class Disk:
|
|
186
186
|
return psutil is not None
|
187
187
|
|
188
188
|
def probe(self) -> dict:
|
189
|
-
disk_paths = list(self.settings.
|
189
|
+
disk_paths = list(self.settings.x_stats_disk_paths or ["/"])
|
190
190
|
disk_metrics = {}
|
191
191
|
for disk_path in disk_paths:
|
192
192
|
try:
|
@@ -354,13 +354,13 @@ class GPU:
|
|
354
354
|
) -> None:
|
355
355
|
self.name = self.__class__.__name__.lower()
|
356
356
|
self.metrics: List[Metric] = [
|
357
|
-
GPUMemoryAllocated(settings.
|
358
|
-
GPUMemoryAllocatedBytes(settings.
|
359
|
-
GPUMemoryUtilization(settings.
|
360
|
-
GPUUtilization(settings.
|
361
|
-
GPUTemperature(settings.
|
362
|
-
GPUPowerUsageWatts(settings.
|
363
|
-
GPUPowerUsagePercent(settings.
|
357
|
+
GPUMemoryAllocated(settings.x_stats_pid),
|
358
|
+
GPUMemoryAllocatedBytes(settings.x_stats_pid),
|
359
|
+
GPUMemoryUtilization(settings.x_stats_pid),
|
360
|
+
GPUUtilization(settings.x_stats_pid),
|
361
|
+
GPUTemperature(settings.x_stats_pid),
|
362
|
+
GPUPowerUsageWatts(settings.x_stats_pid),
|
363
|
+
GPUPowerUsagePercent(settings.x_stats_pid),
|
364
364
|
]
|
365
365
|
self.metrics_monitor = MetricsMonitor(
|
366
366
|
self.name,
|
@@ -2,15 +2,9 @@ import json
|
|
2
2
|
import logging
|
3
3
|
import shutil
|
4
4
|
import subprocess
|
5
|
-
import sys
|
6
5
|
import threading
|
7
6
|
from collections import deque
|
8
|
-
from typing import TYPE_CHECKING, Any, Dict, List, Union
|
9
|
-
|
10
|
-
if sys.version_info >= (3, 8):
|
11
|
-
from typing import Final, Literal
|
12
|
-
else:
|
13
|
-
from typing_extensions import Final, Literal
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, Final, List, Literal, Union
|
14
8
|
|
15
9
|
from wandb.sdk.lib import telemetry
|
16
10
|
|
@@ -1,13 +1,15 @@
|
|
1
1
|
import datetime
|
2
2
|
import logging
|
3
|
-
import sys
|
4
3
|
import threading
|
5
|
-
from typing import
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
4
|
+
from typing import (
|
5
|
+
TYPE_CHECKING,
|
6
|
+
Any,
|
7
|
+
List,
|
8
|
+
Optional,
|
9
|
+
Protocol,
|
10
|
+
TypeVar,
|
11
|
+
runtime_checkable,
|
12
|
+
)
|
11
13
|
|
12
14
|
if TYPE_CHECKING:
|
13
15
|
from typing import Deque
|
@@ -118,14 +120,10 @@ class MetricsMonitor:
|
|
118
120
|
self.sampling_interval: float = float(
|
119
121
|
max(
|
120
122
|
0.1,
|
121
|
-
settings.
|
123
|
+
settings.x_stats_sampling_interval,
|
122
124
|
)
|
123
125
|
) # seconds
|
124
|
-
|
125
|
-
# before publishing; defaults to 15; valid range: [1:30]
|
126
|
-
self.samples_to_aggregate: int = min(
|
127
|
-
30, max(1, settings._stats_samples_to_average)
|
128
|
-
)
|
126
|
+
self.samples_to_aggregate = 1
|
129
127
|
|
130
128
|
def monitor(self) -> None:
|
131
129
|
"""Poll the Asset metrics."""
|
@@ -134,8 +134,8 @@ class Memory:
|
|
134
134
|
self.metrics: List[Metric] = [
|
135
135
|
MemoryAvailable(),
|
136
136
|
MemoryPercent(),
|
137
|
-
ProcessMemoryRSS(settings.
|
138
|
-
ProcessMemoryPercent(settings.
|
137
|
+
ProcessMemoryRSS(settings.x_stats_pid),
|
138
|
+
ProcessMemoryPercent(settings.x_stats_pid),
|
139
139
|
]
|
140
140
|
self.metrics_monitor = MetricsMonitor(
|
141
141
|
self.name,
|
@@ -1,16 +1,10 @@
|
|
1
1
|
import logging
|
2
2
|
import re
|
3
|
-
import sys
|
4
3
|
import threading
|
5
4
|
from collections import defaultdict, deque
|
6
5
|
from functools import lru_cache
|
7
6
|
from types import ModuleType
|
8
|
-
from typing import TYPE_CHECKING, Dict, List, Mapping, Sequence, Tuple, Union
|
9
|
-
|
10
|
-
if sys.version_info >= (3, 8):
|
11
|
-
from typing import Final
|
12
|
-
else:
|
13
|
-
from typing_extensions import Final
|
7
|
+
from typing import TYPE_CHECKING, Dict, Final, List, Mapping, Sequence, Tuple, Union
|
14
8
|
|
15
9
|
import requests
|
16
10
|
import requests.adapters
|
@@ -235,7 +229,7 @@ class OpenMetrics:
|
|
235
229
|
self.shutdown_event = shutdown_event
|
236
230
|
|
237
231
|
self.metrics: List[Metric] = [
|
238
|
-
OpenMetricsMetric(name, url, settings.
|
232
|
+
OpenMetricsMetric(name, url, settings.x_stats_open_metrics_filters)
|
239
233
|
]
|
240
234
|
|
241
235
|
self.metrics_monitor: MetricsMonitor = MetricsMonitor(
|
@@ -6,17 +6,11 @@ import os
|
|
6
6
|
import pathlib
|
7
7
|
import shutil
|
8
8
|
import subprocess
|
9
|
-
import sys
|
10
9
|
import tempfile
|
11
10
|
import threading
|
12
11
|
import time
|
13
12
|
from collections import deque
|
14
|
-
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
15
|
-
|
16
|
-
if sys.version_info >= (3, 8):
|
17
|
-
from typing import Final
|
18
|
-
else:
|
19
|
-
from typing_extensions import Final
|
13
|
+
from typing import TYPE_CHECKING, Any, Dict, Final, List, Optional, Tuple, Union
|
20
14
|
|
21
15
|
from wandb.sdk.lib import telemetry
|
22
16
|
|
@@ -315,8 +309,8 @@ class Trainium:
|
|
315
309
|
self.name = self.__class__.__name__.lower()
|
316
310
|
self.metrics: List[Metric] = [
|
317
311
|
NeuronCoreStats(
|
318
|
-
settings.
|
319
|
-
settings.
|
312
|
+
settings.x_stats_pid,
|
313
|
+
settings.x_stats_neuron_monitor_config_path,
|
320
314
|
),
|
321
315
|
]
|
322
316
|
self.metrics_monitor = MetricsMonitor(
|
@@ -126,7 +126,7 @@ class SystemInfo:
|
|
126
126
|
logger.debug("Saving git patches done")
|
127
127
|
|
128
128
|
def _probe_git(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
129
|
-
if self.settings.disable_git:
|
129
|
+
if self.settings.disable_git or self.settings.x_disable_machine_info:
|
130
130
|
return data
|
131
131
|
|
132
132
|
# in case of manually passing the git repo info, `enabled` would be False,
|
@@ -155,13 +155,14 @@ class SystemInfo:
|
|
155
155
|
data["os"] = self.settings._os
|
156
156
|
data["python"] = self.settings._python
|
157
157
|
data["heartbeatAt"] = datetime.datetime.utcnow().isoformat()
|
158
|
-
data["startedAt"] =
|
159
|
-
self.settings.
|
160
|
-
|
158
|
+
data["startedAt"] = (
|
159
|
+
datetime.datetime.utcfromtimestamp(self.settings.x_start_time).isoformat()
|
160
|
+
if self.settings.x_start_time
|
161
|
+
else None
|
162
|
+
)
|
161
163
|
|
162
164
|
data["docker"] = self.settings.docker
|
163
165
|
|
164
|
-
data["cuda"] = self.settings._cuda
|
165
166
|
data["args"] = tuple(self.settings._args or ())
|
166
167
|
data["state"] = "running"
|
167
168
|
|
@@ -170,22 +171,22 @@ class SystemInfo:
|
|
170
171
|
# Used during artifact-job creation, always points to the relpath
|
171
172
|
# of code execution, even when in a git repo
|
172
173
|
data["codePathLocal"] = self.settings._code_path_local
|
173
|
-
if not self.settings.disable_code:
|
174
|
+
if not (self.settings.disable_code or self.settings.x_disable_machine_info):
|
174
175
|
if self.settings.program_relpath:
|
175
176
|
data["codePath"] = self.settings.program_relpath
|
176
177
|
elif self.settings._jupyter:
|
177
178
|
if self.settings.notebook_name:
|
178
179
|
data["program"] = self.settings.notebook_name
|
179
|
-
elif self.settings.
|
180
|
-
if self.settings.
|
181
|
-
unescaped = unquote(self.settings.
|
180
|
+
elif self.settings.x_jupyter_path:
|
181
|
+
if self.settings.x_jupyter_path.startswith("fileId="):
|
182
|
+
unescaped = unquote(self.settings.x_jupyter_path)
|
182
183
|
data["colab"] = (
|
183
184
|
"https://colab.research.google.com/notebook#" + unescaped
|
184
185
|
)
|
185
|
-
data["program"] = self.settings.
|
186
|
+
data["program"] = self.settings.x_jupyter_name
|
186
187
|
else:
|
187
|
-
data["program"] = self.settings.
|
188
|
-
data["root"] = self.settings.
|
188
|
+
data["program"] = self.settings.x_jupyter_path
|
189
|
+
data["root"] = self.settings.x_jupyter_root
|
189
190
|
# get the git repo info
|
190
191
|
data = self._probe_git(data)
|
191
192
|
|
@@ -225,7 +226,7 @@ class SystemInfo:
|
|
225
226
|
|
226
227
|
def publish(self, system_info: dict) -> None:
|
227
228
|
# save pip, conda, code patches to disk
|
228
|
-
if self.settings.
|
229
|
+
if self.settings.x_save_requirements:
|
229
230
|
self._save_conda()
|
230
231
|
if self.settings.save_code:
|
231
232
|
self._save_code()
|
@@ -60,18 +60,11 @@ class SystemMonitor:
|
|
60
60
|
|
61
61
|
# compute the global publishing interval if _stats_join_assets is requested
|
62
62
|
sampling_interval: float = float(
|
63
|
-
max(
|
64
|
-
0.1,
|
65
|
-
self.settings._stats_sample_rate_seconds,
|
66
|
-
)
|
63
|
+
max(0.1, self.settings.x_stats_sampling_interval)
|
67
64
|
) # seconds
|
68
|
-
|
69
|
-
# before publishing; defaults to 15; valid range: [1:30]
|
70
|
-
samples_to_aggregate: int = min(
|
71
|
-
30, max(1, self.settings._stats_samples_to_average)
|
72
|
-
)
|
65
|
+
samples_to_aggregate: int = 1
|
73
66
|
self.publishing_interval: float = sampling_interval * samples_to_aggregate
|
74
|
-
self.join_assets: bool =
|
67
|
+
self.join_assets: bool = False
|
75
68
|
|
76
69
|
self.backend_interface = interface
|
77
70
|
self.asset_interface: Optional[AssetInterface] = (
|
@@ -90,7 +83,7 @@ class SystemMonitor:
|
|
90
83
|
)
|
91
84
|
|
92
85
|
self.buffer: Dict[str, Deque[Tuple[float, float]]] = defaultdict(
|
93
|
-
lambda: deque([], maxlen=self.settings.
|
86
|
+
lambda: deque([], maxlen=self.settings.x_stats_buffer_size)
|
94
87
|
)
|
95
88
|
|
96
89
|
def _get_assets(self) -> List["Asset"]:
|
@@ -104,7 +97,7 @@ class SystemMonitor:
|
|
104
97
|
]
|
105
98
|
|
106
99
|
def _get_open_metrics_assets(self) -> List["Asset"]:
|
107
|
-
open_metrics_endpoints = self.settings.
|
100
|
+
open_metrics_endpoints = self.settings.x_stats_open_metrics_endpoints
|
108
101
|
if not open_metrics_endpoints:
|
109
102
|
return []
|
110
103
|
|
wandb/sdk/internal/tb_watcher.py
CHANGED
@@ -231,7 +231,7 @@ class TBDirWatcher:
|
|
231
231
|
return is_tfevents_file_created_by(path, None, None)
|
232
232
|
else:
|
233
233
|
return is_tfevents_file_created_by(
|
234
|
-
path, self._hostname, self._tbwatcher._settings.
|
234
|
+
path, self._hostname, self._tbwatcher._settings.x_start_time
|
235
235
|
)
|
236
236
|
|
237
237
|
def _loader(
|
wandb/sdk/internal/writer.py
CHANGED
@@ -63,7 +63,7 @@ class WriteManager:
|
|
63
63
|
self._telemetry_obj = tpb.TelemetryRecord()
|
64
64
|
self._telemetry_overflow = False
|
65
65
|
self._use_flow_control = not (
|
66
|
-
self._settings.
|
66
|
+
self._settings.x_flow_control_disabled or self._settings._offline
|
67
67
|
)
|
68
68
|
|
69
69
|
def open(self) -> None:
|
@@ -1,16 +1,10 @@
|
|
1
1
|
"""Implementation of the run queue item file saver class."""
|
2
2
|
|
3
3
|
import os
|
4
|
-
import
|
5
|
-
from typing import List, Optional
|
4
|
+
from typing import List, Literal, Optional
|
6
5
|
|
7
6
|
import wandb
|
8
7
|
|
9
|
-
if sys.version_info >= (3, 8):
|
10
|
-
from typing import Literal
|
11
|
-
else:
|
12
|
-
from typing_extensions import Literal
|
13
|
-
|
14
8
|
FileSubtypes = Literal["warning", "error"]
|
15
9
|
|
16
10
|
|
wandb/sdk/launch/create_job.py
CHANGED
@@ -168,7 +168,7 @@ def _create_job(
|
|
168
168
|
return None, "", []
|
169
169
|
|
170
170
|
job_builder = _configure_job_builder_for_partial(tempdir.name, job_source=job_type)
|
171
|
-
job_builder._settings.
|
171
|
+
job_builder._settings.job_name = name
|
172
172
|
if job_type == "code":
|
173
173
|
assert entrypoint is not None
|
174
174
|
job_name = _make_code_artifact(
|
@@ -404,8 +404,7 @@ def _configure_job_builder_for_partial(tmpdir: str, job_source: str) -> JobBuild
|
|
404
404
|
if job_source == "code":
|
405
405
|
job_source = "artifact"
|
406
406
|
|
407
|
-
settings = wandb.Settings()
|
408
|
-
settings.update({"files_dir": tmpdir, "job_source": job_source})
|
407
|
+
settings = wandb.Settings(x_files_dir=tmpdir, job_source=job_source)
|
409
408
|
job_builder = JobBuilder(
|
410
409
|
settings=settings, # type: ignore
|
411
410
|
verbose=True,
|
@@ -9,7 +9,7 @@ import os
|
|
9
9
|
import subprocess
|
10
10
|
import sys
|
11
11
|
from abc import ABC, abstractmethod
|
12
|
-
from typing import Any, Dict, List, Optional, Union
|
12
|
+
from typing import Any, Dict, List, Literal, Optional, Union
|
13
13
|
|
14
14
|
from dockerpycreds.utils import find_executable # type: ignore
|
15
15
|
|
@@ -22,11 +22,6 @@ from .._project_spec import LaunchProject
|
|
22
22
|
_logger = logging.getLogger(__name__)
|
23
23
|
|
24
24
|
|
25
|
-
if sys.version_info >= (3, 8):
|
26
|
-
from typing import Literal
|
27
|
-
else:
|
28
|
-
from typing_extensions import Literal
|
29
|
-
|
30
25
|
State = Literal[
|
31
26
|
"unknown",
|
32
27
|
"starting",
|
@@ -2,7 +2,6 @@
|
|
2
2
|
|
3
3
|
import asyncio
|
4
4
|
import logging
|
5
|
-
import sys
|
6
5
|
import traceback
|
7
6
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
8
7
|
|
@@ -74,8 +73,7 @@ _logger = logging.getLogger(__name__)
|
|
74
73
|
def create_named_task(name: str, coro: Any, *args: Any, **kwargs: Any) -> asyncio.Task:
|
75
74
|
"""Create a named task."""
|
76
75
|
task = asyncio.create_task(coro(*args, **kwargs))
|
77
|
-
|
78
|
-
task.set_name(name)
|
76
|
+
task.set_name(name)
|
79
77
|
task.add_done_callback(_log_err_task_callback)
|
80
78
|
return task
|
81
79
|
|
@@ -87,7 +85,7 @@ def _log_err_task_callback(task: asyncio.Task) -> None:
|
|
87
85
|
if isinstance(exec, asyncio.CancelledError):
|
88
86
|
wandb.termlog(f"Task {task.get_name()} was cancelled")
|
89
87
|
return
|
90
|
-
name =
|
88
|
+
name = task.get_name()
|
91
89
|
wandb.termerror(f"Exception in task {name}")
|
92
90
|
tb = exec.__traceback__
|
93
91
|
tb_str = "".join(traceback.format_tb(tb))
|
wandb/sdk/lib/apikey.py
CHANGED
@@ -6,14 +6,10 @@ import stat
|
|
6
6
|
import sys
|
7
7
|
import textwrap
|
8
8
|
from functools import partial
|
9
|
-
from typing import TYPE_CHECKING, Callable, Dict, Optional, Union
|
10
|
-
from urllib.parse import urlparse
|
11
9
|
|
12
10
|
# import Literal
|
13
|
-
|
14
|
-
|
15
|
-
else:
|
16
|
-
from typing_extensions import Literal
|
11
|
+
from typing import TYPE_CHECKING, Callable, Dict, Literal, Optional, Union
|
12
|
+
from urllib.parse import urlparse
|
17
13
|
|
18
14
|
import click
|
19
15
|
from requests.utils import NETRC_FILES, get_netrc_auth
|
wandb/sdk/lib/fsm.py
CHANGED
@@ -31,12 +31,18 @@ Usage:
|
|
31
31
|
import sys
|
32
32
|
from abc import abstractmethod
|
33
33
|
from dataclasses import dataclass
|
34
|
-
from typing import
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
34
|
+
from typing import (
|
35
|
+
Callable,
|
36
|
+
Dict,
|
37
|
+
Generic,
|
38
|
+
Optional,
|
39
|
+
Protocol,
|
40
|
+
Sequence,
|
41
|
+
Type,
|
42
|
+
TypeVar,
|
43
|
+
Union,
|
44
|
+
runtime_checkable,
|
45
|
+
)
|
40
46
|
|
41
47
|
if sys.version_info >= (3, 10):
|
42
48
|
from typing import TypeAlias
|