wandb 0.15.9__py3-none-any.whl → 0.15.11__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- wandb/__init__.py +5 -1
- wandb/apis/public.py +137 -17
- wandb/apis/reports/_panels.py +1 -1
- wandb/apis/reports/blocks.py +1 -0
- wandb/apis/reports/report.py +27 -5
- wandb/cli/cli.py +52 -41
- wandb/docker/__init__.py +17 -0
- wandb/docker/auth.py +1 -1
- wandb/env.py +24 -4
- wandb/filesync/step_checksum.py +3 -3
- wandb/integration/openai/openai.py +3 -0
- wandb/integration/ultralytics/__init__.py +9 -0
- wandb/integration/ultralytics/bbox_utils.py +196 -0
- wandb/integration/ultralytics/callback.py +458 -0
- wandb/integration/ultralytics/classification_utils.py +66 -0
- wandb/integration/ultralytics/mask_utils.py +141 -0
- wandb/integration/ultralytics/pose_utils.py +92 -0
- wandb/integration/xgboost/xgboost.py +3 -3
- wandb/integration/yolov8/__init__.py +0 -7
- wandb/integration/yolov8/yolov8.py +22 -3
- wandb/old/settings.py +7 -0
- wandb/plot/line_series.py +0 -1
- wandb/proto/v3/wandb_internal_pb2.py +353 -300
- wandb/proto/v3/wandb_server_pb2.py +37 -41
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_telemetry_pb2.py +16 -16
- wandb/proto/v4/wandb_internal_pb2.py +272 -260
- wandb/proto/v4/wandb_server_pb2.py +37 -40
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_telemetry_pb2.py +16 -16
- wandb/proto/wandb_internal_codegen.py +7 -31
- wandb/sdk/artifacts/artifact.py +321 -189
- wandb/sdk/artifacts/artifact_cache.py +14 -0
- wandb/sdk/artifacts/artifact_manifest.py +5 -4
- wandb/sdk/artifacts/artifact_manifest_entry.py +37 -9
- wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +1 -9
- wandb/sdk/artifacts/artifact_saver.py +13 -50
- wandb/sdk/artifacts/artifact_ttl.py +6 -0
- wandb/sdk/artifacts/artifacts_cache.py +119 -93
- wandb/sdk/artifacts/staging.py +25 -0
- wandb/sdk/artifacts/storage_handlers/s3_handler.py +12 -7
- wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +2 -3
- wandb/sdk/artifacts/storage_policies/__init__.py +4 -0
- wandb/sdk/artifacts/storage_policies/register.py +1 -0
- wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +4 -3
- wandb/sdk/artifacts/storage_policy.py +4 -2
- wandb/sdk/backend/backend.py +0 -16
- wandb/sdk/data_types/image.py +3 -1
- wandb/sdk/integration_utils/auto_logging.py +38 -13
- wandb/sdk/interface/interface.py +16 -135
- wandb/sdk/interface/interface_shared.py +9 -147
- wandb/sdk/interface/interface_sock.py +0 -26
- wandb/sdk/internal/file_pusher.py +20 -3
- wandb/sdk/internal/file_stream.py +3 -1
- wandb/sdk/internal/handler.py +53 -70
- wandb/sdk/internal/internal_api.py +220 -130
- wandb/sdk/internal/job_builder.py +41 -37
- wandb/sdk/internal/sender.py +7 -25
- wandb/sdk/internal/system/assets/disk.py +144 -11
- wandb/sdk/internal/system/system_info.py +6 -2
- wandb/sdk/launch/__init__.py +5 -0
- wandb/sdk/launch/{launch.py → _launch.py} +53 -54
- wandb/sdk/launch/{launch_add.py → _launch_add.py} +34 -31
- wandb/sdk/launch/_project_spec.py +13 -2
- wandb/sdk/launch/agent/agent.py +103 -59
- wandb/sdk/launch/agent/run_queue_item_file_saver.py +6 -4
- wandb/sdk/launch/builder/build.py +19 -1
- wandb/sdk/launch/builder/docker_builder.py +5 -1
- wandb/sdk/launch/builder/kaniko_builder.py +5 -1
- wandb/sdk/launch/create_job.py +20 -5
- wandb/sdk/launch/loader.py +14 -5
- wandb/sdk/launch/runner/abstract.py +0 -2
- wandb/sdk/launch/runner/kubernetes_monitor.py +329 -0
- wandb/sdk/launch/runner/kubernetes_runner.py +66 -209
- wandb/sdk/launch/runner/local_container.py +5 -2
- wandb/sdk/launch/runner/local_process.py +4 -1
- wandb/sdk/launch/sweeps/scheduler.py +43 -25
- wandb/sdk/launch/sweeps/utils.py +5 -3
- wandb/sdk/launch/utils.py +3 -1
- wandb/sdk/lib/_settings_toposort_generate.py +3 -9
- wandb/sdk/lib/_settings_toposort_generated.py +27 -3
- wandb/sdk/lib/_wburls_generated.py +1 -0
- wandb/sdk/lib/filenames.py +27 -6
- wandb/sdk/lib/filesystem.py +181 -7
- wandb/sdk/lib/fsm.py +5 -3
- wandb/sdk/lib/gql_request.py +3 -0
- wandb/sdk/lib/ipython.py +7 -0
- wandb/sdk/lib/wburls.py +1 -0
- wandb/sdk/service/port_file.py +2 -15
- wandb/sdk/service/server.py +7 -55
- wandb/sdk/service/service.py +56 -26
- wandb/sdk/service/service_base.py +1 -1
- wandb/sdk/service/streams.py +11 -5
- wandb/sdk/verify/verify.py +2 -2
- wandb/sdk/wandb_init.py +8 -2
- wandb/sdk/wandb_manager.py +4 -14
- wandb/sdk/wandb_run.py +143 -53
- wandb/sdk/wandb_settings.py +148 -35
- wandb/testing/relay.py +85 -38
- wandb/util.py +87 -4
- wandb/wandb_torch.py +24 -38
- {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/METADATA +48 -23
- {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/RECORD +107 -103
- {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/WHEEL +1 -1
- wandb/proto/v3/wandb_server_pb2_grpc.py +0 -1422
- wandb/proto/v4/wandb_server_pb2_grpc.py +0 -1422
- wandb/proto/wandb_server_pb2_grpc.py +0 -8
- wandb/sdk/artifacts/storage_policies/s3_bucket_policy.py +0 -61
- wandb/sdk/interface/interface_grpc.py +0 -460
- wandb/sdk/service/server_grpc.py +0 -444
- wandb/sdk/service/service_grpc.py +0 -73
- {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/LICENSE +0 -0
- {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/entry_points.txt +0 -0
- {wandb-0.15.9.dist-info → wandb-0.15.11.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ import logging
|
|
3
3
|
import os
|
4
4
|
import queue
|
5
5
|
import tempfile
|
6
|
+
import threading
|
6
7
|
import time
|
7
8
|
from typing import TYPE_CHECKING, Optional, Tuple
|
8
9
|
|
@@ -12,8 +13,8 @@ from wandb.filesync import stats, step_checksum, step_upload
|
|
12
13
|
from wandb.sdk.lib.paths import LogicalPath
|
13
14
|
|
14
15
|
if TYPE_CHECKING:
|
15
|
-
from wandb.sdk.artifacts import artifact_saver
|
16
16
|
from wandb.sdk.artifacts.artifact_manifest import ArtifactManifest
|
17
|
+
from wandb.sdk.artifacts.artifact_saver import SaveFn, SaveFnAsync
|
17
18
|
from wandb.sdk.internal import file_stream, internal_api
|
18
19
|
from wandb.sdk.internal.settings_static import SettingsStatic
|
19
20
|
|
@@ -72,6 +73,21 @@ class FilePusher:
|
|
72
73
|
)
|
73
74
|
self._step_upload.start()
|
74
75
|
|
76
|
+
self._stats_thread_stop = threading.Event()
|
77
|
+
if os.environ.get("WANDB_DEBUG"):
|
78
|
+
# debug thread to monitor and report file pusher stats
|
79
|
+
self._stats_thread = threading.Thread(
|
80
|
+
target=self._file_pusher_stats,
|
81
|
+
daemon=True,
|
82
|
+
name="FPStatsThread",
|
83
|
+
)
|
84
|
+
self._stats_thread.start()
|
85
|
+
|
86
|
+
def _file_pusher_stats(self) -> None:
|
87
|
+
while not self._stats_thread_stop.is_set():
|
88
|
+
logger.info(f"FilePusher stats: {self._stats._stats}")
|
89
|
+
time.sleep(1)
|
90
|
+
|
75
91
|
def get_status(self) -> Tuple[bool, stats.Summary]:
|
76
92
|
running = self.is_alive()
|
77
93
|
summary = self._stats.summary()
|
@@ -134,8 +150,8 @@ class FilePusher:
|
|
134
150
|
self,
|
135
151
|
manifest: "ArtifactManifest",
|
136
152
|
artifact_id: str,
|
137
|
-
save_fn: "
|
138
|
-
save_fn_async: "
|
153
|
+
save_fn: "SaveFn",
|
154
|
+
save_fn_async: "SaveFnAsync",
|
139
155
|
) -> None:
|
140
156
|
event = step_checksum.RequestStoreManifestFiles(
|
141
157
|
manifest, artifact_id, save_fn, save_fn_async
|
@@ -158,6 +174,7 @@ class FilePusher:
|
|
158
174
|
def finish(self, callback: Optional[step_upload.OnRequestFinishFn] = None):
|
159
175
|
logger.info("shutting down file pusher")
|
160
176
|
self._incoming_queue.put(step_checksum.RequestFinish(callback))
|
177
|
+
self._stats_thread_stop.set()
|
161
178
|
|
162
179
|
def join(self) -> None:
|
163
180
|
# NOTE: must have called finish before join
|
@@ -335,11 +335,13 @@ class FileStreamApi:
|
|
335
335
|
self._run_id = run_id
|
336
336
|
self._start_time = start_time
|
337
337
|
self._client = requests.Session()
|
338
|
+
timeout = timeout or 0
|
338
339
|
if timeout > 0:
|
339
340
|
self._client.post = functools.partial(self._client.post, timeout=timeout) # type: ignore[method-assign]
|
340
341
|
self._client.auth = api.client.transport.session.auth
|
341
342
|
self._client.headers.update(api.client.transport.headers or {})
|
342
343
|
self._client.cookies.update(api.client.transport.cookies or {}) # type: ignore[no-untyped-call]
|
344
|
+
self._client.proxies.update(api.client.transport.session.proxies or {})
|
343
345
|
self._file_policies: Dict[str, DefaultFilePolicy] = {}
|
344
346
|
self._dropped_chunks: int = 0
|
345
347
|
self._queue: queue.Queue = queue.Queue()
|
@@ -499,7 +501,7 @@ class FileStreamApi:
|
|
499
501
|
wandb.termerror(
|
500
502
|
"Dropped streaming file chunk (see wandb/debug-internal.log)"
|
501
503
|
)
|
502
|
-
|
504
|
+
logger.exception("dropped chunk %s" % response)
|
503
505
|
self._dropped_chunks += 1
|
504
506
|
else:
|
505
507
|
parsed: Optional[dict] = None
|
wandb/sdk/internal/handler.py
CHANGED
@@ -23,6 +23,7 @@ from typing import (
|
|
23
23
|
|
24
24
|
from wandb.proto.wandb_internal_pb2 import (
|
25
25
|
HistoryRecord,
|
26
|
+
InternalMessages,
|
26
27
|
MetricRecord,
|
27
28
|
Record,
|
28
29
|
Result,
|
@@ -34,13 +35,13 @@ from wandb.proto.wandb_internal_pb2 import (
|
|
34
35
|
)
|
35
36
|
|
36
37
|
from ..interface.interface_queue import InterfaceQueue
|
37
|
-
from ..lib import handler_util, proto_util, tracelog
|
38
|
+
from ..lib import handler_util, proto_util, tracelog, wburls
|
38
39
|
from . import context, sample, tb_watcher
|
39
40
|
from .settings_static import SettingsStatic
|
40
41
|
from .system.system_monitor import SystemMonitor
|
41
42
|
|
42
43
|
if TYPE_CHECKING:
|
43
|
-
from wandb.proto.wandb_internal_pb2 import
|
44
|
+
from wandb.proto.wandb_internal_pb2 import MetricSummary
|
44
45
|
|
45
46
|
|
46
47
|
SummaryDict = Dict[str, Any]
|
@@ -80,7 +81,6 @@ class HandleManager:
|
|
80
81
|
_metric_copy: Dict[Tuple[str, ...], Any]
|
81
82
|
_track_time: Optional[float]
|
82
83
|
_accumulate_time: float
|
83
|
-
_artifact_xid_done: Dict[str, "ArtifactDoneRequest"]
|
84
84
|
_run_start_time: Optional[float]
|
85
85
|
_context_keeper: context.ContextKeeper
|
86
86
|
|
@@ -119,9 +119,9 @@ class HandleManager:
|
|
119
119
|
self._metric_globs = defaultdict(MetricRecord)
|
120
120
|
self._metric_track = dict()
|
121
121
|
self._metric_copy = dict()
|
122
|
+
self._internal_messages = InternalMessages()
|
122
123
|
|
123
|
-
|
124
|
-
self._artifact_xid_done = dict()
|
124
|
+
self._dropped_history = False
|
125
125
|
|
126
126
|
def __len__(self) -> int:
|
127
127
|
return self._record_q.qsize()
|
@@ -235,7 +235,7 @@ class HandleManager:
|
|
235
235
|
record = Record(summary=summary)
|
236
236
|
self._dispatch_record(record)
|
237
237
|
elif not self._settings._offline:
|
238
|
-
# Send this summary update as a request since we
|
238
|
+
# Send this summary update as a request since we aren't persisting every update
|
239
239
|
summary_record = SummaryRecordRequest(summary=summary)
|
240
240
|
request_record = self._interface._make_request(
|
241
241
|
summary_record=summary_record
|
@@ -266,7 +266,7 @@ class HandleManager:
|
|
266
266
|
if s.none:
|
267
267
|
return False
|
268
268
|
if s.copy:
|
269
|
-
# non
|
269
|
+
# non-key list copy already done in _update_summary
|
270
270
|
if len(kl) > 1:
|
271
271
|
_dict_nested_set(self._consolidated_summary, kl, v)
|
272
272
|
return True
|
@@ -290,7 +290,7 @@ class HandleManager:
|
|
290
290
|
if best_key:
|
291
291
|
_dict_nested_set(self._consolidated_summary, best_key, v)
|
292
292
|
updated = True
|
293
|
-
# defaulting to minimize if goal is not
|
293
|
+
# defaulting to minimize if goal is not specified
|
294
294
|
if s.min or best_key and not goal_max:
|
295
295
|
min_key = tuple(kl + ["min"])
|
296
296
|
old_min = self._metric_track.get(min_key)
|
@@ -358,7 +358,7 @@ class HandleManager:
|
|
358
358
|
) -> bool:
|
359
359
|
metric_key = ".".join([k.replace(".", "\\.") for k in kl])
|
360
360
|
d = self._metric_defines.get(metric_key, d)
|
361
|
-
# if the dict has _type key,
|
361
|
+
# if the dict has _type key, it's a wandb table object
|
362
362
|
if isinstance(v, dict) and not handler_util.metric_is_wandb_dict(v):
|
363
363
|
updated = False
|
364
364
|
for nk, nv in v.items():
|
@@ -374,7 +374,7 @@ class HandleManager:
|
|
374
374
|
return updated
|
375
375
|
|
376
376
|
def _update_summary_media_objects(self, v: Dict[str, Any]) -> Dict[str, Any]:
|
377
|
-
# For now, non
|
377
|
+
# For now, non-recursive - just top level
|
378
378
|
for nk, nv in v.items():
|
379
379
|
if (
|
380
380
|
isinstance(nv, dict)
|
@@ -387,17 +387,17 @@ class HandleManager:
|
|
387
387
|
v[nk] = nv
|
388
388
|
return v
|
389
389
|
|
390
|
-
def _update_summary(self, history_dict: Dict[str, Any]) ->
|
390
|
+
def _update_summary(self, history_dict: Dict[str, Any]) -> List[str]:
|
391
391
|
# keep old behavior fast path if no define metrics have been used
|
392
392
|
if not self._metric_defines:
|
393
393
|
history_dict = self._update_summary_media_objects(history_dict)
|
394
394
|
self._consolidated_summary.update(history_dict)
|
395
|
-
return
|
396
|
-
|
395
|
+
return list(history_dict.keys())
|
396
|
+
updated_keys = []
|
397
397
|
for k, v in history_dict.items():
|
398
398
|
if self._update_summary_list(kl=[k], v=v):
|
399
|
-
|
400
|
-
return
|
399
|
+
updated_keys.append(k)
|
400
|
+
return updated_keys
|
401
401
|
|
402
402
|
def _history_assign_step(
|
403
403
|
self,
|
@@ -511,24 +511,28 @@ class HandleManager:
|
|
511
511
|
self._history_update(record.history, history_dict)
|
512
512
|
self._dispatch_record(record)
|
513
513
|
self._save_history(record.history)
|
514
|
-
|
515
|
-
|
516
|
-
|
514
|
+
# update summary from history
|
515
|
+
updated_keys = self._update_summary(history_dict)
|
516
|
+
if updated_keys:
|
517
|
+
updated_items = {k: self._consolidated_summary[k] for k in updated_keys}
|
518
|
+
self._save_summary(updated_items)
|
517
519
|
|
518
520
|
def _flush_partial_history(
|
519
521
|
self,
|
520
522
|
step: Optional[int] = None,
|
521
523
|
) -> None:
|
522
|
-
if self._partial_history:
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
524
|
+
if not self._partial_history:
|
525
|
+
return
|
526
|
+
|
527
|
+
history = HistoryRecord()
|
528
|
+
for k, v in self._partial_history.items():
|
529
|
+
item = history.item.add()
|
530
|
+
item.key = k
|
531
|
+
item.value_json = json.dumps(v)
|
532
|
+
if step is not None:
|
533
|
+
history.step.num = step
|
534
|
+
self.handle_history(Record(history=history))
|
535
|
+
self._partial_history = {}
|
532
536
|
|
533
537
|
def handle_request_sender_mark_report(self, record: Record) -> None:
|
534
538
|
self._dispatch_record(record, always_send=True)
|
@@ -550,9 +554,18 @@ class HandleManager:
|
|
550
554
|
history_dict = proto_util.dict_from_proto_list(partial_history.item)
|
551
555
|
if step is not None:
|
552
556
|
if step < self._step:
|
553
|
-
|
554
|
-
|
557
|
+
if not self._dropped_history:
|
558
|
+
message = (
|
559
|
+
"Step only supports monotonically increasing values, use define_metric to set a custom x "
|
560
|
+
f"axis. For details see: {wburls.wburls.get('wandb_define_metric')}"
|
561
|
+
)
|
562
|
+
self._internal_messages.warning.append(message)
|
563
|
+
self._dropped_history = True
|
564
|
+
message = (
|
565
|
+
f"(User provided step: {step} is less than current step: {self._step}. "
|
566
|
+
f"Dropping entry: {history_dict})."
|
555
567
|
)
|
568
|
+
self._internal_messages.warning.append(message)
|
556
569
|
return
|
557
570
|
elif step > self._step:
|
558
571
|
self._flush_partial_history()
|
@@ -639,44 +652,6 @@ class HandleManager:
|
|
639
652
|
def handle_request_log_artifact(self, record: Record) -> None:
|
640
653
|
self._dispatch_record(record)
|
641
654
|
|
642
|
-
def handle_request_artifact_send(self, record: Record) -> None:
|
643
|
-
assert record.control.req_resp
|
644
|
-
result = proto_util._result_from_record(record)
|
645
|
-
|
646
|
-
self._dispatch_record(record)
|
647
|
-
|
648
|
-
# send response immediately, the request will be polled for result
|
649
|
-
xid = record.uuid
|
650
|
-
result.response.artifact_send_response.xid = xid
|
651
|
-
self._respond_result(result)
|
652
|
-
|
653
|
-
def handle_request_artifact_poll(self, record: Record) -> None:
|
654
|
-
assert record.control.req_resp
|
655
|
-
xid = record.request.artifact_poll.xid
|
656
|
-
assert xid
|
657
|
-
|
658
|
-
result = proto_util._result_from_record(record)
|
659
|
-
done_req = self._artifact_xid_done.get(xid)
|
660
|
-
if done_req:
|
661
|
-
result.response.artifact_poll_response.artifact_id = done_req.artifact_id
|
662
|
-
result.response.artifact_poll_response.error_message = (
|
663
|
-
done_req.error_message
|
664
|
-
)
|
665
|
-
result.response.artifact_poll_response.ready = True
|
666
|
-
self._respond_result(result)
|
667
|
-
|
668
|
-
def handle_request_artifact_done(self, record: Record) -> None:
|
669
|
-
assert not record.control.req_resp
|
670
|
-
done_req = record.request.artifact_done
|
671
|
-
xid = done_req.xid
|
672
|
-
assert xid
|
673
|
-
|
674
|
-
self._artifact_xid_done[xid] = done_req
|
675
|
-
|
676
|
-
# def handle_request_artifact_release(self, record: Record) -> None:
|
677
|
-
# assert record.control.req_resp
|
678
|
-
# # TODO: implement release protocol to clean up _artifact_xid_done dict
|
679
|
-
|
680
655
|
def handle_telemetry(self, record: Record) -> None:
|
681
656
|
self._dispatch_record(record)
|
682
657
|
|
@@ -716,7 +691,7 @@ class HandleManager:
|
|
716
691
|
|
717
692
|
def handle_request_resume(self, record: Record) -> None:
|
718
693
|
if self._system_monitor is not None:
|
719
|
-
logger.info("starting system metrics thread
|
694
|
+
logger.info("starting system metrics thread")
|
720
695
|
self._system_monitor.start()
|
721
696
|
|
722
697
|
if self._track_time is not None:
|
@@ -725,7 +700,7 @@ class HandleManager:
|
|
725
700
|
|
726
701
|
def handle_request_pause(self, record: Record) -> None:
|
727
702
|
if self._system_monitor is not None:
|
728
|
-
logger.info("stopping system metrics thread
|
703
|
+
logger.info("stopping system metrics thread")
|
729
704
|
self._system_monitor.finish()
|
730
705
|
if self._track_time is not None:
|
731
706
|
self._accumulate_time += time.time() - self._track_time
|
@@ -740,6 +715,14 @@ class HandleManager:
|
|
740
715
|
def handle_request_network_status(self, record: Record) -> None:
|
741
716
|
self._dispatch_record(record)
|
742
717
|
|
718
|
+
def handle_request_internal_messages(self, record: Record) -> None:
|
719
|
+
result = proto_util._result_from_record(record)
|
720
|
+
result.response.internal_messages_response.messages.CopyFrom(
|
721
|
+
self._internal_messages
|
722
|
+
)
|
723
|
+
self._internal_messages.Clear()
|
724
|
+
self._respond_result(result)
|
725
|
+
|
743
726
|
def handle_request_status(self, record: Record) -> None:
|
744
727
|
# TODO(mempressure): do something better?
|
745
728
|
assert record.control.req_resp
|