wandb 0.19.6rc4__py3-none-win_amd64.whl → 0.19.8__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wandb/__init__.py +1 -1
- wandb/__init__.pyi +56 -6
- wandb/apis/public/_generated/__init__.py +21 -0
- wandb/apis/public/_generated/base.py +128 -0
- wandb/apis/public/_generated/enums.py +4 -0
- wandb/apis/public/_generated/input_types.py +4 -0
- wandb/apis/public/_generated/operations.py +15 -0
- wandb/apis/public/_generated/server_features_query.py +27 -0
- wandb/apis/public/_generated/typing_compat.py +14 -0
- wandb/apis/public/api.py +192 -6
- wandb/apis/public/artifacts.py +13 -45
- wandb/apis/public/registries.py +573 -0
- wandb/apis/public/utils.py +36 -0
- wandb/bin/gpu_stats.exe +0 -0
- wandb/bin/wandb-core +0 -0
- wandb/cli/cli.py +11 -20
- wandb/data_types.py +1 -1
- wandb/env.py +10 -0
- wandb/filesync/dir_watcher.py +2 -1
- wandb/proto/v3/wandb_internal_pb2.py +243 -222
- wandb/proto/v3/wandb_server_pb2.py +4 -4
- wandb/proto/v3/wandb_settings_pb2.py +2 -2
- wandb/proto/v3/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v4/wandb_internal_pb2.py +226 -222
- wandb/proto/v4/wandb_server_pb2.py +4 -4
- wandb/proto/v4/wandb_settings_pb2.py +2 -2
- wandb/proto/v4/wandb_telemetry_pb2.py +10 -10
- wandb/proto/v5/wandb_internal_pb2.py +226 -222
- wandb/proto/v5/wandb_server_pb2.py +4 -4
- wandb/proto/v5/wandb_settings_pb2.py +2 -2
- wandb/proto/v5/wandb_telemetry_pb2.py +10 -10
- wandb/sdk/artifacts/_graphql_fragments.py +126 -0
- wandb/sdk/artifacts/artifact.py +51 -95
- wandb/sdk/backend/backend.py +17 -6
- wandb/sdk/data_types/helper_types/bounding_boxes_2d.py +14 -6
- wandb/sdk/data_types/helper_types/image_mask.py +12 -6
- wandb/sdk/data_types/saved_model.py +35 -46
- wandb/sdk/data_types/video.py +7 -16
- wandb/sdk/interface/interface.py +87 -49
- wandb/sdk/interface/interface_queue.py +5 -15
- wandb/sdk/interface/interface_relay.py +7 -22
- wandb/sdk/interface/interface_shared.py +65 -136
- wandb/sdk/interface/interface_sock.py +3 -21
- wandb/sdk/interface/router.py +42 -68
- wandb/sdk/interface/router_queue.py +13 -11
- wandb/sdk/interface/router_relay.py +26 -13
- wandb/sdk/interface/router_sock.py +12 -16
- wandb/sdk/internal/handler.py +4 -3
- wandb/sdk/internal/internal_api.py +12 -1
- wandb/sdk/internal/sender.py +3 -19
- wandb/sdk/lib/apikey.py +87 -26
- wandb/sdk/lib/asyncio_compat.py +210 -0
- wandb/sdk/lib/console_capture.py +172 -0
- wandb/sdk/lib/progress.py +78 -16
- wandb/sdk/lib/redirect.py +102 -76
- wandb/sdk/lib/service_connection.py +37 -17
- wandb/sdk/lib/sock_client.py +6 -56
- wandb/sdk/mailbox/__init__.py +23 -0
- wandb/sdk/mailbox/mailbox.py +135 -0
- wandb/sdk/mailbox/mailbox_handle.py +127 -0
- wandb/sdk/mailbox/response_handle.py +167 -0
- wandb/sdk/mailbox/wait_with_progress.py +135 -0
- wandb/sdk/service/server_sock.py +9 -3
- wandb/sdk/service/streams.py +75 -78
- wandb/sdk/verify/verify.py +54 -2
- wandb/sdk/wandb_init.py +72 -75
- wandb/sdk/wandb_login.py +7 -4
- wandb/sdk/wandb_metadata.py +65 -34
- wandb/sdk/wandb_require.py +14 -8
- wandb/sdk/wandb_run.py +90 -97
- wandb/sdk/wandb_settings.py +10 -4
- wandb/sdk/wandb_setup.py +19 -8
- wandb/sdk/wandb_sync.py +2 -10
- wandb/util.py +3 -1
- {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/METADATA +2 -2
- {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/RECORD +79 -66
- wandb/sdk/interface/message_future.py +0 -27
- wandb/sdk/interface/message_future_poll.py +0 -50
- wandb/sdk/lib/mailbox.py +0 -442
- {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/WHEEL +0 -0
- {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/entry_points.txt +0 -0
- {wandb-0.19.6rc4.dist-info → wandb-0.19.8.dist-info}/licenses/LICENSE +0 -0
@@ -5,64 +5,23 @@ See interface.py for how interface classes relate to each other.
|
|
5
5
|
"""
|
6
6
|
|
7
7
|
import logging
|
8
|
-
import time
|
9
8
|
from abc import abstractmethod
|
10
|
-
from multiprocessing.process import BaseProcess
|
11
9
|
from typing import Any, Optional, cast
|
12
10
|
|
13
|
-
import wandb
|
14
11
|
from wandb.proto import wandb_internal_pb2 as pb
|
15
12
|
from wandb.proto import wandb_telemetry_pb2 as tpb
|
13
|
+
from wandb.sdk.mailbox import Mailbox, MailboxHandle
|
16
14
|
from wandb.util import json_dumps_safer, json_friendly
|
17
15
|
|
18
|
-
from ..lib.mailbox import Mailbox, MailboxHandle
|
19
16
|
from .interface import InterfaceBase
|
20
|
-
from .message_future import MessageFuture
|
21
|
-
from .router import MessageRouter
|
22
17
|
|
23
18
|
logger = logging.getLogger("wandb")
|
24
19
|
|
25
20
|
|
26
21
|
class InterfaceShared(InterfaceBase):
|
27
|
-
|
28
|
-
_process_check: bool
|
29
|
-
_router: Optional[MessageRouter]
|
30
|
-
_mailbox: Optional[Mailbox]
|
31
|
-
_transport_success_timestamp: float
|
32
|
-
_transport_failed: bool
|
33
|
-
|
34
|
-
def __init__(
|
35
|
-
self,
|
36
|
-
process: Optional[BaseProcess] = None,
|
37
|
-
process_check: bool = True,
|
38
|
-
mailbox: Optional[Any] = None,
|
39
|
-
) -> None:
|
22
|
+
def __init__(self, mailbox: Optional[Mailbox] = None) -> None:
|
40
23
|
super().__init__()
|
41
|
-
self._transport_success_timestamp = time.monotonic()
|
42
|
-
self._transport_failed = False
|
43
|
-
self._process = process
|
44
|
-
self._router = None
|
45
|
-
self._process_check = process_check
|
46
24
|
self._mailbox = mailbox
|
47
|
-
self._init_router()
|
48
|
-
|
49
|
-
@abstractmethod
|
50
|
-
def _init_router(self) -> None:
|
51
|
-
raise NotImplementedError
|
52
|
-
|
53
|
-
@property
|
54
|
-
def transport_failed(self) -> bool:
|
55
|
-
return self._transport_failed
|
56
|
-
|
57
|
-
@property
|
58
|
-
def transport_success_timestamp(self) -> float:
|
59
|
-
return self._transport_success_timestamp
|
60
|
-
|
61
|
-
def _transport_mark_failed(self) -> None:
|
62
|
-
self._transport_failed = True
|
63
|
-
|
64
|
-
def _transport_mark_success(self) -> None:
|
65
|
-
self._transport_success_timestamp = time.monotonic()
|
66
25
|
|
67
26
|
def _publish_output(self, outdata: pb.OutputRecord) -> None:
|
68
27
|
rec = pb.Record()
|
@@ -100,7 +59,9 @@ class InterfaceShared(InterfaceBase):
|
|
100
59
|
rec = self._make_record(telemetry=telem)
|
101
60
|
self._publish(rec)
|
102
61
|
|
103
|
-
def _publish_job_input(
|
62
|
+
def _publish_job_input(
|
63
|
+
self, job_input: pb.JobInputRequest
|
64
|
+
) -> MailboxHandle[pb.Result]:
|
104
65
|
record = self._make_request(job_input=job_input)
|
105
66
|
return self._deliver_record(record)
|
106
67
|
|
@@ -114,15 +75,8 @@ class InterfaceShared(InterfaceBase):
|
|
114
75
|
item.value_json = json_dumps_safer(json_friendly(v)[0])
|
115
76
|
return stats
|
116
77
|
|
117
|
-
def _make_login(self, api_key: Optional[str] = None) -> pb.LoginRequest:
|
118
|
-
login = pb.LoginRequest()
|
119
|
-
if api_key:
|
120
|
-
login.api_key = api_key
|
121
|
-
return login
|
122
|
-
|
123
78
|
def _make_request( # noqa: C901
|
124
79
|
self,
|
125
|
-
login: Optional[pb.LoginRequest] = None,
|
126
80
|
get_summary: Optional[pb.GetSummaryRequest] = None,
|
127
81
|
pause: Optional[pb.PauseRequest] = None,
|
128
82
|
resume: Optional[pb.ResumeRequest] = None,
|
@@ -130,6 +84,7 @@ class InterfaceShared(InterfaceBase):
|
|
130
84
|
stop_status: Optional[pb.StopStatusRequest] = None,
|
131
85
|
internal_messages: Optional[pb.InternalMessagesRequest] = None,
|
132
86
|
network_status: Optional[pb.NetworkStatusRequest] = None,
|
87
|
+
operation_stats: Optional[pb.OperationStatsRequest] = None,
|
133
88
|
poll_exit: Optional[pb.PollExitRequest] = None,
|
134
89
|
partial_history: Optional[pb.PartialHistoryRequest] = None,
|
135
90
|
sampled_history: Optional[pb.SampledHistoryRequest] = None,
|
@@ -158,9 +113,7 @@ class InterfaceShared(InterfaceBase):
|
|
158
113
|
metadata: Optional[pb.MetadataRequest] = None,
|
159
114
|
) -> pb.Record:
|
160
115
|
request = pb.Request()
|
161
|
-
if
|
162
|
-
request.login.CopyFrom(login)
|
163
|
-
elif get_summary:
|
116
|
+
if get_summary:
|
164
117
|
request.get_summary.CopyFrom(get_summary)
|
165
118
|
elif pause:
|
166
119
|
request.pause.CopyFrom(pause)
|
@@ -174,6 +127,8 @@ class InterfaceShared(InterfaceBase):
|
|
174
127
|
request.internal_messages.CopyFrom(internal_messages)
|
175
128
|
elif network_status:
|
176
129
|
request.network_status.CopyFrom(network_status)
|
130
|
+
elif operation_stats:
|
131
|
+
request.operations.CopyFrom(operation_stats)
|
177
132
|
elif poll_exit:
|
178
133
|
request.poll_exit.CopyFrom(poll_exit)
|
179
134
|
elif partial_history:
|
@@ -307,35 +262,6 @@ class InterfaceShared(InterfaceBase):
|
|
307
262
|
def _publish(self, record: pb.Record, local: Optional[bool] = None) -> None:
|
308
263
|
raise NotImplementedError
|
309
264
|
|
310
|
-
def _communicate(
|
311
|
-
self, rec: pb.Record, timeout: Optional[int] = 30, local: Optional[bool] = None
|
312
|
-
) -> Optional[pb.Result]:
|
313
|
-
return self._communicate_async(rec, local=local).get(timeout=timeout)
|
314
|
-
|
315
|
-
def _communicate_async(
|
316
|
-
self, rec: pb.Record, local: Optional[bool] = None
|
317
|
-
) -> MessageFuture:
|
318
|
-
assert self._router
|
319
|
-
if self._process_check and self._process and not self._process.is_alive():
|
320
|
-
raise Exception("The wandb backend process has shutdown")
|
321
|
-
future = self._router.send_and_receive(rec, local=local)
|
322
|
-
return future
|
323
|
-
|
324
|
-
def communicate_login(
|
325
|
-
self, api_key: Optional[str] = None, timeout: Optional[int] = 15
|
326
|
-
) -> pb.LoginResponse:
|
327
|
-
login = self._make_login(api_key)
|
328
|
-
rec = self._make_request(login=login)
|
329
|
-
result = self._communicate(rec, timeout=timeout)
|
330
|
-
if result is None:
|
331
|
-
# TODO: friendlier error message here
|
332
|
-
raise wandb.Error(
|
333
|
-
"Couldn't communicate with backend after {} seconds".format(timeout)
|
334
|
-
)
|
335
|
-
login_response = result.response.login_response
|
336
|
-
assert login_response
|
337
|
-
return login_response
|
338
|
-
|
339
265
|
def _publish_defer(self, state: "pb.DeferRequest.DeferState.V") -> None:
|
340
266
|
defer = pb.DeferRequest(state=state)
|
341
267
|
rec = self._make_request(defer=defer)
|
@@ -358,11 +284,6 @@ class InterfaceShared(InterfaceBase):
|
|
358
284
|
rec = self._make_record(final=final)
|
359
285
|
self._publish(rec)
|
360
286
|
|
361
|
-
def publish_login(self, api_key: Optional[str] = None) -> None:
|
362
|
-
login = self._make_login(api_key)
|
363
|
-
rec = self._make_request(login=login)
|
364
|
-
self._publish(rec)
|
365
|
-
|
366
287
|
def _publish_pause(self, pause: pb.PauseRequest) -> None:
|
367
288
|
rec = self._make_request(pause=pause)
|
368
289
|
self._publish(rec)
|
@@ -410,19 +331,22 @@ class InterfaceShared(InterfaceBase):
|
|
410
331
|
rec = self._make_record(use_artifact=use_artifact)
|
411
332
|
self._publish(rec)
|
412
333
|
|
413
|
-
def
|
334
|
+
def _deliver_artifact(
|
335
|
+
self,
|
336
|
+
log_artifact: pb.LogArtifactRequest,
|
337
|
+
) -> MailboxHandle[pb.Result]:
|
414
338
|
rec = self._make_request(log_artifact=log_artifact)
|
415
|
-
return self.
|
339
|
+
return self._deliver_record(rec)
|
416
340
|
|
417
341
|
def _deliver_download_artifact(
|
418
342
|
self, download_artifact: pb.DownloadArtifactRequest
|
419
|
-
) -> MailboxHandle:
|
343
|
+
) -> MailboxHandle[pb.Result]:
|
420
344
|
rec = self._make_request(download_artifact=download_artifact)
|
421
345
|
return self._deliver_record(rec)
|
422
346
|
|
423
347
|
def _deliver_link_artifact(
|
424
348
|
self, link_artifact: pb.LinkArtifactRequest
|
425
|
-
) -> MailboxHandle:
|
349
|
+
) -> MailboxHandle[pb.Result]:
|
426
350
|
rec = self._make_request(link_artifact=link_artifact)
|
427
351
|
return self._deliver_record(rec)
|
428
352
|
|
@@ -437,7 +361,7 @@ class InterfaceShared(InterfaceBase):
|
|
437
361
|
def _deliver_status(
|
438
362
|
self,
|
439
363
|
status: pb.StatusRequest,
|
440
|
-
) -> MailboxHandle:
|
364
|
+
) -> MailboxHandle[pb.Result]:
|
441
365
|
req = self._make_request(status=status)
|
442
366
|
return self._deliver_record(req)
|
443
367
|
|
@@ -449,114 +373,119 @@ class InterfaceShared(InterfaceBase):
|
|
449
373
|
record = self._make_request(keepalive=keepalive)
|
450
374
|
self._publish(record)
|
451
375
|
|
452
|
-
def
|
453
|
-
# shutdown
|
376
|
+
def _deliver_shutdown(self) -> MailboxHandle[pb.Result]:
|
454
377
|
request = pb.Request(shutdown=pb.ShutdownRequest())
|
455
378
|
record = self._make_record(request=request)
|
456
|
-
|
379
|
+
return self._deliver_record(record)
|
457
380
|
|
458
381
|
def _get_mailbox(self) -> Mailbox:
|
459
382
|
mailbox = self._mailbox
|
460
383
|
assert mailbox
|
461
384
|
return mailbox
|
462
385
|
|
463
|
-
def _deliver_record(self, record: pb.Record) -> MailboxHandle:
|
386
|
+
def _deliver_record(self, record: pb.Record) -> MailboxHandle[pb.Result]:
|
464
387
|
mailbox = self._get_mailbox()
|
465
|
-
handle = mailbox._deliver_record(record, interface=self)
|
466
|
-
return handle
|
467
388
|
|
468
|
-
|
389
|
+
handle = mailbox.require_response(record)
|
390
|
+
self._publish(record)
|
391
|
+
|
392
|
+
return handle.map(lambda resp: resp.result_communicate)
|
393
|
+
|
394
|
+
def _deliver_run(self, run: pb.RunRecord) -> MailboxHandle[pb.Result]:
|
469
395
|
record = self._make_record(run=run)
|
470
396
|
return self._deliver_record(record)
|
471
397
|
|
472
|
-
def _deliver_finish_sync(
|
398
|
+
def _deliver_finish_sync(
|
399
|
+
self,
|
400
|
+
sync_finish: pb.SyncFinishRequest,
|
401
|
+
) -> MailboxHandle[pb.Result]:
|
473
402
|
record = self._make_request(sync_finish=sync_finish)
|
474
403
|
return self._deliver_record(record)
|
475
404
|
|
476
|
-
def _deliver_run_start(
|
405
|
+
def _deliver_run_start(
|
406
|
+
self,
|
407
|
+
run_start: pb.RunStartRequest,
|
408
|
+
) -> MailboxHandle[pb.Result]:
|
477
409
|
record = self._make_request(run_start=run_start)
|
478
410
|
return self._deliver_record(record)
|
479
411
|
|
480
|
-
def _deliver_get_summary(
|
412
|
+
def _deliver_get_summary(
|
413
|
+
self,
|
414
|
+
get_summary: pb.GetSummaryRequest,
|
415
|
+
) -> MailboxHandle[pb.Result]:
|
481
416
|
record = self._make_request(get_summary=get_summary)
|
482
417
|
return self._deliver_record(record)
|
483
418
|
|
484
419
|
def _deliver_get_system_metrics(
|
485
420
|
self, get_system_metrics: pb.GetSystemMetricsRequest
|
486
|
-
) -> MailboxHandle:
|
421
|
+
) -> MailboxHandle[pb.Result]:
|
487
422
|
record = self._make_request(get_system_metrics=get_system_metrics)
|
488
423
|
return self._deliver_record(record)
|
489
424
|
|
490
425
|
def _deliver_get_system_metadata(
|
491
426
|
self, get_system_metadata: pb.GetSystemMetadataRequest
|
492
|
-
) -> MailboxHandle:
|
427
|
+
) -> MailboxHandle[pb.Result]:
|
493
428
|
record = self._make_request(get_system_metadata=get_system_metadata)
|
494
429
|
return self._deliver_record(record)
|
495
430
|
|
496
|
-
def _deliver_exit(
|
431
|
+
def _deliver_exit(
|
432
|
+
self,
|
433
|
+
exit_data: pb.RunExitRecord,
|
434
|
+
) -> MailboxHandle[pb.Result]:
|
497
435
|
record = self._make_record(exit=exit_data)
|
498
436
|
return self._deliver_record(record)
|
499
437
|
|
500
|
-
def
|
438
|
+
def deliver_operation_stats(self):
|
439
|
+
record = self._make_request(operation_stats=pb.OperationStatsRequest())
|
440
|
+
return self._deliver_record(record)
|
441
|
+
|
442
|
+
def _deliver_poll_exit(
|
443
|
+
self,
|
444
|
+
poll_exit: pb.PollExitRequest,
|
445
|
+
) -> MailboxHandle[pb.Result]:
|
501
446
|
record = self._make_request(poll_exit=poll_exit)
|
502
447
|
return self._deliver_record(record)
|
503
448
|
|
504
449
|
def _deliver_finish_without_exit(
|
505
450
|
self, run_finish_without_exit: pb.RunFinishWithoutExitRequest
|
506
|
-
) -> MailboxHandle:
|
451
|
+
) -> MailboxHandle[pb.Result]:
|
507
452
|
record = self._make_request(run_finish_without_exit=run_finish_without_exit)
|
508
453
|
return self._deliver_record(record)
|
509
454
|
|
510
|
-
def _deliver_stop_status(
|
455
|
+
def _deliver_stop_status(
|
456
|
+
self,
|
457
|
+
stop_status: pb.StopStatusRequest,
|
458
|
+
) -> MailboxHandle[pb.Result]:
|
511
459
|
record = self._make_request(stop_status=stop_status)
|
512
460
|
return self._deliver_record(record)
|
513
461
|
|
514
|
-
def _deliver_attach(
|
462
|
+
def _deliver_attach(
|
463
|
+
self,
|
464
|
+
attach: pb.AttachRequest,
|
465
|
+
) -> MailboxHandle[pb.Result]:
|
515
466
|
record = self._make_request(attach=attach)
|
516
467
|
return self._deliver_record(record)
|
517
468
|
|
518
469
|
def _deliver_network_status(
|
519
470
|
self, network_status: pb.NetworkStatusRequest
|
520
|
-
) -> MailboxHandle:
|
471
|
+
) -> MailboxHandle[pb.Result]:
|
521
472
|
record = self._make_request(network_status=network_status)
|
522
473
|
return self._deliver_record(record)
|
523
474
|
|
524
475
|
def _deliver_internal_messages(
|
525
476
|
self, internal_message: pb.InternalMessagesRequest
|
526
|
-
) -> MailboxHandle:
|
477
|
+
) -> MailboxHandle[pb.Result]:
|
527
478
|
record = self._make_request(internal_messages=internal_message)
|
528
479
|
return self._deliver_record(record)
|
529
480
|
|
530
481
|
def _deliver_request_sampled_history(
|
531
482
|
self, sampled_history: pb.SampledHistoryRequest
|
532
|
-
) -> MailboxHandle:
|
483
|
+
) -> MailboxHandle[pb.Result]:
|
533
484
|
record = self._make_request(sampled_history=sampled_history)
|
534
485
|
return self._deliver_record(record)
|
535
486
|
|
536
487
|
def _deliver_request_run_status(
|
537
488
|
self, run_status: pb.RunStatusRequest
|
538
|
-
) -> MailboxHandle:
|
489
|
+
) -> MailboxHandle[pb.Result]:
|
539
490
|
record = self._make_request(run_status=run_status)
|
540
491
|
return self._deliver_record(record)
|
541
|
-
|
542
|
-
def _transport_keepalive_failed(self, keepalive_interval: int = 5) -> bool:
|
543
|
-
if self._transport_failed:
|
544
|
-
return True
|
545
|
-
|
546
|
-
now = time.monotonic()
|
547
|
-
if now < self._transport_success_timestamp + keepalive_interval:
|
548
|
-
return False
|
549
|
-
|
550
|
-
try:
|
551
|
-
self.publish_keepalive()
|
552
|
-
except Exception:
|
553
|
-
self._transport_mark_failed()
|
554
|
-
else:
|
555
|
-
self._transport_mark_success()
|
556
|
-
return self._transport_failed
|
557
|
-
|
558
|
-
def join(self) -> None:
|
559
|
-
super().join()
|
560
|
-
|
561
|
-
if self._router:
|
562
|
-
self._router.join()
|
@@ -7,11 +7,10 @@ See interface.py for how interface classes relate to each other.
|
|
7
7
|
import logging
|
8
8
|
from typing import TYPE_CHECKING, Any, Optional
|
9
9
|
|
10
|
-
from
|
10
|
+
from wandb.sdk.mailbox import Mailbox
|
11
|
+
|
11
12
|
from ..lib.sock_client import SockClient
|
12
13
|
from .interface_shared import InterfaceShared
|
13
|
-
from .message_future import MessageFuture
|
14
|
-
from .router_sock import MessageSockRouter
|
15
14
|
|
16
15
|
if TYPE_CHECKING:
|
17
16
|
from wandb.proto import wandb_internal_pb2 as pb
|
@@ -21,23 +20,16 @@ logger = logging.getLogger("wandb")
|
|
21
20
|
|
22
21
|
|
23
22
|
class InterfaceSock(InterfaceShared):
|
24
|
-
_mailbox: Mailbox
|
25
|
-
|
26
23
|
def __init__(
|
27
24
|
self,
|
28
25
|
sock_client: SockClient,
|
29
26
|
mailbox: Mailbox,
|
30
27
|
stream_id: str,
|
31
28
|
) -> None:
|
32
|
-
# _sock_client is used when abstract method _init_router() is called by constructor
|
33
|
-
self._sock_client = sock_client
|
34
29
|
super().__init__(mailbox=mailbox)
|
35
|
-
self.
|
30
|
+
self._sock_client = sock_client
|
36
31
|
self._stream_id = stream_id
|
37
32
|
|
38
|
-
def _init_router(self) -> None:
|
39
|
-
self._router = MessageSockRouter(self._sock_client, mailbox=self._mailbox)
|
40
|
-
|
41
33
|
def _assign(self, record: Any) -> None:
|
42
34
|
assert self._stream_id
|
43
35
|
record._info.stream_id = self._stream_id
|
@@ -45,13 +37,3 @@ class InterfaceSock(InterfaceShared):
|
|
45
37
|
def _publish(self, record: "pb.Record", local: Optional[bool] = None) -> None:
|
46
38
|
self._assign(record)
|
47
39
|
self._sock_client.send_record_publish(record)
|
48
|
-
|
49
|
-
def _communicate_async(
|
50
|
-
self, rec: "pb.Record", local: Optional[bool] = None
|
51
|
-
) -> MessageFuture:
|
52
|
-
self._assign(rec)
|
53
|
-
assert self._router
|
54
|
-
if self._process_check and self._process and not self._process.is_alive():
|
55
|
-
raise Exception("The wandb backend process has shutdown")
|
56
|
-
future = self._router.send_and_receive(rec, local=local)
|
57
|
-
return future
|
wandb/sdk/interface/router.py
CHANGED
@@ -4,20 +4,20 @@ Router to manage responses.
|
|
4
4
|
|
5
5
|
"""
|
6
6
|
|
7
|
+
from __future__ import annotations
|
8
|
+
|
7
9
|
import logging
|
8
10
|
import threading
|
9
|
-
import uuid
|
10
11
|
from abc import abstractmethod
|
11
|
-
from typing import TYPE_CHECKING
|
12
|
+
from typing import TYPE_CHECKING
|
12
13
|
|
13
|
-
from
|
14
|
-
from .
|
14
|
+
from wandb.proto import wandb_internal_pb2 as pb
|
15
|
+
from wandb.proto import wandb_server_pb2 as spb
|
16
|
+
from wandb.sdk import mailbox
|
15
17
|
|
16
18
|
if TYPE_CHECKING:
|
17
19
|
from queue import Queue
|
18
20
|
|
19
|
-
from wandb.proto import wandb_internal_pb2 as pb
|
20
|
-
|
21
21
|
|
22
22
|
logger = logging.getLogger("wandb")
|
23
23
|
|
@@ -26,26 +26,13 @@ class MessageRouterClosedError(Exception):
|
|
26
26
|
"""Router has been closed."""
|
27
27
|
|
28
28
|
|
29
|
-
class MessageFutureObject(MessageFuture):
|
30
|
-
def __init__(self) -> None:
|
31
|
-
super().__init__()
|
32
|
-
|
33
|
-
def get(self, timeout: Optional[int] = None) -> Optional["pb.Result"]:
|
34
|
-
is_set = self._object_ready.wait(timeout)
|
35
|
-
if is_set and self._object:
|
36
|
-
return self._object
|
37
|
-
return None
|
38
|
-
|
39
|
-
|
40
29
|
class MessageRouter:
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
_mailbox: Optional[mailbox.Mailbox]
|
30
|
+
_request_queue: Queue[pb.Record]
|
31
|
+
_response_queue: Queue[pb.Result]
|
32
|
+
_mailbox: mailbox.Mailbox | None
|
45
33
|
|
46
|
-
def __init__(self, mailbox:
|
34
|
+
def __init__(self, mailbox: mailbox.Mailbox | None = None) -> None:
|
47
35
|
self._mailbox = mailbox
|
48
|
-
self._pending_reqs = {}
|
49
36
|
self._lock = threading.Lock()
|
50
37
|
|
51
38
|
self._join_event = threading.Event()
|
@@ -55,61 +42,48 @@ class MessageRouter:
|
|
55
42
|
self._thread.start()
|
56
43
|
|
57
44
|
@abstractmethod
|
58
|
-
def _read_message(self) ->
|
45
|
+
def _read_message(self) -> pb.Result | spb.ServerResponse | None:
|
59
46
|
raise NotImplementedError
|
60
47
|
|
61
48
|
@abstractmethod
|
62
|
-
def _send_message(self, record:
|
49
|
+
def _send_message(self, record: pb.Record) -> None:
|
63
50
|
raise NotImplementedError
|
64
51
|
|
65
52
|
def message_loop(self) -> None:
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
if local:
|
86
|
-
rec.control.local = local
|
87
|
-
rec.uuid = uuid.uuid4().hex
|
88
|
-
future = MessageFutureObject()
|
89
|
-
with self._lock:
|
90
|
-
self._pending_reqs[rec.uuid] = future
|
91
|
-
|
92
|
-
self._send_message(rec)
|
93
|
-
|
94
|
-
return future
|
53
|
+
try:
|
54
|
+
while not self._join_event.is_set():
|
55
|
+
try:
|
56
|
+
msg = self._read_message()
|
57
|
+
except EOFError:
|
58
|
+
# On abnormal shutdown the queue will be destroyed underneath
|
59
|
+
# resulting in EOFError. message_loop needs to exit..
|
60
|
+
logger.warning("EOFError seen in message_loop")
|
61
|
+
break
|
62
|
+
except MessageRouterClosedError as e:
|
63
|
+
logger.warning("message_loop has been closed", exc_info=e)
|
64
|
+
break
|
65
|
+
if not msg:
|
66
|
+
continue
|
67
|
+
self._handle_msg_rcv(msg)
|
68
|
+
|
69
|
+
finally:
|
70
|
+
if self._mailbox:
|
71
|
+
self._mailbox.close()
|
95
72
|
|
96
73
|
def join(self) -> None:
|
97
74
|
self._join_event.set()
|
98
75
|
self._thread.join()
|
99
76
|
|
100
|
-
def _handle_msg_rcv(self, msg:
|
101
|
-
|
102
|
-
if self._mailbox and msg.control.mailbox_slot:
|
103
|
-
self._mailbox.deliver(msg)
|
77
|
+
def _handle_msg_rcv(self, msg: pb.Result | spb.ServerResponse) -> None:
|
78
|
+
if not self._mailbox:
|
104
79
|
return
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
logger.warning(
|
112
|
-
"No listener found for msg with uuid %s (%s)", msg.uuid, msg
|
80
|
+
|
81
|
+
if isinstance(msg, pb.Result) and msg.control.mailbox_slot:
|
82
|
+
self._mailbox.deliver(
|
83
|
+
spb.ServerResponse(
|
84
|
+
request_id=msg.control.mailbox_slot,
|
85
|
+
result_communicate=msg,
|
113
86
|
)
|
114
|
-
|
115
|
-
|
87
|
+
)
|
88
|
+
elif isinstance(msg, spb.ServerResponse) and msg.request_id:
|
89
|
+
self._mailbox.deliver(msg)
|
@@ -4,38 +4,40 @@ Router to manage responses from a queue.
|
|
4
4
|
|
5
5
|
"""
|
6
6
|
|
7
|
+
from __future__ import annotations
|
8
|
+
|
7
9
|
import queue
|
8
|
-
from typing import TYPE_CHECKING
|
10
|
+
from typing import TYPE_CHECKING
|
11
|
+
|
12
|
+
from wandb.proto import wandb_internal_pb2 as pb
|
13
|
+
from wandb.sdk.mailbox import Mailbox
|
9
14
|
|
10
|
-
from ..lib.mailbox import Mailbox
|
11
15
|
from .router import MessageRouter
|
12
16
|
|
13
17
|
if TYPE_CHECKING:
|
14
18
|
from queue import Queue
|
15
19
|
|
16
|
-
from wandb.proto import wandb_internal_pb2 as pb
|
17
|
-
|
18
20
|
|
19
21
|
class MessageQueueRouter(MessageRouter):
|
20
|
-
_request_queue:
|
21
|
-
_response_queue:
|
22
|
+
_request_queue: Queue[pb.Record]
|
23
|
+
_response_queue: Queue[pb.Result]
|
22
24
|
|
23
25
|
def __init__(
|
24
26
|
self,
|
25
|
-
request_queue:
|
26
|
-
response_queue:
|
27
|
-
mailbox:
|
27
|
+
request_queue: Queue[pb.Record],
|
28
|
+
response_queue: Queue[pb.Result],
|
29
|
+
mailbox: Mailbox | None = None,
|
28
30
|
) -> None:
|
29
31
|
self._request_queue = request_queue
|
30
32
|
self._response_queue = response_queue
|
31
33
|
super().__init__(mailbox=mailbox)
|
32
34
|
|
33
|
-
def _read_message(self) ->
|
35
|
+
def _read_message(self) -> pb.Result | None:
|
34
36
|
try:
|
35
37
|
msg = self._response_queue.get(timeout=1)
|
36
38
|
except queue.Empty:
|
37
39
|
return None
|
38
40
|
return msg
|
39
41
|
|
40
|
-
def _send_message(self, record:
|
42
|
+
def _send_message(self, record: pb.Record) -> None:
|
41
43
|
self._request_queue.put(record)
|
@@ -4,34 +4,47 @@ Router to manage responses from a queue with relay.
|
|
4
4
|
|
5
5
|
"""
|
6
6
|
|
7
|
+
from __future__ import annotations
|
8
|
+
|
7
9
|
from typing import TYPE_CHECKING
|
8
10
|
|
9
|
-
from
|
11
|
+
from wandb.proto import wandb_internal_pb2 as pb
|
12
|
+
from wandb.proto import wandb_server_pb2 as spb
|
13
|
+
from wandb.sdk.mailbox import Mailbox
|
14
|
+
|
10
15
|
from .router_queue import MessageQueueRouter
|
11
16
|
|
12
17
|
if TYPE_CHECKING:
|
13
18
|
from queue import Queue
|
14
19
|
|
15
|
-
from wandb.proto import wandb_internal_pb2 as pb
|
16
|
-
|
17
20
|
|
18
21
|
class MessageRelayRouter(MessageQueueRouter):
|
19
|
-
_relay_queue:
|
22
|
+
_relay_queue: Queue[pb.Result]
|
20
23
|
|
21
24
|
def __init__(
|
22
25
|
self,
|
23
|
-
request_queue:
|
24
|
-
response_queue:
|
25
|
-
relay_queue:
|
26
|
+
request_queue: Queue[pb.Record],
|
27
|
+
response_queue: Queue[pb.Result],
|
28
|
+
relay_queue: Queue[pb.Result],
|
26
29
|
mailbox: Mailbox,
|
27
30
|
) -> None:
|
28
31
|
self._relay_queue = relay_queue
|
29
32
|
super().__init__(
|
30
|
-
request_queue=request_queue,
|
33
|
+
request_queue=request_queue,
|
34
|
+
response_queue=response_queue,
|
35
|
+
mailbox=mailbox,
|
31
36
|
)
|
32
37
|
|
33
|
-
def _handle_msg_rcv(self, msg:
|
34
|
-
if msg.
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
+
def _handle_msg_rcv(self, msg: pb.Result | spb.ServerResponse) -> None:
|
39
|
+
if isinstance(msg, pb.Result):
|
40
|
+
relay_msg = msg
|
41
|
+
else:
|
42
|
+
relay_msg = msg.result_communicate
|
43
|
+
|
44
|
+
# This is legacy-service logic for returning responses to the client.
|
45
|
+
# A different thread reads the "relay queue" and writes responses on
|
46
|
+
# the socket.
|
47
|
+
if relay_msg.control.relay_id:
|
48
|
+
self._relay_queue.put(relay_msg)
|
49
|
+
else:
|
50
|
+
super()._handle_msg_rcv(msg)
|