torchmonarch-nightly 2025.6.17__cp310-cp310-manylinux2014_x86_64.whl → 2025.6.18__cp310-cp310-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monarch/_rust_bindings.so +0 -0
- monarch/actor_mesh.py +53 -95
- monarch/monarch_controller +0 -0
- monarch/tools/cli.py +1 -1
- monarch/tools/network.py +62 -0
- tests/error_test_binary.py +31 -1
- tests/test_actor_error.py +31 -1
- tests/test_python_actors.py +57 -7
- {torchmonarch_nightly-2025.6.17.dist-info → torchmonarch_nightly-2025.6.18.dist-info}/METADATA +1 -1
- {torchmonarch_nightly-2025.6.17.dist-info → torchmonarch_nightly-2025.6.18.dist-info}/RECORD +14 -13
- {torchmonarch_nightly-2025.6.17.dist-info → torchmonarch_nightly-2025.6.18.dist-info}/WHEEL +0 -0
- {torchmonarch_nightly-2025.6.17.dist-info → torchmonarch_nightly-2025.6.18.dist-info}/entry_points.txt +0 -0
- {torchmonarch_nightly-2025.6.17.dist-info → torchmonarch_nightly-2025.6.18.dist-info}/licenses/LICENSE +0 -0
- {torchmonarch_nightly-2025.6.17.dist-info → torchmonarch_nightly-2025.6.18.dist-info}/top_level.txt +0 -0
monarch/_rust_bindings.so
CHANGED
Binary file
|
monarch/actor_mesh.py
CHANGED
@@ -6,7 +6,6 @@
|
|
6
6
|
|
7
7
|
# pyre-unsafe
|
8
8
|
|
9
|
-
import asyncio
|
10
9
|
import collections
|
11
10
|
import contextvars
|
12
11
|
import functools
|
@@ -27,9 +26,7 @@ from typing import (
|
|
27
26
|
Callable,
|
28
27
|
cast,
|
29
28
|
Concatenate,
|
30
|
-
Coroutine,
|
31
29
|
Dict,
|
32
|
-
Generator,
|
33
30
|
Generic,
|
34
31
|
Iterable,
|
35
32
|
List,
|
@@ -99,39 +96,6 @@ _context: contextvars.ContextVar[MonarchContext] = contextvars.ContextVar(
|
|
99
96
|
)
|
100
97
|
|
101
98
|
|
102
|
-
# this was implemented in python 3.12 as an argument to task
|
103
|
-
# but I have to backport to 3.10/3.11.
|
104
|
-
def create_eager_task(coro: Awaitable[None]) -> asyncio.Future:
|
105
|
-
iter = coro.__await__()
|
106
|
-
try:
|
107
|
-
first_yield = next(iter)
|
108
|
-
return asyncio.create_task(RestOfCoroutine(first_yield, iter).run())
|
109
|
-
except StopIteration as e:
|
110
|
-
t = asyncio.Future()
|
111
|
-
t.set_result(e.value)
|
112
|
-
return t
|
113
|
-
|
114
|
-
|
115
|
-
class RestOfCoroutine(Generic[T1, T2]):
|
116
|
-
def __init__(self, first_yield: T1, iter: Generator[T2, None, T2]) -> None:
|
117
|
-
self.first_yield: T1 | None = first_yield
|
118
|
-
self.iter: Generator[T2, None, T2] = iter
|
119
|
-
|
120
|
-
def __await__(self) -> Generator[T1, None, T1] | Generator[T2, None, T2]:
|
121
|
-
first_yield = self.first_yield
|
122
|
-
assert first_yield is not None
|
123
|
-
yield first_yield
|
124
|
-
self.first_yield = None
|
125
|
-
while True:
|
126
|
-
try:
|
127
|
-
yield next(self.iter)
|
128
|
-
except StopIteration as e:
|
129
|
-
return e.value
|
130
|
-
|
131
|
-
async def run(self) -> T1 | T2:
|
132
|
-
return await self
|
133
|
-
|
134
|
-
|
135
99
|
T = TypeVar("T")
|
136
100
|
P = ParamSpec("P")
|
137
101
|
R = TypeVar("R")
|
@@ -285,7 +249,18 @@ class Endpoint(Generic[P, R]):
|
|
285
249
|
async def process() -> ValueMesh[R]:
|
286
250
|
results: List[R] = [None] * len(self._actor_mesh) # pyre-fixme[9]
|
287
251
|
for _ in range(len(self._actor_mesh)):
|
288
|
-
rank, value = await r.recv()
|
252
|
+
rank, value = await r.recv()
|
253
|
+
results[rank] = value
|
254
|
+
call_shape = Shape(
|
255
|
+
self._actor_mesh._shape.labels,
|
256
|
+
NDSlice.new_row_major(self._actor_mesh._shape.ndslice.sizes),
|
257
|
+
)
|
258
|
+
return ValueMesh(call_shape, results)
|
259
|
+
|
260
|
+
def process_blocking() -> ValueMesh[R]:
|
261
|
+
results: List[R] = [None] * len(self._actor_mesh) # pyre-fixme[9]
|
262
|
+
for _ in range(len(self._actor_mesh)):
|
263
|
+
rank, value = r.recv().get()
|
289
264
|
results[rank] = value
|
290
265
|
call_shape = Shape(
|
291
266
|
self._actor_mesh._shape.labels,
|
@@ -293,7 +268,7 @@ class Endpoint(Generic[P, R]):
|
|
293
268
|
)
|
294
269
|
return ValueMesh(call_shape, results)
|
295
270
|
|
296
|
-
return Future(process)
|
271
|
+
return Future(process, process_blocking)
|
297
272
|
|
298
273
|
async def stream(self, *args: P.args, **kwargs: P.kwargs) -> AsyncGenerator[R, R]:
|
299
274
|
"""
|
@@ -362,6 +337,9 @@ class ValueMesh(MeshTrait, Generic[R]):
|
|
362
337
|
def __len__(self) -> int:
|
363
338
|
return len(self._shape)
|
364
339
|
|
340
|
+
def __repr__(self) -> str:
|
341
|
+
return f"ValueMesh({self._shape})"
|
342
|
+
|
365
343
|
@property
|
366
344
|
def _ndslice(self) -> NDSlice:
|
367
345
|
return self._shape.ndslice
|
@@ -485,24 +463,36 @@ singleton_shape = Shape([], NDSlice(offset=0, sizes=[], strides=[]))
|
|
485
463
|
|
486
464
|
|
487
465
|
class _Actor:
|
466
|
+
"""
|
467
|
+
This is the message handling implementation of a Python actor.
|
468
|
+
|
469
|
+
The layering goes:
|
470
|
+
Rust `PythonActor` -> `_Actor` -> user-provided `Actor` instance
|
471
|
+
|
472
|
+
Messages are received from the Rust backend, and forwarded to the `handle`
|
473
|
+
methods on this class.
|
474
|
+
|
475
|
+
This class wraps the actual `Actor` instance provided by the user, and
|
476
|
+
routes messages to it, managing argument serialization/deserialization and
|
477
|
+
error handling.
|
478
|
+
"""
|
479
|
+
|
488
480
|
def __init__(self) -> None:
|
489
481
|
self.instance: object | None = None
|
490
|
-
self.active_requests: asyncio.Queue[asyncio.Future[object]] = asyncio.Queue()
|
491
|
-
self.complete_task: asyncio.Task | None = None
|
492
482
|
|
493
|
-
def handle(
|
483
|
+
async def handle(
|
494
484
|
self, mailbox: Mailbox, message: PythonMessage, panic_flag: PanicFlag
|
495
|
-
) ->
|
496
|
-
return self.handle_cast(mailbox, 0, singleton_shape, message, panic_flag)
|
485
|
+
) -> None:
|
486
|
+
return await self.handle_cast(mailbox, 0, singleton_shape, message, panic_flag)
|
497
487
|
|
498
|
-
def handle_cast(
|
488
|
+
async def handle_cast(
|
499
489
|
self,
|
500
490
|
mailbox: Mailbox,
|
501
491
|
rank: int,
|
502
492
|
shape: Shape,
|
503
493
|
message: PythonMessage,
|
504
494
|
panic_flag: PanicFlag,
|
505
|
-
) ->
|
495
|
+
) -> None:
|
506
496
|
port = (
|
507
497
|
Port(message.response_port, mailbox, rank)
|
508
498
|
if message.response_port
|
@@ -515,26 +505,21 @@ class _Actor:
|
|
515
505
|
_context.set(ctx)
|
516
506
|
|
517
507
|
args, kwargs = _unpickle(message.message, mailbox)
|
508
|
+
|
518
509
|
if message.method == "__init__":
|
519
510
|
Class, *args = args
|
520
511
|
self.instance = Class(*args, **kwargs)
|
521
512
|
return None
|
522
|
-
else:
|
523
|
-
the_method = getattr(self.instance, message.method)._method
|
524
513
|
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
)
|
529
|
-
result = the_method(self.instance, *args, **kwargs)
|
530
|
-
exit_span()
|
531
|
-
if port is not None:
|
532
|
-
port.send("result", result)
|
533
|
-
return None
|
514
|
+
the_method = getattr(self.instance, message.method)._method
|
515
|
+
|
516
|
+
if inspect.iscoroutinefunction(the_method):
|
534
517
|
|
535
518
|
async def instrumented():
|
536
519
|
enter_span(
|
537
|
-
the_method.__module__,
|
520
|
+
the_method.__module__,
|
521
|
+
message.method,
|
522
|
+
str(ctx.mailbox.actor_id),
|
538
523
|
)
|
539
524
|
try:
|
540
525
|
result = await the_method(self.instance, *args, **kwargs)
|
@@ -547,39 +532,14 @@ class _Actor:
|
|
547
532
|
exit_span()
|
548
533
|
return result
|
549
534
|
|
550
|
-
|
551
|
-
ctx,
|
552
|
-
self.run_task(port, instrumented(), panic_flag),
|
553
|
-
)
|
554
|
-
except Exception as e:
|
555
|
-
traceback.print_exc()
|
556
|
-
s = ActorError(e)
|
557
|
-
|
558
|
-
# The exception is delivered to exactly one of:
|
559
|
-
# (1) our caller, (2) our supervisor
|
560
|
-
if port is not None:
|
561
|
-
port.send("exception", s)
|
535
|
+
result = await instrumented()
|
562
536
|
else:
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
coroutine: Awaitable[None],
|
569
|
-
) -> None:
|
570
|
-
_context.set(ctx)
|
571
|
-
if self.complete_task is None:
|
572
|
-
self.complete_task = asyncio.create_task(self._complete())
|
573
|
-
await self.active_requests.put(create_eager_task(coroutine))
|
537
|
+
enter_span(
|
538
|
+
the_method.__module__, message.method, str(ctx.mailbox.actor_id)
|
539
|
+
)
|
540
|
+
result = the_method(self.instance, *args, **kwargs)
|
541
|
+
exit_span()
|
574
542
|
|
575
|
-
async def run_task(
|
576
|
-
self,
|
577
|
-
port: Port | None,
|
578
|
-
coroutine: Awaitable[Any],
|
579
|
-
panic_flag: PanicFlag,
|
580
|
-
) -> None:
|
581
|
-
try:
|
582
|
-
result = await coroutine
|
583
543
|
if port is not None:
|
584
544
|
port.send("result", result)
|
585
545
|
except Exception as e:
|
@@ -603,11 +563,6 @@ class _Actor:
|
|
603
563
|
pass
|
604
564
|
raise
|
605
565
|
|
606
|
-
async def _complete(self) -> None:
|
607
|
-
while True:
|
608
|
-
task = await self.active_requests.get()
|
609
|
-
await task
|
610
|
-
|
611
566
|
|
612
567
|
def _is_mailbox(x: object) -> bool:
|
613
568
|
return isinstance(x, Mailbox)
|
@@ -648,8 +603,8 @@ class Actor(MeshTrait):
|
|
648
603
|
"actor implementations are not meshes, but we can't convince the typechecker of it..."
|
649
604
|
)
|
650
605
|
|
651
|
-
@endpoint
|
652
|
-
|
606
|
+
@endpoint # pyre-ignore
|
607
|
+
def _set_debug_client(self, client: "DebugClient") -> None:
|
653
608
|
point = MonarchContext.get().point
|
654
609
|
# For some reason, using a lambda instead of functools.partial
|
655
610
|
# confuses the pdb wrapper implementation.
|
@@ -750,6 +705,9 @@ class ActorMeshRef(MeshTrait):
|
|
750
705
|
self._mailbox,
|
751
706
|
)
|
752
707
|
|
708
|
+
def __repr__(self) -> str:
|
709
|
+
return f"ActorMeshRef(class={self._class}, shape={self._actor_mesh_ref._shape})"
|
710
|
+
|
753
711
|
|
754
712
|
class ActorError(Exception):
|
755
713
|
"""
|
monarch/monarch_controller
CHANGED
Binary file
|
monarch/tools/cli.py
CHANGED
monarch/tools/network.py
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
# pyre-strict
|
8
|
+
import logging
|
9
|
+
import socket
|
10
|
+
from typing import Optional
|
11
|
+
|
12
|
+
logger: logging.Logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
def get_ip_addr(hostname: str) -> str:
|
16
|
+
"""Resolves and returns the ip address of the given hostname.
|
17
|
+
|
18
|
+
This function will return an ipv6 address if one that can bind
|
19
|
+
`SOCK_STREAM` (TCP) socket is found. Otherwise it will fall-back
|
20
|
+
to resolving an ipv4 `SOCK_STREAM` address.
|
21
|
+
|
22
|
+
Raises a `RuntimeError` if neither ipv6 or ipv4 ip can be resolved from hostname.
|
23
|
+
"""
|
24
|
+
|
25
|
+
def get_sockaddr(family: socket.AddressFamily) -> Optional[str]:
|
26
|
+
try:
|
27
|
+
# patternlint-disable-next-line python-dns-deps (only used for oss)
|
28
|
+
addrs = socket.getaddrinfo(
|
29
|
+
hostname, port=None, family=family, type=socket.SOCK_STREAM
|
30
|
+
) # tcp
|
31
|
+
if addrs:
|
32
|
+
# socket.getaddrinfo return a list of addr 5-tuple addr infos
|
33
|
+
_, _, _, _, sockaddr = addrs[0] # use the first address
|
34
|
+
|
35
|
+
# sockaddr is a tuple (ipv4) or a 4-tuple (ipv6) where the first element is the ip addr
|
36
|
+
ipaddr = str(sockaddr[0])
|
37
|
+
|
38
|
+
logger.info(
|
39
|
+
"Resolved %s address: `%s` for host: `%s`",
|
40
|
+
family.name,
|
41
|
+
ipaddr,
|
42
|
+
hostname,
|
43
|
+
)
|
44
|
+
return str(ipaddr)
|
45
|
+
else:
|
46
|
+
return None
|
47
|
+
except socket.gaierror as e:
|
48
|
+
logger.info(
|
49
|
+
"No %s address that can bind TCP sockets for host: %s. %s",
|
50
|
+
family.name,
|
51
|
+
hostname,
|
52
|
+
e,
|
53
|
+
)
|
54
|
+
return None
|
55
|
+
|
56
|
+
ipaddr = get_sockaddr(socket.AF_INET6) or get_sockaddr(socket.AF_INET)
|
57
|
+
if not ipaddr:
|
58
|
+
raise RuntimeError(
|
59
|
+
f"Unable to resolve `{hostname}` to ipv6 or ipv4 address that can bind TCP socket."
|
60
|
+
" Check the network configuration on the host."
|
61
|
+
)
|
62
|
+
return ipaddr
|
tests/error_test_binary.py
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
6
6
|
|
7
|
+
import asyncio
|
7
8
|
import ctypes
|
8
9
|
import sys
|
9
10
|
|
@@ -11,7 +12,7 @@ import click
|
|
11
12
|
|
12
13
|
from monarch._rust_bindings.monarch_extension.panic import panicking_function
|
13
14
|
|
14
|
-
from monarch.actor_mesh import Actor, endpoint
|
15
|
+
from monarch.actor_mesh import Actor, endpoint, send
|
15
16
|
from monarch.proc_mesh import proc_mesh
|
16
17
|
|
17
18
|
|
@@ -35,6 +36,12 @@ class ErrorActor(Actor):
|
|
35
36
|
"""Endpoint that calls a Rust function that panics."""
|
36
37
|
panicking_function()
|
37
38
|
|
39
|
+
@endpoint
|
40
|
+
async def await_then_error(self) -> None:
|
41
|
+
await asyncio.sleep(0.1)
|
42
|
+
await asyncio.sleep(0.1)
|
43
|
+
raise RuntimeError("oh noez")
|
44
|
+
|
38
45
|
|
39
46
|
class ErrorActorSync(Actor):
|
40
47
|
"""An actor that has endpoints cause segfaults."""
|
@@ -146,5 +153,28 @@ def error_bootstrap():
|
|
146
153
|
proc_mesh(gpus=4, env={"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1"}).get()
|
147
154
|
|
148
155
|
|
156
|
+
async def _error_unmonitored():
|
157
|
+
print("I actually ran")
|
158
|
+
sys.stdout.flush()
|
159
|
+
|
160
|
+
proc = await proc_mesh(gpus=1)
|
161
|
+
actor = await proc.spawn("error_actor", ErrorActor)
|
162
|
+
|
163
|
+
# fire and forget
|
164
|
+
send(actor.await_then_error, (), {}, None, "all")
|
165
|
+
|
166
|
+
# Wait. Eventually a supervision event will get propagated and the process
|
167
|
+
# will exit.
|
168
|
+
#
|
169
|
+
# If an event is not delivered, the test will time out before this sleep
|
170
|
+
# finishes.
|
171
|
+
await asyncio.sleep(300)
|
172
|
+
|
173
|
+
|
174
|
+
@main.command("error-unmonitored")
|
175
|
+
def error_unmonitored():
|
176
|
+
asyncio.run(_error_unmonitored())
|
177
|
+
|
178
|
+
|
149
179
|
if __name__ == "__main__":
|
150
180
|
main()
|
tests/test_actor_error.py
CHANGED
@@ -4,11 +4,12 @@
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
6
6
|
|
7
|
+
import asyncio
|
7
8
|
import importlib.resources
|
8
9
|
import subprocess
|
9
10
|
|
10
11
|
import pytest
|
11
|
-
from monarch.actor_mesh import Actor, ActorError, endpoint
|
12
|
+
from monarch.actor_mesh import Actor, ActorError, endpoint, send
|
12
13
|
|
13
14
|
from monarch.proc_mesh import proc_mesh
|
14
15
|
|
@@ -128,6 +129,7 @@ def test_actor_supervision(num_procs, sync_endpoint, sync_test_impl, endpoint_na
|
|
128
129
|
f"--endpoint-name={endpoint_name}",
|
129
130
|
]
|
130
131
|
try:
|
132
|
+
print("running cmd", " ".join(cmd))
|
131
133
|
process = subprocess.run(cmd, capture_output=True, timeout=180)
|
132
134
|
except subprocess.TimeoutExpired as e:
|
133
135
|
print("timeout expired")
|
@@ -157,6 +159,7 @@ def test_proc_mesh_bootstrap_error():
|
|
157
159
|
"error-bootstrap",
|
158
160
|
]
|
159
161
|
try:
|
162
|
+
print("running cmd", " ".join(cmd))
|
160
163
|
process = subprocess.run(cmd, capture_output=True, timeout=180)
|
161
164
|
except subprocess.TimeoutExpired as e:
|
162
165
|
print("timeout expired")
|
@@ -208,3 +211,30 @@ async def test_broken_pickle_class(raise_on_getstate, raise_on_setstate, num_pro
|
|
208
211
|
await exception_actor.print_value.call_one(broken_obj)
|
209
212
|
else:
|
210
213
|
await exception_actor.print_value.call(broken_obj)
|
214
|
+
|
215
|
+
|
216
|
+
# oss_skip: importlib not pulling resource correctly in git CI, needs to be revisited
|
217
|
+
@pytest.mark.oss_skip
|
218
|
+
async def test_exception_after_wait_unmonitored():
|
219
|
+
# Run the test in a subprocess
|
220
|
+
test_bin = importlib.resources.files("monarch.python.tests").joinpath("test_bin")
|
221
|
+
cmd = [
|
222
|
+
str(test_bin),
|
223
|
+
"error-unmonitored",
|
224
|
+
]
|
225
|
+
try:
|
226
|
+
print("running cmd", " ".join(cmd))
|
227
|
+
process = subprocess.run(cmd, capture_output=True, timeout=180)
|
228
|
+
except subprocess.TimeoutExpired as e:
|
229
|
+
print("timeout expired")
|
230
|
+
if e.stdout is not None:
|
231
|
+
print(e.stdout.decode())
|
232
|
+
if e.stderr is not None:
|
233
|
+
print(e.stderr.decode())
|
234
|
+
raise
|
235
|
+
|
236
|
+
# Assert that the subprocess exited with a non-zero code
|
237
|
+
assert "I actually ran" in process.stdout.decode()
|
238
|
+
assert (
|
239
|
+
process.returncode != 0
|
240
|
+
), f"Expected non-zero exit code, got {process.returncode}"
|
tests/test_python_actors.py
CHANGED
@@ -584,16 +584,40 @@ async def test_actor_tls() -> None:
|
|
584
584
|
pm = await proc_mesh(gpus=1)
|
585
585
|
am = await pm.spawn("tls", TLSActor)
|
586
586
|
await am.increment.call_one()
|
587
|
-
|
588
|
-
|
589
|
-
|
587
|
+
await am.increment_async.call_one()
|
588
|
+
await am.increment.call_one()
|
589
|
+
await am.increment_async.call_one()
|
590
|
+
|
591
|
+
assert 4 == await am.get.call_one()
|
592
|
+
assert 4 == await am.get_async.call_one()
|
593
|
+
|
594
|
+
|
595
|
+
class TLSActorFullSync(Actor):
|
596
|
+
"""An actor that manages thread-local state."""
|
597
|
+
|
598
|
+
def __init__(self):
|
599
|
+
self.local = threading.local()
|
600
|
+
self.local.value = 0
|
601
|
+
|
602
|
+
@endpoint
|
603
|
+
def increment(self):
|
604
|
+
self.local.value += 1
|
605
|
+
|
606
|
+
@endpoint
|
607
|
+
def get(self):
|
608
|
+
return self.local.value
|
590
609
|
|
591
|
-
|
610
|
+
|
611
|
+
async def test_actor_tls_full_sync() -> None:
|
612
|
+
"""Test that thread-local state is respected."""
|
613
|
+
pm = await proc_mesh(gpus=1)
|
614
|
+
am = await pm.spawn("tls", TLSActorFullSync)
|
615
|
+
await am.increment.call_one()
|
616
|
+
await am.increment.call_one()
|
617
|
+
await am.increment.call_one()
|
592
618
|
await am.increment.call_one()
|
593
|
-
# await am.increment_async.call_one()
|
594
619
|
|
595
|
-
assert
|
596
|
-
# assert 4 == await am.get_async.call_one()
|
620
|
+
assert 4 == await am.get.call_one()
|
597
621
|
|
598
622
|
|
599
623
|
@two_gpu
|
@@ -611,3 +635,29 @@ def test_proc_mesh_tensor_engine() -> None:
|
|
611
635
|
assert a == 0
|
612
636
|
assert b == 10
|
613
637
|
assert c == 100
|
638
|
+
|
639
|
+
|
640
|
+
class AsyncActor(Actor):
|
641
|
+
def __init__(self):
|
642
|
+
self.should_exit = False
|
643
|
+
|
644
|
+
@endpoint
|
645
|
+
async def sleep(self) -> None:
|
646
|
+
while True and not self.should_exit:
|
647
|
+
await asyncio.sleep(1)
|
648
|
+
|
649
|
+
@endpoint
|
650
|
+
async def no_more(self) -> None:
|
651
|
+
self.should_exit = True
|
652
|
+
|
653
|
+
|
654
|
+
@pytest.mark.timeout(15)
|
655
|
+
async def test_async_concurrency():
|
656
|
+
"""Test that async endpoints will be processed concurrently."""
|
657
|
+
pm = await proc_mesh(gpus=1)
|
658
|
+
am = await pm.spawn("async", AsyncActor)
|
659
|
+
fut = am.sleep.call()
|
660
|
+
# This call should go through and exit the sleep loop, as long as we are
|
661
|
+
# actually concurrently processing messages.
|
662
|
+
await am.no_more.call()
|
663
|
+
await fut
|
{torchmonarch_nightly-2025.6.17.dist-info → torchmonarch_nightly-2025.6.18.dist-info}/RECORD
RENAMED
@@ -1,7 +1,7 @@
|
|
1
1
|
monarch/__init__.py,sha256=iUvWHc0-7Q2tovRoRxOIiA3TsefMXCbWl-jEfQ2djew,6897
|
2
|
-
monarch/_rust_bindings.so,sha256=
|
2
|
+
monarch/_rust_bindings.so,sha256=RlkNuWQ74oxTOEfmaVFsgESTEdMP84vug1sRY4xya60,40803008
|
3
3
|
monarch/_testing.py,sha256=jOIOG6jcZBzvEvG_DwSnwCkaMVXvSun6sJAG6nXemww,7859
|
4
|
-
monarch/actor_mesh.py,sha256=
|
4
|
+
monarch/actor_mesh.py,sha256=8hjIy0TSby33xfVXp_xZnqlPkxy3l6IGqEyPOhVtjvU,24197
|
5
5
|
monarch/allocator.py,sha256=ylvYTf31o-PT385cYJPhi17uNbC4yl_RAraqD0fVe4g,4112
|
6
6
|
monarch/bootstrap_main.py,sha256=RCUQhJk07yMFiKp6HzQuqZFUpkgsT9kVEyimiwjn6_E,1827
|
7
7
|
monarch/cached_remote_function.py,sha256=kYdB6r4OHx_T_uX4q3tCNcp1t2DJwF8tPTIahUiT2pU,8785
|
@@ -11,7 +11,7 @@ monarch/future.py,sha256=lcdFEe7m1shYPPuvZ1RkS6JUIChEKGBWe3v7x_nu4Hg,731
|
|
11
11
|
monarch/gradient_generator.py,sha256=Rl3dmXGceTdCc1mYBg2JciR88ywGPnW7TVkL86KwqEA,6366
|
12
12
|
monarch/memory.py,sha256=ol86dBhFAJqg78iF25-BuK0wuwj1onR8FIioZ_B0gjw,1377
|
13
13
|
monarch/mesh_controller.py,sha256=am1QP7dvn0OH1z9ADSKm41APs1HY_dHcBAhOVP-QDmE,10427
|
14
|
-
monarch/monarch_controller,sha256=
|
14
|
+
monarch/monarch_controller,sha256=HucZG4CSJhkVpbHElarAp2LUz1xW5bMNnAR3TNjWKks,20335344
|
15
15
|
monarch/notebook.py,sha256=zu9MKDFKf1-rCM2TqFSRJjMBeiWuKcJSyUFLvoZRQzs,25949
|
16
16
|
monarch/opaque_module.py,sha256=oajOu_WD1hD4hxE8HDdO-tvWY7KDHWd7VaAhJEa5L2I,10446
|
17
17
|
monarch/opaque_object.py,sha256=IVpll4pyuKZMo_EnPh4s0qnx8RlAcJrJ1yoLX6E75wQ,2782
|
@@ -106,9 +106,10 @@ monarch/timer/example_spmd.py,sha256=p8i3_tO1AmpwSkZryiSjgkh7qaEZ6QXp2Fy1qtPpECA
|
|
106
106
|
monarch/timer/execution_timer.py,sha256=1YsrLIZirdohKOeFAU2H4UcONhQXHuctJbYcoX8I6gY,6985
|
107
107
|
monarch/timer/execution_timer_test.py,sha256=CSxTv44fFZQURJlCBmYvysQI1aS_zEGZs_uxl9SOHak,4486
|
108
108
|
monarch/tools/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
109
|
-
monarch/tools/cli.py,sha256=
|
109
|
+
monarch/tools/cli.py,sha256=EIdarsfuFX0WqRCe29_5GNKWJBhxx0lABalw3zPSagw,4977
|
110
110
|
monarch/tools/commands.py,sha256=BfmXndJmU_cZP4cMPlknkxGca1NjqYd8_ReDePWksXw,6908
|
111
111
|
monarch/tools/mesh_spec.py,sha256=JLykhgy1dClXiNbH1Qsl2fX5MbqplQAhl8LGoragvbo,3702
|
112
|
+
monarch/tools/network.py,sha256=bRj-jOs5qDqnM3BcE9MSXCLS01hiMN4YSWfKZ_d7bc4,2182
|
112
113
|
monarch/tools/components/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
113
114
|
monarch/tools/components/hyperactor.py,sha256=Ryi1X07VLcaQVlpc4af65JNBbZtOb9IAlKxSKMZ1AW4,2120
|
114
115
|
monarch/tools/config/__init__.py,sha256=OPSflEmJB2zxAaRVzzWSWXV5M5vlknLgpulGdW1ze5U,510
|
@@ -131,9 +132,9 @@ monarch_supervisor/python_executable.py,sha256=WfCiK3wdAvm9Jxx5jgjGF991NgGc9-oHU
|
|
131
132
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
132
133
|
tests/dispatch_bench.py,sha256=sU_m-8KAjQgYTsxI5khV664NdgLLutidni69Rtowk98,3933
|
133
134
|
tests/dispatch_bench_helper.py,sha256=1ORgAMrRgjAjmmWeCHLLQd_bda9mJk0rS2ucEbRu28s,633
|
134
|
-
tests/error_test_binary.py,sha256=
|
135
|
+
tests/error_test_binary.py,sha256=BRj13wAROsUWx4jcxc07HYN2n-xyBNhnnRAhjqah-A0,5582
|
135
136
|
tests/sleep_binary.py,sha256=XfLYaAfwm9xgzM-svs8fhAeFhwYIg6SyVEnx4e6wbUw,1009
|
136
|
-
tests/test_actor_error.py,sha256
|
137
|
+
tests/test_actor_error.py,sha256=-0UJCEpyzsBh-RdbGhDiG1-sRtu7bJPQWmtjUD0ad48,8526
|
137
138
|
tests/test_alloc.py,sha256=D6DdQbtOZEvvnnc7LV-WyWFMk0Xb77eblH6Oz90zJTA,745
|
138
139
|
tests/test_allocator.py,sha256=P11sQ95ADjzC_-CfPs3CEP80nP8sn7wW8vVPsmpSVoM,8164
|
139
140
|
tests/test_coalescing.py,sha256=JZ4YgQNlWWs7N-Z8KCCXQPANcuyyXEKjeHIXYbPnQhk,15606
|
@@ -144,7 +145,7 @@ tests/test_future.py,sha256=cXzaNi2YDwVyjR541ScXmgktX1YFsKzbl8wep0DMVbk,3032
|
|
144
145
|
tests/test_grad_generator.py,sha256=p4Pm4kMEeGldt2jUVAkGKCB0mLccKI28pltH6OTGbQA,3412
|
145
146
|
tests/test_mock_cuda.py,sha256=5hisElxeLJ5MHw3KM9gwxBiXiMaG-Rm382u3AsQcDOI,3068
|
146
147
|
tests/test_pdb_actor.py,sha256=5KJhuhcZDPWMdjC6eAtDdwnz1W7jNFXvIrMSFaCWaPw,3858
|
147
|
-
tests/test_python_actors.py,sha256=
|
148
|
+
tests/test_python_actors.py,sha256=ls0x_ie4i9KLuouecfxG_fHHZSZc2g_mQSAPJg70pgw,18949
|
148
149
|
tests/test_remote_functions.py,sha256=5nxYB8dfA9NT9f9Od9O3htgQtPbiRNiXZ1Kgtn75sOQ,50056
|
149
150
|
tests/test_rust_backend.py,sha256=94S3R995ZkyIhEiBsM5flcjf5X7bscEAHBtInbTRFe8,7776
|
150
151
|
tests/test_signal_safe_block_on.py,sha256=bmal0XgzJowZXJV6T1Blow5a-vZluYWusCThLMGxyTE,3336
|
@@ -154,9 +155,9 @@ tests/simulator/test_profiling.py,sha256=TGYCfzTLdkpIwnOuO6KApprmrgPIRQe60KRX3wk
|
|
154
155
|
tests/simulator/test_simulator.py,sha256=LO8lA0ssY-OGEBL5ipEu74f97Y765TEwfUOv-DtIptM,14568
|
155
156
|
tests/simulator/test_task.py,sha256=ipqBDuDAysuo1xOB9S5psaFvwe6VATD43IovCTSs0t4,2327
|
156
157
|
tests/simulator/test_worker.py,sha256=QrWWIJ3HDgDLkBPRc2mwYPlOQoXQcj1qRfc0WUfKkFY,3507
|
157
|
-
torchmonarch_nightly-2025.6.
|
158
|
-
torchmonarch_nightly-2025.6.
|
159
|
-
torchmonarch_nightly-2025.6.
|
160
|
-
torchmonarch_nightly-2025.6.
|
161
|
-
torchmonarch_nightly-2025.6.
|
162
|
-
torchmonarch_nightly-2025.6.
|
158
|
+
torchmonarch_nightly-2025.6.18.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
|
159
|
+
torchmonarch_nightly-2025.6.18.dist-info/METADATA,sha256=lPDac3GQrS5MmEp41wt6YCWHIluJzBgFfPY37x0cKJM,2772
|
160
|
+
torchmonarch_nightly-2025.6.18.dist-info/WHEEL,sha256=_wZSFk0d90K9wOBp8Q-UGxshyiJ987JoPiyUBNC6VLk,104
|
161
|
+
torchmonarch_nightly-2025.6.18.dist-info/entry_points.txt,sha256=sqfQ16oZqjEvttUI-uj9BBXIIE6jt05bYFSmy-2hyXI,106
|
162
|
+
torchmonarch_nightly-2025.6.18.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
|
163
|
+
torchmonarch_nightly-2025.6.18.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
{torchmonarch_nightly-2025.6.17.dist-info → torchmonarch_nightly-2025.6.18.dist-info}/top_level.txt
RENAMED
File without changes
|