torchmonarch-nightly 2025.6.19__cp310-cp310-manylinux2014_x86_64.whl → 2025.6.20__cp310-cp310-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monarch/_rust_bindings.so +0 -0
- monarch/common/client.py +35 -3
- monarch/common/remote.py +14 -21
- monarch/future.py +59 -8
- tests/test_python_actors.py +98 -0
- {torchmonarch_nightly-2025.6.19.dist-info → torchmonarch_nightly-2025.6.20.dist-info}/METADATA +1 -1
- {torchmonarch_nightly-2025.6.19.dist-info → torchmonarch_nightly-2025.6.20.dist-info}/RECORD +11 -11
- {torchmonarch_nightly-2025.6.19.dist-info → torchmonarch_nightly-2025.6.20.dist-info}/WHEEL +0 -0
- {torchmonarch_nightly-2025.6.19.dist-info → torchmonarch_nightly-2025.6.20.dist-info}/entry_points.txt +0 -0
- {torchmonarch_nightly-2025.6.19.dist-info → torchmonarch_nightly-2025.6.20.dist-info}/licenses/LICENSE +0 -0
- {torchmonarch_nightly-2025.6.19.dist-info → torchmonarch_nightly-2025.6.20.dist-info}/top_level.txt +0 -0
monarch/_rust_bindings.so
CHANGED
Binary file
|
monarch/common/client.py
CHANGED
@@ -41,6 +41,8 @@ from monarch.common import messages
|
|
41
41
|
from monarch.common.borrows import Borrow, StorageAliases
|
42
42
|
from monarch.common.controller_api import LogMessage, MessageResult, TController
|
43
43
|
from monarch.common.device_mesh import DeviceMesh
|
44
|
+
|
45
|
+
from monarch.common.future import Future
|
44
46
|
from monarch.common.invocation import DeviceException, RemoteException, Seq
|
45
47
|
from monarch.common.recording import flatten_messages, Recording
|
46
48
|
|
@@ -52,9 +54,6 @@ from monarch.common.tree import tree_map
|
|
52
54
|
|
53
55
|
from . import _coalescing
|
54
56
|
|
55
|
-
if TYPE_CHECKING:
|
56
|
-
from monarch.common.future import Future
|
57
|
-
|
58
57
|
|
59
58
|
logger = logging.getLogger(__name__)
|
60
59
|
|
@@ -447,6 +446,39 @@ class Client:
|
|
447
446
|
def mesh_state(self) -> WorldState:
|
448
447
|
return self.inner.worker_world_state()
|
449
448
|
|
449
|
+
def fetch(
|
450
|
+
self,
|
451
|
+
mesh: "DeviceMesh",
|
452
|
+
stream: "StreamRef",
|
453
|
+
shard,
|
454
|
+
preprocess_message,
|
455
|
+
args,
|
456
|
+
kwargs,
|
457
|
+
defs: Tuple["Tensor", ...],
|
458
|
+
uses: Tuple["Tensor", ...],
|
459
|
+
) -> "Future":
|
460
|
+
fut = Future(self)
|
461
|
+
ident = self.new_node(defs, uses, fut)
|
462
|
+
process = mesh._process(shard)
|
463
|
+
self.send(
|
464
|
+
process,
|
465
|
+
messages.SendValue(
|
466
|
+
ident,
|
467
|
+
None,
|
468
|
+
defs,
|
469
|
+
preprocess_message,
|
470
|
+
args,
|
471
|
+
kwargs,
|
472
|
+
stream,
|
473
|
+
),
|
474
|
+
)
|
475
|
+
# we have to ask for status updates
|
476
|
+
# from workers to be sure they have finished
|
477
|
+
# enough work to count this future as finished,
|
478
|
+
# and all potential errors have been reported
|
479
|
+
self._request_status()
|
480
|
+
return fut
|
481
|
+
|
450
482
|
|
451
483
|
def tree_map_refs(first_ref: int, tree):
|
452
484
|
def translate_id(ref: int) -> int:
|
monarch/common/remote.py
CHANGED
@@ -21,6 +21,7 @@ from typing import (
|
|
21
21
|
overload,
|
22
22
|
Protocol,
|
23
23
|
Tuple,
|
24
|
+
TYPE_CHECKING,
|
24
25
|
TypeVar,
|
25
26
|
)
|
26
27
|
|
@@ -30,6 +31,9 @@ import torch
|
|
30
31
|
|
31
32
|
from monarch.common import _coalescing, device_mesh, messages, stream
|
32
33
|
|
34
|
+
if TYPE_CHECKING:
|
35
|
+
from monarch.common.client import Client
|
36
|
+
|
33
37
|
from monarch.common.device_mesh import RemoteProcessGroup
|
34
38
|
from monarch.common.fake import fake_call
|
35
39
|
|
@@ -173,30 +177,19 @@ def _call_on_shard_and_fetch(
|
|
173
177
|
propagator, rfunction, args, kwargs, ambient_mesh, stream._active
|
174
178
|
)
|
175
179
|
|
176
|
-
client = mesh.client
|
180
|
+
client: "Client" = mesh.client
|
177
181
|
if _coalescing.is_active(client):
|
178
182
|
raise NotImplementedError("NYI: fetching results during a coalescing block")
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
preprocess_message,
|
189
|
-
args,
|
190
|
-
kwargs,
|
191
|
-
stream._active._to_ref(client),
|
192
|
-
),
|
183
|
+
return client.fetch(
|
184
|
+
mesh,
|
185
|
+
stream._active._to_ref(client),
|
186
|
+
shard,
|
187
|
+
preprocess_message,
|
188
|
+
args,
|
189
|
+
kwargs,
|
190
|
+
mutates,
|
191
|
+
dtensors,
|
193
192
|
)
|
194
|
-
# we have to ask for status updates
|
195
|
-
# from workers to be sure they have finished
|
196
|
-
# enough work to count this future as finished,
|
197
|
-
# and all potential errors have been reported
|
198
|
-
client._request_status()
|
199
|
-
return fut
|
200
193
|
|
201
194
|
|
202
195
|
@remote
|
monarch/future.py
CHANGED
@@ -5,21 +5,72 @@
|
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
6
6
|
|
7
7
|
import asyncio
|
8
|
-
from
|
8
|
+
from functools import partial
|
9
|
+
from typing import Generator, Generic, Optional, TypeVar
|
9
10
|
|
10
11
|
R = TypeVar("R")
|
11
12
|
|
12
13
|
|
14
|
+
def _incomplete(impl, self):
|
15
|
+
try:
|
16
|
+
return self._set_result(impl())
|
17
|
+
except Exception as e:
|
18
|
+
self._set_exception(e)
|
19
|
+
raise
|
20
|
+
|
21
|
+
|
22
|
+
async def _aincomplete(impl, self):
|
23
|
+
try:
|
24
|
+
return self._set_result(await impl())
|
25
|
+
except Exception as e:
|
26
|
+
self._set_exception(e)
|
27
|
+
raise
|
28
|
+
|
29
|
+
|
13
30
|
# TODO: consolidate with monarch.common.future
|
14
31
|
class ActorFuture(Generic[R]):
|
15
32
|
def __init__(self, impl, blocking_impl=None):
|
16
|
-
|
17
|
-
|
33
|
+
if blocking_impl is None:
|
34
|
+
blocking_impl = partial(asyncio.run, impl())
|
35
|
+
self._get = partial(_incomplete, blocking_impl)
|
36
|
+
self._aget = partial(_aincomplete, impl)
|
18
37
|
|
19
|
-
def get(self) -> R:
|
20
|
-
if
|
21
|
-
return self.
|
22
|
-
return
|
38
|
+
def get(self, timeout: Optional[float] = None) -> R:
|
39
|
+
if timeout is not None:
|
40
|
+
return asyncio.run(asyncio.wait_for(self._aget(self), timeout))
|
41
|
+
return self._get(self)
|
23
42
|
|
24
43
|
def __await__(self) -> Generator[R, None, R]:
|
25
|
-
return self.
|
44
|
+
return self._aget(self).__await__()
|
45
|
+
|
46
|
+
def _set_result(self, result):
|
47
|
+
def f(self):
|
48
|
+
return result
|
49
|
+
|
50
|
+
async def af(self):
|
51
|
+
return result
|
52
|
+
|
53
|
+
self._get, self._aget = f, af
|
54
|
+
return result
|
55
|
+
|
56
|
+
def _set_exception(self, e):
|
57
|
+
def f(self):
|
58
|
+
raise e
|
59
|
+
|
60
|
+
async def af(self):
|
61
|
+
raise e
|
62
|
+
|
63
|
+
self._get, self._aget = f, af
|
64
|
+
|
65
|
+
# compatibility with old tensor engine Future objects
|
66
|
+
# hopefully we do not need done(), add_callback because
|
67
|
+
# they are harder to implement right.
|
68
|
+
def result(self, timeout: Optional[float] = None) -> R:
|
69
|
+
return self.get(timeout)
|
70
|
+
|
71
|
+
def exception(self, timeout: Optional[float] = None):
|
72
|
+
try:
|
73
|
+
self.get(timeout)
|
74
|
+
return None
|
75
|
+
except Exception as e:
|
76
|
+
return e
|
tests/test_python_actors.py
CHANGED
@@ -29,6 +29,7 @@ from monarch.actor_mesh import (
|
|
29
29
|
MonarchContext,
|
30
30
|
)
|
31
31
|
from monarch.debugger import init_debugging
|
32
|
+
from monarch.future import ActorFuture
|
32
33
|
|
33
34
|
from monarch.mesh_controller import spawn_tensor_engine
|
34
35
|
|
@@ -672,3 +673,100 @@ async def test_async_concurrency():
|
|
672
673
|
# actually concurrently processing messages.
|
673
674
|
await am.no_more.call()
|
674
675
|
await fut
|
676
|
+
|
677
|
+
|
678
|
+
async def awaitit(f):
|
679
|
+
return await f
|
680
|
+
|
681
|
+
|
682
|
+
def test_actor_future():
|
683
|
+
v = 0
|
684
|
+
|
685
|
+
async def incr():
|
686
|
+
nonlocal v
|
687
|
+
v += 1
|
688
|
+
return v
|
689
|
+
|
690
|
+
# can use async implementation from sync
|
691
|
+
# if no non-blocking is provided
|
692
|
+
f = ActorFuture(incr)
|
693
|
+
assert f.get() == 1
|
694
|
+
assert v == 1
|
695
|
+
assert f.get() == 1
|
696
|
+
assert asyncio.run(awaitit(f)) == 1
|
697
|
+
|
698
|
+
f = ActorFuture(incr)
|
699
|
+
assert asyncio.run(awaitit(f)) == 2
|
700
|
+
assert f.get() == 2
|
701
|
+
|
702
|
+
def incr2():
|
703
|
+
nonlocal v
|
704
|
+
v += 2
|
705
|
+
return v
|
706
|
+
|
707
|
+
# Use non-blocking optimization if provided
|
708
|
+
f = ActorFuture(incr, incr2)
|
709
|
+
assert f.get() == 4
|
710
|
+
assert asyncio.run(awaitit(f)) == 4
|
711
|
+
|
712
|
+
async def nope():
|
713
|
+
nonlocal v
|
714
|
+
v += 1
|
715
|
+
raise ValueError("nope")
|
716
|
+
|
717
|
+
f = ActorFuture(nope)
|
718
|
+
|
719
|
+
with pytest.raises(ValueError):
|
720
|
+
f.get()
|
721
|
+
|
722
|
+
assert v == 5
|
723
|
+
|
724
|
+
with pytest.raises(ValueError):
|
725
|
+
f.get()
|
726
|
+
|
727
|
+
assert v == 5
|
728
|
+
|
729
|
+
with pytest.raises(ValueError):
|
730
|
+
asyncio.run(awaitit(f))
|
731
|
+
|
732
|
+
assert v == 5
|
733
|
+
|
734
|
+
def nope():
|
735
|
+
nonlocal v
|
736
|
+
v += 1
|
737
|
+
raise ValueError("nope")
|
738
|
+
|
739
|
+
f = ActorFuture(incr, nope)
|
740
|
+
|
741
|
+
with pytest.raises(ValueError):
|
742
|
+
f.get()
|
743
|
+
|
744
|
+
assert v == 6
|
745
|
+
|
746
|
+
with pytest.raises(ValueError):
|
747
|
+
f.result()
|
748
|
+
|
749
|
+
assert f.exception() is not None
|
750
|
+
|
751
|
+
assert v == 6
|
752
|
+
|
753
|
+
with pytest.raises(ValueError):
|
754
|
+
asyncio.run(awaitit(f))
|
755
|
+
|
756
|
+
assert v == 6
|
757
|
+
|
758
|
+
async def seven():
|
759
|
+
return 7
|
760
|
+
|
761
|
+
f = ActorFuture(seven)
|
762
|
+
|
763
|
+
assert 7 == f.get(timeout=0.001)
|
764
|
+
|
765
|
+
async def neverfinish():
|
766
|
+
f = asyncio.Future()
|
767
|
+
await f
|
768
|
+
|
769
|
+
f = ActorFuture(neverfinish)
|
770
|
+
|
771
|
+
with pytest.raises(asyncio.exceptions.TimeoutError):
|
772
|
+
f.get(timeout=0.1)
|
{torchmonarch_nightly-2025.6.19.dist-info → torchmonarch_nightly-2025.6.20.dist-info}/RECORD
RENAMED
@@ -1,5 +1,5 @@
|
|
1
1
|
monarch/__init__.py,sha256=iUvWHc0-7Q2tovRoRxOIiA3TsefMXCbWl-jEfQ2djew,6897
|
2
|
-
monarch/_rust_bindings.so,sha256=
|
2
|
+
monarch/_rust_bindings.so,sha256=sDdg6RjptgNPmFnFiDAgv36k_Or_Kz47aYaZ2M5EAao,41088032
|
3
3
|
monarch/_testing.py,sha256=jOIOG6jcZBzvEvG_DwSnwCkaMVXvSun6sJAG6nXemww,7859
|
4
4
|
monarch/actor_mesh.py,sha256=m6QapbZHqYujXya28jW1II2wkBUV_nKGvxmWPSW9lsQ,24327
|
5
5
|
monarch/allocator.py,sha256=UEaVLntH4xQ8Lr84TbgcXusvuK8FhSMJmav-omztUbw,4473
|
@@ -7,7 +7,7 @@ monarch/bootstrap_main.py,sha256=RCUQhJk07yMFiKp6HzQuqZFUpkgsT9kVEyimiwjn6_E,182
|
|
7
7
|
monarch/cached_remote_function.py,sha256=kYdB6r4OHx_T_uX4q3tCNcp1t2DJwF8tPTIahUiT2pU,8785
|
8
8
|
monarch/debugger.py,sha256=AdlvOG3X-9Pw9c1DLQYEy4vjEfh0ZtwtsNJEFLFzN8o,13312
|
9
9
|
monarch/fetch.py,sha256=61jxo7sx4QNUTkc0_rF5NaJROen4tKbAaiIjrXWLOvg,1705
|
10
|
-
monarch/future.py,sha256=
|
10
|
+
monarch/future.py,sha256=g1VYJl8ReBBS6VbikwWilnFqEr5qJDiSKid92AnWFV4,2058
|
11
11
|
monarch/gradient_generator.py,sha256=Rl3dmXGceTdCc1mYBg2JciR88ywGPnW7TVkL86KwqEA,6366
|
12
12
|
monarch/memory.py,sha256=ol86dBhFAJqg78iF25-BuK0wuwj1onR8FIioZ_B0gjw,1377
|
13
13
|
monarch/mesh_controller.py,sha256=am1QP7dvn0OH1z9ADSKm41APs1HY_dHcBAhOVP-QDmE,10427
|
@@ -46,7 +46,7 @@ monarch/common/_device_utils.py,sha256=gBpl23wMjppVAEzzj8U9HyX-B7Bs2_3ftiMAkzUS4
|
|
46
46
|
monarch/common/_tensor_to_table.py,sha256=yRjCNwvtl188Z1Dwkx3ZU-Bh2mwYnQ0Lnue2RAztwvc,5753
|
47
47
|
monarch/common/base_tensor.py,sha256=ujRzR6lWaeCdPv2JX0vCR-VsCWn-3SHaJIkZH1Sw9FQ,1159
|
48
48
|
monarch/common/borrows.py,sha256=7KR62xoUat1T6FyADsdHsxVAVIJDvfJWUnPO-xx277U,5307
|
49
|
-
monarch/common/client.py,sha256=
|
49
|
+
monarch/common/client.py,sha256=abYQqi-yFzG0ERvh3gMC5UgiWSezmM20kbxzalKpnf4,25806
|
50
50
|
monarch/common/constants.py,sha256=ohvsVYMpfeWopv3KXDAeHWDFLukwc-OY37VRxpKNBE8,300
|
51
51
|
monarch/common/context_manager.py,sha256=GOeyaFbyCqvQmkJ0oI7q6IxRd8_0mVyYKZRccI8iaug,1067
|
52
52
|
monarch/common/controller_api.py,sha256=djGkK5aSd-V6pBkr3uBCXbfJv3OKf2o2VbBXJgFF2WI,3202
|
@@ -65,7 +65,7 @@ monarch/common/pipe.py,sha256=9pTf8--3yOv4HpnJEhgcmc_JM6Az4uL1y72TSQA55dw,5013
|
|
65
65
|
monarch/common/process_group.py,sha256=FbJ_AJRZYFkvQ68L2naRq64J_aNuAKe5kO0MWdn_x74,1662
|
66
66
|
monarch/common/recording.py,sha256=hoI9VY_FyW_xVx-jmfsKydqX5vW2GulwcDWsBdUVOm8,4637
|
67
67
|
monarch/common/reference.py,sha256=O26lkzEeVwj0S1xEy-OLqdHVnACmmlbQCUmXRrW4n1Q,938
|
68
|
-
monarch/common/remote.py,sha256=
|
68
|
+
monarch/common/remote.py,sha256=vklFYJvuaPpS8kAyFmRz-T-brfHvcZ1lPTC_-7DIwqM,8908
|
69
69
|
monarch/common/selection.py,sha256=lpWFbZs3ArYy29e-53eoAVAjQFksf1RvZz9NvM0CUW4,308
|
70
70
|
monarch/common/shape.py,sha256=B-7DI768ZhT8ECUNCJcI7DfCB7iDFGFH0r-HmXaAfcM,8296
|
71
71
|
monarch/common/stream.py,sha256=_ejoxafHtdD10lLzznRCXKwrkZ_ZH9k_VTgiA5yfBrI,3583
|
@@ -145,7 +145,7 @@ tests/test_future.py,sha256=cXzaNi2YDwVyjR541ScXmgktX1YFsKzbl8wep0DMVbk,3032
|
|
145
145
|
tests/test_grad_generator.py,sha256=p4Pm4kMEeGldt2jUVAkGKCB0mLccKI28pltH6OTGbQA,3412
|
146
146
|
tests/test_mock_cuda.py,sha256=5hisElxeLJ5MHw3KM9gwxBiXiMaG-Rm382u3AsQcDOI,3068
|
147
147
|
tests/test_pdb_actor.py,sha256=5KJhuhcZDPWMdjC6eAtDdwnz1W7jNFXvIrMSFaCWaPw,3858
|
148
|
-
tests/test_python_actors.py,sha256=
|
148
|
+
tests/test_python_actors.py,sha256=du0AiGiKtVHOLkDUKu6gV75eYf_NoHDKV6utKzrplz4,21010
|
149
149
|
tests/test_remote_functions.py,sha256=5nxYB8dfA9NT9f9Od9O3htgQtPbiRNiXZ1Kgtn75sOQ,50056
|
150
150
|
tests/test_rust_backend.py,sha256=94S3R995ZkyIhEiBsM5flcjf5X7bscEAHBtInbTRFe8,7776
|
151
151
|
tests/test_signal_safe_block_on.py,sha256=bmal0XgzJowZXJV6T1Blow5a-vZluYWusCThLMGxyTE,3336
|
@@ -155,9 +155,9 @@ tests/simulator/test_profiling.py,sha256=TGYCfzTLdkpIwnOuO6KApprmrgPIRQe60KRX3wk
|
|
155
155
|
tests/simulator/test_simulator.py,sha256=LO8lA0ssY-OGEBL5ipEu74f97Y765TEwfUOv-DtIptM,14568
|
156
156
|
tests/simulator/test_task.py,sha256=ipqBDuDAysuo1xOB9S5psaFvwe6VATD43IovCTSs0t4,2327
|
157
157
|
tests/simulator/test_worker.py,sha256=QrWWIJ3HDgDLkBPRc2mwYPlOQoXQcj1qRfc0WUfKkFY,3507
|
158
|
-
torchmonarch_nightly-2025.6.
|
159
|
-
torchmonarch_nightly-2025.6.
|
160
|
-
torchmonarch_nightly-2025.6.
|
161
|
-
torchmonarch_nightly-2025.6.
|
162
|
-
torchmonarch_nightly-2025.6.
|
163
|
-
torchmonarch_nightly-2025.6.
|
158
|
+
torchmonarch_nightly-2025.6.20.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
|
159
|
+
torchmonarch_nightly-2025.6.20.dist-info/METADATA,sha256=QKiDH01IYFpa492TDs5WzWeDRbjMKmpSAc3V9NpQ5YM,2772
|
160
|
+
torchmonarch_nightly-2025.6.20.dist-info/WHEEL,sha256=_wZSFk0d90K9wOBp8Q-UGxshyiJ987JoPiyUBNC6VLk,104
|
161
|
+
torchmonarch_nightly-2025.6.20.dist-info/entry_points.txt,sha256=sqfQ16oZqjEvttUI-uj9BBXIIE6jt05bYFSmy-2hyXI,106
|
162
|
+
torchmonarch_nightly-2025.6.20.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
|
163
|
+
torchmonarch_nightly-2025.6.20.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
{torchmonarch_nightly-2025.6.19.dist-info → torchmonarch_nightly-2025.6.20.dist-info}/top_level.txt
RENAMED
File without changes
|