torchmonarch-nightly 2025.6.19__cp310-cp310-manylinux2014_x86_64.whl → 2025.6.20__cp310-cp310-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
monarch/_rust_bindings.so CHANGED
Binary file
monarch/common/client.py CHANGED
@@ -41,6 +41,8 @@ from monarch.common import messages
41
41
  from monarch.common.borrows import Borrow, StorageAliases
42
42
  from monarch.common.controller_api import LogMessage, MessageResult, TController
43
43
  from monarch.common.device_mesh import DeviceMesh
44
+
45
+ from monarch.common.future import Future
44
46
  from monarch.common.invocation import DeviceException, RemoteException, Seq
45
47
  from monarch.common.recording import flatten_messages, Recording
46
48
 
@@ -52,9 +54,6 @@ from monarch.common.tree import tree_map
52
54
 
53
55
  from . import _coalescing
54
56
 
55
- if TYPE_CHECKING:
56
- from monarch.common.future import Future
57
-
58
57
 
59
58
  logger = logging.getLogger(__name__)
60
59
 
@@ -447,6 +446,39 @@ class Client:
447
446
  def mesh_state(self) -> WorldState:
448
447
  return self.inner.worker_world_state()
449
448
 
449
+ def fetch(
450
+ self,
451
+ mesh: "DeviceMesh",
452
+ stream: "StreamRef",
453
+ shard,
454
+ preprocess_message,
455
+ args,
456
+ kwargs,
457
+ defs: Tuple["Tensor", ...],
458
+ uses: Tuple["Tensor", ...],
459
+ ) -> "Future":
460
+ fut = Future(self)
461
+ ident = self.new_node(defs, uses, fut)
462
+ process = mesh._process(shard)
463
+ self.send(
464
+ process,
465
+ messages.SendValue(
466
+ ident,
467
+ None,
468
+ defs,
469
+ preprocess_message,
470
+ args,
471
+ kwargs,
472
+ stream,
473
+ ),
474
+ )
475
+ # we have to ask for status updates
476
+ # from workers to be sure they have finished
477
+ # enough work to count this future as finished,
478
+ # and all potential errors have been reported
479
+ self._request_status()
480
+ return fut
481
+
450
482
 
451
483
  def tree_map_refs(first_ref: int, tree):
452
484
  def translate_id(ref: int) -> int:
monarch/common/remote.py CHANGED
@@ -21,6 +21,7 @@ from typing import (
21
21
  overload,
22
22
  Protocol,
23
23
  Tuple,
24
+ TYPE_CHECKING,
24
25
  TypeVar,
25
26
  )
26
27
 
@@ -30,6 +31,9 @@ import torch
30
31
 
31
32
  from monarch.common import _coalescing, device_mesh, messages, stream
32
33
 
34
+ if TYPE_CHECKING:
35
+ from monarch.common.client import Client
36
+
33
37
  from monarch.common.device_mesh import RemoteProcessGroup
34
38
  from monarch.common.fake import fake_call
35
39
 
@@ -173,30 +177,19 @@ def _call_on_shard_and_fetch(
173
177
  propagator, rfunction, args, kwargs, ambient_mesh, stream._active
174
178
  )
175
179
 
176
- client = mesh.client
180
+ client: "Client" = mesh.client
177
181
  if _coalescing.is_active(client):
178
182
  raise NotImplementedError("NYI: fetching results during a coalescing block")
179
- fut = Future(client)
180
- ident = client.new_node(mutates, dtensors, fut)
181
- process = mesh._process(shard)
182
- client.send(
183
- process,
184
- messages.SendValue(
185
- ident,
186
- None,
187
- mutates,
188
- preprocess_message,
189
- args,
190
- kwargs,
191
- stream._active._to_ref(client),
192
- ),
183
+ return client.fetch(
184
+ mesh,
185
+ stream._active._to_ref(client),
186
+ shard,
187
+ preprocess_message,
188
+ args,
189
+ kwargs,
190
+ mutates,
191
+ dtensors,
193
192
  )
194
- # we have to ask for status updates
195
- # from workers to be sure they have finished
196
- # enough work to count this future as finished,
197
- # and all potential errors have been reported
198
- client._request_status()
199
- return fut
200
193
 
201
194
 
202
195
  @remote
monarch/future.py CHANGED
@@ -5,21 +5,72 @@
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
7
  import asyncio
8
- from typing import Generator, Generic, TypeVar
8
+ from functools import partial
9
+ from typing import Generator, Generic, Optional, TypeVar
9
10
 
10
11
  R = TypeVar("R")
11
12
 
12
13
 
14
+ def _incomplete(impl, self):
15
+ try:
16
+ return self._set_result(impl())
17
+ except Exception as e:
18
+ self._set_exception(e)
19
+ raise
20
+
21
+
22
+ async def _aincomplete(impl, self):
23
+ try:
24
+ return self._set_result(await impl())
25
+ except Exception as e:
26
+ self._set_exception(e)
27
+ raise
28
+
29
+
13
30
  # TODO: consolidate with monarch.common.future
14
31
  class ActorFuture(Generic[R]):
15
32
  def __init__(self, impl, blocking_impl=None):
16
- self._impl = impl
17
- self._blocking_impl = blocking_impl
33
+ if blocking_impl is None:
34
+ blocking_impl = partial(asyncio.run, impl())
35
+ self._get = partial(_incomplete, blocking_impl)
36
+ self._aget = partial(_aincomplete, impl)
18
37
 
19
- def get(self) -> R:
20
- if self._blocking_impl is not None:
21
- return self._blocking_impl()
22
- return asyncio.run(self._impl())
38
+ def get(self, timeout: Optional[float] = None) -> R:
39
+ if timeout is not None:
40
+ return asyncio.run(asyncio.wait_for(self._aget(self), timeout))
41
+ return self._get(self)
23
42
 
24
43
  def __await__(self) -> Generator[R, None, R]:
25
- return self._impl().__await__()
44
+ return self._aget(self).__await__()
45
+
46
+ def _set_result(self, result):
47
+ def f(self):
48
+ return result
49
+
50
+ async def af(self):
51
+ return result
52
+
53
+ self._get, self._aget = f, af
54
+ return result
55
+
56
+ def _set_exception(self, e):
57
+ def f(self):
58
+ raise e
59
+
60
+ async def af(self):
61
+ raise e
62
+
63
+ self._get, self._aget = f, af
64
+
65
+ # compatibility with old tensor engine Future objects
66
+ # hopefully we do not need done(), add_callback because
67
+ # they are harder to implement right.
68
+ def result(self, timeout: Optional[float] = None) -> R:
69
+ return self.get(timeout)
70
+
71
+ def exception(self, timeout: Optional[float] = None):
72
+ try:
73
+ self.get(timeout)
74
+ return None
75
+ except Exception as e:
76
+ return e
@@ -29,6 +29,7 @@ from monarch.actor_mesh import (
29
29
  MonarchContext,
30
30
  )
31
31
  from monarch.debugger import init_debugging
32
+ from monarch.future import ActorFuture
32
33
 
33
34
  from monarch.mesh_controller import spawn_tensor_engine
34
35
 
@@ -672,3 +673,100 @@ async def test_async_concurrency():
672
673
  # actually concurrently processing messages.
673
674
  await am.no_more.call()
674
675
  await fut
676
+
677
+
678
+ async def awaitit(f):
679
+ return await f
680
+
681
+
682
+ def test_actor_future():
683
+ v = 0
684
+
685
+ async def incr():
686
+ nonlocal v
687
+ v += 1
688
+ return v
689
+
690
+ # can use async implementation from sync
691
+ # if no non-blocking is provided
692
+ f = ActorFuture(incr)
693
+ assert f.get() == 1
694
+ assert v == 1
695
+ assert f.get() == 1
696
+ assert asyncio.run(awaitit(f)) == 1
697
+
698
+ f = ActorFuture(incr)
699
+ assert asyncio.run(awaitit(f)) == 2
700
+ assert f.get() == 2
701
+
702
+ def incr2():
703
+ nonlocal v
704
+ v += 2
705
+ return v
706
+
707
+ # Use non-blocking optimization if provided
708
+ f = ActorFuture(incr, incr2)
709
+ assert f.get() == 4
710
+ assert asyncio.run(awaitit(f)) == 4
711
+
712
+ async def nope():
713
+ nonlocal v
714
+ v += 1
715
+ raise ValueError("nope")
716
+
717
+ f = ActorFuture(nope)
718
+
719
+ with pytest.raises(ValueError):
720
+ f.get()
721
+
722
+ assert v == 5
723
+
724
+ with pytest.raises(ValueError):
725
+ f.get()
726
+
727
+ assert v == 5
728
+
729
+ with pytest.raises(ValueError):
730
+ asyncio.run(awaitit(f))
731
+
732
+ assert v == 5
733
+
734
+ def nope():
735
+ nonlocal v
736
+ v += 1
737
+ raise ValueError("nope")
738
+
739
+ f = ActorFuture(incr, nope)
740
+
741
+ with pytest.raises(ValueError):
742
+ f.get()
743
+
744
+ assert v == 6
745
+
746
+ with pytest.raises(ValueError):
747
+ f.result()
748
+
749
+ assert f.exception() is not None
750
+
751
+ assert v == 6
752
+
753
+ with pytest.raises(ValueError):
754
+ asyncio.run(awaitit(f))
755
+
756
+ assert v == 6
757
+
758
+ async def seven():
759
+ return 7
760
+
761
+ f = ActorFuture(seven)
762
+
763
+ assert 7 == f.get(timeout=0.001)
764
+
765
+ async def neverfinish():
766
+ f = asyncio.Future()
767
+ await f
768
+
769
+ f = ActorFuture(neverfinish)
770
+
771
+ with pytest.raises(asyncio.exceptions.TimeoutError):
772
+ f.get(timeout=0.1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: torchmonarch-nightly
3
- Version: 2025.6.19
3
+ Version: 2025.6.20
4
4
  Summary: Monarch: Single controller library
5
5
  Author: Meta
6
6
  Author-email: oncall+monarch@xmail.facebook.com
@@ -1,5 +1,5 @@
1
1
  monarch/__init__.py,sha256=iUvWHc0-7Q2tovRoRxOIiA3TsefMXCbWl-jEfQ2djew,6897
2
- monarch/_rust_bindings.so,sha256=EUkkinIuX45ihfDu4ot656fOd0CxaepnmaZdUv0cOMY,41044112
2
+ monarch/_rust_bindings.so,sha256=sDdg6RjptgNPmFnFiDAgv36k_Or_Kz47aYaZ2M5EAao,41088032
3
3
  monarch/_testing.py,sha256=jOIOG6jcZBzvEvG_DwSnwCkaMVXvSun6sJAG6nXemww,7859
4
4
  monarch/actor_mesh.py,sha256=m6QapbZHqYujXya28jW1II2wkBUV_nKGvxmWPSW9lsQ,24327
5
5
  monarch/allocator.py,sha256=UEaVLntH4xQ8Lr84TbgcXusvuK8FhSMJmav-omztUbw,4473
@@ -7,7 +7,7 @@ monarch/bootstrap_main.py,sha256=RCUQhJk07yMFiKp6HzQuqZFUpkgsT9kVEyimiwjn6_E,182
7
7
  monarch/cached_remote_function.py,sha256=kYdB6r4OHx_T_uX4q3tCNcp1t2DJwF8tPTIahUiT2pU,8785
8
8
  monarch/debugger.py,sha256=AdlvOG3X-9Pw9c1DLQYEy4vjEfh0ZtwtsNJEFLFzN8o,13312
9
9
  monarch/fetch.py,sha256=61jxo7sx4QNUTkc0_rF5NaJROen4tKbAaiIjrXWLOvg,1705
10
- monarch/future.py,sha256=lcdFEe7m1shYPPuvZ1RkS6JUIChEKGBWe3v7x_nu4Hg,731
10
+ monarch/future.py,sha256=g1VYJl8ReBBS6VbikwWilnFqEr5qJDiSKid92AnWFV4,2058
11
11
  monarch/gradient_generator.py,sha256=Rl3dmXGceTdCc1mYBg2JciR88ywGPnW7TVkL86KwqEA,6366
12
12
  monarch/memory.py,sha256=ol86dBhFAJqg78iF25-BuK0wuwj1onR8FIioZ_B0gjw,1377
13
13
  monarch/mesh_controller.py,sha256=am1QP7dvn0OH1z9ADSKm41APs1HY_dHcBAhOVP-QDmE,10427
@@ -46,7 +46,7 @@ monarch/common/_device_utils.py,sha256=gBpl23wMjppVAEzzj8U9HyX-B7Bs2_3ftiMAkzUS4
46
46
  monarch/common/_tensor_to_table.py,sha256=yRjCNwvtl188Z1Dwkx3ZU-Bh2mwYnQ0Lnue2RAztwvc,5753
47
47
  monarch/common/base_tensor.py,sha256=ujRzR6lWaeCdPv2JX0vCR-VsCWn-3SHaJIkZH1Sw9FQ,1159
48
48
  monarch/common/borrows.py,sha256=7KR62xoUat1T6FyADsdHsxVAVIJDvfJWUnPO-xx277U,5307
49
- monarch/common/client.py,sha256=axo37s_z17nYQGOZG5fi_0zUEJ_8qw7INjs-Kw2vaVo,24937
49
+ monarch/common/client.py,sha256=abYQqi-yFzG0ERvh3gMC5UgiWSezmM20kbxzalKpnf4,25806
50
50
  monarch/common/constants.py,sha256=ohvsVYMpfeWopv3KXDAeHWDFLukwc-OY37VRxpKNBE8,300
51
51
  monarch/common/context_manager.py,sha256=GOeyaFbyCqvQmkJ0oI7q6IxRd8_0mVyYKZRccI8iaug,1067
52
52
  monarch/common/controller_api.py,sha256=djGkK5aSd-V6pBkr3uBCXbfJv3OKf2o2VbBXJgFF2WI,3202
@@ -65,7 +65,7 @@ monarch/common/pipe.py,sha256=9pTf8--3yOv4HpnJEhgcmc_JM6Az4uL1y72TSQA55dw,5013
65
65
  monarch/common/process_group.py,sha256=FbJ_AJRZYFkvQ68L2naRq64J_aNuAKe5kO0MWdn_x74,1662
66
66
  monarch/common/recording.py,sha256=hoI9VY_FyW_xVx-jmfsKydqX5vW2GulwcDWsBdUVOm8,4637
67
67
  monarch/common/reference.py,sha256=O26lkzEeVwj0S1xEy-OLqdHVnACmmlbQCUmXRrW4n1Q,938
68
- monarch/common/remote.py,sha256=qZWXkShX20l07TseQSpVECh2yXZaVKYUvQXkeEM-zvY,9220
68
+ monarch/common/remote.py,sha256=vklFYJvuaPpS8kAyFmRz-T-brfHvcZ1lPTC_-7DIwqM,8908
69
69
  monarch/common/selection.py,sha256=lpWFbZs3ArYy29e-53eoAVAjQFksf1RvZz9NvM0CUW4,308
70
70
  monarch/common/shape.py,sha256=B-7DI768ZhT8ECUNCJcI7DfCB7iDFGFH0r-HmXaAfcM,8296
71
71
  monarch/common/stream.py,sha256=_ejoxafHtdD10lLzznRCXKwrkZ_ZH9k_VTgiA5yfBrI,3583
@@ -145,7 +145,7 @@ tests/test_future.py,sha256=cXzaNi2YDwVyjR541ScXmgktX1YFsKzbl8wep0DMVbk,3032
145
145
  tests/test_grad_generator.py,sha256=p4Pm4kMEeGldt2jUVAkGKCB0mLccKI28pltH6OTGbQA,3412
146
146
  tests/test_mock_cuda.py,sha256=5hisElxeLJ5MHw3KM9gwxBiXiMaG-Rm382u3AsQcDOI,3068
147
147
  tests/test_pdb_actor.py,sha256=5KJhuhcZDPWMdjC6eAtDdwnz1W7jNFXvIrMSFaCWaPw,3858
148
- tests/test_python_actors.py,sha256=3ru2JsPQmaO7ppVX3-ls7JcvIeOgEmWWUsYKZCuBXPg,19256
148
+ tests/test_python_actors.py,sha256=du0AiGiKtVHOLkDUKu6gV75eYf_NoHDKV6utKzrplz4,21010
149
149
  tests/test_remote_functions.py,sha256=5nxYB8dfA9NT9f9Od9O3htgQtPbiRNiXZ1Kgtn75sOQ,50056
150
150
  tests/test_rust_backend.py,sha256=94S3R995ZkyIhEiBsM5flcjf5X7bscEAHBtInbTRFe8,7776
151
151
  tests/test_signal_safe_block_on.py,sha256=bmal0XgzJowZXJV6T1Blow5a-vZluYWusCThLMGxyTE,3336
@@ -155,9 +155,9 @@ tests/simulator/test_profiling.py,sha256=TGYCfzTLdkpIwnOuO6KApprmrgPIRQe60KRX3wk
155
155
  tests/simulator/test_simulator.py,sha256=LO8lA0ssY-OGEBL5ipEu74f97Y765TEwfUOv-DtIptM,14568
156
156
  tests/simulator/test_task.py,sha256=ipqBDuDAysuo1xOB9S5psaFvwe6VATD43IovCTSs0t4,2327
157
157
  tests/simulator/test_worker.py,sha256=QrWWIJ3HDgDLkBPRc2mwYPlOQoXQcj1qRfc0WUfKkFY,3507
158
- torchmonarch_nightly-2025.6.19.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
159
- torchmonarch_nightly-2025.6.19.dist-info/METADATA,sha256=2XYBEhTb9iSTFKhAGmq2Bg_AXwjQvcPj6CQmG4bBiLE,2772
160
- torchmonarch_nightly-2025.6.19.dist-info/WHEEL,sha256=_wZSFk0d90K9wOBp8Q-UGxshyiJ987JoPiyUBNC6VLk,104
161
- torchmonarch_nightly-2025.6.19.dist-info/entry_points.txt,sha256=sqfQ16oZqjEvttUI-uj9BBXIIE6jt05bYFSmy-2hyXI,106
162
- torchmonarch_nightly-2025.6.19.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
163
- torchmonarch_nightly-2025.6.19.dist-info/RECORD,,
158
+ torchmonarch_nightly-2025.6.20.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
159
+ torchmonarch_nightly-2025.6.20.dist-info/METADATA,sha256=QKiDH01IYFpa492TDs5WzWeDRbjMKmpSAc3V9NpQ5YM,2772
160
+ torchmonarch_nightly-2025.6.20.dist-info/WHEEL,sha256=_wZSFk0d90K9wOBp8Q-UGxshyiJ987JoPiyUBNC6VLk,104
161
+ torchmonarch_nightly-2025.6.20.dist-info/entry_points.txt,sha256=sqfQ16oZqjEvttUI-uj9BBXIIE6jt05bYFSmy-2hyXI,106
162
+ torchmonarch_nightly-2025.6.20.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
163
+ torchmonarch_nightly-2025.6.20.dist-info/RECORD,,