torchmonarch-nightly 2025.6.18__cp310-cp310-manylinux2014_x86_64.whl → 2025.6.20__cp310-cp310-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
monarch/_rust_bindings.so CHANGED
Binary file
monarch/actor_mesh.py CHANGED
@@ -48,8 +48,9 @@ from monarch._rust_bindings.monarch_hyperactor.actor_mesh import PythonActorMesh
48
48
  from monarch._rust_bindings.monarch_hyperactor.mailbox import (
49
49
  Mailbox,
50
50
  OncePortReceiver,
51
- PortId,
51
+ OncePortRef,
52
52
  PortReceiver as HyPortReceiver,
53
+ PortRef,
53
54
  )
54
55
  from monarch._rust_bindings.monarch_hyperactor.proc import ActorId
55
56
  from monarch._rust_bindings.monarch_hyperactor.shape import Point as HyPoint, Shape
@@ -227,6 +228,8 @@ class Endpoint(Generic[P, R]):
227
228
 
228
229
  Load balanced RPC-style entrypoint for request/response messaging.
229
230
  """
231
+ p: Port[R]
232
+ r: PortReceiver[R]
230
233
  p, r = port(self, once=True)
231
234
  # pyre-ignore
232
235
  send(self, args, kwargs, port=p, selection="choose")
@@ -365,7 +368,7 @@ def send(
365
368
  message = PythonMessage(
366
369
  endpoint._name,
367
370
  _pickle((args, kwargs)),
368
- None if port is None else port._port,
371
+ None if port is None else port._port_ref,
369
372
  None,
370
373
  )
371
374
  endpoint._actor_mesh.cast(message, selection)
@@ -389,14 +392,16 @@ def endpoint(
389
392
 
390
393
 
391
394
  class Port(Generic[R]):
392
- def __init__(self, port: PortId, mailbox: Mailbox, rank: Optional[int]) -> None:
393
- self._port = port
395
+ def __init__(
396
+ self, port_ref: PortRef | OncePortRef, mailbox: Mailbox, rank: Optional[int]
397
+ ) -> None:
398
+ self._port_ref = port_ref
394
399
  self._mailbox = mailbox
395
400
  self._rank = rank
396
401
 
397
402
  def send(self, method: str, obj: R) -> None:
398
- self._mailbox.post(
399
- self._port,
403
+ self._port_ref.send(
404
+ self._mailbox,
400
405
  PythonMessage(method, _pickle(obj), None, self._rank),
401
406
  )
402
407
 
@@ -410,8 +415,8 @@ def port(
410
415
  handle, receiver = (
411
416
  endpoint._mailbox.open_once_port() if once else endpoint._mailbox.open_port()
412
417
  )
413
- port_id: PortId = handle.bind()
414
- return Port(port_id, endpoint._mailbox, rank=None), PortReceiver(
418
+ port_ref: PortRef | OncePortRef = handle.bind()
419
+ return Port(port_ref, endpoint._mailbox, rank=None), PortReceiver(
415
420
  endpoint._mailbox, receiver
416
421
  )
417
422
 
monarch/allocator.py CHANGED
@@ -74,7 +74,7 @@ class RemoteAllocInitializer(abc.ABC):
74
74
  """
75
75
 
76
76
  @abc.abstractmethod
77
- async def initialize_alloc(self) -> list[str]:
77
+ async def initialize_alloc(self, match_labels: dict[str, str]) -> list[str]:
78
78
  """
79
79
  Return the addresses of the servers that should be used to allocate processes
80
80
  for the proc mesh. The addresses should be running hyperactor's RemoteProcessAllocator.
@@ -88,6 +88,10 @@ class RemoteAllocInitializer(abc.ABC):
88
88
  in the future this method can be called multiple times and should return the current set of
89
89
  addresses that are eligible to handle allocation requests.
90
90
 
91
+ Arguments:
92
+ - `match_labels`: The match labels specified in `AllocSpec.AllocConstraints`. Initializer implementations
93
+ can read specific labels for matching a set of hosts that will service `allocate()` requests.
94
+
91
95
  """
92
96
  ...
93
97
 
@@ -102,7 +106,8 @@ class StaticRemoteAllocInitializer(RemoteAllocInitializer):
102
106
  super().__init__()
103
107
  self.addrs: list[str] = list(addrs)
104
108
 
105
- async def initialize_alloc(self) -> list[str]:
109
+ async def initialize_alloc(self, match_labels: dict[str, str]) -> list[str]:
110
+ _ = match_labels # Suppress unused variable warning
106
111
  return list(self.addrs)
107
112
 
108
113
 
monarch/common/client.py CHANGED
@@ -41,6 +41,8 @@ from monarch.common import messages
41
41
  from monarch.common.borrows import Borrow, StorageAliases
42
42
  from monarch.common.controller_api import LogMessage, MessageResult, TController
43
43
  from monarch.common.device_mesh import DeviceMesh
44
+
45
+ from monarch.common.future import Future
44
46
  from monarch.common.invocation import DeviceException, RemoteException, Seq
45
47
  from monarch.common.recording import flatten_messages, Recording
46
48
 
@@ -52,9 +54,6 @@ from monarch.common.tree import tree_map
52
54
 
53
55
  from . import _coalescing
54
56
 
55
- if TYPE_CHECKING:
56
- from monarch.common.future import Future
57
-
58
57
 
59
58
  logger = logging.getLogger(__name__)
60
59
 
@@ -447,6 +446,39 @@ class Client:
447
446
  def mesh_state(self) -> WorldState:
448
447
  return self.inner.worker_world_state()
449
448
 
449
+ def fetch(
450
+ self,
451
+ mesh: "DeviceMesh",
452
+ stream: "StreamRef",
453
+ shard,
454
+ preprocess_message,
455
+ args,
456
+ kwargs,
457
+ defs: Tuple["Tensor", ...],
458
+ uses: Tuple["Tensor", ...],
459
+ ) -> "Future":
460
+ fut = Future(self)
461
+ ident = self.new_node(defs, uses, fut)
462
+ process = mesh._process(shard)
463
+ self.send(
464
+ process,
465
+ messages.SendValue(
466
+ ident,
467
+ None,
468
+ defs,
469
+ preprocess_message,
470
+ args,
471
+ kwargs,
472
+ stream,
473
+ ),
474
+ )
475
+ # we have to ask for status updates
476
+ # from workers to be sure they have finished
477
+ # enough work to count this future as finished,
478
+ # and all potential errors have been reported
479
+ self._request_status()
480
+ return fut
481
+
450
482
 
451
483
  def tree_map_refs(first_ref: int, tree):
452
484
  def translate_id(ref: int) -> int:
@@ -25,7 +25,6 @@ from monarch._rust_bindings.monarch_extension import tensor_worker
25
25
  from monarch.common.function import ResolvableFromCloudpickle, ResolvableFunction
26
26
  from monarch.common.invocation import DeviceException, RemoteException
27
27
  from monarch.common.reference import Referenceable
28
- from monarch.common.stream import StreamRef
29
28
  from monarch.common.tree import flattener
30
29
  from pyre_extensions import none_throws
31
30
 
@@ -33,6 +32,8 @@ from .shape import NDSlice
33
32
  from .tensor_factory import TensorFactory
34
33
 
35
34
  if TYPE_CHECKING:
35
+ from monarch.common.stream import StreamRef
36
+
36
37
  from .device_mesh import DeviceMesh, RemoteProcessGroup
37
38
  from .pipe import Pipe
38
39
  from .recording import Recording
@@ -98,7 +99,7 @@ class CreateDeviceMesh(NamedTuple):
98
99
 
99
100
 
100
101
  class CreateStream(NamedTuple):
101
- result: StreamRef
102
+ result: "StreamRef"
102
103
  default: bool
103
104
 
104
105
  def to_rust_message(self) -> tensor_worker.WorkerMessage:
@@ -132,7 +133,7 @@ class CallFunction(NamedTuple):
132
133
  function: ResolvableFunction
133
134
  args: Tuple[object, ...]
134
135
  kwargs: Dict[str, object]
135
- stream: StreamRef
136
+ stream: "StreamRef"
136
137
  device_mesh: DeviceMesh
137
138
  remote_process_groups: List[RemoteProcessGroup]
138
139
 
@@ -199,7 +200,7 @@ class RecordingFormal(NamedTuple):
199
200
  class RecordingResult(NamedTuple):
200
201
  input: Tensor | tensor_worker.Ref
201
202
  output_index: int
202
- stream: StreamRef
203
+ stream: "StreamRef"
203
204
 
204
205
  def to_rust_message(self) -> tensor_worker.WorkerMessage:
205
206
  return tensor_worker.RecordingResult(
monarch/common/remote.py CHANGED
@@ -21,6 +21,7 @@ from typing import (
21
21
  overload,
22
22
  Protocol,
23
23
  Tuple,
24
+ TYPE_CHECKING,
24
25
  TypeVar,
25
26
  )
26
27
 
@@ -30,6 +31,9 @@ import torch
30
31
 
31
32
  from monarch.common import _coalescing, device_mesh, messages, stream
32
33
 
34
+ if TYPE_CHECKING:
35
+ from monarch.common.client import Client
36
+
33
37
  from monarch.common.device_mesh import RemoteProcessGroup
34
38
  from monarch.common.fake import fake_call
35
39
 
@@ -173,30 +177,19 @@ def _call_on_shard_and_fetch(
173
177
  propagator, rfunction, args, kwargs, ambient_mesh, stream._active
174
178
  )
175
179
 
176
- client = mesh.client
180
+ client: "Client" = mesh.client
177
181
  if _coalescing.is_active(client):
178
182
  raise NotImplementedError("NYI: fetching results during a coalescing block")
179
- fut = Future(client)
180
- ident = client.new_node(mutates, dtensors, fut)
181
- process = mesh._process(shard)
182
- client.send(
183
- process,
184
- messages.SendValue(
185
- ident,
186
- None,
187
- mutates,
188
- preprocess_message,
189
- args,
190
- kwargs,
191
- stream._active._to_ref(client),
192
- ),
183
+ return client.fetch(
184
+ mesh,
185
+ stream._active._to_ref(client),
186
+ shard,
187
+ preprocess_message,
188
+ args,
189
+ kwargs,
190
+ mutates,
191
+ dtensors,
193
192
  )
194
- # we have to ask for status updates
195
- # from workers to be sure they have finished
196
- # enough work to count this future as finished,
197
- # and all potential errors have been reported
198
- client._request_status()
199
- return fut
200
193
 
201
194
 
202
195
  @remote
monarch/future.py CHANGED
@@ -5,21 +5,72 @@
5
5
  # LICENSE file in the root directory of this source tree.
6
6
 
7
7
  import asyncio
8
- from typing import Generator, Generic, TypeVar
8
+ from functools import partial
9
+ from typing import Generator, Generic, Optional, TypeVar
9
10
 
10
11
  R = TypeVar("R")
11
12
 
12
13
 
14
+ def _incomplete(impl, self):
15
+ try:
16
+ return self._set_result(impl())
17
+ except Exception as e:
18
+ self._set_exception(e)
19
+ raise
20
+
21
+
22
+ async def _aincomplete(impl, self):
23
+ try:
24
+ return self._set_result(await impl())
25
+ except Exception as e:
26
+ self._set_exception(e)
27
+ raise
28
+
29
+
13
30
  # TODO: consolidate with monarch.common.future
14
31
  class ActorFuture(Generic[R]):
15
32
  def __init__(self, impl, blocking_impl=None):
16
- self._impl = impl
17
- self._blocking_impl = blocking_impl
33
+ if blocking_impl is None:
34
+ blocking_impl = partial(asyncio.run, impl())
35
+ self._get = partial(_incomplete, blocking_impl)
36
+ self._aget = partial(_aincomplete, impl)
18
37
 
19
- def get(self) -> R:
20
- if self._blocking_impl is not None:
21
- return self._blocking_impl()
22
- return asyncio.run(self._impl())
38
+ def get(self, timeout: Optional[float] = None) -> R:
39
+ if timeout is not None:
40
+ return asyncio.run(asyncio.wait_for(self._aget(self), timeout))
41
+ return self._get(self)
23
42
 
24
43
  def __await__(self) -> Generator[R, None, R]:
25
- return self._impl().__await__()
44
+ return self._aget(self).__await__()
45
+
46
+ def _set_result(self, result):
47
+ def f(self):
48
+ return result
49
+
50
+ async def af(self):
51
+ return result
52
+
53
+ self._get, self._aget = f, af
54
+ return result
55
+
56
+ def _set_exception(self, e):
57
+ def f(self):
58
+ raise e
59
+
60
+ async def af(self):
61
+ raise e
62
+
63
+ self._get, self._aget = f, af
64
+
65
+ # compatibility with old tensor engine Future objects
66
+ # hopefully we do not need done(), add_callback because
67
+ # they are harder to implement right.
68
+ def result(self, timeout: Optional[float] = None) -> R:
69
+ return self.get(timeout)
70
+
71
+ def exception(self, timeout: Optional[float] = None):
72
+ try:
73
+ self.get(timeout)
74
+ return None
75
+ except Exception as e:
76
+ return e
Binary file
monarch/tools/commands.py CHANGED
@@ -9,7 +9,10 @@
9
9
  import argparse
10
10
  import functools
11
11
  import inspect
12
+ import logging
12
13
  import os
14
+ import time
15
+ from datetime import timedelta
13
16
  from typing import Any, Callable, Mapping, Optional, Union
14
17
 
15
18
  from monarch.tools.config import ( # @manual=//monarch/python/monarch/tools/config/meta:defaults
@@ -18,12 +21,13 @@ from monarch.tools.config import ( # @manual=//monarch/python/monarch/tools/con
18
21
  )
19
22
 
20
23
  from monarch.tools.mesh_spec import mesh_spec_from_metadata, ServerSpec
21
-
22
24
  from torchx.runner import Runner
23
- from torchx.specs import AppDef, AppDryRunInfo, CfgVal
25
+ from torchx.specs import AppDef, AppDryRunInfo, AppState, CfgVal
24
26
  from torchx.specs.builders import parse_args
25
27
  from torchx.util.types import decode, decode_optional
26
28
 
29
+ logger: logging.Logger = logging.getLogger(__name__)
30
+
27
31
 
28
32
  def torchx_runner() -> Runner:
29
33
  # namespace is currently unused so make it empty str
@@ -165,15 +169,73 @@ def info(server_handle: str) -> Optional[ServerSpec]:
165
169
  if appdef is None:
166
170
  return None
167
171
 
172
+ # host status grouped by mesh (role) names
173
+ replica_status = {r.role: r.replicas for r in status.roles}
174
+
168
175
  mesh_specs = []
169
176
  for role in appdef.roles:
170
177
  spec = mesh_spec_from_metadata(appdef, role.name)
171
178
  assert spec is not None, "cannot be 'None' since we iterate over appdef's roles"
179
+
180
+ # null-guard since some schedulers do not fill replica_status
181
+ if host_status := replica_status.get(role.name):
182
+ spec.hostnames = [h.hostname for h in host_status]
183
+
172
184
  mesh_specs.append(spec)
173
185
 
174
186
  return ServerSpec(name=appdef.name, state=status.state, meshes=mesh_specs)
175
187
 
176
188
 
189
+ _5_SECONDS = timedelta(seconds=5)
190
+
191
+
192
+ async def server_ready(
193
+ server_handle: str, check_interval: timedelta = _5_SECONDS
194
+ ) -> Optional[ServerSpec]:
195
+ """Waits until the server's job is in RUNNING state to returns the server spec.
196
+ Returns `None` if the server does not exist.
197
+
198
+ NOTE: Certain fields such as `hostnames` is only filled (and valid) when the server is RUNNING.
199
+
200
+ Usage:
201
+
202
+ .. code-block:: python
203
+
204
+ server_info = await server_ready("slurm:///123")
205
+ if not server_info:
206
+ print(f"Job does not exist")
207
+ else:
208
+ if server_info.is_running:
209
+ for mesh in server_info.meshes:
210
+ connect_to(mesh.hostnames)
211
+ else:
212
+ print(f"Job in {server_info.state} state. Hostnames are not available")
213
+
214
+ """
215
+
216
+ while True:
217
+ server_spec = info(server_handle)
218
+
219
+ if not server_spec: # server not found
220
+ return None
221
+
222
+ if server_spec.state <= AppState.PENDING: # UNSUBMITTED or SUBMITTED or PENDING
223
+ # NOTE: TorchX currently does not have async APIs so need to loop-on-interval
224
+ # TODO maybe inverse exponential backoff instead of constant interval?
225
+ check_interval_seconds = check_interval.total_seconds()
226
+ logger.info(
227
+ "waiting for %s to be %s (current: %s), will check again in %g seconds...",
228
+ server_handle,
229
+ AppState.RUNNING,
230
+ server_spec.state,
231
+ check_interval_seconds,
232
+ )
233
+ time.sleep(check_interval_seconds)
234
+ continue
235
+ else:
236
+ return server_spec
237
+
238
+
177
239
  def kill(server_handle: str) -> None:
178
240
  with torchx_runner() as runner:
179
241
  runner.cancel(server_handle)
@@ -6,7 +6,7 @@
6
6
 
7
7
  # pyre-strict
8
8
  import string
9
- from dataclasses import dataclass
9
+ from dataclasses import dataclass, field
10
10
  from typing import Any, Optional
11
11
 
12
12
  from torchx import specs
@@ -29,6 +29,7 @@ class MeshSpec:
29
29
  host_type: str
30
30
  gpus: int
31
31
  port: int = DEFAULT_REMOTE_ALLOCATOR_PORT
32
+ hostnames: list[str] = field(default_factory=list)
32
33
 
33
34
 
34
35
  def _tag(mesh_name: str, tag_template: str) -> str:
@@ -84,6 +85,10 @@ class ServerSpec:
84
85
  state: specs.AppState
85
86
  meshes: list[MeshSpec]
86
87
 
88
+ @property
89
+ def is_running(self) -> bool:
90
+ return self.state == specs.AppState.RUNNING
91
+
87
92
  def get_mesh_spec(self, mesh_name: str) -> MeshSpec:
88
93
  for mesh_spec in self.meshes:
89
94
  if mesh_spec.name == mesh_name:
@@ -115,6 +120,7 @@ class ServerSpec:
115
120
  "host_type": mesh.host_type,
116
121
  "hosts": mesh.num_hosts,
117
122
  "gpus": mesh.gpus,
123
+ "hostnames": mesh.hostnames,
118
124
  }
119
125
  for mesh in self.meshes
120
126
  },
tests/test_allocator.py CHANGED
@@ -116,8 +116,8 @@ class TestRemoteAllocator(unittest.IsolatedAsyncioTestCase):
116
116
  used to test that the state of the initializer is preserved across calls to allocate()
117
117
  """
118
118
 
119
- async def initialize_alloc(self) -> list[str]:
120
- alloc = await super().initialize_alloc()
119
+ async def initialize_alloc(self, match_labels: dict[str, str]) -> list[str]:
120
+ alloc = await super().initialize_alloc(match_labels)
121
121
  self.addrs.pop(-1)
122
122
  return alloc
123
123
 
@@ -142,7 +142,8 @@ class TestRemoteAllocator(unittest.IsolatedAsyncioTestCase):
142
142
  class EmptyAllocInitializer(StaticRemoteAllocInitializer):
143
143
  """test initializer that returns an empty list of addresses"""
144
144
 
145
- async def initialize_alloc(self) -> list[str]:
145
+ async def initialize_alloc(self, match_labels: dict[str, str]) -> list[str]:
146
+ _ = match_labels # Suppress unused variable warning
146
147
  return []
147
148
 
148
149
  empty_initializer = EmptyAllocInitializer()
@@ -9,6 +9,7 @@ import operator
9
9
  import os
10
10
  import re
11
11
  import threading
12
+ import time
12
13
  from types import ModuleType
13
14
  from unittest.mock import AsyncMock, patch
14
15
 
@@ -28,6 +29,7 @@ from monarch.actor_mesh import (
28
29
  MonarchContext,
29
30
  )
30
31
  from monarch.debugger import init_debugging
32
+ from monarch.future import ActorFuture
31
33
 
32
34
  from monarch.mesh_controller import spawn_tensor_engine
33
35
 
@@ -391,6 +393,16 @@ def test_rust_binding_modules_correct() -> None:
391
393
  check(bindings, "monarch._rust_bindings")
392
394
 
393
395
 
396
+ def test_proc_mesh_liveness() -> None:
397
+ mesh = proc_mesh(gpus=2).get()
398
+ counter = mesh.spawn("counter", Counter, 1).get()
399
+ del mesh
400
+ # Give some time for the mesh to have been shut down.
401
+ # (It only would if there were a bug.)
402
+ time.sleep(0.5)
403
+ counter.value.call().get()
404
+
405
+
394
406
  two_gpu = pytest.mark.skipif(
395
407
  torch.cuda.device_count() < 2,
396
408
  reason="Not enough GPUs, this test requires at least 2 GPUs",
@@ -661,3 +673,100 @@ async def test_async_concurrency():
661
673
  # actually concurrently processing messages.
662
674
  await am.no_more.call()
663
675
  await fut
676
+
677
+
678
+ async def awaitit(f):
679
+ return await f
680
+
681
+
682
+ def test_actor_future():
683
+ v = 0
684
+
685
+ async def incr():
686
+ nonlocal v
687
+ v += 1
688
+ return v
689
+
690
+ # can use async implementation from sync
691
+ # if no non-blocking is provided
692
+ f = ActorFuture(incr)
693
+ assert f.get() == 1
694
+ assert v == 1
695
+ assert f.get() == 1
696
+ assert asyncio.run(awaitit(f)) == 1
697
+
698
+ f = ActorFuture(incr)
699
+ assert asyncio.run(awaitit(f)) == 2
700
+ assert f.get() == 2
701
+
702
+ def incr2():
703
+ nonlocal v
704
+ v += 2
705
+ return v
706
+
707
+ # Use non-blocking optimization if provided
708
+ f = ActorFuture(incr, incr2)
709
+ assert f.get() == 4
710
+ assert asyncio.run(awaitit(f)) == 4
711
+
712
+ async def nope():
713
+ nonlocal v
714
+ v += 1
715
+ raise ValueError("nope")
716
+
717
+ f = ActorFuture(nope)
718
+
719
+ with pytest.raises(ValueError):
720
+ f.get()
721
+
722
+ assert v == 5
723
+
724
+ with pytest.raises(ValueError):
725
+ f.get()
726
+
727
+ assert v == 5
728
+
729
+ with pytest.raises(ValueError):
730
+ asyncio.run(awaitit(f))
731
+
732
+ assert v == 5
733
+
734
+ def nope():
735
+ nonlocal v
736
+ v += 1
737
+ raise ValueError("nope")
738
+
739
+ f = ActorFuture(incr, nope)
740
+
741
+ with pytest.raises(ValueError):
742
+ f.get()
743
+
744
+ assert v == 6
745
+
746
+ with pytest.raises(ValueError):
747
+ f.result()
748
+
749
+ assert f.exception() is not None
750
+
751
+ assert v == 6
752
+
753
+ with pytest.raises(ValueError):
754
+ asyncio.run(awaitit(f))
755
+
756
+ assert v == 6
757
+
758
+ async def seven():
759
+ return 7
760
+
761
+ f = ActorFuture(seven)
762
+
763
+ assert 7 == f.get(timeout=0.001)
764
+
765
+ async def neverfinish():
766
+ f = asyncio.Future()
767
+ await f
768
+
769
+ f = ActorFuture(neverfinish)
770
+
771
+ with pytest.raises(asyncio.exceptions.TimeoutError):
772
+ f.get(timeout=0.1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: torchmonarch-nightly
3
- Version: 2025.6.18
3
+ Version: 2025.6.20
4
4
  Summary: Monarch: Single controller library
5
5
  Author: Meta
6
6
  Author-email: oncall+monarch@xmail.facebook.com
@@ -1,17 +1,17 @@
1
1
  monarch/__init__.py,sha256=iUvWHc0-7Q2tovRoRxOIiA3TsefMXCbWl-jEfQ2djew,6897
2
- monarch/_rust_bindings.so,sha256=RlkNuWQ74oxTOEfmaVFsgESTEdMP84vug1sRY4xya60,40803008
2
+ monarch/_rust_bindings.so,sha256=sDdg6RjptgNPmFnFiDAgv36k_Or_Kz47aYaZ2M5EAao,41088032
3
3
  monarch/_testing.py,sha256=jOIOG6jcZBzvEvG_DwSnwCkaMVXvSun6sJAG6nXemww,7859
4
- monarch/actor_mesh.py,sha256=8hjIy0TSby33xfVXp_xZnqlPkxy3l6IGqEyPOhVtjvU,24197
5
- monarch/allocator.py,sha256=ylvYTf31o-PT385cYJPhi17uNbC4yl_RAraqD0fVe4g,4112
4
+ monarch/actor_mesh.py,sha256=m6QapbZHqYujXya28jW1II2wkBUV_nKGvxmWPSW9lsQ,24327
5
+ monarch/allocator.py,sha256=UEaVLntH4xQ8Lr84TbgcXusvuK8FhSMJmav-omztUbw,4473
6
6
  monarch/bootstrap_main.py,sha256=RCUQhJk07yMFiKp6HzQuqZFUpkgsT9kVEyimiwjn6_E,1827
7
7
  monarch/cached_remote_function.py,sha256=kYdB6r4OHx_T_uX4q3tCNcp1t2DJwF8tPTIahUiT2pU,8785
8
8
  monarch/debugger.py,sha256=AdlvOG3X-9Pw9c1DLQYEy4vjEfh0ZtwtsNJEFLFzN8o,13312
9
9
  monarch/fetch.py,sha256=61jxo7sx4QNUTkc0_rF5NaJROen4tKbAaiIjrXWLOvg,1705
10
- monarch/future.py,sha256=lcdFEe7m1shYPPuvZ1RkS6JUIChEKGBWe3v7x_nu4Hg,731
10
+ monarch/future.py,sha256=g1VYJl8ReBBS6VbikwWilnFqEr5qJDiSKid92AnWFV4,2058
11
11
  monarch/gradient_generator.py,sha256=Rl3dmXGceTdCc1mYBg2JciR88ywGPnW7TVkL86KwqEA,6366
12
12
  monarch/memory.py,sha256=ol86dBhFAJqg78iF25-BuK0wuwj1onR8FIioZ_B0gjw,1377
13
13
  monarch/mesh_controller.py,sha256=am1QP7dvn0OH1z9ADSKm41APs1HY_dHcBAhOVP-QDmE,10427
14
- monarch/monarch_controller,sha256=HucZG4CSJhkVpbHElarAp2LUz1xW5bMNnAR3TNjWKks,20335344
14
+ monarch/monarch_controller,sha256=sWOUMClz3JPUjZbppDWgdrPOAjbydygdRPDZ1kaAVC4,20328464
15
15
  monarch/notebook.py,sha256=zu9MKDFKf1-rCM2TqFSRJjMBeiWuKcJSyUFLvoZRQzs,25949
16
16
  monarch/opaque_module.py,sha256=oajOu_WD1hD4hxE8HDdO-tvWY7KDHWd7VaAhJEa5L2I,10446
17
17
  monarch/opaque_object.py,sha256=IVpll4pyuKZMo_EnPh4s0qnx8RlAcJrJ1yoLX6E75wQ,2782
@@ -46,7 +46,7 @@ monarch/common/_device_utils.py,sha256=gBpl23wMjppVAEzzj8U9HyX-B7Bs2_3ftiMAkzUS4
46
46
  monarch/common/_tensor_to_table.py,sha256=yRjCNwvtl188Z1Dwkx3ZU-Bh2mwYnQ0Lnue2RAztwvc,5753
47
47
  monarch/common/base_tensor.py,sha256=ujRzR6lWaeCdPv2JX0vCR-VsCWn-3SHaJIkZH1Sw9FQ,1159
48
48
  monarch/common/borrows.py,sha256=7KR62xoUat1T6FyADsdHsxVAVIJDvfJWUnPO-xx277U,5307
49
- monarch/common/client.py,sha256=axo37s_z17nYQGOZG5fi_0zUEJ_8qw7INjs-Kw2vaVo,24937
49
+ monarch/common/client.py,sha256=abYQqi-yFzG0ERvh3gMC5UgiWSezmM20kbxzalKpnf4,25806
50
50
  monarch/common/constants.py,sha256=ohvsVYMpfeWopv3KXDAeHWDFLukwc-OY37VRxpKNBE8,300
51
51
  monarch/common/context_manager.py,sha256=GOeyaFbyCqvQmkJ0oI7q6IxRd8_0mVyYKZRccI8iaug,1067
52
52
  monarch/common/controller_api.py,sha256=djGkK5aSd-V6pBkr3uBCXbfJv3OKf2o2VbBXJgFF2WI,3202
@@ -57,7 +57,7 @@ monarch/common/function_caching.py,sha256=HVdbWtv6Eea7ENMWi8iv36w1G1TaVuUJhkUX_J
57
57
  monarch/common/future.py,sha256=D1UJ_8Rvb8-VG9vNE-z7xz2m2otMd2HgB0rnA02nlvA,4681
58
58
  monarch/common/invocation.py,sha256=L4mSmzqlHMxo1Tb71hBU_M8aBZCRCOcb6vvPhvvewec,4195
59
59
  monarch/common/mast.py,sha256=XTzYljGR0aZ7GjmNMPgU2HyuL4HWSAy4IwE3kEDqdOw,7735
60
- monarch/common/messages.py,sha256=El7BoGZ2jlP8HyyE-S8wkiG9W8Ciw3_5JERnNrgOYHU,18278
60
+ monarch/common/messages.py,sha256=OFMd_4yBoMIHjdXcKcJDG88iERfViLG3QxTqzwV4Gnw,18289
61
61
  monarch/common/mock_cuda.py,sha256=x6ho1Ton6BbKjBZ5ZxnFOUaQM032X70wnpoUNB7Ci2w,1039
62
62
  monarch/common/opaque_ref.py,sha256=tWNvOC6CsjNPKD1JDx-8PSaeXqZC3eermgBExUPKML4,2871
63
63
  monarch/common/pickle_flatten.py,sha256=2mc-dPiZy7kRqAstyfMLnPuoGJwsBftYYEHyF_HOZw4,1313
@@ -65,7 +65,7 @@ monarch/common/pipe.py,sha256=9pTf8--3yOv4HpnJEhgcmc_JM6Az4uL1y72TSQA55dw,5013
65
65
  monarch/common/process_group.py,sha256=FbJ_AJRZYFkvQ68L2naRq64J_aNuAKe5kO0MWdn_x74,1662
66
66
  monarch/common/recording.py,sha256=hoI9VY_FyW_xVx-jmfsKydqX5vW2GulwcDWsBdUVOm8,4637
67
67
  monarch/common/reference.py,sha256=O26lkzEeVwj0S1xEy-OLqdHVnACmmlbQCUmXRrW4n1Q,938
68
- monarch/common/remote.py,sha256=qZWXkShX20l07TseQSpVECh2yXZaVKYUvQXkeEM-zvY,9220
68
+ monarch/common/remote.py,sha256=vklFYJvuaPpS8kAyFmRz-T-brfHvcZ1lPTC_-7DIwqM,8908
69
69
  monarch/common/selection.py,sha256=lpWFbZs3ArYy29e-53eoAVAjQFksf1RvZz9NvM0CUW4,308
70
70
  monarch/common/shape.py,sha256=B-7DI768ZhT8ECUNCJcI7DfCB7iDFGFH0r-HmXaAfcM,8296
71
71
  monarch/common/stream.py,sha256=_ejoxafHtdD10lLzznRCXKwrkZ_ZH9k_VTgiA5yfBrI,3583
@@ -107,8 +107,8 @@ monarch/timer/execution_timer.py,sha256=1YsrLIZirdohKOeFAU2H4UcONhQXHuctJbYcoX8I
107
107
  monarch/timer/execution_timer_test.py,sha256=CSxTv44fFZQURJlCBmYvysQI1aS_zEGZs_uxl9SOHak,4486
108
108
  monarch/tools/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
109
109
  monarch/tools/cli.py,sha256=EIdarsfuFX0WqRCe29_5GNKWJBhxx0lABalw3zPSagw,4977
110
- monarch/tools/commands.py,sha256=BfmXndJmU_cZP4cMPlknkxGca1NjqYd8_ReDePWksXw,6908
111
- monarch/tools/mesh_spec.py,sha256=JLykhgy1dClXiNbH1Qsl2fX5MbqplQAhl8LGoragvbo,3702
110
+ monarch/tools/commands.py,sha256=OuFDVAcl5LvBdBZ-HyemErR0IiDtiMMNgmGPD4MWTHY,8996
111
+ monarch/tools/mesh_spec.py,sha256=3Qp7Lu3pAa9tfaG-METsCmj-QXECQ6OsrPWiLydWvKc,3914
112
112
  monarch/tools/network.py,sha256=bRj-jOs5qDqnM3BcE9MSXCLS01hiMN4YSWfKZ_d7bc4,2182
113
113
  monarch/tools/components/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
114
114
  monarch/tools/components/hyperactor.py,sha256=Ryi1X07VLcaQVlpc4af65JNBbZtOb9IAlKxSKMZ1AW4,2120
@@ -136,7 +136,7 @@ tests/error_test_binary.py,sha256=BRj13wAROsUWx4jcxc07HYN2n-xyBNhnnRAhjqah-A0,55
136
136
  tests/sleep_binary.py,sha256=XfLYaAfwm9xgzM-svs8fhAeFhwYIg6SyVEnx4e6wbUw,1009
137
137
  tests/test_actor_error.py,sha256=-0UJCEpyzsBh-RdbGhDiG1-sRtu7bJPQWmtjUD0ad48,8526
138
138
  tests/test_alloc.py,sha256=D6DdQbtOZEvvnnc7LV-WyWFMk0Xb77eblH6Oz90zJTA,745
139
- tests/test_allocator.py,sha256=P11sQ95ADjzC_-CfPs3CEP80nP8sn7wW8vVPsmpSVoM,8164
139
+ tests/test_allocator.py,sha256=jaYWPVEFdcK0XmmEA1Y9uwkeBjhxb2iI1GUL6IZKh4s,8305
140
140
  tests/test_coalescing.py,sha256=JZ4YgQNlWWs7N-Z8KCCXQPANcuyyXEKjeHIXYbPnQhk,15606
141
141
  tests/test_controller.py,sha256=Rp_kW20zYT8ocsK5LX0Ha3LB9azS2LSKpp8n_dBlzVU,31384
142
142
  tests/test_device_mesh.py,sha256=DrbezYOM0thfP9MgLXb5-F0VoLOmSz5GR0GwjR_3bE4,5290
@@ -145,7 +145,7 @@ tests/test_future.py,sha256=cXzaNi2YDwVyjR541ScXmgktX1YFsKzbl8wep0DMVbk,3032
145
145
  tests/test_grad_generator.py,sha256=p4Pm4kMEeGldt2jUVAkGKCB0mLccKI28pltH6OTGbQA,3412
146
146
  tests/test_mock_cuda.py,sha256=5hisElxeLJ5MHw3KM9gwxBiXiMaG-Rm382u3AsQcDOI,3068
147
147
  tests/test_pdb_actor.py,sha256=5KJhuhcZDPWMdjC6eAtDdwnz1W7jNFXvIrMSFaCWaPw,3858
148
- tests/test_python_actors.py,sha256=ls0x_ie4i9KLuouecfxG_fHHZSZc2g_mQSAPJg70pgw,18949
148
+ tests/test_python_actors.py,sha256=du0AiGiKtVHOLkDUKu6gV75eYf_NoHDKV6utKzrplz4,21010
149
149
  tests/test_remote_functions.py,sha256=5nxYB8dfA9NT9f9Od9O3htgQtPbiRNiXZ1Kgtn75sOQ,50056
150
150
  tests/test_rust_backend.py,sha256=94S3R995ZkyIhEiBsM5flcjf5X7bscEAHBtInbTRFe8,7776
151
151
  tests/test_signal_safe_block_on.py,sha256=bmal0XgzJowZXJV6T1Blow5a-vZluYWusCThLMGxyTE,3336
@@ -155,9 +155,9 @@ tests/simulator/test_profiling.py,sha256=TGYCfzTLdkpIwnOuO6KApprmrgPIRQe60KRX3wk
155
155
  tests/simulator/test_simulator.py,sha256=LO8lA0ssY-OGEBL5ipEu74f97Y765TEwfUOv-DtIptM,14568
156
156
  tests/simulator/test_task.py,sha256=ipqBDuDAysuo1xOB9S5psaFvwe6VATD43IovCTSs0t4,2327
157
157
  tests/simulator/test_worker.py,sha256=QrWWIJ3HDgDLkBPRc2mwYPlOQoXQcj1qRfc0WUfKkFY,3507
158
- torchmonarch_nightly-2025.6.18.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
159
- torchmonarch_nightly-2025.6.18.dist-info/METADATA,sha256=lPDac3GQrS5MmEp41wt6YCWHIluJzBgFfPY37x0cKJM,2772
160
- torchmonarch_nightly-2025.6.18.dist-info/WHEEL,sha256=_wZSFk0d90K9wOBp8Q-UGxshyiJ987JoPiyUBNC6VLk,104
161
- torchmonarch_nightly-2025.6.18.dist-info/entry_points.txt,sha256=sqfQ16oZqjEvttUI-uj9BBXIIE6jt05bYFSmy-2hyXI,106
162
- torchmonarch_nightly-2025.6.18.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
163
- torchmonarch_nightly-2025.6.18.dist-info/RECORD,,
158
+ torchmonarch_nightly-2025.6.20.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
159
+ torchmonarch_nightly-2025.6.20.dist-info/METADATA,sha256=QKiDH01IYFpa492TDs5WzWeDRbjMKmpSAc3V9NpQ5YM,2772
160
+ torchmonarch_nightly-2025.6.20.dist-info/WHEEL,sha256=_wZSFk0d90K9wOBp8Q-UGxshyiJ987JoPiyUBNC6VLk,104
161
+ torchmonarch_nightly-2025.6.20.dist-info/entry_points.txt,sha256=sqfQ16oZqjEvttUI-uj9BBXIIE6jt05bYFSmy-2hyXI,106
162
+ torchmonarch_nightly-2025.6.20.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
163
+ torchmonarch_nightly-2025.6.20.dist-info/RECORD,,