torchmonarch-nightly 2025.7.25__cp311-cp311-manylinux2014_x86_64.whl → 2025.7.26__cp311-cp311-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monarch/_rust_bindings.so +0 -0
- monarch/_src/actor/actor_mesh.py +39 -35
- monarch/_src/actor/endpoint.py +36 -3
- monarch/_src/actor/event_loop.py +1 -1
- monarch/_src/actor/proc_mesh.py +17 -9
- monarch/_src/actor/tensor_engine_shim.py +5 -2
- monarch/actor/__init__.py +2 -0
- monarch/common/messages.py +9 -0
- monarch/common/remote.py +2 -2
- monarch/gradient/_gradient_generator.so +0 -0
- monarch/mesh_controller.py +76 -14
- monarch/monarch_controller +0 -0
- monarch/tools/cli.py +2 -2
- monarch/tools/commands.py +49 -27
- monarch/tools/components/hyperactor.py +5 -3
- monarch/tools/config/__init__.py +18 -1
- monarch/tools/config/defaults.py +2 -2
- monarch/tools/mesh_spec.py +4 -1
- tests/test_allocator.py +11 -15
- tests/test_env_before_cuda.py +2 -3
- tests/test_tensor_engine.py +27 -1
- {torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/METADATA +34 -1
- {torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/RECORD +27 -27
- {torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/WHEEL +0 -0
- {torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/entry_points.txt +0 -0
- {torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/licenses/LICENSE +0 -0
- {torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/top_level.txt +0 -0
monarch/_rust_bindings.so
CHANGED
Binary file
|
monarch/_src/actor/actor_mesh.py
CHANGED
@@ -65,6 +65,7 @@ from monarch._src.actor.endpoint import (
|
|
65
65
|
Endpoint,
|
66
66
|
EndpointProperty,
|
67
67
|
Extent,
|
68
|
+
NotAnEndpoint,
|
68
69
|
Propagator,
|
69
70
|
Selection,
|
70
71
|
)
|
@@ -76,7 +77,7 @@ from monarch._src.actor.pickle import flatten, unflatten
|
|
76
77
|
from monarch._src.actor.shape import MeshTrait, NDSlice
|
77
78
|
from monarch._src.actor.sync_state import fake_sync_state
|
78
79
|
|
79
|
-
from monarch._src.actor.tensor_engine_shim import actor_send
|
80
|
+
from monarch._src.actor.tensor_engine_shim import actor_rref, actor_send
|
80
81
|
|
81
82
|
if TYPE_CHECKING:
|
82
83
|
from monarch._src.actor.proc_mesh import ProcMesh
|
@@ -313,8 +314,7 @@ class ActorEndpoint(Endpoint[P, R]):
|
|
313
314
|
"""
|
314
315
|
self._signature.bind(None, *args, **kwargs)
|
315
316
|
objects, bytes = flatten((args, kwargs), _is_ref_or_mailbox)
|
316
|
-
|
317
|
-
if not refs:
|
317
|
+
if all(not hasattr(obj, "__monarch_ref__") for obj in objects):
|
318
318
|
message = PythonMessage(
|
319
319
|
PythonMessageKind.CallMethod(
|
320
320
|
self._name, None if port is None else port._port_ref
|
@@ -323,7 +323,7 @@ class ActorEndpoint(Endpoint[P, R]):
|
|
323
323
|
)
|
324
324
|
self._actor_mesh.cast(message, selection)
|
325
325
|
else:
|
326
|
-
actor_send(self, bytes,
|
326
|
+
actor_send(self, bytes, objects, port, selection)
|
327
327
|
shape = self._actor_mesh._shape
|
328
328
|
return Extent(shape.labels, shape.ndslice.sizes)
|
329
329
|
|
@@ -335,6 +335,26 @@ class ActorEndpoint(Endpoint[P, R]):
|
|
335
335
|
), "unexpected receiver type"
|
336
336
|
return PortTuple(p, PortReceiver(self._mailbox, self._supervise(r._receiver)))
|
337
337
|
|
338
|
+
def _rref(self, args, kwargs):
|
339
|
+
self._signature.bind(None, *args, **kwargs)
|
340
|
+
refs, bytes = flatten((args, kwargs), _is_ref_or_mailbox)
|
341
|
+
|
342
|
+
return actor_rref(self, bytes, refs)
|
343
|
+
|
344
|
+
|
345
|
+
def as_endpoint(
|
346
|
+
not_an_endpoint: Callable[P, R], *, propagate: Propagator = None
|
347
|
+
) -> Endpoint[P, R]:
|
348
|
+
if not isinstance(not_an_endpoint, NotAnEndpoint):
|
349
|
+
raise ValueError("expected an method of a spawned actor")
|
350
|
+
return ActorEndpoint(
|
351
|
+
not_an_endpoint._ref._actor_mesh_ref,
|
352
|
+
not_an_endpoint._name,
|
353
|
+
getattr(not_an_endpoint._ref, not_an_endpoint._name),
|
354
|
+
not_an_endpoint._ref._mailbox,
|
355
|
+
propagate,
|
356
|
+
)
|
357
|
+
|
338
358
|
|
339
359
|
class Accumulator(Generic[P, R, A]):
|
340
360
|
def __init__(
|
@@ -625,18 +645,23 @@ class _Actor:
|
|
625
645
|
f" This is likely due to an earlier error: {self._saved_error}"
|
626
646
|
)
|
627
647
|
raise AssertionError(error_message)
|
628
|
-
the_method = getattr(self.instance, method)
|
648
|
+
the_method = getattr(self.instance, method)
|
649
|
+
if isinstance(the_method, EndpointProperty):
|
650
|
+
module = the_method._method.__module__
|
651
|
+
the_method = functools.partial(the_method._method, self.instance)
|
652
|
+
else:
|
653
|
+
module = the_method.__module__
|
629
654
|
|
630
655
|
if inspect.iscoroutinefunction(the_method):
|
631
656
|
|
632
657
|
async def instrumented():
|
633
658
|
enter_span(
|
634
|
-
|
659
|
+
module,
|
635
660
|
method,
|
636
661
|
str(ctx.mailbox.actor_id),
|
637
662
|
)
|
638
663
|
try:
|
639
|
-
result = await the_method(
|
664
|
+
result = await the_method(*args, **kwargs)
|
640
665
|
self._maybe_exit_debugger()
|
641
666
|
except Exception as e:
|
642
667
|
logging.critical(
|
@@ -649,9 +674,9 @@ class _Actor:
|
|
649
674
|
|
650
675
|
result = await instrumented()
|
651
676
|
else:
|
652
|
-
enter_span(
|
677
|
+
enter_span(module, method, str(ctx.mailbox.actor_id))
|
653
678
|
with fake_sync_state():
|
654
|
-
result = the_method(
|
679
|
+
result = the_method(*args, **kwargs)
|
655
680
|
self._maybe_exit_debugger()
|
656
681
|
exit_span()
|
657
682
|
|
@@ -758,35 +783,14 @@ class ActorMeshRef(MeshTrait):
|
|
758
783
|
attr_name,
|
759
784
|
attr_value._method,
|
760
785
|
self._mailbox,
|
786
|
+
attr_value._propagator,
|
761
787
|
),
|
762
788
|
)
|
763
789
|
|
764
|
-
def __getattr__(self,
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
# At runtime, we still want to raise AttributeError for truly missing attributes
|
769
|
-
|
770
|
-
# Check if this is a method on the underlying class
|
771
|
-
if hasattr(self._class, name):
|
772
|
-
attr = getattr(self._class, name)
|
773
|
-
if isinstance(attr, EndpointProperty):
|
774
|
-
# Dynamically create the endpoint
|
775
|
-
endpoint = ActorEndpoint(
|
776
|
-
self._actor_mesh_ref,
|
777
|
-
name,
|
778
|
-
attr._method,
|
779
|
-
self._mailbox,
|
780
|
-
propagator=attr._propagator,
|
781
|
-
)
|
782
|
-
# Cache it for future use
|
783
|
-
setattr(self, name, endpoint)
|
784
|
-
return endpoint
|
785
|
-
|
786
|
-
# If we get here, it's truly not found
|
787
|
-
raise AttributeError(
|
788
|
-
f"'{self.__class__.__name__}' object has no attribute '{name}'"
|
789
|
-
)
|
790
|
+
def __getattr__(self, attr: str) -> NotAnEndpoint:
|
791
|
+
if attr in dir(self._class):
|
792
|
+
return NotAnEndpoint(self, attr)
|
793
|
+
raise AttributeError(attr)
|
790
794
|
|
791
795
|
def _create(
|
792
796
|
self,
|
monarch/_src/actor/endpoint.py
CHANGED
@@ -34,6 +34,7 @@ from monarch._src.actor.tensor_engine_shim import _cached_propagation, fake_call
|
|
34
34
|
|
35
35
|
if TYPE_CHECKING:
|
36
36
|
from monarch._src.actor.actor_mesh import (
|
37
|
+
ActorMeshRef,
|
37
38
|
HyPortReceiver,
|
38
39
|
OncePortReceiver,
|
39
40
|
Port,
|
@@ -182,11 +183,22 @@ class Endpoint(ABC, Generic[P, R]):
|
|
182
183
|
# pyre-ignore
|
183
184
|
send(self, args, kwargs)
|
184
185
|
|
186
|
+
@abstractmethod
|
187
|
+
def _rref(self, args, kwargs) -> Any: ...
|
188
|
+
|
189
|
+
def rref(self, *args: P.args, **kwargs: P.kwargs) -> R:
|
190
|
+
return self._rref(args, kwargs)
|
191
|
+
|
185
192
|
def _propagate(self, args, kwargs, fake_args, fake_kwargs):
|
186
193
|
if self._propagator_arg is None or self._propagator_arg == "cached":
|
187
194
|
if self._cache is None:
|
188
195
|
self._cache = {}
|
189
|
-
|
196
|
+
resolvable = getattr(self, "_resolvable", None)
|
197
|
+
if resolvable is None:
|
198
|
+
raise NotImplementedError(
|
199
|
+
"Cached propagation is not implemented for actor endpoints."
|
200
|
+
)
|
201
|
+
return _cached_propagation(self._cache, resolvable, args, kwargs)
|
190
202
|
elif self._propagator_arg == "inspect":
|
191
203
|
return None
|
192
204
|
elif self._propagator_arg == "mocked":
|
@@ -229,13 +241,34 @@ class EndpointProperty(Generic[P, R]):
|
|
229
241
|
return cast(Endpoint[P, R], self)
|
230
242
|
|
231
243
|
|
244
|
+
class NotAnEndpoint:
|
245
|
+
"""
|
246
|
+
Used as the dynamic value of functions on an ActorMeshRef that were not marked as endpoints.
|
247
|
+
This is used both to give a better error message (since we cannot prevent the type system from thinking they are methods),
|
248
|
+
and to provide the oppurtunity for someone to do endpoint(x.foo) on something that wasn't marked as an endpoint.
|
249
|
+
"""
|
250
|
+
|
251
|
+
def __init__(self, ref: "ActorMeshRef", name: str):
|
252
|
+
self._ref = ref
|
253
|
+
self._name = name
|
254
|
+
|
255
|
+
def __call__(self, *args, **kwargs) -> None:
|
256
|
+
raise RuntimeError(
|
257
|
+
f"Actor {self._ref._class}.{self._name} is not annotated as an endpoint. To call it as one, add a @endpoint decorator to it, or directly wrap it in one as_endpoint(obj.method).call(...)"
|
258
|
+
)
|
259
|
+
|
260
|
+
|
232
261
|
# This can't just be Callable because otherwise we are not
|
233
262
|
# allowed to use type arguments in the return value.
|
234
263
|
class EndpointIfy:
|
235
264
|
@overload
|
236
|
-
def __call__(
|
265
|
+
def __call__(
|
266
|
+
self, function: Callable[Concatenate[Any, P], Awaitable[R]]
|
267
|
+
) -> Endpoint[P, R]: ...
|
237
268
|
@overload
|
238
|
-
def __call__(
|
269
|
+
def __call__(
|
270
|
+
self, function: Callable[Concatenate[Any, P], R]
|
271
|
+
) -> Endpoint[P, R]: ...
|
239
272
|
|
240
273
|
def __call__(self, function: Any):
|
241
274
|
pass
|
monarch/_src/actor/event_loop.py
CHANGED
monarch/_src/actor/proc_mesh.py
CHANGED
@@ -43,7 +43,6 @@ from monarch._src.actor.actor_mesh import (
|
|
43
43
|
Actor,
|
44
44
|
ActorMeshRef,
|
45
45
|
fake_sync_state,
|
46
|
-
MonarchContext,
|
47
46
|
)
|
48
47
|
|
49
48
|
from monarch._src.actor.allocator import LocalAllocator, ProcessAllocator, SimAllocator
|
@@ -89,7 +88,7 @@ class SetupActor(Actor):
|
|
89
88
|
Typically used to setup the environment variables.
|
90
89
|
"""
|
91
90
|
|
92
|
-
def __init__(self, env: Callable[[
|
91
|
+
def __init__(self, env: Callable[[], None]) -> None:
|
93
92
|
"""
|
94
93
|
Initialize the setup actor with the user defined setup method.
|
95
94
|
"""
|
@@ -100,8 +99,7 @@ class SetupActor(Actor):
|
|
100
99
|
"""
|
101
100
|
Call the user defined setup method with the monarch context.
|
102
101
|
"""
|
103
|
-
|
104
|
-
self._setup_method(ctx)
|
102
|
+
self._setup_method()
|
105
103
|
|
106
104
|
|
107
105
|
T = TypeVar("T")
|
@@ -114,7 +112,7 @@ except ImportError:
|
|
114
112
|
|
115
113
|
|
116
114
|
async def _allocate_nonblocking(
|
117
|
-
alloc: Alloc, setup: Callable[[
|
115
|
+
alloc: Alloc, setup: Callable[[], None] | None = None
|
118
116
|
) -> "ProcMesh":
|
119
117
|
_proc_mesh = await HyProcMesh.allocate_nonblocking(alloc)
|
120
118
|
if setup is None:
|
@@ -211,7 +209,7 @@ class ProcMesh(MeshTrait):
|
|
211
209
|
|
212
210
|
@classmethod
|
213
211
|
def from_alloc(
|
214
|
-
self, alloc: Alloc, setup: Callable[[
|
212
|
+
self, alloc: Alloc, setup: Callable[[], None] | None = None
|
215
213
|
) -> Future["ProcMesh"]:
|
216
214
|
"""
|
217
215
|
Allocate a process mesh according to the provided alloc.
|
@@ -219,7 +217,17 @@ class ProcMesh(MeshTrait):
|
|
219
217
|
|
220
218
|
Arguments:
|
221
219
|
- `alloc`: The alloc to allocate according to.
|
222
|
-
- `setup`:
|
220
|
+
- `setup`: An optional lambda function to configure environment variables on the allocated mesh.
|
221
|
+
Use the `current_rank()` method within the lambda to obtain the rank.
|
222
|
+
|
223
|
+
Example of a setup method to initialize torch distributed environment variables:
|
224
|
+
```
|
225
|
+
def setup():
|
226
|
+
rank = current_rank()
|
227
|
+
os.environ["RANK"] = str(rank)
|
228
|
+
os.environ["WORLD_SIZE"] = str(len(rank.shape))
|
229
|
+
os.environ["LOCAL_RANK"] = str(rank["gpus"])
|
230
|
+
```
|
223
231
|
"""
|
224
232
|
return Future(
|
225
233
|
impl=lambda: _allocate_nonblocking(alloc, setup),
|
@@ -428,7 +436,7 @@ async def proc_mesh_nonblocking(
|
|
428
436
|
gpus: Optional[int] = None,
|
429
437
|
hosts: int = 1,
|
430
438
|
env: dict[str, str] | None = None,
|
431
|
-
setup: Callable[[
|
439
|
+
setup: Callable[[], None] | None = None,
|
432
440
|
) -> ProcMesh:
|
433
441
|
if gpus is None:
|
434
442
|
gpus = _local_device_count()
|
@@ -457,7 +465,7 @@ def proc_mesh(
|
|
457
465
|
gpus: Optional[int] = None,
|
458
466
|
hosts: int = 1,
|
459
467
|
env: dict[str, str] | None = None,
|
460
|
-
setup: Callable[[
|
468
|
+
setup: Callable[[], None] | None = None,
|
461
469
|
) -> Future[ProcMesh]:
|
462
470
|
return Future(
|
463
471
|
impl=lambda: proc_mesh_nonblocking(
|
@@ -19,7 +19,6 @@ time it is used.
|
|
19
19
|
|
20
20
|
if TYPE_CHECKING:
|
21
21
|
from monarch._src.actor.actor_mesh import ActorEndpoint, Port, Selection
|
22
|
-
from monarch._src.actor.endpoint import Endpoint
|
23
22
|
|
24
23
|
|
25
24
|
def shim(fn=None, *, module=None):
|
@@ -48,8 +47,12 @@ def actor_send(
|
|
48
47
|
) -> None: ...
|
49
48
|
|
50
49
|
|
50
|
+
@shim(module="monarch.mesh_controller")
|
51
|
+
def actor_rref(endpoint, args_kwargs_tuple: bytes, refs: Sequence[Any]): ...
|
52
|
+
|
53
|
+
|
51
54
|
@shim(module="monarch.common.remote")
|
52
|
-
def _cached_propagation(_cache, rfunction
|
55
|
+
def _cached_propagation(_cache, rfunction, args, kwargs) -> Any: ...
|
53
56
|
|
54
57
|
|
55
58
|
@shim(module="monarch.common.fake")
|
monarch/actor/__init__.py
CHANGED
@@ -12,6 +12,7 @@ from monarch._src.actor.actor_mesh import (
|
|
12
12
|
Accumulator,
|
13
13
|
Actor,
|
14
14
|
ActorError,
|
15
|
+
as_endpoint,
|
15
16
|
current_actor_name,
|
16
17
|
current_rank,
|
17
18
|
current_size,
|
@@ -35,6 +36,7 @@ __all__ = [
|
|
35
36
|
"Actor",
|
36
37
|
"ActorError",
|
37
38
|
"current_actor_name",
|
39
|
+
"as_endpoint",
|
38
40
|
"current_rank",
|
39
41
|
"current_size",
|
40
42
|
"endpoint",
|
monarch/common/messages.py
CHANGED
@@ -435,6 +435,15 @@ class SendResultOfActorCall(NamedTuple):
|
|
435
435
|
stream: tensor_worker.StreamRef
|
436
436
|
|
437
437
|
|
438
|
+
class CallActorMethod(NamedTuple):
|
439
|
+
seq: int
|
440
|
+
result: object
|
441
|
+
broker_id: Tuple[str, int]
|
442
|
+
local_state: Sequence[Tensor | tensor_worker.Ref]
|
443
|
+
mutates: List[tensor_worker.Ref]
|
444
|
+
stream: tensor_worker.StreamRef
|
445
|
+
|
446
|
+
|
438
447
|
class SplitComm(NamedTuple):
|
439
448
|
dims: Dims
|
440
449
|
device_mesh: DeviceMesh
|
monarch/common/remote.py
CHANGED
@@ -157,7 +157,7 @@ class Remote(Generic[P, R], Endpoint[P, R]):
|
|
157
157
|
def _maybe_resolvable(self):
|
158
158
|
return None if self._remote_impl is None else self._resolvable
|
159
159
|
|
160
|
-
def
|
160
|
+
def _rref(self, args, kwargs):
|
161
161
|
return dtensor_dispatch(
|
162
162
|
self._resolvable,
|
163
163
|
self._propagate,
|
@@ -352,7 +352,7 @@ _miss = 0
|
|
352
352
|
_hit = 0
|
353
353
|
|
354
354
|
|
355
|
-
def _cached_propagation(_cache, rfunction:
|
355
|
+
def _cached_propagation(_cache, rfunction: ResolvableFunction, args, kwargs):
|
356
356
|
tensors, shape_key = hashable_tensor_flatten(args, kwargs)
|
357
357
|
# pyre-ignore
|
358
358
|
inputs_group = TensorGroup([t._fake for t in tensors])
|
Binary file
|
monarch/mesh_controller.py
CHANGED
@@ -30,6 +30,7 @@ from monarch._rust_bindings.monarch_extension.client import ( # @manual=//monar
|
|
30
30
|
WorldState,
|
31
31
|
)
|
32
32
|
from monarch._rust_bindings.monarch_extension.mesh_controller import _Controller
|
33
|
+
from monarch._rust_bindings.monarch_extension.tensor_worker import Ref
|
33
34
|
from monarch._rust_bindings.monarch_hyperactor.actor import (
|
34
35
|
PythonMessage,
|
35
36
|
PythonMessageKind,
|
@@ -44,10 +45,12 @@ from monarch._src.actor.endpoint import Selection
|
|
44
45
|
from monarch._src.actor.shape import NDSlice
|
45
46
|
from monarch.common import device_mesh, messages, stream
|
46
47
|
from monarch.common.controller_api import TController
|
48
|
+
from monarch.common.function import ResolvableFunction
|
47
49
|
from monarch.common.invocation import Seq
|
48
50
|
from monarch.common.messages import Referenceable, SendResultOfActorCall
|
49
51
|
from monarch.common.stream import StreamRef
|
50
|
-
from monarch.common.tensor import InputChecker, Tensor
|
52
|
+
from monarch.common.tensor import dtensor_check, InputChecker, Tensor
|
53
|
+
from monarch.common.tree import flatten
|
51
54
|
from monarch.tensor_worker_main import _set_trace
|
52
55
|
|
53
56
|
if TYPE_CHECKING:
|
@@ -265,17 +268,36 @@ class RemoteException(Exception):
|
|
265
268
|
return "<exception formatting RemoteException>"
|
266
269
|
|
267
270
|
|
268
|
-
def
|
271
|
+
def _cast_call_method_indirect(
|
269
272
|
endpoint: ActorEndpoint,
|
273
|
+
selection: Selection,
|
274
|
+
client: MeshClient,
|
275
|
+
seq: Seq,
|
270
276
|
args_kwargs_tuple: bytes,
|
271
277
|
refs: Sequence[Any],
|
272
|
-
|
273
|
-
selection: Selection,
|
274
|
-
):
|
278
|
+
) -> Tuple[str, int]:
|
275
279
|
unflatten_args = [
|
276
280
|
UnflattenArg.PyObject if isinstance(ref, Tensor) else UnflattenArg.Mailbox
|
277
281
|
for ref in refs
|
278
282
|
]
|
283
|
+
broker_id: Tuple[str, int] = client._mesh_controller.broker_id
|
284
|
+
actor_msg = PythonMessage(
|
285
|
+
PythonMessageKind.CallMethodIndirect(
|
286
|
+
endpoint._name, broker_id, seq, unflatten_args
|
287
|
+
),
|
288
|
+
args_kwargs_tuple,
|
289
|
+
)
|
290
|
+
endpoint._actor_mesh.cast(actor_msg, selection)
|
291
|
+
return broker_id
|
292
|
+
|
293
|
+
|
294
|
+
def actor_send(
|
295
|
+
endpoint: ActorEndpoint,
|
296
|
+
args_kwargs_tuple: bytes,
|
297
|
+
refs: Sequence[Any],
|
298
|
+
port: Optional[Port[Any]],
|
299
|
+
selection: Selection,
|
300
|
+
):
|
279
301
|
tensors = [ref for ref in refs if isinstance(ref, Tensor)]
|
280
302
|
# we have some monarch references, we need to ensure their
|
281
303
|
# proc_mesh matches that of the tensors we sent to it
|
@@ -284,7 +306,7 @@ def actor_send(
|
|
284
306
|
if hasattr(t, "stream"):
|
285
307
|
chosen_stream = t.stream
|
286
308
|
break
|
287
|
-
with InputChecker(
|
309
|
+
with InputChecker(tensors, lambda x: f"actor_call({x})") as checker:
|
288
310
|
checker.check_mesh_stream_local(device_mesh._active, chosen_stream)
|
289
311
|
# TODO: move propagators into Endpoint abstraction and run the propagator to get the
|
290
312
|
# mutates
|
@@ -300,8 +322,6 @@ def actor_send(
|
|
300
322
|
|
301
323
|
client = cast(MeshClient, checker.mesh.client)
|
302
324
|
|
303
|
-
broker_id: Tuple[str, int] = client._mesh_controller.broker_id
|
304
|
-
|
305
325
|
stream_ref = chosen_stream._to_ref(client)
|
306
326
|
|
307
327
|
fut = (port, checker.mesh._ndslice) if port is not None else None
|
@@ -316,13 +336,9 @@ def actor_send(
|
|
316
336
|
# The message to the generic actor tells it to first wait on the broker to get the local arguments
|
317
337
|
# from the stream, then it will run the actor method, and send the result to response port.
|
318
338
|
|
319
|
-
|
320
|
-
|
321
|
-
endpoint._name, broker_id, ident, unflatten_args
|
322
|
-
),
|
323
|
-
args_kwargs_tuple,
|
339
|
+
broker_id = _cast_call_method_indirect(
|
340
|
+
endpoint, selection, client, ident, args_kwargs_tuple, refs
|
324
341
|
)
|
325
|
-
endpoint._actor_mesh.cast(actor_msg, selection)
|
326
342
|
worker_msg = SendResultOfActorCall(ident, broker_id, tensors, [], stream_ref)
|
327
343
|
client.send(checker.mesh._ndslice, worker_msg)
|
328
344
|
# we have to ask for status updates
|
@@ -330,3 +346,49 @@ def actor_send(
|
|
330
346
|
# enough work to count this future as finished,
|
331
347
|
# and all potential errors have been reported
|
332
348
|
client._request_status()
|
349
|
+
|
350
|
+
|
351
|
+
def actor_rref(endpoint, args_kwargs_tuple: bytes, refs: Sequence[Any]):
|
352
|
+
chosen_stream = stream._active
|
353
|
+
fake_result, dtensors, mutates, mesh = dtensor_check(
|
354
|
+
endpoint._propagate,
|
355
|
+
cast(ResolvableFunction, endpoint._name),
|
356
|
+
refs,
|
357
|
+
{},
|
358
|
+
device_mesh._active,
|
359
|
+
chosen_stream,
|
360
|
+
)
|
361
|
+
assert mesh is not None
|
362
|
+
|
363
|
+
fake_result_dtensors, unflatten_result = flatten(
|
364
|
+
fake_result, lambda x: isinstance(x, torch.Tensor)
|
365
|
+
)
|
366
|
+
result_dtensors = tuple(
|
367
|
+
Tensor(fake, mesh, chosen_stream) for fake in fake_result_dtensors
|
368
|
+
)
|
369
|
+
seq = mesh.client.new_node(result_dtensors + mutates, dtensors)
|
370
|
+
assert all(t.ref is not None for t in result_dtensors)
|
371
|
+
assert all(t.ref is not None for t in mutates)
|
372
|
+
result = result_msg = unflatten_result(result_dtensors)
|
373
|
+
if len(result_dtensors) == 0:
|
374
|
+
result_msg = None
|
375
|
+
|
376
|
+
broker_id = _cast_call_method_indirect(
|
377
|
+
endpoint, "all", mesh.client, seq, args_kwargs_tuple, refs
|
378
|
+
)
|
379
|
+
# note the device mesh has to be defined regardles so the remote functions
|
380
|
+
# can invoke mesh.rank("...")
|
381
|
+
|
382
|
+
mesh.define_remotely()
|
383
|
+
|
384
|
+
mesh._send(
|
385
|
+
messages.CallActorMethod(
|
386
|
+
seq,
|
387
|
+
result_msg,
|
388
|
+
broker_id,
|
389
|
+
refs,
|
390
|
+
cast("List[Ref]", mutates),
|
391
|
+
stream._active._to_ref(mesh.client),
|
392
|
+
)
|
393
|
+
)
|
394
|
+
return result
|
monarch/monarch_controller
CHANGED
Binary file
|
monarch/tools/cli.py
CHANGED
@@ -86,9 +86,9 @@ class CreateCmd:
|
|
86
86
|
else defaults.component_fn(config.scheduler)
|
87
87
|
)
|
88
88
|
component_args = component_args_from_cli(component_fn, args.component_args)
|
89
|
-
appdef = component_fn(**component_args)
|
89
|
+
config.appdef = component_fn(**component_args)
|
90
90
|
|
91
|
-
handle = create(config
|
91
|
+
handle = create(config)
|
92
92
|
print(handle)
|
93
93
|
|
94
94
|
|
monarch/tools/commands.py
CHANGED
@@ -7,18 +7,19 @@
|
|
7
7
|
# pyre-strict
|
8
8
|
|
9
9
|
import argparse
|
10
|
+
import asyncio
|
10
11
|
import inspect
|
11
12
|
import logging
|
12
13
|
import os
|
13
|
-
import
|
14
|
-
from datetime import timedelta
|
14
|
+
from datetime import datetime, timedelta
|
15
15
|
from typing import Any, Callable, Mapping, Optional, Union
|
16
16
|
|
17
|
+
from monarch.tools.components.hyperactor import DEFAULT_NAME
|
18
|
+
|
17
19
|
from monarch.tools.config import ( # @manual=//monarch/python/monarch/tools/config/meta:defaults
|
18
20
|
Config,
|
19
21
|
defaults,
|
20
22
|
)
|
21
|
-
|
22
23
|
from monarch.tools.mesh_spec import mesh_spec_from_metadata, ServerSpec
|
23
24
|
from torchx.runner import Runner # @manual=//torchx/runner:lib_core
|
24
25
|
from torchx.specs import AppDef, AppDryRunInfo, AppState, CfgVal, parse_app_handle
|
@@ -83,7 +84,7 @@ def component_args_from_cli(
|
|
83
84
|
|
84
85
|
def create(
|
85
86
|
config: Config,
|
86
|
-
|
87
|
+
name: str = DEFAULT_NAME,
|
87
88
|
) -> Union[str, AppDryRunInfo]:
|
88
89
|
"""Creates a monarch server by submitting it as a job to the target scheduler.
|
89
90
|
|
@@ -94,7 +95,7 @@ def create(
|
|
94
95
|
from monarch.tools.config import defaults
|
95
96
|
|
96
97
|
config = defaults.config(scheduler="slurm")
|
97
|
-
appdef = defaults.component_fn(scheduler=config.scheduler)()
|
98
|
+
config.appdef = defaults.component_fn(scheduler=config.scheduler)()
|
98
99
|
|
99
100
|
config.scheduler_args.update(
|
100
101
|
{
|
@@ -105,7 +106,7 @@ def create(
|
|
105
106
|
)
|
106
107
|
config.dryrun = True
|
107
108
|
|
108
|
-
create(config
|
109
|
+
create(config)
|
109
110
|
|
110
111
|
|
111
112
|
Args:
|
@@ -114,6 +115,7 @@ def create(
|
|
114
115
|
component_fn: a function that returns the AppDef (job def).
|
115
116
|
If not provided, defaults to the configured default for the scheduler
|
116
117
|
(in most cases ``monarch.tools.components.hyperactor.proc_mesh``)
|
118
|
+
name: the name of the job. If none, a default job name will be created.
|
117
119
|
"""
|
118
120
|
scheduler: str = config.scheduler
|
119
121
|
cfg: Mapping[str, CfgVal] = config.scheduler_args
|
@@ -122,6 +124,8 @@ def create(
|
|
122
124
|
os.environ["TORCHX_CONTEXT_NAME"] = os.getenv("TORCHX_CONTEXT_NAME", "monarch")
|
123
125
|
|
124
126
|
with torchx_runner() as runner:
|
127
|
+
appdef: AppDef = AppDef(name, config.appdef.roles, config.appdef.metadata)
|
128
|
+
|
125
129
|
info = runner.dryrun(appdef, scheduler, cfg, config.workspace)
|
126
130
|
|
127
131
|
info_json_fmt = AppDryRunInfo(
|
@@ -170,6 +174,8 @@ def info(server_handle: str) -> Optional[ServerSpec]:
|
|
170
174
|
# null-guard since some schedulers do not fill replica_status
|
171
175
|
if host_status := replica_status.get(role.name):
|
172
176
|
spec.hostnames = [h.hostname for h in host_status]
|
177
|
+
# the mesh status is based on the "least progressive" replica status
|
178
|
+
spec.state = min(h.state for h in host_status)
|
173
179
|
|
174
180
|
mesh_specs.append(spec)
|
175
181
|
|
@@ -211,6 +217,8 @@ async def server_ready(
|
|
211
217
|
|
212
218
|
"""
|
213
219
|
|
220
|
+
check_interval_seconds = check_interval.total_seconds()
|
221
|
+
start = datetime.now()
|
214
222
|
while True:
|
215
223
|
server_spec = info(server_handle)
|
216
224
|
|
@@ -220,42 +228,56 @@ async def server_ready(
|
|
220
228
|
if server_spec.state <= AppState.PENDING: # UNSUBMITTED or SUBMITTED or PENDING
|
221
229
|
# NOTE: TorchX currently does not have async APIs so need to loop-on-interval
|
222
230
|
# TODO maybe inverse exponential backoff instead of constant interval?
|
223
|
-
|
224
|
-
|
225
|
-
"
|
226
|
-
|
227
|
-
|
228
|
-
server_spec.state,
|
229
|
-
check_interval_seconds,
|
231
|
+
print(
|
232
|
+
f"Waiting for {server_handle} to be {AppState.RUNNING} (current: {server_spec.state}); "
|
233
|
+
f"will check again in {check_interval_seconds} seconds. "
|
234
|
+
f"Total wait time: {datetime.now() - start}",
|
235
|
+
end="\r",
|
230
236
|
)
|
231
|
-
|
237
|
+
await asyncio.sleep(check_interval_seconds)
|
232
238
|
continue
|
233
|
-
else:
|
234
|
-
return server_spec
|
235
|
-
|
236
239
|
|
240
|
+
# check if hosts are allocated for all the meshes
|
241
|
+
if server_spec.state == AppState.RUNNING:
|
242
|
+
running = True
|
243
|
+
for mesh_spec in server_spec.meshes:
|
244
|
+
if mesh_spec.state <= AppState.PENDING:
|
245
|
+
print(
|
246
|
+
f"Job {server_handle} is running but waiting for mesh {mesh_spec.name} "
|
247
|
+
f"to be {AppState.RUNNING} (current: {mesh_spec.state}); "
|
248
|
+
f"will check again in {check_interval_seconds} seconds. "
|
249
|
+
f"Total wait time: {datetime.now() - start}",
|
250
|
+
end="\r",
|
251
|
+
)
|
252
|
+
running = False
|
253
|
+
break
|
254
|
+
if not running:
|
255
|
+
await asyncio.sleep(check_interval_seconds)
|
256
|
+
continue
|
257
|
+
|
258
|
+
return server_spec
|
259
|
+
|
260
|
+
|
261
|
+
# TODO: this API is overloaded. Ideally, we do not need config to get or an handle to create.
|
237
262
|
async def get_or_create(
|
238
263
|
name: str,
|
239
264
|
config: Config,
|
240
|
-
appdef: AppDef,
|
241
265
|
check_interval: timedelta = _5_SECONDS,
|
242
266
|
) -> ServerSpec:
|
243
|
-
"""Waits for the server
|
267
|
+
"""Waits for the server based on identity `name` in the scheduler specified in the `config`
|
244
268
|
to be ready (e.g. RUNNING). If the server is not found then this function creates one
|
245
|
-
per the `
|
269
|
+
per the `config` spec, and waits for the server to be ready before returning.
|
246
270
|
|
247
271
|
Usage:
|
248
272
|
|
249
273
|
.. code-block:: python
|
250
274
|
|
251
|
-
import getpass
|
252
275
|
from monarch.tools.config import defaults
|
253
276
|
|
254
|
-
USER = getpass.getuser()
|
255
277
|
config = defaults.config(scheduler)
|
256
|
-
appdef = defaults.component_fn(config.scheduler)()
|
278
|
+
config.appdef = defaults.component_fn(config.scheduler)()
|
257
279
|
|
258
|
-
server_handle = get_or_create(
|
280
|
+
server_handle = get_or_create(name="my_job_name", config)
|
259
281
|
server_info = info(server_handle)
|
260
282
|
|
261
283
|
Returns: A `ServerSpec` containing information about either the existing or the newly
|
@@ -273,7 +295,7 @@ async def get_or_create(
|
|
273
295
|
)
|
274
296
|
|
275
297
|
# no dryrun (see assertion above) support so will always be a handle (str)
|
276
|
-
new_server_handle = str(create(config,
|
298
|
+
new_server_handle = str(create(config, name))
|
277
299
|
|
278
300
|
logger.info(f"created new `{new_server_handle}` waiting for it to be ready...")
|
279
301
|
|
@@ -289,10 +311,10 @@ async def get_or_create(
|
|
289
311
|
f"the new server `{new_server_handle}` has {server_info.state}"
|
290
312
|
)
|
291
313
|
|
292
|
-
|
314
|
+
print(f"\x1b[36mNew job `{new_server_handle}` is ready to serve. \x1b[0m")
|
293
315
|
return server_info
|
294
316
|
else:
|
295
|
-
|
317
|
+
print(f"\x1b[36mFound existing job `{server_handle}` ready to serve. \x1b[0m")
|
296
318
|
return server_info
|
297
319
|
|
298
320
|
|
@@ -9,6 +9,7 @@ import getpass
|
|
9
9
|
from typing import Optional
|
10
10
|
|
11
11
|
from monarch.tools import mesh_spec
|
12
|
+
from monarch.tools.config import UnnamedAppDef
|
12
13
|
from monarch.tools.mesh_spec import mesh_spec_from_str
|
13
14
|
from torchx import specs
|
14
15
|
|
@@ -16,17 +17,18 @@ _DEFAULT_MESHES = ["mesh_0:1:gpu.small"]
|
|
16
17
|
|
17
18
|
_USER: str = getpass.getuser()
|
18
19
|
|
20
|
+
DEFAULT_NAME: str = f"monarch-{_USER}"
|
21
|
+
|
19
22
|
__version__ = "latest" # TODO get version from monarch.__version_
|
20
23
|
|
21
24
|
|
22
25
|
def proc_mesh(
|
23
|
-
name: str = f"monarch-{_USER}",
|
24
26
|
image: str = f"ghcr.io/pytorch-labs/monarch:{__version__}", # TODO docker needs to be built and pushed to ghcr
|
25
27
|
meshes: list[str] = _DEFAULT_MESHES,
|
26
28
|
env: Optional[dict[str, str]] = None,
|
27
29
|
port: int = mesh_spec.DEFAULT_REMOTE_ALLOCATOR_PORT,
|
28
30
|
program: str = "monarch_bootstrap", # installed with monarch wheel (as console script)
|
29
|
-
) ->
|
31
|
+
) -> UnnamedAppDef:
|
30
32
|
"""
|
31
33
|
Args:
|
32
34
|
name: the name of the monarch server job
|
@@ -37,7 +39,7 @@ def proc_mesh(
|
|
37
39
|
program: path to the binary that the remote process allocator spawns on an allocation request
|
38
40
|
"""
|
39
41
|
|
40
|
-
appdef =
|
42
|
+
appdef = UnnamedAppDef()
|
41
43
|
|
42
44
|
for mesh in [mesh_spec_from_str(mesh) for mesh in meshes]:
|
43
45
|
mesh_role = specs.Role(
|
monarch/tools/config/__init__.py
CHANGED
@@ -6,15 +6,32 @@
|
|
6
6
|
|
7
7
|
# pyre-strict
|
8
8
|
from dataclasses import dataclass, field
|
9
|
-
from typing import Any, Optional
|
9
|
+
from typing import Any, Dict, List, Optional
|
10
|
+
|
11
|
+
from torchx.specs import Role
|
10
12
|
|
11
13
|
|
12
14
|
NOT_SET: str = "__NOT_SET__"
|
13
15
|
|
14
16
|
|
17
|
+
@dataclass
|
18
|
+
class UnnamedAppDef:
|
19
|
+
"""
|
20
|
+
A TorchX AppDef without a name.
|
21
|
+
"""
|
22
|
+
|
23
|
+
roles: List[Role] = field(default_factory=list)
|
24
|
+
metadata: Dict[str, str] = field(default_factory=dict)
|
25
|
+
|
26
|
+
|
15
27
|
@dataclass
|
16
28
|
class Config:
|
29
|
+
"""
|
30
|
+
All configs needed to schedule a mesh of allocators.
|
31
|
+
"""
|
32
|
+
|
17
33
|
scheduler: str = NOT_SET
|
18
34
|
scheduler_args: dict[str, Any] = field(default_factory=dict)
|
19
35
|
workspace: Optional[str] = None
|
20
36
|
dryrun: bool = False
|
37
|
+
appdef: UnnamedAppDef = UnnamedAppDef()
|
monarch/tools/config/defaults.py
CHANGED
@@ -11,7 +11,7 @@
|
|
11
11
|
from typing import Callable, Optional
|
12
12
|
|
13
13
|
from monarch.tools.components import hyperactor
|
14
|
-
from monarch.tools.config import Config
|
14
|
+
from monarch.tools.config import Config, UnnamedAppDef
|
15
15
|
|
16
16
|
from torchx import specs
|
17
17
|
from torchx.schedulers import (
|
@@ -23,7 +23,7 @@ from torchx.schedulers import (
|
|
23
23
|
)
|
24
24
|
|
25
25
|
|
26
|
-
def component_fn(scheduler: str) -> Callable[...,
|
26
|
+
def component_fn(scheduler: str) -> Callable[..., UnnamedAppDef]:
|
27
27
|
"""The default TorchX component function for the scheduler"""
|
28
28
|
return hyperactor.proc_mesh
|
29
29
|
|
monarch/tools/mesh_spec.py
CHANGED
@@ -9,6 +9,8 @@ import string
|
|
9
9
|
from dataclasses import dataclass, field
|
10
10
|
from typing import Any, Optional
|
11
11
|
|
12
|
+
from monarch.tools.config import UnnamedAppDef
|
13
|
+
|
12
14
|
from monarch.tools.network import get_sockaddr
|
13
15
|
from torchx import specs
|
14
16
|
from torchx.specs.api import is_terminal
|
@@ -39,6 +41,7 @@ class MeshSpec:
|
|
39
41
|
transport: str = "tcp"
|
40
42
|
port: int = DEFAULT_REMOTE_ALLOCATOR_PORT
|
41
43
|
hostnames: list[str] = field(default_factory=list)
|
44
|
+
state: specs.AppState = specs.AppState.UNSUBMITTED
|
42
45
|
|
43
46
|
def server_addrs(
|
44
47
|
self, transport: Optional[str] = None, port: Optional[int] = None
|
@@ -69,7 +72,7 @@ def _tag(mesh_name: str, tag_template: str) -> str:
|
|
69
72
|
return string.Template(tag_template).substitute(mesh_name=mesh_name)
|
70
73
|
|
71
74
|
|
72
|
-
def tag_as_metadata(mesh_spec: MeshSpec, appdef:
|
75
|
+
def tag_as_metadata(mesh_spec: MeshSpec, appdef: UnnamedAppDef) -> None:
|
73
76
|
appdef.metadata[_tag(mesh_spec.name, _TAG_HOST_TYPE)] = mesh_spec.host_type
|
74
77
|
appdef.metadata[_tag(mesh_spec.name, _TAG_GPUS)] = str(mesh_spec.gpus)
|
75
78
|
appdef.metadata[_tag(mesh_spec.name, _TAG_TRANSPORT)] = mesh_spec.transport
|
tests/test_allocator.py
CHANGED
@@ -33,7 +33,6 @@ from monarch._rust_bindings.monarch_hyperactor.channel import (
|
|
33
33
|
ChannelTransport,
|
34
34
|
)
|
35
35
|
|
36
|
-
from monarch._src.actor.actor_mesh import MonarchContext
|
37
36
|
from monarch._src.actor.allocator import (
|
38
37
|
ALLOC_LABEL_PROC_MESH_NAME,
|
39
38
|
LocalAllocator,
|
@@ -160,7 +159,7 @@ class TestSetupActorInAllocator(unittest.IsolatedAsyncioTestCase):
|
|
160
159
|
"TEST_ENV_VAR_3": "value_3",
|
161
160
|
}
|
162
161
|
|
163
|
-
def setup_multiple_env_vars(
|
162
|
+
def setup_multiple_env_vars() -> None:
|
164
163
|
for name, value in env_vars.items():
|
165
164
|
os.environ[name] = value
|
166
165
|
|
@@ -184,36 +183,33 @@ class TestSetupActorInAllocator(unittest.IsolatedAsyncioTestCase):
|
|
184
183
|
await proc_mesh.stop()
|
185
184
|
|
186
185
|
async def test_setup_lambda_with_context_info(self) -> None:
|
187
|
-
"""Test that the setup lambda can access
|
188
|
-
context_var_name: str = "
|
186
|
+
"""Test that the setup lambda can access rank information"""
|
187
|
+
context_var_name: str = "PROC_MESH_RANK_INFO"
|
189
188
|
|
190
|
-
def
|
191
|
-
context_info = f"
|
189
|
+
def setup_with_rank() -> None:
|
190
|
+
context_info = f"point_rank:{current_rank().rank}"
|
192
191
|
os.environ[context_var_name] = context_info
|
193
192
|
|
194
193
|
spec = AllocSpec(AllocConstraints(), gpus=1, hosts=1)
|
195
194
|
allocator = LocalAllocator()
|
196
195
|
alloc = await allocator.allocate(spec)
|
197
196
|
|
198
|
-
proc_mesh = await ProcMesh.from_alloc(alloc, setup=
|
197
|
+
proc_mesh = await ProcMesh.from_alloc(alloc, setup=setup_with_rank)
|
199
198
|
|
200
199
|
try:
|
201
200
|
actor = await proc_mesh.spawn("env_check", EnvCheckActor)
|
202
201
|
|
203
|
-
|
202
|
+
rank_info = await actor.get_env_var.call_one(context_var_name)
|
204
203
|
|
205
204
|
self.assertNotEqual(
|
206
|
-
|
205
|
+
rank_info,
|
207
206
|
"NOT_SET",
|
208
207
|
"Context information was not stored in the environment variable",
|
209
208
|
)
|
210
|
-
self.assertIn(
|
211
|
-
"proc_id:", context_info, "Context information does not contain proc_id"
|
212
|
-
)
|
213
209
|
self.assertIn(
|
214
210
|
"point_rank:0",
|
215
|
-
|
216
|
-
f"Context information {
|
211
|
+
rank_info,
|
212
|
+
f"Context information {rank_info} does not contain point_rank",
|
217
213
|
)
|
218
214
|
finally:
|
219
215
|
await proc_mesh.stop()
|
@@ -435,7 +431,7 @@ class TestRemoteAllocator(unittest.IsolatedAsyncioTestCase):
|
|
435
431
|
test_var_name: str = "TEST_ENV_VAR_FOR_PROC_MESH"
|
436
432
|
test_var_value: str = "test_value_123"
|
437
433
|
|
438
|
-
def setup_env_vars(
|
434
|
+
def setup_env_vars() -> None:
|
439
435
|
os.environ[test_var_name] = test_var_value
|
440
436
|
|
441
437
|
hosts = 2
|
tests/test_env_before_cuda.py
CHANGED
@@ -15,7 +15,6 @@ import cloudpickle
|
|
15
15
|
|
16
16
|
import torch
|
17
17
|
from monarch._rust_bindings.monarch_hyperactor.alloc import AllocConstraints, AllocSpec
|
18
|
-
from monarch._src.actor.actor_mesh import MonarchContext
|
19
18
|
from monarch._src.actor.allocator import LocalAllocator
|
20
19
|
from monarch._src.actor.proc_mesh import proc_mesh
|
21
20
|
from monarch.actor import Actor, endpoint, ProcMesh
|
@@ -70,7 +69,7 @@ class TestEnvBeforeCuda(unittest.IsolatedAsyncioTestCase):
|
|
70
69
|
"CUDA_LAUNCH_BLOCKING": "1",
|
71
70
|
}
|
72
71
|
|
73
|
-
def setup_cuda_env(
|
72
|
+
def setup_cuda_env() -> None:
|
74
73
|
for name, value in cuda_env_vars.items():
|
75
74
|
os.environ[name] = value
|
76
75
|
|
@@ -107,7 +106,7 @@ class TestEnvBeforeCuda(unittest.IsolatedAsyncioTestCase):
|
|
107
106
|
"CUDA_DEVICE_MAX_CONNECTIONS": "1",
|
108
107
|
}
|
109
108
|
|
110
|
-
def setup_cuda_env(
|
109
|
+
def setup_cuda_env() -> None:
|
111
110
|
for name, value in cuda_env_vars.items():
|
112
111
|
os.environ[name] = value
|
113
112
|
|
tests/test_tensor_engine.py
CHANGED
@@ -8,7 +8,7 @@ import monarch
|
|
8
8
|
import pytest
|
9
9
|
import torch
|
10
10
|
from monarch import remote
|
11
|
-
from monarch.actor import Actor, endpoint, proc_mesh
|
11
|
+
from monarch.actor import Actor, as_endpoint, endpoint, proc_mesh
|
12
12
|
from monarch.mesh_controller import spawn_tensor_engine
|
13
13
|
|
14
14
|
|
@@ -104,3 +104,29 @@ def test_actor_tensor_ordering() -> None:
|
|
104
104
|
results.append(counter.incr.call(1))
|
105
105
|
|
106
106
|
assert list(range(10)) == [r.get().item(hosts=0, gpus=0) for r in results]
|
107
|
+
|
108
|
+
|
109
|
+
class Linear(Actor):
|
110
|
+
def __init__(self, N: int, M: int):
|
111
|
+
self.weight = torch.zeros((N, M))
|
112
|
+
|
113
|
+
def forward(self, x) -> torch.Tensor:
|
114
|
+
return x @ self.weight
|
115
|
+
|
116
|
+
@endpoint(propagate="inspect")
|
117
|
+
def update(self, w: torch.Tensor) -> None:
|
118
|
+
self.weight += w
|
119
|
+
|
120
|
+
|
121
|
+
@two_gpu
|
122
|
+
def test_rref_actor() -> None:
|
123
|
+
pm = proc_mesh(gpus=1).get()
|
124
|
+
with pm.activate():
|
125
|
+
x = pm.spawn("linear", Linear, 3, 4).get()
|
126
|
+
|
127
|
+
y = torch.ones((4, 3))
|
128
|
+
t = as_endpoint(x.forward, propagate=lambda x: torch.rand(3, 4)).rref(y)
|
129
|
+
assert monarch.inspect(t.sum()).item() == 0
|
130
|
+
x.update.rref(torch.ones((3, 4)))
|
131
|
+
t = as_endpoint(x.forward, propagate=lambda x: torch.rand(3, 4)).rref(y)
|
132
|
+
assert monarch.inspect(t.sum()).item() == 3 * 4 * 4
|
{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: torchmonarch-nightly
|
3
|
-
Version: 2025.7.
|
3
|
+
Version: 2025.7.26
|
4
4
|
Summary: Monarch: Single controller library
|
5
5
|
Author: Meta
|
6
6
|
Author-email: oncall+monarch@xmail.facebook.com
|
@@ -44,6 +44,8 @@ Note: Monarch is currently only supported on Linux systems
|
|
44
44
|
|
45
45
|
## Installation
|
46
46
|
|
47
|
+
### On Fedora distributions
|
48
|
+
|
47
49
|
`pip install torchmonarch-nightly`
|
48
50
|
|
49
51
|
or manually
|
@@ -88,6 +90,37 @@ pip install --no-build-isolation -e .
|
|
88
90
|
pytest python/tests/ -v -m "not oss_skip"
|
89
91
|
```
|
90
92
|
|
93
|
+
### On MacOS
|
94
|
+
|
95
|
+
You can also build Monarch to run locally on a MacOS system.
|
96
|
+
|
97
|
+
Note that this does not support tensor engine, which is tied to CUDA and RDMA (via ibverbs).
|
98
|
+
|
99
|
+
|
100
|
+
```sh
|
101
|
+
|
102
|
+
# Create and activate the conda environment
|
103
|
+
conda create -n monarchenv python=3.10 -y
|
104
|
+
conda activate monarchenv
|
105
|
+
|
106
|
+
# Install nightly rust toolchain
|
107
|
+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
108
|
+
rustup toolchain install nightly
|
109
|
+
rustup default nightly
|
110
|
+
|
111
|
+
# Install build dependencies
|
112
|
+
pip install -r build-requirements.txt
|
113
|
+
# Install test dependencies
|
114
|
+
pip install -r python/tests/requirements.txt
|
115
|
+
|
116
|
+
# Build and install Monarch
|
117
|
+
USE_TENSOR_ENGINE=0 pip install --no-build-isolation .
|
118
|
+
# or setup for development
|
119
|
+
USE_TENSOR_ENGINE=0 pip install --no-build-isolation -e .
|
120
|
+
|
121
|
+
```
|
122
|
+
|
123
|
+
|
91
124
|
## Running examples
|
92
125
|
|
93
126
|
Check out the `examples/` directory for demonstrations of how to use Monarch's APIs.
|
{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/RECORD
RENAMED
@@ -1,5 +1,5 @@
|
|
1
1
|
monarch/__init__.py,sha256=mgKiyD1kxky-1pvhMlNfF4VmxWnhi-FSYZNFzkW1BEM,7052
|
2
|
-
monarch/_rust_bindings.so,sha256=
|
2
|
+
monarch/_rust_bindings.so,sha256=pvUGrYVAW1JrVgti7IJsFSb7Va0vWTB95RvoOgAIXGE,47520848
|
3
3
|
monarch/_testing.py,sha256=_3MYNMq-_0T1qXCj2vxrW13GlWGdUuVFMskQF2Gsw_o,7877
|
4
4
|
monarch/actor_mesh.py,sha256=VtPU9syi_vUdwDSJJ639Z4Y_EcWZUScyoj0lQ88RQPs,421
|
5
5
|
monarch/bootstrap_main.py,sha256=39OZpNMrfvvNJf-iwuNzgslzYA_ItaRPHfXGn_V74N0,524
|
@@ -7,8 +7,8 @@ monarch/cached_remote_function.py,sha256=kYdB6r4OHx_T_uX4q3tCNcp1t2DJwF8tPTIahUi
|
|
7
7
|
monarch/fetch.py,sha256=JMxC8HdWMvpik0T4E6e-gfxvmNnOkA0ul4eo4R3Jg_o,1712
|
8
8
|
monarch/gradient_generator.py,sha256=Rl3dmXGceTdCc1mYBg2JciR88ywGPnW7TVkL86KwqEA,6366
|
9
9
|
monarch/memory.py,sha256=ol86dBhFAJqg78iF25-BuK0wuwj1onR8FIioZ_B0gjw,1377
|
10
|
-
monarch/mesh_controller.py,sha256=
|
11
|
-
monarch/monarch_controller,sha256=
|
10
|
+
monarch/mesh_controller.py,sha256=mOk2misobJun2AgR_ALjFoopAEcOPYQVrrAJXa18ZTs,13810
|
11
|
+
monarch/monarch_controller,sha256=1gXdTNolxIUp_88alJnW1onBHseo5PzuNnjEnBte4wI,21243128
|
12
12
|
monarch/notebook.py,sha256=zu9MKDFKf1-rCM2TqFSRJjMBeiWuKcJSyUFLvoZRQzs,25949
|
13
13
|
monarch/opaque_module.py,sha256=jCcg0DjbcEVXA9WNG0NhUzGteLHOJLTZEBvrIYJIAns,10436
|
14
14
|
monarch/opaque_object.py,sha256=x1LoX6RIMGh4ux52xIfhPgoh6PhZHdkf9bMccHW3DW0,2808
|
@@ -25,27 +25,27 @@ monarch/tensorboard.py,sha256=MnLgH5lbqeUJauEuirEgR6L_qYl2NGdtwZOWIAuOZao,2587
|
|
25
25
|
monarch/world_mesh.py,sha256=ob5dJWaC49Uw0xqClHBm8CQLvL4xKnjd4TGzk7k8NxI,980
|
26
26
|
monarch/_src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
27
|
monarch/_src/actor/__init__.py,sha256=4iK3nzQZmEPe0HGNY70fABBenw3lCVVaaF0xddF5Fa0,235
|
28
|
-
monarch/_src/actor/actor_mesh.py,sha256=
|
28
|
+
monarch/_src/actor/actor_mesh.py,sha256=guYD9nZHguLGJAvTisc3Q664ASkupcNC6z9iheeGFUQ,29188
|
29
29
|
monarch/_src/actor/allocator.py,sha256=WpHEK1SvjgF3GdIWIYUkonXli2-gQVKJVZPInl2RFQo,8212
|
30
30
|
monarch/_src/actor/bootstrap_main.py,sha256=e5eU3GvX60MWWmCty7VcZrAmukD29iJdWBysNgQ2o3A,2342
|
31
31
|
monarch/_src/actor/debugger.py,sha256=t2iAAxz03b2KZ89T3VjRc__7GzSf83R8gM81SDyX3-o,19532
|
32
32
|
monarch/_src/actor/device_utils.py,sha256=gBpl23wMjppVAEzzj8U9HyX-B7Bs2_3ftiMAkzUS4j4,577
|
33
|
-
monarch/_src/actor/endpoint.py,sha256=
|
34
|
-
monarch/_src/actor/event_loop.py,sha256=
|
33
|
+
monarch/_src/actor/endpoint.py,sha256=jM3XYWze6gB6ajE4AMojNFSN4IaaxgioNAErJkkywjE,9721
|
34
|
+
monarch/_src/actor/event_loop.py,sha256=2i4fKIkemBzua_t47BqVa2roZ6fWB6sbmMFPNx2zKN0,2832
|
35
35
|
monarch/_src/actor/future.py,sha256=jOGh1wfwKyGJxhl9t1P8eapXYywf8KwQldZCCbupmb8,4042
|
36
36
|
monarch/_src/actor/pdb_wrapper.py,sha256=-QxRktntdEO2LdHixBGKLboYtADyh8bEIAoa3gFwIEo,4161
|
37
37
|
monarch/_src/actor/pickle.py,sha256=jD_3E07OJmMIlcMOOrNFnIuRKZU2F_Q_BP-njDFXUNM,2044
|
38
|
-
monarch/_src/actor/proc_mesh.py,sha256=
|
38
|
+
monarch/_src/actor/proc_mesh.py,sha256=amF4fbO-33qHFudlS9WabYXIVh0Y_D_0nhCTxvOhpGg,16640
|
39
39
|
monarch/_src/actor/shape.py,sha256=B-7DI768ZhT8ECUNCJcI7DfCB7iDFGFH0r-HmXaAfcM,8296
|
40
40
|
monarch/_src/actor/sync_state.py,sha256=GB6bTAGztkcN8fZ9K7zXklOzjYzv6cvkJeBje20xFkE,471
|
41
|
-
monarch/_src/actor/tensor_engine_shim.py,sha256=
|
41
|
+
monarch/_src/actor/tensor_engine_shim.py,sha256=hupavQ2rjPwECaTlDAhY-eeiEY18Wyyx59MZHcSEcYM,1622
|
42
42
|
monarch/_src/actor/code_sync/__init__.py,sha256=qzWoFNJEJvEbqab0QuHbkvhdz6FHi7BOTw6-2B3p0A4,378
|
43
43
|
monarch/_src/actor/code_sync/auto_reload.py,sha256=kqXCQuSzjxMw8bcDLsUZiL_NImo4j2EScfNklwpltmU,6685
|
44
44
|
monarch/_src/actor/telemetry/__init__.py,sha256=sHA5fmFdWU9jcUJVszNFhbXbjRSIBmuDXDMwJrrE0hw,523
|
45
45
|
monarch/_src/actor/telemetry/rust_span_tracing.py,sha256=UvkywuwjQX7tIyLdKZbF-fcmI_aHporAejsTRTyJNNg,4445
|
46
46
|
monarch/_src/tensor_engine/__init__.py,sha256=Md3cCHD7Ano9kV15PqGbicgUO-RMdh4aVy1yKiDt_xE,208
|
47
47
|
monarch/_src/tensor_engine/rdma.py,sha256=KbhJXMuuHruYXnmxzB3BpkpcGsZ4hSu_7C6wF-EPhDk,6331
|
48
|
-
monarch/actor/__init__.py,sha256=
|
48
|
+
monarch/actor/__init__.py,sha256=F87BC7owDdH_yRjLvMu6pbICbajndsEbtWG2W53Rapo,1050
|
49
49
|
monarch/builtins/__init__.py,sha256=QcfnHZGbc2qktBg7DyZt2ruE6VahnIt4S8lEZLHdJqU,443
|
50
50
|
monarch/builtins/log.py,sha256=H1QkuVzwxyi36Zyv-XR0VN0QsNimBWwxE1__fjs0_2o,554
|
51
51
|
monarch/builtins/random.py,sha256=wPbvscg7u53EXpMFo885fO2XOlsyjrNAJ4rBxLzfxdg,1839
|
@@ -67,14 +67,14 @@ monarch/common/function_caching.py,sha256=HVdbWtv6Eea7ENMWi8iv36w1G1TaVuUJhkUX_J
|
|
67
67
|
monarch/common/future.py,sha256=D1UJ_8Rvb8-VG9vNE-z7xz2m2otMd2HgB0rnA02nlvA,4681
|
68
68
|
monarch/common/invocation.py,sha256=L4mSmzqlHMxo1Tb71hBU_M8aBZCRCOcb6vvPhvvewec,4195
|
69
69
|
monarch/common/mast.py,sha256=XTzYljGR0aZ7GjmNMPgU2HyuL4HWSAy4IwE3kEDqdOw,7735
|
70
|
-
monarch/common/messages.py,sha256=
|
70
|
+
monarch/common/messages.py,sha256=jwwJMVVx3gKd39AXcnRxjMr7lPJRLimHtZYel3zjq4o,18833
|
71
71
|
monarch/common/mock_cuda.py,sha256=x6ho1Ton6BbKjBZ5ZxnFOUaQM032X70wnpoUNB7Ci2w,1039
|
72
72
|
monarch/common/opaque_ref.py,sha256=tWNvOC6CsjNPKD1JDx-8PSaeXqZC3eermgBExUPKML4,2871
|
73
73
|
monarch/common/pipe.py,sha256=9pTf8--3yOv4HpnJEhgcmc_JM6Az4uL1y72TSQA55dw,5013
|
74
74
|
monarch/common/process_group.py,sha256=FbJ_AJRZYFkvQ68L2naRq64J_aNuAKe5kO0MWdn_x74,1662
|
75
75
|
monarch/common/recording.py,sha256=Q39Zhb3kT52NCPf4VVMox2WXjtXju5eTuvPMZ_QGW7o,4660
|
76
76
|
monarch/common/reference.py,sha256=O26lkzEeVwj0S1xEy-OLqdHVnACmmlbQCUmXRrW4n1Q,938
|
77
|
-
monarch/common/remote.py,sha256=
|
77
|
+
monarch/common/remote.py,sha256=Q2YpAo_fsdh22ElUNruxYyn-cNTecZr8POcHCGtuKyg,12129
|
78
78
|
monarch/common/selection.py,sha256=lpWFbZs3ArYy29e-53eoAVAjQFksf1RvZz9NvM0CUW4,308
|
79
79
|
monarch/common/stream.py,sha256=_ejoxafHtdD10lLzznRCXKwrkZ_ZH9k_VTgiA5yfBrI,3583
|
80
80
|
monarch/common/tensor.py,sha256=ysT51NClNF4FxV0DFLJJUNmCRaVy8uQuYWpLViyPLdY,29292
|
@@ -89,7 +89,7 @@ monarch/controller/rust_backend/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTY
|
|
89
89
|
monarch/controller/rust_backend/controller.py,sha256=8IYnVUiqEVKO9rGL3vKqcCSAhWJG1bYYQ0MoaMqsp78,9521
|
90
90
|
monarch/gradient/__init__.py,sha256=kqmzwt16mMpk0M3GhpgP_f7da4DGnaV9chDzbt66k4Q,308
|
91
91
|
monarch/gradient/_gradient_generator.pyi,sha256=6cX0UxaDt9NAlwgIhTgnweqGOf6qRhHiGnUzSWNCxdU,630
|
92
|
-
monarch/gradient/_gradient_generator.so,sha256=
|
92
|
+
monarch/gradient/_gradient_generator.so,sha256=VyBW9SsE1IJ5iiiq1Su0BgTR7vDBnbaRQj7yjAtBPUs,11531728
|
93
93
|
monarch/parallel/__init__.py,sha256=6920kIkhiX7AiyjYvyc1ad8ccP-bStJJ1sS5KkeN2P0,352
|
94
94
|
monarch/parallel/pipelining/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
95
95
|
monarch/parallel/pipelining/runtime.py,sha256=KK8TG1gUYEzSsquiZoPTWGSIC74mlncD7cYknKxfb3c,32470
|
@@ -115,15 +115,15 @@ monarch/timer/example_spmd.py,sha256=p8i3_tO1AmpwSkZryiSjgkh7qaEZ6QXp2Fy1qtPpECA
|
|
115
115
|
monarch/timer/execution_timer.py,sha256=1YsrLIZirdohKOeFAU2H4UcONhQXHuctJbYcoX8I6gY,6985
|
116
116
|
monarch/timer/execution_timer_test.py,sha256=CSxTv44fFZQURJlCBmYvysQI1aS_zEGZs_uxl9SOHak,4486
|
117
117
|
monarch/tools/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
118
|
-
monarch/tools/cli.py,sha256=
|
119
|
-
monarch/tools/commands.py,sha256=
|
120
|
-
monarch/tools/mesh_spec.py,sha256=
|
118
|
+
monarch/tools/cli.py,sha256=b3mKZnK-MwP7JwskTxHI0KcJXxSU6498jEb2ntVr_VM,5001
|
119
|
+
monarch/tools/commands.py,sha256=3xuvHcMwl0t6cWTVUxI_r8EqrJZnay0bkKxOijhlKrw,12126
|
120
|
+
monarch/tools/mesh_spec.py,sha256=in6txNRmA-UvveVSMHCjX6mGpofd3K8vl2Plz1eD6rg,7935
|
121
121
|
monarch/tools/network.py,sha256=mN8Fx9mervxM3VdFHRn4ZXt4z7yWxZp52BTxx2tfpus,2455
|
122
122
|
monarch/tools/utils.py,sha256=2GGUQQE0dLtzoKy40_tAsOfbSxE6krnL0WvwMgUBgmw,1213
|
123
123
|
monarch/tools/components/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
124
|
-
monarch/tools/components/hyperactor.py,sha256=
|
125
|
-
monarch/tools/config/__init__.py,sha256=
|
126
|
-
monarch/tools/config/defaults.py,sha256=
|
124
|
+
monarch/tools/components/hyperactor.py,sha256=gYZS8AcmoTuq48mRrZWWnyxQqaiwTNHv8YqHhHi799U,2169
|
125
|
+
monarch/tools/config/__init__.py,sha256=MLa6uvVJssN_zTciCvCMeCURWglchCuqE3zdqA-gh4U,869
|
126
|
+
monarch/tools/config/defaults.py,sha256=ZymyKr9fNnBIgsV-xz-KrtrXRLkJo3hymTqxjXXnBzs,1910
|
127
127
|
monarch/worker/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
128
128
|
monarch/worker/_testing_function.py,sha256=A81cVMKgdlO66XvoYcBCDrxIQIm3o3GgvcH_c8M9OmI,13480
|
129
129
|
monarch/worker/compiled_block.py,sha256=hYx1F6PAu0_BnpKAprP_nV9qJtk5XWO7mcwH3JPDioU,10114
|
@@ -146,12 +146,12 @@ tests/error_test_binary.py,sha256=cgdrnVI3SIzAFSRXTvASfiR8eKSMrZ7N3tSCLVkJo44,78
|
|
146
146
|
tests/sleep_binary.py,sha256=XfLYaAfwm9xgzM-svs8fhAeFhwYIg6SyVEnx4e6wbUw,1009
|
147
147
|
tests/test_actor_error.py,sha256=kEfj1XW_WPk2mplucFBuzCWU3UrvzwkKoHSLIZfyQr0,20945
|
148
148
|
tests/test_alloc.py,sha256=IW7yJSaKxhOYc8SJtFyREakDUwiKWq9M0CGgYyBYHoc,743
|
149
|
-
tests/test_allocator.py,sha256=
|
149
|
+
tests/test_allocator.py,sha256=4LcUB4QRNGDp0qBWAyLM6ektmoxpO922f-NcHZziJ_w,28762
|
150
150
|
tests/test_coalescing.py,sha256=JZ4YgQNlWWs7N-Z8KCCXQPANcuyyXEKjeHIXYbPnQhk,15606
|
151
151
|
tests/test_controller.py,sha256=CIMb-ApmBcBj1eCqccDUAbVyyJWMGooAha5gQk0AoeY,31452
|
152
152
|
tests/test_debugger.py,sha256=mtd_no7dAooBePoQ_TZOxtgzwd1-x6xkpzAFK1_Y8B8,13703
|
153
153
|
tests/test_device_mesh.py,sha256=DrbezYOM0thfP9MgLXb5-F0VoLOmSz5GR0GwjR_3bE4,5290
|
154
|
-
tests/test_env_before_cuda.py,sha256=
|
154
|
+
tests/test_env_before_cuda.py,sha256=K5zdpXNRZB8hXQJaTN_CftcGHb3vzzdKasu8KFUoiCg,5440
|
155
155
|
tests/test_fault_tolerance.py,sha256=u4wmG1z5MZ6PY6us5zUZHJh2pUC3L7i0wsUfRDNHmxA,14144
|
156
156
|
tests/test_future.py,sha256=cXzaNi2YDwVyjR541ScXmgktX1YFsKzbl8wep0DMVbk,3032
|
157
157
|
tests/test_grad_generator.py,sha256=p4Pm4kMEeGldt2jUVAkGKCB0mLccKI28pltH6OTGbQA,3412
|
@@ -163,15 +163,15 @@ tests/test_remote_functions.py,sha256=VT65W7htp1jCsP9-AsiO1dofhx-4OebWEOssDEgi3G
|
|
163
163
|
tests/test_rust_backend.py,sha256=3TLu8dSVEqyLhjHED2DoAEQHTpbBQcr3WI6K2eGZLZw,7861
|
164
164
|
tests/test_signal_safe_block_on.py,sha256=bmal0XgzJowZXJV6T1Blow5a-vZluYWusCThLMGxyTE,3336
|
165
165
|
tests/test_sim_backend.py,sha256=kT7MnPo5O9xxX8f7uZOpR9Tkuz5brjaOyK1g1NqHRlo,1398
|
166
|
-
tests/test_tensor_engine.py,sha256=
|
166
|
+
tests/test_tensor_engine.py,sha256=_F70SQiUCRVZcbq5JcP5XkGJFnul57pqBpu1rF9kipE,3591
|
167
167
|
tests/simulator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
168
168
|
tests/simulator/test_profiling.py,sha256=TGYCfzTLdkpIwnOuO6KApprmrgPIRQe60KRX3wkB0sg,4565
|
169
169
|
tests/simulator/test_simulator.py,sha256=LO8lA0ssY-OGEBL5ipEu74f97Y765TEwfUOv-DtIptM,14568
|
170
170
|
tests/simulator/test_task.py,sha256=ipqBDuDAysuo1xOB9S5psaFvwe6VATD43IovCTSs0t4,2327
|
171
171
|
tests/simulator/test_worker.py,sha256=QrWWIJ3HDgDLkBPRc2mwYPlOQoXQcj1qRfc0WUfKkFY,3507
|
172
|
-
torchmonarch_nightly-2025.7.
|
173
|
-
torchmonarch_nightly-2025.7.
|
174
|
-
torchmonarch_nightly-2025.7.
|
175
|
-
torchmonarch_nightly-2025.7.
|
176
|
-
torchmonarch_nightly-2025.7.
|
177
|
-
torchmonarch_nightly-2025.7.
|
172
|
+
torchmonarch_nightly-2025.7.26.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
|
173
|
+
torchmonarch_nightly-2025.7.26.dist-info/METADATA,sha256=rJuOVCi7kVf2R9tHhtMSlaWv80ybWS5g9MvMzmso5M8,3852
|
174
|
+
torchmonarch_nightly-2025.7.26.dist-info/WHEEL,sha256=JC9FVdjbTDi9l3EyrqUd11CgmN9LkBi1g5dFHayafwA,104
|
175
|
+
torchmonarch_nightly-2025.7.26.dist-info/entry_points.txt,sha256=60QVSpYVzkzS4iDOiLp0fsLxVp47X3J2l3v7W-59LMo,117
|
176
|
+
torchmonarch_nightly-2025.7.26.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
|
177
|
+
torchmonarch_nightly-2025.7.26.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
{torchmonarch_nightly-2025.7.25.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/top_level.txt
RENAMED
File without changes
|