torchmonarch-nightly 2025.7.1__cp310-cp310-manylinux2014_x86_64.whl → 2025.7.25__cp310-cp310-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monarch/__init__.py +13 -9
- monarch/_rust_bindings.so +0 -0
- monarch/{_monarch/selection → _src/actor}/__init__.py +3 -7
- monarch/_src/actor/actor_mesh.py +874 -0
- monarch/{allocator.py → _src/actor/allocator.py} +26 -17
- monarch/_src/actor/bootstrap_main.py +73 -0
- monarch/{code_sync.py → _src/actor/code_sync/__init__.py} +3 -1
- monarch/_src/actor/code_sync/auto_reload.py +223 -0
- monarch/_src/actor/debugger.py +565 -0
- monarch/_src/actor/endpoint.py +270 -0
- monarch/_src/actor/event_loop.py +97 -0
- monarch/_src/actor/future.py +100 -0
- monarch/{pdb_wrapper.py → _src/actor/pdb_wrapper.py} +47 -46
- monarch/{common/pickle_flatten.py → _src/actor/pickle.py} +26 -2
- monarch/_src/actor/proc_mesh.py +500 -0
- monarch/_src/actor/sync_state.py +18 -0
- monarch/{telemetry.py → _src/actor/telemetry/__init__.py} +1 -1
- monarch/_src/actor/telemetry/rust_span_tracing.py +159 -0
- monarch/_src/actor/tensor_engine_shim.py +56 -0
- monarch/_src/tensor_engine/rdma.py +180 -0
- monarch/_testing.py +3 -2
- monarch/actor/__init__.py +51 -0
- monarch/actor_mesh.py +6 -765
- monarch/bootstrap_main.py +8 -47
- monarch/common/client.py +1 -1
- monarch/common/controller_api.py +2 -1
- monarch/common/device_mesh.py +12 -2
- monarch/common/messages.py +12 -1
- monarch/common/recording.py +4 -3
- monarch/common/remote.py +135 -52
- monarch/common/tensor.py +2 -1
- monarch/controller/backend.py +2 -2
- monarch/controller/controller.py +2 -1
- monarch/controller/rust_backend/controller.py +2 -1
- monarch/fetch.py +3 -5
- monarch/mesh_controller.py +201 -139
- monarch/monarch_controller +0 -0
- monarch/opaque_module.py +4 -6
- monarch/opaque_object.py +3 -3
- monarch/proc_mesh.py +6 -309
- monarch/python_local_mesh.py +1 -1
- monarch/rust_backend_mesh.py +2 -1
- monarch/rust_local_mesh.py +4 -2
- monarch/sim_mesh.py +10 -19
- monarch/simulator/command_history.py +1 -1
- monarch/simulator/interface.py +2 -1
- monarch/simulator/mock_controller.py +1 -1
- monarch/simulator/simulator.py +1 -1
- monarch/tensor_engine/__init__.py +23 -0
- monarch/tensor_worker_main.py +3 -1
- monarch/tools/cli.py +3 -1
- monarch/tools/commands.py +95 -35
- monarch/tools/mesh_spec.py +55 -0
- monarch/tools/utils.py +38 -0
- monarch/worker/worker.py +1 -1
- monarch/world_mesh.py +2 -1
- monarch_supervisor/python_executable.py +6 -3
- tests/error_test_binary.py +48 -10
- tests/test_actor_error.py +370 -21
- tests/test_alloc.py +1 -1
- tests/test_allocator.py +373 -17
- tests/test_controller.py +2 -0
- tests/test_debugger.py +416 -0
- tests/test_env_before_cuda.py +162 -0
- tests/test_python_actors.py +184 -333
- tests/test_rdma.py +198 -0
- tests/test_remote_functions.py +40 -12
- tests/test_rust_backend.py +7 -5
- tests/test_sim_backend.py +1 -4
- tests/test_tensor_engine.py +55 -1
- {torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.25.dist-info}/METADATA +6 -1
- {torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.25.dist-info}/RECORD +80 -68
- torchmonarch_nightly-2025.7.25.dist-info/entry_points.txt +3 -0
- monarch/_monarch/hyperactor/__init__.py +0 -58
- monarch/_monarch/worker/debugger.py +0 -117
- monarch/_monarch/worker/logging.py +0 -107
- monarch/debugger.py +0 -379
- monarch/future.py +0 -76
- monarch/rdma.py +0 -162
- torchmonarch_nightly-2025.7.1.dist-info/entry_points.txt +0 -3
- /monarch/{_monarch/worker → _src}/__init__.py +0 -0
- /monarch/{common/_device_utils.py → _src/actor/device_utils.py} +0 -0
- /monarch/{common → _src/actor}/shape.py +0 -0
- /monarch/{_monarch → _src/tensor_engine}/__init__.py +0 -0
- {torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.25.dist-info}/WHEEL +0 -0
- {torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.25.dist-info}/licenses/LICENSE +0 -0
- {torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.25.dist-info}/top_level.txt +0 -0
tests/test_rdma.py
ADDED
@@ -0,0 +1,198 @@
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
import pytest
|
8
|
+
|
9
|
+
import torch
|
10
|
+
from monarch.actor import Actor, current_rank, endpoint, proc_mesh
|
11
|
+
from monarch.tensor_engine import is_available as rdma_available, RDMABuffer
|
12
|
+
|
13
|
+
|
14
|
+
needs_cuda = pytest.mark.skipif(
|
15
|
+
not torch.cuda.is_available(),
|
16
|
+
reason="CUDA not available",
|
17
|
+
)
|
18
|
+
needs_rdma = pytest.mark.skipif(
|
19
|
+
not rdma_available(),
|
20
|
+
reason="RDMA not available",
|
21
|
+
)
|
22
|
+
|
23
|
+
|
24
|
+
class ParameterServer(Actor):
|
25
|
+
def __init__(self):
|
26
|
+
self.params = torch.rand(10, 10)
|
27
|
+
self.grad_buffer = torch.rand(10, 10)
|
28
|
+
|
29
|
+
@endpoint
|
30
|
+
async def grad_handle(self) -> RDMABuffer:
|
31
|
+
byte_tensor = self.grad_buffer.view(torch.uint8).flatten()
|
32
|
+
buffer = RDMABuffer(byte_tensor)
|
33
|
+
return buffer
|
34
|
+
|
35
|
+
@endpoint
|
36
|
+
async def update(self):
|
37
|
+
self.params += 0.01 * self.grad_buffer
|
38
|
+
|
39
|
+
@endpoint
|
40
|
+
async def get_grad_buffer(self) -> torch.Tensor:
|
41
|
+
# just used for testing
|
42
|
+
return self.grad_buffer
|
43
|
+
|
44
|
+
|
45
|
+
class ParameterClient(Actor):
|
46
|
+
def __init__(self, server, buffer):
|
47
|
+
self.server = server
|
48
|
+
byte_tensor = buffer.view(torch.uint8).flatten()
|
49
|
+
self.buffer = byte_tensor
|
50
|
+
|
51
|
+
@endpoint
|
52
|
+
async def upload(self, tensor):
|
53
|
+
gh = await self.server.grad_handle.call_one()
|
54
|
+
await gh.write_from(tensor)
|
55
|
+
|
56
|
+
@endpoint
|
57
|
+
async def download(self):
|
58
|
+
gh = await self.server.grad_handle.call_one()
|
59
|
+
await gh.read_into(self.buffer)
|
60
|
+
|
61
|
+
@endpoint
|
62
|
+
async def get_buffer(self):
|
63
|
+
return self.buffer
|
64
|
+
|
65
|
+
|
66
|
+
@needs_rdma
|
67
|
+
@needs_cuda
|
68
|
+
async def test_proc_mesh_rdma():
|
69
|
+
proc = await proc_mesh(gpus=1)
|
70
|
+
server = await proc.spawn("server", ParameterServer)
|
71
|
+
|
72
|
+
# --- CPU TESTS ---
|
73
|
+
client_cpu = await proc.spawn(
|
74
|
+
"client_cpu", ParameterClient, server, torch.ones(10, 10)
|
75
|
+
)
|
76
|
+
x = await client_cpu.get_buffer.call_one()
|
77
|
+
assert torch.sum(x.view(torch.float32).view(10, 10)) == 100
|
78
|
+
zeros = torch.zeros(10, 10)
|
79
|
+
await client_cpu.upload.call_one(zeros.view(torch.uint8).flatten())
|
80
|
+
await client_cpu.download.call_one()
|
81
|
+
x = await client_cpu.get_buffer.call_one()
|
82
|
+
assert torch.sum(x.view(torch.float32).view(10, 10)) == 0
|
83
|
+
|
84
|
+
# --- Modify server's backing buffer directly ---
|
85
|
+
await server.update.call_one()
|
86
|
+
|
87
|
+
# Should reflect updated values
|
88
|
+
await client_cpu.download.call_one()
|
89
|
+
|
90
|
+
buffer = await client_cpu.get_buffer.call_one()
|
91
|
+
remote_grad = await server.get_grad_buffer.call_one()
|
92
|
+
assert torch.allclose(buffer.view(torch.float32).view(10, 10), remote_grad)
|
93
|
+
|
94
|
+
# --- GPU TESTS ---
|
95
|
+
client_gpu = await proc.spawn(
|
96
|
+
"client_gpu", ParameterClient, server, torch.ones(10, 10, device="cuda")
|
97
|
+
)
|
98
|
+
x = await client_gpu.get_buffer.call_one()
|
99
|
+
buffer = x.view(torch.float32).view(10, 10)
|
100
|
+
assert torch.sum(buffer) == 100
|
101
|
+
zeros = torch.zeros(10, 10, device="cuda")
|
102
|
+
await client_gpu.upload.call_one(zeros.view(torch.uint8).flatten())
|
103
|
+
await client_gpu.download.call_one()
|
104
|
+
x = await client_gpu.get_buffer.call_one()
|
105
|
+
buffer_gpu = x.view(torch.float32).view(10, 10)
|
106
|
+
assert torch.sum(buffer_gpu) == 0
|
107
|
+
# copying a tensor across hosts moves it to CPU
|
108
|
+
assert buffer_gpu.device.type == "cpu"
|
109
|
+
|
110
|
+
# Modify server state again
|
111
|
+
await server.update.call_one()
|
112
|
+
await client_gpu.download.call_one()
|
113
|
+
x = await client_gpu.get_buffer.call_one()
|
114
|
+
buffer_gpu = x.view(torch.float32).view(10, 10)
|
115
|
+
remote_grad = await server.get_grad_buffer.call_one()
|
116
|
+
assert torch.allclose(buffer_gpu.cpu(), remote_grad)
|
117
|
+
|
118
|
+
|
119
|
+
class TrainerActor(Actor):
|
120
|
+
def __init__(self):
|
121
|
+
super().__init__()
|
122
|
+
# TODO - switch to CUDA once GPU support is added
|
123
|
+
self.trainer = torch.nn.Linear(10, 10).to("cpu")
|
124
|
+
self.trainer.weight.data.zero_()
|
125
|
+
|
126
|
+
@endpoint
|
127
|
+
async def init(self, gen):
|
128
|
+
ranks = current_rank()
|
129
|
+
self.gen = gen.slice(**ranks)
|
130
|
+
|
131
|
+
@endpoint
|
132
|
+
async def exchange_metadata(self):
|
133
|
+
byte_tensor = self.trainer.weight.data.view(torch.uint8).flatten()
|
134
|
+
self.handle = RDMABuffer(byte_tensor)
|
135
|
+
await self.gen.attach_weight_buffer.call(self.handle)
|
136
|
+
|
137
|
+
@endpoint
|
138
|
+
async def weights_ready(self):
|
139
|
+
self.trainer.weight.data.add_(1.0)
|
140
|
+
|
141
|
+
|
142
|
+
class GeneratorActor(Actor):
|
143
|
+
def __init__(self):
|
144
|
+
super().__init__()
|
145
|
+
self.generator = torch.nn.Linear(10, 10).to("cuda")
|
146
|
+
self.step = 0
|
147
|
+
|
148
|
+
@endpoint
|
149
|
+
async def init(self, trainer):
|
150
|
+
ranks = current_rank()
|
151
|
+
self.trainer = trainer.slice(**ranks)
|
152
|
+
|
153
|
+
@endpoint
|
154
|
+
async def attach_weight_buffer(self, handle):
|
155
|
+
self.handle = handle
|
156
|
+
|
157
|
+
@endpoint
|
158
|
+
async def update_weights(self):
|
159
|
+
self.step += 1
|
160
|
+
byte_tensor = self.generator.weight.data.view(torch.uint8).flatten()
|
161
|
+
await self.handle.read_into(byte_tensor)
|
162
|
+
assert (
|
163
|
+
torch.sum(self.generator.weight.data) == self.step * 100
|
164
|
+
), f"{torch.sum(self.generator.weight.data)=}, {self.step=}"
|
165
|
+
|
166
|
+
|
167
|
+
@needs_rdma
|
168
|
+
@needs_cuda
|
169
|
+
async def test_gpu_trainer_generator():
|
170
|
+
trainer_proc = await proc_mesh(gpus=1)
|
171
|
+
gen_proc = await proc_mesh(gpus=1)
|
172
|
+
trainer = await trainer_proc.spawn("trainer", TrainerActor)
|
173
|
+
generator = await gen_proc.spawn("gen", GeneratorActor)
|
174
|
+
|
175
|
+
await generator.init.call(trainer)
|
176
|
+
await trainer.init.call(generator)
|
177
|
+
await trainer.exchange_metadata.call()
|
178
|
+
|
179
|
+
for _ in range(3):
|
180
|
+
await trainer.weights_ready.call()
|
181
|
+
await generator.update_weights.call()
|
182
|
+
|
183
|
+
|
184
|
+
@needs_rdma
|
185
|
+
@needs_cuda
|
186
|
+
def test_gpu_trainer_generator_sync() -> None:
|
187
|
+
trainer_proc = proc_mesh(gpus=1).get()
|
188
|
+
gen_proc = proc_mesh(gpus=1).get()
|
189
|
+
trainer = trainer_proc.spawn("trainer", TrainerActor).get()
|
190
|
+
generator = gen_proc.spawn("gen", GeneratorActor).get()
|
191
|
+
|
192
|
+
generator.init.call(trainer).get()
|
193
|
+
trainer.init.call(generator).get()
|
194
|
+
trainer.exchange_metadata.call().get()
|
195
|
+
|
196
|
+
for _ in range(1):
|
197
|
+
trainer.weights_ready.call().get()
|
198
|
+
generator.update_weights.call().get()
|
tests/test_remote_functions.py
CHANGED
@@ -9,7 +9,6 @@ import itertools
|
|
9
9
|
import math
|
10
10
|
import sys
|
11
11
|
import traceback
|
12
|
-
from enum import Enum
|
13
12
|
from typing import Callable, ContextManager, Tuple
|
14
13
|
from unittest.mock import patch
|
15
14
|
|
@@ -25,16 +24,18 @@ from monarch import (
|
|
25
24
|
Pipe,
|
26
25
|
remote,
|
27
26
|
remote_generator,
|
28
|
-
RemoteException,
|
27
|
+
RemoteException as OldRemoteException,
|
29
28
|
Stream,
|
30
29
|
)
|
30
|
+
|
31
31
|
from monarch._testing import BackendType, TestingContext
|
32
32
|
from monarch.builtins.log import log_remote
|
33
33
|
from monarch.builtins.random import set_manual_seed_remote
|
34
34
|
from monarch.cached_remote_function import remote_autograd_function
|
35
35
|
from monarch.common import remote as remote_module
|
36
36
|
from monarch.common.device_mesh import DeviceMesh
|
37
|
-
from monarch.common.remote import Remote
|
37
|
+
from monarch.common.remote import call_on_shard_and_fetch, Remote
|
38
|
+
from monarch.mesh_controller import RemoteException as NewRemoteException
|
38
39
|
|
39
40
|
from monarch.opaque_module import OpaqueModule
|
40
41
|
from monarch.opaque_object import opaque_method, OpaqueObject
|
@@ -57,6 +58,8 @@ from monarch.worker._testing_function import (
|
|
57
58
|
from monarch_supervisor.logging import fix_exception_lines
|
58
59
|
from torch.distributed import ReduceOp
|
59
60
|
|
61
|
+
RemoteException = (NewRemoteException, OldRemoteException)
|
62
|
+
|
60
63
|
|
61
64
|
def custom_excepthook(exc_type, exc_value, exc_traceback):
|
62
65
|
tb_lines = fix_exception_lines(
|
@@ -181,7 +184,9 @@ class RemoteFunctionsTestBase:
|
|
181
184
|
# out is not counted as a failure, so we set a more restrictive timeout to
|
182
185
|
# ensure we see a hard failure in CI.
|
183
186
|
@pytest.mark.timeout(120)
|
184
|
-
@pytest.mark.parametrize(
|
187
|
+
@pytest.mark.parametrize(
|
188
|
+
"backend_type", [BackendType.PY, BackendType.RS, BackendType.MESH]
|
189
|
+
)
|
185
190
|
class TestRemoteFunctions(RemoteFunctionsTestBase):
|
186
191
|
@classmethod
|
187
192
|
def do_test_reduce_scatter_tensor(cls, backend_type, reduce_op, expected_tensor):
|
@@ -326,7 +331,7 @@ class TestRemoteFunctions(RemoteFunctionsTestBase):
|
|
326
331
|
_ = fetch_shard(a).result(timeout=40)
|
327
332
|
|
328
333
|
def test_set_device_inside_udf_fails_with_explanation(self, backend_type):
|
329
|
-
if backend_type
|
334
|
+
if backend_type != BackendType.RS:
|
330
335
|
pytest.skip("Python support not planned for this test")
|
331
336
|
with self.local_device_mesh(2, 2, backend_type):
|
332
337
|
t = set_device_udf(2)
|
@@ -628,11 +633,10 @@ class TestRemoteFunctions(RemoteFunctionsTestBase):
|
|
628
633
|
with self.local_device_mesh(2, 2, backend_type):
|
629
634
|
assert (
|
630
635
|
"an argument processed"
|
631
|
-
==
|
632
|
-
|
636
|
+
== call_on_shard_and_fetch(
|
637
|
+
remote("monarch.worker._testing_function.do_some_processing"),
|
633
638
|
"an argument",
|
634
|
-
)
|
635
|
-
.result()
|
639
|
+
).result()
|
636
640
|
)
|
637
641
|
|
638
642
|
def test_cached_remote_function(self, backend_type):
|
@@ -727,7 +731,7 @@ class TestRemoteFunctions(RemoteFunctionsTestBase):
|
|
727
731
|
|
728
732
|
with self.local_device_mesh(2, 2, backend_type):
|
729
733
|
a = torch.ones(())
|
730
|
-
assert
|
734
|
+
assert call_on_shard_and_fetch(check, bar(a, a)).result()
|
731
735
|
# ensure we do not attempt to pickle closures
|
732
736
|
close()
|
733
737
|
|
@@ -770,7 +774,7 @@ class TestRemoteFunctions(RemoteFunctionsTestBase):
|
|
770
774
|
|
771
775
|
with self.local_device_mesh(1, 1, backend_type):
|
772
776
|
# This should be a valid return than an exception to raise
|
773
|
-
|
777
|
+
call_on_shard_and_fetch(simple).result()
|
774
778
|
|
775
779
|
def test_opaque_object(self, backend_type):
|
776
780
|
with self.local_device_mesh(2, 2, backend_type):
|
@@ -948,10 +952,13 @@ class TestRemoteFunctions(RemoteFunctionsTestBase):
|
|
948
952
|
x = outer_remote_function_that_calls_inner()
|
949
953
|
try:
|
950
954
|
inspect(x)
|
951
|
-
except
|
955
|
+
except OldRemoteException as e:
|
952
956
|
backtrace = "\n".join([frame.name for frame in e.worker_frames])
|
953
957
|
assert "outer_remote_function" in backtrace
|
954
958
|
assert "inner_remote_function" in backtrace
|
959
|
+
except NewRemoteException as e:
|
960
|
+
assert "outer_remote_function" in e.worker_error_string
|
961
|
+
assert "inner_remote_function" in e.worker_error_string
|
955
962
|
|
956
963
|
def test_remote_function_broadcast(self, backend_type):
|
957
964
|
with self.local_device_mesh(2, 2, backend_type) as device_mesh:
|
@@ -1269,3 +1276,24 @@ def a_function_called_by_a_live_function(x):
|
|
1269
1276
|
|
1270
1277
|
def a_live_function_call_by_a_live_function(x):
|
1271
1278
|
return 3 * x
|
1279
|
+
|
1280
|
+
|
1281
|
+
@remote
|
1282
|
+
def return_them(x: torch.Tensor, y: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
1283
|
+
return (x, y)
|
1284
|
+
|
1285
|
+
|
1286
|
+
@pytest.mark.skipif(
|
1287
|
+
torch.cuda.device_count() < 2,
|
1288
|
+
reason="Not enough GPUs, this test requires at least 2 GPUs",
|
1289
|
+
)
|
1290
|
+
class TestMeshSpecific(RemoteFunctionsTestBase):
|
1291
|
+
def test_value_mesh(self):
|
1292
|
+
with self.local_device_mesh(2, 2, "mesh") as device_mesh:
|
1293
|
+
x = device_mesh.rank("host")
|
1294
|
+
y = device_mesh.rank("gpu")
|
1295
|
+
r = return_them.call(x, y).get()
|
1296
|
+
|
1297
|
+
for p, (h, g) in r:
|
1298
|
+
assert p["host"] == h.item()
|
1299
|
+
assert p["gpu"] == g.item()
|
tests/test_rust_backend.py
CHANGED
@@ -17,6 +17,7 @@ import torch
|
|
17
17
|
import torch.utils._python_dispatch
|
18
18
|
from monarch import fetch_shard, no_mesh, remote, Stream
|
19
19
|
from monarch.common.device_mesh import DeviceMesh
|
20
|
+
from monarch.common.remote import call_on_shard_and_fetch
|
20
21
|
from monarch.rust_local_mesh import local_meshes, LoggingLocation, SocketType
|
21
22
|
from torch.nn.attention import sdpa_kernel, SDPBackend
|
22
23
|
from torch.nn.functional import scaled_dot_product_attention
|
@@ -111,9 +112,10 @@ class TestRustBackend(TestCase):
|
|
111
112
|
with local_mesh():
|
112
113
|
assert (
|
113
114
|
"an argument processed"
|
114
|
-
==
|
115
|
-
|
116
|
-
|
115
|
+
== call_on_shard_and_fetch(
|
116
|
+
remote("monarch.worker._testing_function.do_some_processing"),
|
117
|
+
"an argument",
|
118
|
+
).result()
|
117
119
|
)
|
118
120
|
|
119
121
|
def test_brutal_shutdown(self):
|
@@ -143,8 +145,8 @@ class TestRustBackend(TestCase):
|
|
143
145
|
return torch.isnan(t).any().item()
|
144
146
|
|
145
147
|
t = torch.rand(3, 4)
|
146
|
-
res =
|
147
|
-
t, shard={"host": 0, "gpu": 0}
|
148
|
+
res = call_on_shard_and_fetch(
|
149
|
+
has_nan, t, shard={"host": 0, "gpu": 0}
|
148
150
|
).result()
|
149
151
|
|
150
152
|
self.assertFalse(res)
|
tests/test_sim_backend.py
CHANGED
@@ -24,11 +24,8 @@ def local_sim_mesh(
|
|
24
24
|
# TODO: support multiple gpus in a mesh.
|
25
25
|
gpu_per_host: int = 1,
|
26
26
|
activate: bool = True,
|
27
|
-
proxy_addr: Optional[str] = None,
|
28
27
|
) -> Generator[DeviceMesh, None, None]:
|
29
|
-
dms = sim_mesh(
|
30
|
-
n_meshes=1, hosts=hosts, gpus_per_host=gpu_per_host, proxy_addr=proxy_addr
|
31
|
-
)
|
28
|
+
dms = sim_mesh(n_meshes=1, hosts=hosts, gpus_per_host=gpu_per_host)
|
32
29
|
dm = dms[0]
|
33
30
|
try:
|
34
31
|
if activate:
|
tests/test_tensor_engine.py
CHANGED
@@ -7,8 +7,9 @@
|
|
7
7
|
import monarch
|
8
8
|
import pytest
|
9
9
|
import torch
|
10
|
+
from monarch import remote
|
11
|
+
from monarch.actor import Actor, endpoint, proc_mesh
|
10
12
|
from monarch.mesh_controller import spawn_tensor_engine
|
11
|
-
from monarch.proc_mesh import proc_mesh
|
12
13
|
|
13
14
|
|
14
15
|
two_gpu = pytest.mark.skipif(
|
@@ -32,6 +33,14 @@ def test_tensor_engine() -> None:
|
|
32
33
|
assert torch.allclose(torch.zeros(3, 4), r)
|
33
34
|
assert torch.allclose(torch.zeros(3, 4), f)
|
34
35
|
|
36
|
+
@remote(propagate=lambda x: x)
|
37
|
+
def nope(x):
|
38
|
+
raise ValueError("nope")
|
39
|
+
|
40
|
+
with pytest.raises(monarch.mesh_controller.RemoteException):
|
41
|
+
with dm.activate():
|
42
|
+
monarch.inspect(nope(torch.zeros(3, 4)))
|
43
|
+
|
35
44
|
dm.exit()
|
36
45
|
|
37
46
|
|
@@ -50,3 +59,48 @@ def test_proc_mesh_tensor_engine() -> None:
|
|
50
59
|
assert a == 0
|
51
60
|
assert b == 10
|
52
61
|
assert c == 100
|
62
|
+
|
63
|
+
|
64
|
+
class AddWithState(Actor):
|
65
|
+
def __init__(self, state: torch.Tensor):
|
66
|
+
super().__init__()
|
67
|
+
self.state = state
|
68
|
+
|
69
|
+
@endpoint
|
70
|
+
def forward(self, x) -> torch.Tensor:
|
71
|
+
return x + self.state
|
72
|
+
|
73
|
+
|
74
|
+
@two_gpu
|
75
|
+
def test_actor_with_tensors() -> None:
|
76
|
+
pm = proc_mesh(gpus=1).get()
|
77
|
+
with pm.activate():
|
78
|
+
x = pm.spawn("adder", AddWithState, torch.ones(())).get()
|
79
|
+
y = torch.ones(())
|
80
|
+
assert x.forward.call(y).get(timeout=5).item(hosts=0, gpus=0).item() == 2
|
81
|
+
|
82
|
+
|
83
|
+
class Counter(Actor):
|
84
|
+
def __init__(self):
|
85
|
+
super().__init__()
|
86
|
+
self.c = 0
|
87
|
+
|
88
|
+
@endpoint
|
89
|
+
def incr(self, x) -> int:
|
90
|
+
self.c += 1
|
91
|
+
return self.c - 1
|
92
|
+
|
93
|
+
|
94
|
+
@two_gpu
|
95
|
+
def test_actor_tensor_ordering() -> None:
|
96
|
+
pm = proc_mesh(gpus=1).get()
|
97
|
+
with pm.activate():
|
98
|
+
counter = pm.spawn("a", Counter).get()
|
99
|
+
results = []
|
100
|
+
for _ in range(0, 10, 2):
|
101
|
+
# tensor engine call
|
102
|
+
results.append(counter.incr.call(torch.ones(())))
|
103
|
+
# non-tensor engine call
|
104
|
+
results.append(counter.incr.call(1))
|
105
|
+
|
106
|
+
assert list(range(10)) == [r.get().item(hosts=0, gpus=0) for r in results]
|
{torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.25.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: torchmonarch-nightly
|
3
|
-
Version: 2025.7.
|
3
|
+
Version: 2025.7.25
|
4
4
|
Summary: Monarch: Single controller library
|
5
5
|
Author: Meta
|
6
6
|
Author-email: oncall+monarch@xmail.facebook.com
|
@@ -15,6 +15,8 @@ Requires-Dist: numpy
|
|
15
15
|
Requires-Dist: pyre-extensions
|
16
16
|
Requires-Dist: cloudpickle
|
17
17
|
Requires-Dist: torchx-nightly
|
18
|
+
Requires-Dist: lark
|
19
|
+
Requires-Dist: tabulate
|
18
20
|
Dynamic: author
|
19
21
|
Dynamic: author-email
|
20
22
|
Dynamic: description
|
@@ -69,6 +71,9 @@ sudo dnf install clang-devel libnccl-devel
|
|
69
71
|
conda install -c conda-forge clangdev nccl
|
70
72
|
conda update -n monarchenv --all -c conda-forge -y
|
71
73
|
|
74
|
+
# If you are building with RDMA support, build monarch with `USE_TENSOR_ENGINE=1 pip install --no-build-isolation .` and dnf install the following packages
|
75
|
+
sudo dnf install -y libibverbs rdma-core libmlx5 libibverbs-devel rdma-core-devel
|
76
|
+
|
72
77
|
# Install build dependencies
|
73
78
|
pip install -r build-requirements.txt
|
74
79
|
# Install test dependencies
|