torchmonarch-nightly 2025.6.27__cp313-cp313-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. monarch/__init__.py +189 -0
  2. monarch/_monarch/__init__.py +5 -0
  3. monarch/_monarch/hyperactor/__init__.py +58 -0
  4. monarch/_monarch/selection/__init__.py +13 -0
  5. monarch/_monarch/worker/__init__.py +0 -0
  6. monarch/_monarch/worker/debugger.py +117 -0
  7. monarch/_monarch/worker/logging.py +107 -0
  8. monarch/_rust_bindings.so +0 -0
  9. monarch/_testing.py +230 -0
  10. monarch/actor_mesh.py +761 -0
  11. monarch/allocator.py +220 -0
  12. monarch/bootstrap_main.py +59 -0
  13. monarch/builtins/__init__.py +14 -0
  14. monarch/builtins/log.py +22 -0
  15. monarch/builtins/random.py +68 -0
  16. monarch/cached_remote_function.py +257 -0
  17. monarch/code_sync.py +10 -0
  18. monarch/common/_C.pyi +11 -0
  19. monarch/common/_C.so +0 -0
  20. monarch/common/__init__.py +0 -0
  21. monarch/common/_coalescing.py +308 -0
  22. monarch/common/_device_utils.py +18 -0
  23. monarch/common/_tensor_to_table.py +172 -0
  24. monarch/common/base_tensor.py +28 -0
  25. monarch/common/borrows.py +143 -0
  26. monarch/common/client.py +690 -0
  27. monarch/common/constants.py +10 -0
  28. monarch/common/context_manager.py +40 -0
  29. monarch/common/controller_api.py +104 -0
  30. monarch/common/device_mesh.py +417 -0
  31. monarch/common/fake.py +55 -0
  32. monarch/common/function.py +160 -0
  33. monarch/common/function_caching.py +164 -0
  34. monarch/common/future.py +168 -0
  35. monarch/common/invocation.py +125 -0
  36. monarch/common/mast.py +221 -0
  37. monarch/common/messages.py +573 -0
  38. monarch/common/mock_cuda.py +41 -0
  39. monarch/common/opaque_ref.py +98 -0
  40. monarch/common/pickle_flatten.py +48 -0
  41. monarch/common/pipe.py +152 -0
  42. monarch/common/process_group.py +55 -0
  43. monarch/common/recording.py +127 -0
  44. monarch/common/reference.py +33 -0
  45. monarch/common/remote.py +297 -0
  46. monarch/common/selection.py +9 -0
  47. monarch/common/shape.py +229 -0
  48. monarch/common/stream.py +114 -0
  49. monarch/common/tensor.py +814 -0
  50. monarch/common/tensor_factory.py +31 -0
  51. monarch/common/tree.py +73 -0
  52. monarch/controller/__init__.py +7 -0
  53. monarch/controller/backend.py +223 -0
  54. monarch/controller/controller.py +223 -0
  55. monarch/controller/debugger.py +47 -0
  56. monarch/controller/history.py +90 -0
  57. monarch/controller/rust_backend/__init__.py +7 -0
  58. monarch/controller/rust_backend/controller.py +245 -0
  59. monarch/debugger.py +379 -0
  60. monarch/fetch.py +55 -0
  61. monarch/future.py +76 -0
  62. monarch/gradient/__init__.py +11 -0
  63. monarch/gradient/_gradient_generator.pyi +22 -0
  64. monarch/gradient/_gradient_generator.so +0 -0
  65. monarch/gradient_generator.py +185 -0
  66. monarch/memory.py +43 -0
  67. monarch/mesh_controller.py +271 -0
  68. monarch/monarch_controller +0 -0
  69. monarch/notebook.py +761 -0
  70. monarch/opaque_module.py +235 -0
  71. monarch/opaque_object.py +88 -0
  72. monarch/parallel/__init__.py +9 -0
  73. monarch/parallel/pipelining/__init__.py +7 -0
  74. monarch/parallel/pipelining/runtime.py +847 -0
  75. monarch/parallel/pipelining/schedule_ir.py +692 -0
  76. monarch/parallel/pipelining/scheduler.py +249 -0
  77. monarch/pdb_wrapper.py +135 -0
  78. monarch/proc_mesh.py +299 -0
  79. monarch/profiler.py +160 -0
  80. monarch/python_local_mesh.py +107 -0
  81. monarch/random.py +61 -0
  82. monarch/rdma.py +162 -0
  83. monarch/remote_class.py +114 -0
  84. monarch/rust_backend_mesh.py +280 -0
  85. monarch/rust_local_mesh.py +1402 -0
  86. monarch/sim_mesh.py +359 -0
  87. monarch/simulator/__init__.py +7 -0
  88. monarch/simulator/command_history.py +424 -0
  89. monarch/simulator/config.py +21 -0
  90. monarch/simulator/interface.py +59 -0
  91. monarch/simulator/ir.py +770 -0
  92. monarch/simulator/mock_controller.py +214 -0
  93. monarch/simulator/profiling.py +424 -0
  94. monarch/simulator/simulator.py +1052 -0
  95. monarch/simulator/task.py +255 -0
  96. monarch/simulator/tensor.py +373 -0
  97. monarch/simulator/trace.py +395 -0
  98. monarch/simulator/utils.py +41 -0
  99. monarch/simulator/worker.py +389 -0
  100. monarch/telemetry.py +19 -0
  101. monarch/tensor_worker_main.py +260 -0
  102. monarch/tensorboard.py +84 -0
  103. monarch/timer/__init__.py +21 -0
  104. monarch/timer/example_monarch.py +78 -0
  105. monarch/timer/example_spmd.py +55 -0
  106. monarch/timer/execution_timer.py +199 -0
  107. monarch/timer/execution_timer_test.py +131 -0
  108. monarch/tools/__init__.py +7 -0
  109. monarch/tools/cli.py +167 -0
  110. monarch/tools/commands.py +251 -0
  111. monarch/tools/components/__init__.py +7 -0
  112. monarch/tools/components/hyperactor.py +58 -0
  113. monarch/tools/config/__init__.py +20 -0
  114. monarch/tools/config/defaults.py +54 -0
  115. monarch/tools/mesh_spec.py +165 -0
  116. monarch/tools/network.py +69 -0
  117. monarch/worker/__init__.py +7 -0
  118. monarch/worker/_testing_function.py +481 -0
  119. monarch/worker/compiled_block.py +270 -0
  120. monarch/worker/debugger.py +125 -0
  121. monarch/worker/lines.py +47 -0
  122. monarch/worker/monitor.py +53 -0
  123. monarch/worker/worker.py +1191 -0
  124. monarch/world_mesh.py +34 -0
  125. monarch_supervisor/__init__.py +1044 -0
  126. monarch_supervisor/_testing.py +44 -0
  127. monarch_supervisor/function_call.py +30 -0
  128. monarch_supervisor/host.py +386 -0
  129. monarch_supervisor/launchers.py +145 -0
  130. monarch_supervisor/log_pstree.py +48 -0
  131. monarch_supervisor/logging.py +103 -0
  132. monarch_supervisor/python_executable.py +42 -0
  133. tests/__init__.py +0 -0
  134. tests/dispatch_bench.py +124 -0
  135. tests/dispatch_bench_helper.py +25 -0
  136. tests/error_test_binary.py +180 -0
  137. tests/simulator/__init__.py +0 -0
  138. tests/simulator/test_profiling.py +136 -0
  139. tests/simulator/test_simulator.py +411 -0
  140. tests/simulator/test_task.py +64 -0
  141. tests/simulator/test_worker.py +102 -0
  142. tests/sleep_binary.py +35 -0
  143. tests/test_actor_error.py +240 -0
  144. tests/test_alloc.py +25 -0
  145. tests/test_allocator.py +365 -0
  146. tests/test_coalescing.py +492 -0
  147. tests/test_controller.py +845 -0
  148. tests/test_device_mesh.py +132 -0
  149. tests/test_fault_tolerance.py +398 -0
  150. tests/test_future.py +94 -0
  151. tests/test_grad_generator.py +121 -0
  152. tests/test_mock_cuda.py +74 -0
  153. tests/test_pdb_actor.py +110 -0
  154. tests/test_python_actors.py +736 -0
  155. tests/test_remote_functions.py +1271 -0
  156. tests/test_rust_backend.py +217 -0
  157. tests/test_signal_safe_block_on.py +103 -0
  158. tests/test_sim_backend.py +54 -0
  159. tests/test_tensor_engine.py +52 -0
  160. torchmonarch_nightly-2025.6.27.dist-info/METADATA +94 -0
  161. torchmonarch_nightly-2025.6.27.dist-info/RECORD +165 -0
  162. torchmonarch_nightly-2025.6.27.dist-info/WHEEL +5 -0
  163. torchmonarch_nightly-2025.6.27.dist-info/entry_points.txt +3 -0
  164. torchmonarch_nightly-2025.6.27.dist-info/licenses/LICENSE +29 -0
  165. torchmonarch_nightly-2025.6.27.dist-info/top_level.txt +3 -0
@@ -0,0 +1,217 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # pyre-unsafe
8
+
9
+ from contextlib import contextmanager
10
+ from typing import Generator
11
+ from unittest import TestCase
12
+
13
+ import monarch
14
+
15
+ import pytest
16
+ import torch
17
+ import torch.utils._python_dispatch
18
+ from monarch import fetch_shard, no_mesh, remote, Stream
19
+ from monarch.common.device_mesh import DeviceMesh
20
+ from monarch.rust_local_mesh import local_meshes, LoggingLocation, SocketType
21
+ from torch.nn.attention import sdpa_kernel, SDPBackend
22
+ from torch.nn.functional import scaled_dot_product_attention
23
+
24
+
25
+ def simple_all_reduce(*args, **kwargs):
26
+ return torch.ones(args[0].shape)
27
+
28
+
29
+ simple_all_reduce = remote(
30
+ "monarch.worker._testing_function.simple_all_reduce_local",
31
+ propagate=simple_all_reduce,
32
+ )
33
+
34
+
35
+ @contextmanager
36
+ def local_mesh(
37
+ hosts: int = 1, gpu_per_host: int = 2, activate: bool = True
38
+ ) -> Generator[DeviceMesh, None, None]:
39
+ with monarch.rust_local_mesh.local_mesh(
40
+ hosts=hosts,
41
+ gpus_per_host=gpu_per_host,
42
+ socket_type=SocketType.UNIX,
43
+ logging_location=LoggingLocation.DEFAULT,
44
+ ) as dm:
45
+ try:
46
+ if activate:
47
+ with dm.activate():
48
+ yield dm
49
+ else:
50
+ yield dm
51
+ dm.exit()
52
+ except Exception:
53
+ dm.client._shutdown = True
54
+ raise
55
+
56
+
57
+ # Set global timeout--sandcastle's timeout is 600s. A test that sandcastle times
58
+ # out is not counted as a failure, so we set a more restrictive timeout to
59
+ # ensure we see a hard failure in CI.
60
+ @pytest.mark.timeout(120)
61
+ @pytest.mark.skipif(
62
+ torch.cuda.device_count() < 2,
63
+ reason="Not enough GPUs, this test requires at least 2 GPUs",
64
+ )
65
+ class TestRustBackend(TestCase):
66
+ def test_local_mesh_setup(self):
67
+ with local_mesh():
68
+ t = torch.zeros(3, 4)
69
+ t.add_(1)
70
+ fut = fetch_shard(t)
71
+
72
+ with no_mesh.activate():
73
+ local_t = fut.result()
74
+ assert torch.equal(local_t, torch.ones(3, 4))
75
+
76
+ def test_result_in_mesh(self):
77
+ with local_mesh():
78
+ t = torch.ones(3, 4)
79
+ t.add_(-1)
80
+ # Assert calling result() is fine within an active mesh.
81
+ local_t = fetch_shard(t).result()
82
+ assert torch.equal(local_t, torch.zeros(3, 4))
83
+
84
+ def test_errors(self):
85
+ t = torch.rand(3, 4)
86
+ with local_mesh(2, 2) as dm:
87
+ y = torch.rand(3, 4)
88
+ with pytest.raises(TypeError, match="LOCAL_TENSOR"):
89
+ t.add(y)
90
+ with pytest.raises(TypeError, match="WRONG_MESH"):
91
+ sub_mesh = dm(host=0)
92
+ with sub_mesh.activate():
93
+ x = torch.rand(3, 4)
94
+ x.add(y)
95
+ other = Stream("other")
96
+ t = torch.rand(10).cuda()
97
+ with pytest.raises(TypeError, match="WRONG_STREAM"):
98
+ with other.activate():
99
+ t = t.reduce("host", "sum")
100
+
101
+ def test_multi_hosts(self):
102
+ with local_mesh(hosts=2, gpu_per_host=2):
103
+ t = torch.rand(3, 4).cuda()
104
+ local_t1 = fetch_shard(t, {"host": 1, "gpu": 0}).result()
105
+ local_t2 = fetch_shard(t, {"host": 1, "gpu": 0}).result()
106
+ local_t3 = fetch_shard(t, {"host": 0, "gpu": 1}).result()
107
+ assert torch.equal(local_t1, local_t2)
108
+ assert not torch.equal(local_t1, local_t3)
109
+
110
+ def test_fetch_preprocess(self):
111
+ with local_mesh():
112
+ assert (
113
+ "an argument processed"
114
+ == remote("monarch.worker._testing_function.do_some_processing")
115
+ .call_on_shard_and_fetch("an argument")
116
+ .result()
117
+ )
118
+
119
+ def test_brutal_shutdown(self):
120
+ with monarch.rust_local_mesh.local_mesh(
121
+ hosts=1, gpus_per_host=1, socket_type=SocketType.UNIX
122
+ ) as dm:
123
+ dm.exit()
124
+ dm.deactivate()
125
+
126
+ def test_results_filtering(self):
127
+ with local_mesh(gpu_per_host=1):
128
+ query = torch.rand(1, 1, 1, 1, dtype=torch.float16, device="cuda")
129
+ key = torch.rand(1, 1, 1, 1, dtype=torch.float16, device="cuda")
130
+ value = torch.rand(1, 1, 1, 1, dtype=torch.float16, device="cuda")
131
+ with sdpa_kernel(backends=[SDPBackend.FLASH_ATTENTION]):
132
+ # This function will send 9 results. Only 5 of them will be set.
133
+ t = scaled_dot_product_attention(query, key, value)
134
+ fut = fetch_shard(t)
135
+ local_tensor = fut.result()
136
+ assert len(local_tensor) == 1
137
+
138
+ def test_live_function(self):
139
+ with local_mesh():
140
+
141
+ @remote
142
+ def has_nan(t):
143
+ return torch.isnan(t).any().item()
144
+
145
+ t = torch.rand(3, 4)
146
+ res = has_nan.call_on_shard_and_fetch(
147
+ t, shard={"host": 0, "gpu": 0}
148
+ ).result()
149
+
150
+ self.assertFalse(res)
151
+
152
+ def test_multiple_global_meshes(self):
153
+ """
154
+ This test is to validate we can have a single client process
155
+ connecting to multiple global meshes. The global meshes are distinct
156
+ from each other to provide native failure domain isolation.
157
+ """
158
+ replicas = 4
159
+ with local_meshes(
160
+ meshes=replicas,
161
+ hosts_per_mesh=1,
162
+ gpus_per_host=1,
163
+ socket_type=SocketType.UNIX,
164
+ logging_location=LoggingLocation.DEFAULT,
165
+ ) as groups:
166
+ results = []
167
+ for i, group in enumerate(groups):
168
+ with group.activate():
169
+ t = torch.ones(i + 1)
170
+ results.append(fetch_shard(t).result())
171
+ for i in range(replicas):
172
+ assert torch.equal(results[i], torch.ones(i + 1))
173
+
174
+ for group in groups:
175
+ group.exit()
176
+ group.deactivate()
177
+
178
+ def test_get_world_status(self) -> None:
179
+ with local_mesh(gpu_per_host=2) as mesh:
180
+ mesh_info = mesh.get_info()
181
+
182
+ self.assertIsNotNone(mesh_info.mesh_labels)
183
+ self.assertEqual(len(mesh_info.devices_labels), 2)
184
+
185
+ def test_ivalue_problems(self) -> None:
186
+ with local_mesh(hosts=1, gpu_per_host=1):
187
+ from typing import cast
188
+
189
+ from monarch.common.messages import CallFunction, CommandGroup
190
+
191
+ a = cast(monarch.Tensor, torch.rand(3, 4))
192
+ result = monarch.Tensor(a._fake, a.mesh, a.stream)
193
+ msg = CallFunction(
194
+ 0,
195
+ result,
196
+ (),
197
+ monarch.common.function.ResolvableFunctionFromPath(
198
+ "torch.ops.aten.mul.Tensor"
199
+ ),
200
+ (2, a),
201
+ {},
202
+ a.stream._to_ref(a.mesh.client),
203
+ a.mesh,
204
+ [],
205
+ )
206
+ # Internally, this will call CallFunction(...).to_rust_message().
207
+ # The 2 arg will be converted to an IValue tensor via rust + C++.
208
+ # Then when the CommandGroup message gets converted to rust, it
209
+ # will attempt to clone the rust CallFunction message, which will
210
+ # attempt to clone the IValue tensor, which will cause a crash.
211
+ # Upon attempting to clone the IValue tensor, our custom __torch_dispatch__
212
+ # intercepts the following two calls:
213
+ # aten._to_copy.default () (2,) {'dtype': torch.float64, 'device': device(type='cpu')}
214
+ # aten.clone.default () (2,) {}
215
+
216
+ with torch.utils._python_dispatch._disable_current_modes():
217
+ CommandGroup([msg]).to_rust_message()
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # pyre-strict
9
+
10
+ """
11
+ Integration test for signal_safe_block_on.
12
+
13
+ This test spawns a Python binary that calls a Rust function which sleeps indefinitely.
14
+ The test then sends SIGINT to the process and confirms that it exits properly,
15
+ verifying that signal_safe_block_on correctly handles signals.
16
+ """
17
+
18
+ import importlib.resources
19
+ import os
20
+ import signal
21
+ import subprocess
22
+ import time
23
+ import unittest
24
+
25
+ import pytest
26
+
27
+
28
+ # oss_skip: importlib not pulling resource correctly in git CI, needs to be revisited
29
+ class TestSignalSafeBlockOn(unittest.TestCase):
30
+ # pyre-ignore[56]
31
+ @pytest.mark.oss_skip
32
+ def test_sigint_handling(self) -> None:
33
+ """
34
+ Test that a process using signal_safe_block_on can be interrupted with SIGINT.
35
+
36
+ This test:
37
+ 1. Spawns a subprocess running sleep_binary.py
38
+ 2. Waits for it to start
39
+ 3. Sends SIGINT to the process
40
+ 4. Verifies that the process exits within a reasonable timeout
41
+
42
+ To validate that it will behave in the same way as a ctl-c in the shell,
43
+ we launch the process in it's own process group and send the signal to the process
44
+ group instead of the process itself.
45
+ """
46
+ test_bin = importlib.resources.files("monarch.python.tests").joinpath(
47
+ "test_bin"
48
+ )
49
+ # Start the subprocess
50
+ process = subprocess.Popen(
51
+ [str(test_bin)],
52
+ stdout=subprocess.PIPE,
53
+ stderr=subprocess.PIPE,
54
+ text=True,
55
+ start_new_session=True,
56
+ )
57
+
58
+ gpig = os.getpgid(process.pid)
59
+
60
+ try:
61
+ # Wait for the process to start and print its startup message
62
+ start_time = time.time()
63
+ startup_timeout = 10 # seconds
64
+
65
+ while time.time() - start_time < startup_timeout:
66
+ if process.stdout and "Starting sleep_binary" in (
67
+ process.stdout.readline() or ""
68
+ ):
69
+ break
70
+ time.sleep(0.1)
71
+ else:
72
+ self.fail("Subprocess did not start properly within timeout")
73
+
74
+ # Give the process a moment to enter the sleep_indefinitely_for_unit_tests function
75
+ time.sleep(1)
76
+
77
+ # Send SIGINT to the process
78
+ os.killpg(gpig, signal.SIGINT)
79
+
80
+ # Wait for the process to exit with a timeout
81
+ exit_timeout = 5 # seconds
82
+ exit_time = time.time()
83
+
84
+ while time.time() - exit_time < exit_timeout:
85
+ if process.poll() is not None:
86
+ # Process has exited
87
+ break
88
+ time.sleep(0.1)
89
+ else:
90
+ self.fail("Process did not exit after receiving SIGINT")
91
+
92
+ # Check that the process exited with code 0 (clean exit)
93
+ self.assertEqual(process.returncode, 0, "Process did not exit cleanly")
94
+
95
+ finally:
96
+ # Clean up in case the test fails
97
+ if process.poll() is None:
98
+ process.kill()
99
+ process.wait()
100
+
101
+
102
+ if __name__ == "__main__":
103
+ unittest.main()
@@ -0,0 +1,54 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # pyre-unsafe
8
+
9
+ from contextlib import contextmanager
10
+ from typing import Generator, Optional
11
+ from unittest import TestCase
12
+
13
+ import pytest
14
+
15
+ import torch
16
+ from monarch import fetch_shard
17
+ from monarch.common.device_mesh import DeviceMesh
18
+ from monarch.sim_mesh import sim_mesh
19
+
20
+
21
+ @contextmanager
22
+ def local_sim_mesh(
23
+ hosts: int = 1,
24
+ # TODO: support multiple gpus in a mesh.
25
+ gpu_per_host: int = 1,
26
+ activate: bool = True,
27
+ proxy_addr: Optional[str] = None,
28
+ ) -> Generator[DeviceMesh, None, None]:
29
+ dms = sim_mesh(
30
+ n_meshes=1, hosts=hosts, gpus_per_host=gpu_per_host, proxy_addr=proxy_addr
31
+ )
32
+ dm = dms[0]
33
+ try:
34
+ if activate:
35
+ with dm.activate():
36
+ yield dm
37
+ else:
38
+ yield dm
39
+ dm.exit()
40
+ except Exception:
41
+ dm.client._shutdown = True
42
+ raise
43
+
44
+
45
+ # oss_skip: importlib not pulling resource correctly in git CI, needs to be revisited
46
+ @pytest.mark.oss_skip
47
+ class TestSimBackend(TestCase):
48
+ def test_local_mesh_setup(self):
49
+ with local_sim_mesh():
50
+ t = torch.zeros(3, 4)
51
+ t.add_(1)
52
+ local_t = fetch_shard(t).result()
53
+ # consider support specifying the return value in the mock worker.
54
+ assert local_t is not None
@@ -0,0 +1,52 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ import monarch
8
+ import pytest
9
+ import torch
10
+ from monarch.mesh_controller import spawn_tensor_engine
11
+ from monarch.proc_mesh import proc_mesh
12
+
13
+
14
+ two_gpu = pytest.mark.skipif(
15
+ torch.cuda.device_count() < 2,
16
+ reason="Not enough GPUs, this test requires at least 2 GPUs",
17
+ )
18
+
19
+
20
+ @two_gpu
21
+ def test_tensor_engine() -> None:
22
+ pm = proc_mesh(gpus=2).get()
23
+
24
+ dm = spawn_tensor_engine(pm)
25
+ with dm.activate():
26
+ r = monarch.inspect(2 * torch.zeros(3, 4))
27
+
28
+ fm = dm.flatten("all")
29
+ with fm.activate():
30
+ f = monarch.inspect(2 * torch.zeros(3, 4), all=1)
31
+
32
+ assert torch.allclose(torch.zeros(3, 4), r)
33
+ assert torch.allclose(torch.zeros(3, 4), f)
34
+
35
+ dm.exit()
36
+
37
+
38
+ @two_gpu
39
+ def test_proc_mesh_tensor_engine() -> None:
40
+ pm = proc_mesh(gpus=2).get()
41
+ with pm.activate():
42
+ f = 10 * pm.rank_tensor("gpus").cuda()
43
+ a = monarch.inspect(f, hosts=0, gpus=0)
44
+ b = monarch.inspect(f, hosts=0, gpus=1)
45
+
46
+ one = pm.slice(gpus=1)
47
+ with one.activate():
48
+ sliced_b = monarch.slice_mesh(f, gpus=1).to_mesh(one)
49
+ c = monarch.inspect(sliced_b * 10)
50
+ assert a == 0
51
+ assert b == 10
52
+ assert c == 100
@@ -0,0 +1,94 @@
1
+ Metadata-Version: 2.4
2
+ Name: torchmonarch-nightly
3
+ Version: 2025.6.27
4
+ Summary: Monarch: Single controller library
5
+ Author: Meta
6
+ Author-email: oncall+monarch@xmail.facebook.com
7
+ License: BSD-3-Clause
8
+ Requires-Python: >= 3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: torch
12
+ Requires-Dist: pyzmq
13
+ Requires-Dist: requests
14
+ Requires-Dist: numpy
15
+ Requires-Dist: pyre-extensions
16
+ Requires-Dist: cloudpickle
17
+ Requires-Dist: torchx-nightly
18
+ Dynamic: author
19
+ Dynamic: author-email
20
+ Dynamic: description
21
+ Dynamic: description-content-type
22
+ Dynamic: license
23
+ Dynamic: license-file
24
+ Dynamic: requires-dist
25
+ Dynamic: requires-python
26
+ Dynamic: summary
27
+
28
+ # Monarch 🦋
29
+
30
+ **Monarch** is a distributed execution engine for PyTorch. Our overall goal is
31
+ to deliver the high-quality user experience that people get from single-GPU
32
+ PyTorch, but at cluster scale.
33
+
34
+ > ⚠️ **Early Development Warning** Monarch is currently in an experimental
35
+ > stage. You should expect bugs, incomplete features, and APIs that may change
36
+ > in future versions. The project welcomes bugfixes, but to make sure things are
37
+ > well coordinated you should discuss any significant change before starting the
38
+ > work. It's recommended that you signal your intention to contribute in the
39
+ > issue tracker, either by filing a new issue or by claiming an existing one.
40
+
41
+ Note: Monarch is currently only supported on Linux systems
42
+
43
+ ## Installation
44
+
45
+ `pip install torchmonarch-nightly`
46
+
47
+ or manually
48
+
49
+ ```sh
50
+
51
+ # Create and activate the conda environment
52
+ conda create -n monarchenv python=3.10 -y
53
+ conda activate monarchenv
54
+
55
+ # Install nightly rust toolchain
56
+ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
57
+ rustup toolchain install nightly
58
+ rustup default nightly
59
+
60
+ # Install non-python dependencies
61
+ conda install libunwind -y
62
+
63
+ # Install the correct cuda and cuda-toolkit versions for your machine
64
+ sudo dnf install cuda-toolkit-12-0 cuda-12-0
65
+
66
+ # Install clang-dev and nccl-dev
67
+ sudo dnf install clang-devel libnccl-devel
68
+ # Or, in some envrionments, the following may be necessary instead
69
+ conda install -c conda-forge clangdev nccl
70
+ conda update -n monarchenv --all -c conda-forge -y
71
+
72
+ # Install build dependencies
73
+ pip install -r build-requirements.txt
74
+ # Install test dependencies
75
+ pip install -r python/tests/requirements.txt
76
+
77
+ # Build and install Monarch
78
+ pip install --no-build-isolation .
79
+ # or setup for development
80
+ pip install --no-build-isolation -e .
81
+
82
+ # Run unit tests. consider -s for more verbose output
83
+ pytest python/tests/ -v -m "not oss_skip"
84
+ ```
85
+
86
+ ## Running examples
87
+
88
+ Check out the `examples/` directory for demonstrations of how to use Monarch's APIs.
89
+
90
+ We'll be adding more examples as we stabilize and polish functionality!
91
+
92
+ ## License
93
+
94
+ Monarch is BSD-3 licensed, as found in the [LICENSE](LICENSE) file.