torchmonarch-nightly 2025.6.4__cp310-cp310-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monarch/__init__.py +189 -0
- monarch/_monarch/__init__.py +5 -0
- monarch/_monarch/hyperactor/__init__.py +74 -0
- monarch/_monarch/selection/__init__.py +13 -0
- monarch/_monarch/worker/__init__.py +0 -0
- monarch/_monarch/worker/debugger.py +117 -0
- monarch/_monarch/worker/logging.py +107 -0
- monarch/_rust_bindings.so +0 -0
- monarch/_testing.py +198 -0
- monarch/actor_mesh.py +692 -0
- monarch/allocator.py +62 -0
- monarch/bootstrap_main.py +75 -0
- monarch/builtins/__init__.py +14 -0
- monarch/builtins/log.py +22 -0
- monarch/builtins/random.py +69 -0
- monarch/cached_remote_function.py +257 -0
- monarch/common/_C.pyi +11 -0
- monarch/common/_C.so +0 -0
- monarch/common/__init__.py +0 -0
- monarch/common/_coalescing.py +308 -0
- monarch/common/_device_utils.py +18 -0
- monarch/common/_tensor_to_table.py +172 -0
- monarch/common/base_tensor.py +28 -0
- monarch/common/borrows.py +143 -0
- monarch/common/client.py +646 -0
- monarch/common/constants.py +10 -0
- monarch/common/context_manager.py +40 -0
- monarch/common/controller_api.py +104 -0
- monarch/common/device_mesh.py +443 -0
- monarch/common/fake.py +55 -0
- monarch/common/function.py +160 -0
- monarch/common/function_caching.py +164 -0
- monarch/common/future.py +168 -0
- monarch/common/invocation.py +125 -0
- monarch/common/mast.py +221 -0
- monarch/common/messages.py +572 -0
- monarch/common/mock_cuda.py +41 -0
- monarch/common/opaque_ref.py +98 -0
- monarch/common/pickle_flatten.py +48 -0
- monarch/common/pipe.py +152 -0
- monarch/common/process_group.py +55 -0
- monarch/common/recording.py +127 -0
- monarch/common/reference.py +33 -0
- monarch/common/remote.py +304 -0
- monarch/common/selection.py +9 -0
- monarch/common/shape.py +204 -0
- monarch/common/stream.py +111 -0
- monarch/common/tensor.py +793 -0
- monarch/common/tensor_factory.py +31 -0
- monarch/common/tree.py +73 -0
- monarch/controller/__init__.py +7 -0
- monarch/controller/backend.py +223 -0
- monarch/controller/controller.py +223 -0
- monarch/controller/debugger.py +47 -0
- monarch/controller/history.py +90 -0
- monarch/controller/rust_backend/__init__.py +7 -0
- monarch/controller/rust_backend/controller.py +245 -0
- monarch/fetch.py +55 -0
- monarch/future.py +25 -0
- monarch/gradient/__init__.py +11 -0
- monarch/gradient/_gradient_generator.pyi +22 -0
- monarch/gradient/_gradient_generator.so +0 -0
- monarch/gradient_generator.py +185 -0
- monarch/memory.py +43 -0
- monarch/monarch_controller +0 -0
- monarch/notebook.py +761 -0
- monarch/opaque_module.py +235 -0
- monarch/opaque_object.py +88 -0
- monarch/parallel/__init__.py +9 -0
- monarch/parallel/pipelining/__init__.py +7 -0
- monarch/parallel/pipelining/runtime.py +847 -0
- monarch/parallel/pipelining/schedule_ir.py +692 -0
- monarch/parallel/pipelining/scheduler.py +249 -0
- monarch/proc_mesh.py +188 -0
- monarch/profiler.py +160 -0
- monarch/python_local_mesh.py +107 -0
- monarch/random.py +61 -0
- monarch/rdma.py +190 -0
- monarch/remote_class.py +114 -0
- monarch/rust_backend_mesh.py +280 -0
- monarch/rust_local_mesh.py +1402 -0
- monarch/sim_mesh.py +357 -0
- monarch/simulator/__init__.py +7 -0
- monarch/simulator/command_history.py +424 -0
- monarch/simulator/config.py +21 -0
- monarch/simulator/interface.py +59 -0
- monarch/simulator/ir.py +770 -0
- monarch/simulator/mock_controller.py +214 -0
- monarch/simulator/profiling.py +424 -0
- monarch/simulator/simulator.py +1052 -0
- monarch/simulator/task.py +255 -0
- monarch/simulator/tensor.py +373 -0
- monarch/simulator/trace.py +395 -0
- monarch/simulator/utils.py +41 -0
- monarch/simulator/worker.py +389 -0
- monarch/tensor_worker_main.py +260 -0
- monarch/tensorboard.py +84 -0
- monarch/timer/__init__.py +21 -0
- monarch/timer/example_monarch.py +78 -0
- monarch/timer/example_spmd.py +55 -0
- monarch/timer/execution_timer.py +199 -0
- monarch/timer/execution_timer_test.py +131 -0
- monarch/tools/__init__.py +7 -0
- monarch/tools/cli.py +167 -0
- monarch/tools/commands.py +189 -0
- monarch/tools/components/__init__.py +7 -0
- monarch/tools/components/hyperactor.py +57 -0
- monarch/tools/config/__init__.py +20 -0
- monarch/tools/config/defaults.py +54 -0
- monarch/tools/mesh_spec.py +121 -0
- monarch/worker/__init__.py +7 -0
- monarch/worker/_testing_function.py +481 -0
- monarch/worker/compiled_block.py +270 -0
- monarch/worker/debugger.py +125 -0
- monarch/worker/lines.py +47 -0
- monarch/worker/monitor.py +53 -0
- monarch/worker/worker.py +1191 -0
- monarch/world_mesh.py +34 -0
- monarch_supervisor/__init__.py +1044 -0
- monarch_supervisor/_testing.py +44 -0
- monarch_supervisor/function_call.py +30 -0
- monarch_supervisor/host.py +386 -0
- monarch_supervisor/launchers.py +145 -0
- monarch_supervisor/log_pstree.py +48 -0
- monarch_supervisor/logging.py +103 -0
- monarch_supervisor/python_executable.py +42 -0
- tests/__init__.py +0 -0
- tests/dispatch_bench.py +124 -0
- tests/dispatch_bench_helper.py +25 -0
- tests/error_test_binary.py +139 -0
- tests/simulator/__init__.py +0 -0
- tests/simulator/test_profiling.py +136 -0
- tests/simulator/test_simulator.py +411 -0
- tests/simulator/test_task.py +64 -0
- tests/simulator/test_worker.py +102 -0
- tests/sleep_binary.py +35 -0
- tests/test_actor_error.py +112 -0
- tests/test_alloc.py +25 -0
- tests/test_coalescing.py +492 -0
- tests/test_controller.py +835 -0
- tests/test_device_mesh.py +132 -0
- tests/test_fault_tolerance.py +398 -0
- tests/test_future.py +94 -0
- tests/test_grad_generator.py +121 -0
- tests/test_mock_cuda.py +74 -0
- tests/test_pdb_actor.py +110 -0
- tests/test_python_actors.py +372 -0
- tests/test_remote_functions.py +1271 -0
- tests/test_rust_backend.py +182 -0
- tests/test_signal_safe_block_on.py +103 -0
- tests/test_sim_backend.py +54 -0
- torchmonarch_nightly-2025.6.4.dist-info/METADATA +94 -0
- torchmonarch_nightly-2025.6.4.dist-info/RECORD +157 -0
- torchmonarch_nightly-2025.6.4.dist-info/WHEEL +5 -0
- torchmonarch_nightly-2025.6.4.dist-info/entry_points.txt +3 -0
- torchmonarch_nightly-2025.6.4.dist-info/licenses/LICENSE +29 -0
- torchmonarch_nightly-2025.6.4.dist-info/top_level.txt +3 -0
@@ -0,0 +1,182 @@
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
# pyre-unsafe
|
8
|
+
|
9
|
+
from contextlib import contextmanager
|
10
|
+
from typing import Generator
|
11
|
+
from unittest import TestCase
|
12
|
+
|
13
|
+
import monarch
|
14
|
+
|
15
|
+
import pytest
|
16
|
+
import torch
|
17
|
+
from monarch import fetch_shard, no_mesh, remote, Stream
|
18
|
+
from monarch.common.device_mesh import DeviceMesh
|
19
|
+
from monarch.rust_local_mesh import local_meshes, LoggingLocation, SocketType
|
20
|
+
from torch.nn.attention import sdpa_kernel, SDPBackend
|
21
|
+
from torch.nn.functional import scaled_dot_product_attention
|
22
|
+
|
23
|
+
|
24
|
+
def simple_all_reduce(*args, **kwargs):
|
25
|
+
return torch.ones(args[0].shape)
|
26
|
+
|
27
|
+
|
28
|
+
simple_all_reduce = remote(
|
29
|
+
"monarch.worker._testing_function.simple_all_reduce_local",
|
30
|
+
propagate=simple_all_reduce,
|
31
|
+
)
|
32
|
+
|
33
|
+
|
34
|
+
@contextmanager
|
35
|
+
def local_mesh(
|
36
|
+
hosts: int = 1, gpu_per_host: int = 2, activate: bool = True
|
37
|
+
) -> Generator[DeviceMesh, None, None]:
|
38
|
+
with monarch.rust_local_mesh.local_mesh(
|
39
|
+
hosts=hosts,
|
40
|
+
gpus_per_host=gpu_per_host,
|
41
|
+
socket_type=SocketType.UNIX,
|
42
|
+
logging_location=LoggingLocation.DEFAULT,
|
43
|
+
) as dm:
|
44
|
+
try:
|
45
|
+
if activate:
|
46
|
+
with dm.activate():
|
47
|
+
yield dm
|
48
|
+
else:
|
49
|
+
yield dm
|
50
|
+
dm.exit()
|
51
|
+
except Exception:
|
52
|
+
dm.client._shutdown = True
|
53
|
+
raise
|
54
|
+
|
55
|
+
|
56
|
+
# Set global timeout--sandcastle's timeout is 600s. A test that sandcastle times
|
57
|
+
# out is not counted as a failure, so we set a more restrictive timeout to
|
58
|
+
# ensure we see a hard failure in CI.
|
59
|
+
@pytest.mark.timeout(120)
|
60
|
+
@pytest.mark.skipif(
|
61
|
+
torch.cuda.device_count() < 2,
|
62
|
+
reason="Not enough GPUs, this test requires at least 2 GPUs",
|
63
|
+
)
|
64
|
+
class TestRustBackend(TestCase):
|
65
|
+
def test_local_mesh_setup(self):
|
66
|
+
with local_mesh():
|
67
|
+
t = torch.zeros(3, 4)
|
68
|
+
t.add_(1)
|
69
|
+
fut = fetch_shard(t)
|
70
|
+
|
71
|
+
with no_mesh.activate():
|
72
|
+
local_t = fut.result()
|
73
|
+
assert torch.equal(local_t, torch.ones(3, 4))
|
74
|
+
|
75
|
+
def test_result_in_mesh(self):
|
76
|
+
with local_mesh():
|
77
|
+
t = torch.ones(3, 4)
|
78
|
+
t.add_(-1)
|
79
|
+
# Assert calling result() is fine within an active mesh.
|
80
|
+
local_t = fetch_shard(t).result()
|
81
|
+
assert torch.equal(local_t, torch.zeros(3, 4))
|
82
|
+
|
83
|
+
def test_errors(self):
|
84
|
+
t = torch.rand(3, 4)
|
85
|
+
with local_mesh(2, 2) as dm:
|
86
|
+
y = torch.rand(3, 4)
|
87
|
+
with pytest.raises(TypeError, match="LOCAL_TENSOR"):
|
88
|
+
t.add(y)
|
89
|
+
with pytest.raises(TypeError, match="WRONG_MESH"):
|
90
|
+
sub_mesh = dm(host=0)
|
91
|
+
with sub_mesh.activate():
|
92
|
+
x = torch.rand(3, 4)
|
93
|
+
x.add(y)
|
94
|
+
other = Stream("other")
|
95
|
+
t = torch.rand(10).cuda()
|
96
|
+
with pytest.raises(TypeError, match="WRONG_STREAM"):
|
97
|
+
with other.activate():
|
98
|
+
t = t.reduce("host", "sum")
|
99
|
+
|
100
|
+
def test_multi_hosts(self):
|
101
|
+
with local_mesh(hosts=2, gpu_per_host=2):
|
102
|
+
t = torch.rand(3, 4).cuda()
|
103
|
+
local_t1 = fetch_shard(t, {"host": 1, "gpu": 0}).result()
|
104
|
+
local_t2 = fetch_shard(t, {"host": 1, "gpu": 0}).result()
|
105
|
+
local_t3 = fetch_shard(t, {"host": 0, "gpu": 1}).result()
|
106
|
+
assert torch.equal(local_t1, local_t2)
|
107
|
+
assert not torch.equal(local_t1, local_t3)
|
108
|
+
|
109
|
+
def test_fetch_preprocess(self):
|
110
|
+
with local_mesh():
|
111
|
+
assert (
|
112
|
+
"an argument processed"
|
113
|
+
== remote("monarch.worker._testing_function.do_some_processing")
|
114
|
+
.call_on_shard_and_fetch("an argument")
|
115
|
+
.result()
|
116
|
+
)
|
117
|
+
|
118
|
+
def test_brutal_shutdown(self):
|
119
|
+
with monarch.rust_local_mesh.local_mesh(
|
120
|
+
hosts=1, gpus_per_host=1, socket_type=SocketType.UNIX
|
121
|
+
) as dm:
|
122
|
+
dm.exit()
|
123
|
+
dm.deactivate()
|
124
|
+
|
125
|
+
def test_results_filtering(self):
|
126
|
+
with local_mesh(gpu_per_host=1):
|
127
|
+
query = torch.rand(1, 1, 1, 1, dtype=torch.float16, device="cuda")
|
128
|
+
key = torch.rand(1, 1, 1, 1, dtype=torch.float16, device="cuda")
|
129
|
+
value = torch.rand(1, 1, 1, 1, dtype=torch.float16, device="cuda")
|
130
|
+
with sdpa_kernel(backends=[SDPBackend.FLASH_ATTENTION]):
|
131
|
+
# This function will send 9 results. Only 5 of them will be set.
|
132
|
+
t = scaled_dot_product_attention(query, key, value)
|
133
|
+
fut = fetch_shard(t)
|
134
|
+
local_tensor = fut.result()
|
135
|
+
assert len(local_tensor) == 1
|
136
|
+
|
137
|
+
def test_live_function(self):
|
138
|
+
with local_mesh():
|
139
|
+
|
140
|
+
@remote
|
141
|
+
def has_nan(t):
|
142
|
+
return torch.isnan(t).any().item()
|
143
|
+
|
144
|
+
t = torch.rand(3, 4)
|
145
|
+
res = has_nan.call_on_shard_and_fetch(
|
146
|
+
t, shard={"host": 0, "gpu": 0}
|
147
|
+
).result()
|
148
|
+
|
149
|
+
self.assertFalse(res)
|
150
|
+
|
151
|
+
def test_multiple_global_meshes(self):
|
152
|
+
"""
|
153
|
+
This test is to validate we can have a single client process
|
154
|
+
connecting to multiple global meshes. The global meshes are distinct
|
155
|
+
from each other to provide native failure domain isolation.
|
156
|
+
"""
|
157
|
+
replicas = 4
|
158
|
+
with local_meshes(
|
159
|
+
meshes=replicas,
|
160
|
+
hosts_per_mesh=1,
|
161
|
+
gpus_per_host=1,
|
162
|
+
socket_type=SocketType.UNIX,
|
163
|
+
logging_location=LoggingLocation.DEFAULT,
|
164
|
+
) as groups:
|
165
|
+
results = []
|
166
|
+
for i, group in enumerate(groups):
|
167
|
+
with group.activate():
|
168
|
+
t = torch.ones(i + 1)
|
169
|
+
results.append(fetch_shard(t).result())
|
170
|
+
for i in range(replicas):
|
171
|
+
assert torch.equal(results[i], torch.ones(i + 1))
|
172
|
+
|
173
|
+
for group in groups:
|
174
|
+
group.exit()
|
175
|
+
group.deactivate()
|
176
|
+
|
177
|
+
def test_get_world_status(self) -> None:
|
178
|
+
with local_mesh(gpu_per_host=2) as mesh:
|
179
|
+
mesh_info = mesh.get_info()
|
180
|
+
|
181
|
+
self.assertIsNotNone(mesh_info.mesh_labels)
|
182
|
+
self.assertEqual(len(mesh_info.devices_labels), 2)
|
@@ -0,0 +1,103 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
|
+
# All rights reserved.
|
4
|
+
#
|
5
|
+
# This source code is licensed under the BSD-style license found in the
|
6
|
+
# LICENSE file in the root directory of this source tree.
|
7
|
+
|
8
|
+
# pyre-strict
|
9
|
+
|
10
|
+
"""
|
11
|
+
Integration test for signal_safe_block_on.
|
12
|
+
|
13
|
+
This test spawns a Python binary that calls a Rust function which sleeps indefinitely.
|
14
|
+
The test then sends SIGINT to the process and confirms that it exits properly,
|
15
|
+
verifying that signal_safe_block_on correctly handles signals.
|
16
|
+
"""
|
17
|
+
|
18
|
+
import importlib.resources
|
19
|
+
import os
|
20
|
+
import signal
|
21
|
+
import subprocess
|
22
|
+
import time
|
23
|
+
import unittest
|
24
|
+
|
25
|
+
import pytest
|
26
|
+
|
27
|
+
|
28
|
+
# oss_skip: importlib not pulling resource correctly in git CI, needs to be revisited
|
29
|
+
class TestSignalSafeBlockOn(unittest.TestCase):
|
30
|
+
# pyre-ignore[56]
|
31
|
+
@pytest.mark.oss_skip
|
32
|
+
def test_sigint_handling(self) -> None:
|
33
|
+
"""
|
34
|
+
Test that a process using signal_safe_block_on can be interrupted with SIGINT.
|
35
|
+
|
36
|
+
This test:
|
37
|
+
1. Spawns a subprocess running sleep_binary.py
|
38
|
+
2. Waits for it to start
|
39
|
+
3. Sends SIGINT to the process
|
40
|
+
4. Verifies that the process exits within a reasonable timeout
|
41
|
+
|
42
|
+
To validate that it will behave in the same way as a ctl-c in the shell,
|
43
|
+
we launch the process in it's own process group and send the signal to the process
|
44
|
+
group instead of the process itself.
|
45
|
+
"""
|
46
|
+
test_bin = importlib.resources.files("monarch.python.tests").joinpath(
|
47
|
+
"test_bin"
|
48
|
+
)
|
49
|
+
# Start the subprocess
|
50
|
+
process = subprocess.Popen(
|
51
|
+
[str(test_bin)],
|
52
|
+
stdout=subprocess.PIPE,
|
53
|
+
stderr=subprocess.PIPE,
|
54
|
+
text=True,
|
55
|
+
start_new_session=True,
|
56
|
+
)
|
57
|
+
|
58
|
+
gpig = os.getpgid(process.pid)
|
59
|
+
|
60
|
+
try:
|
61
|
+
# Wait for the process to start and print its startup message
|
62
|
+
start_time = time.time()
|
63
|
+
startup_timeout = 10 # seconds
|
64
|
+
|
65
|
+
while time.time() - start_time < startup_timeout:
|
66
|
+
if process.stdout and "Starting sleep_binary" in (
|
67
|
+
process.stdout.readline() or ""
|
68
|
+
):
|
69
|
+
break
|
70
|
+
time.sleep(0.1)
|
71
|
+
else:
|
72
|
+
self.fail("Subprocess did not start properly within timeout")
|
73
|
+
|
74
|
+
# Give the process a moment to enter the sleep_indefinitely_for_unit_tests function
|
75
|
+
time.sleep(1)
|
76
|
+
|
77
|
+
# Send SIGINT to the process
|
78
|
+
os.killpg(gpig, signal.SIGINT)
|
79
|
+
|
80
|
+
# Wait for the process to exit with a timeout
|
81
|
+
exit_timeout = 5 # seconds
|
82
|
+
exit_time = time.time()
|
83
|
+
|
84
|
+
while time.time() - exit_time < exit_timeout:
|
85
|
+
if process.poll() is not None:
|
86
|
+
# Process has exited
|
87
|
+
break
|
88
|
+
time.sleep(0.1)
|
89
|
+
else:
|
90
|
+
self.fail("Process did not exit after receiving SIGINT")
|
91
|
+
|
92
|
+
# Check that the process exited with code 0 (clean exit)
|
93
|
+
self.assertEqual(process.returncode, 0, "Process did not exit cleanly")
|
94
|
+
|
95
|
+
finally:
|
96
|
+
# Clean up in case the test fails
|
97
|
+
if process.poll() is None:
|
98
|
+
process.kill()
|
99
|
+
process.wait()
|
100
|
+
|
101
|
+
|
102
|
+
if __name__ == "__main__":
|
103
|
+
unittest.main()
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
# pyre-unsafe
|
8
|
+
|
9
|
+
from contextlib import contextmanager
|
10
|
+
from typing import Generator, Optional
|
11
|
+
from unittest import TestCase
|
12
|
+
|
13
|
+
import pytest
|
14
|
+
|
15
|
+
import torch
|
16
|
+
from monarch import fetch_shard
|
17
|
+
from monarch.common.device_mesh import DeviceMesh
|
18
|
+
from monarch.sim_mesh import sim_mesh
|
19
|
+
|
20
|
+
|
21
|
+
@contextmanager
|
22
|
+
def local_sim_mesh(
|
23
|
+
hosts: int = 1,
|
24
|
+
# TODO: support multiple gpus in a mesh.
|
25
|
+
gpu_per_host: int = 1,
|
26
|
+
activate: bool = True,
|
27
|
+
proxy_addr: Optional[str] = None,
|
28
|
+
) -> Generator[DeviceMesh, None, None]:
|
29
|
+
dms = sim_mesh(
|
30
|
+
n_meshes=1, hosts=hosts, gpus_per_host=gpu_per_host, proxy_addr=proxy_addr
|
31
|
+
)
|
32
|
+
dm = dms[0]
|
33
|
+
try:
|
34
|
+
if activate:
|
35
|
+
with dm.activate():
|
36
|
+
yield dm
|
37
|
+
else:
|
38
|
+
yield dm
|
39
|
+
dm.exit()
|
40
|
+
except Exception:
|
41
|
+
dm.client._shutdown = True
|
42
|
+
raise
|
43
|
+
|
44
|
+
|
45
|
+
# oss_skip: importlib not pulling resource correctly in git CI, needs to be revisited
|
46
|
+
@pytest.mark.oss_skip
|
47
|
+
class TestSimBackend(TestCase):
|
48
|
+
def test_local_mesh_setup(self):
|
49
|
+
with local_sim_mesh():
|
50
|
+
t = torch.zeros(3, 4)
|
51
|
+
t.add_(1)
|
52
|
+
local_t = fetch_shard(t).result()
|
53
|
+
# consider support specifying the return value in the mock worker.
|
54
|
+
assert local_t is not None
|
@@ -0,0 +1,94 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: torchmonarch-nightly
|
3
|
+
Version: 2025.6.4
|
4
|
+
Summary: Monarch: Single controller library
|
5
|
+
Author: Meta
|
6
|
+
Author-email: oncall+monarch@xmail.facebook.com
|
7
|
+
License: BSD-3-Clause
|
8
|
+
Requires-Python: >= 3.10
|
9
|
+
Description-Content-Type: text/markdown
|
10
|
+
License-File: LICENSE
|
11
|
+
Requires-Dist: torch
|
12
|
+
Requires-Dist: pyzmq
|
13
|
+
Requires-Dist: requests
|
14
|
+
Requires-Dist: numpy
|
15
|
+
Requires-Dist: pyre-extensions
|
16
|
+
Requires-Dist: cloudpickle
|
17
|
+
Requires-Dist: torchx-nightly
|
18
|
+
Dynamic: author
|
19
|
+
Dynamic: author-email
|
20
|
+
Dynamic: description
|
21
|
+
Dynamic: description-content-type
|
22
|
+
Dynamic: license
|
23
|
+
Dynamic: license-file
|
24
|
+
Dynamic: requires-dist
|
25
|
+
Dynamic: requires-python
|
26
|
+
Dynamic: summary
|
27
|
+
|
28
|
+
# Monarch 🦋
|
29
|
+
|
30
|
+
**Monarch** is a distributed execution engine for PyTorch. Our overall goal is
|
31
|
+
to deliver the high-quality user experience that people get from single-GPU
|
32
|
+
PyTorch, but at cluster scale.
|
33
|
+
|
34
|
+
> ⚠️ **Early Development Warning** Monarch is currently in an experimental
|
35
|
+
> stage. You should expect bugs, incomplete features, and APIs that may change
|
36
|
+
> in future versions. The project welcomes bugfixes, but to make sure things are
|
37
|
+
> well coordinated you should discuss any significant change before starting the
|
38
|
+
> work. It's recommended that you signal your intention to contribute in the
|
39
|
+
> issue tracker, either by filing a new issue or by claiming an existing one.
|
40
|
+
|
41
|
+
Note: Monarch is currently only supported on Linux systems
|
42
|
+
|
43
|
+
## Installation
|
44
|
+
|
45
|
+
`pip install torchmonarch`
|
46
|
+
|
47
|
+
or manually
|
48
|
+
|
49
|
+
```sh
|
50
|
+
|
51
|
+
# Create and activate the conda environment
|
52
|
+
conda create -n monarchenv python=3.10 -y
|
53
|
+
conda activate monarchenv
|
54
|
+
|
55
|
+
# Install nightly rust toolchain
|
56
|
+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
57
|
+
rustup toolchain install nightly
|
58
|
+
rustup default nightly
|
59
|
+
|
60
|
+
# Install non-python dependencies
|
61
|
+
conda install libunwind -y
|
62
|
+
|
63
|
+
# Install the correct cuda and cuda-toolkit versions for your machine, as well as NCCL-dev
|
64
|
+
sudo dnf install cuda-toolkit-12-0 cuda-12-0 libnccl-devel
|
65
|
+
|
66
|
+
# Install clang dev
|
67
|
+
sudo dnf install clang-devel
|
68
|
+
# In some envrionments, the following may be necessary instead
|
69
|
+
conda install conda-forge::clangdev
|
70
|
+
conda update -n monarchenv --all -c conda-forge -y
|
71
|
+
|
72
|
+
# Install build dependencies
|
73
|
+
pip install -r build-requirements.txt
|
74
|
+
# Install test dependencies
|
75
|
+
pip install -r python/tests/requirements.txt
|
76
|
+
|
77
|
+
# Build and install Monarch
|
78
|
+
pip install --no-build-isolation .
|
79
|
+
# or setup for development
|
80
|
+
pip install --no-build-isolation -e .
|
81
|
+
|
82
|
+
# Run unit tests. consider -s for more verbose output
|
83
|
+
pytest python/tests/ -v -m "not oss_skip"
|
84
|
+
```
|
85
|
+
|
86
|
+
## Running examples
|
87
|
+
|
88
|
+
Check out the `examples/` directory for demonstrations of how to use Monarch's APIs.
|
89
|
+
|
90
|
+
We'll be adding more examples as we stabilize and polish functionality!
|
91
|
+
|
92
|
+
## License
|
93
|
+
|
94
|
+
Monarch is BSD-3 licensed, as found in the [LICENSE](LICENSE) file.
|
@@ -0,0 +1,157 @@
|
|
1
|
+
monarch/__init__.py,sha256=iUvWHc0-7Q2tovRoRxOIiA3TsefMXCbWl-jEfQ2djew,6897
|
2
|
+
monarch/_rust_bindings.so,sha256=1o-iDDjlX_h91VItvO3P0iWkIiU2eIdbJ9VZXBpypDw,39087712
|
3
|
+
monarch/_testing.py,sha256=MN8DK1e-wzV0-R_nFW1b_7-O5oKfWvZ12BMGD4Z7PQk,6755
|
4
|
+
monarch/actor_mesh.py,sha256=kfky0QPtji5yDaZ_vVIbWncL7M_parqoxzECvZNc2_c,22301
|
5
|
+
monarch/allocator.py,sha256=_2DKFP9pSD33zDgH7xZJC8Tq7BQrCeQEUmMB7_xCT0Y,1784
|
6
|
+
monarch/bootstrap_main.py,sha256=_LgEvfI_kFHj2QWH8CLRBQI1tbxS0uWrnHqwzOVbjeI,2417
|
7
|
+
monarch/cached_remote_function.py,sha256=kYdB6r4OHx_T_uX4q3tCNcp1t2DJwF8tPTIahUiT2pU,8785
|
8
|
+
monarch/fetch.py,sha256=61jxo7sx4QNUTkc0_rF5NaJROen4tKbAaiIjrXWLOvg,1705
|
9
|
+
monarch/future.py,sha256=lcdFEe7m1shYPPuvZ1RkS6JUIChEKGBWe3v7x_nu4Hg,731
|
10
|
+
monarch/gradient_generator.py,sha256=Rl3dmXGceTdCc1mYBg2JciR88ywGPnW7TVkL86KwqEA,6366
|
11
|
+
monarch/memory.py,sha256=ol86dBhFAJqg78iF25-BuK0wuwj1onR8FIioZ_B0gjw,1377
|
12
|
+
monarch/monarch_controller,sha256=LXuIyuHLGVSrAdwcDbe8Kw99u8DWerRlvSst4i-O1_g,20689048
|
13
|
+
monarch/notebook.py,sha256=zu9MKDFKf1-rCM2TqFSRJjMBeiWuKcJSyUFLvoZRQzs,25949
|
14
|
+
monarch/opaque_module.py,sha256=oajOu_WD1hD4hxE8HDdO-tvWY7KDHWd7VaAhJEa5L2I,10446
|
15
|
+
monarch/opaque_object.py,sha256=IVpll4pyuKZMo_EnPh4s0qnx8RlAcJrJ1yoLX6E75wQ,2782
|
16
|
+
monarch/proc_mesh.py,sha256=sTMmwQLKqM0h-yY0mn8uSzOb9B_MX9DKWCI9EsyfD6s,6384
|
17
|
+
monarch/profiler.py,sha256=TQ9fnVM8H7smBWtYdB_6Irtzz8DBOmcp7U1T3wlUmco,4911
|
18
|
+
monarch/python_local_mesh.py,sha256=YsureIzR9uGlNVrKd4vRghxOXBeYabkt9lICRErfRAI,3536
|
19
|
+
monarch/random.py,sha256=f9QR7Esu4Vxqxs-KCf5QYyVqlWvXJ3-UtG90L_h4j40,1527
|
20
|
+
monarch/rdma.py,sha256=eWwYKurW-Y6j68m0xH8jeyE3bfmSgB5ZwM2j-RmbCHc,6397
|
21
|
+
monarch/remote_class.py,sha256=-OAowzU1aDP6i4ik_SjXntVUC9h4dqAzgqwohkQ6Grc,4167
|
22
|
+
monarch/rust_backend_mesh.py,sha256=1htC62of4MgFtkezWGlsxSFtKJdc0CIeqeSuOx7yu3M,9944
|
23
|
+
monarch/rust_local_mesh.py,sha256=7ASptybn3wy4J7eoBc7LhGW4j4AA6bigl5Kuhyflw8s,47405
|
24
|
+
monarch/sim_mesh.py,sha256=pJ4DDn35Y7CobsIwbkUefBbnEHaAc7Ro_7YQdNaP2Dg,12171
|
25
|
+
monarch/tensor_worker_main.py,sha256=Nbarl2sJKIddLeaRFsaUnqOerLHjzggUr9SqCr2_GYI,8300
|
26
|
+
monarch/tensorboard.py,sha256=MnLgH5lbqeUJauEuirEgR6L_qYl2NGdtwZOWIAuOZao,2587
|
27
|
+
monarch/world_mesh.py,sha256=GqZpFoVNJPxYa70rLYgv0vu8Vg1nXqx_GYERRb1E9Pc,975
|
28
|
+
monarch/_monarch/__init__.py,sha256=Md3cCHD7Ano9kV15PqGbicgUO-RMdh4aVy1yKiDt_xE,208
|
29
|
+
monarch/_monarch/hyperactor/__init__.py,sha256=H-9w80ejck1lBVfpqOLikT-mPLMLpi7ZZfqrmprMxL0,1748
|
30
|
+
monarch/_monarch/selection/__init__.py,sha256=47arOElvlK0uYcTNrd__1BwXSfsMosnVw4_tgu2hA-I,381
|
31
|
+
monarch/_monarch/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
32
|
+
monarch/_monarch/worker/debugger.py,sha256=JJZwRPTgQO2emz-hrMelkOSxJFIR3dV4ZA6e7ftYUKA,3614
|
33
|
+
monarch/_monarch/worker/logging.py,sha256=nJUkIuKhPqRZaNDOT7MVbFFjcITZQf_CiFRLFKJJqsw,3591
|
34
|
+
monarch/builtins/__init__.py,sha256=QcfnHZGbc2qktBg7DyZt2ruE6VahnIt4S8lEZLHdJqU,443
|
35
|
+
monarch/builtins/log.py,sha256=H1QkuVzwxyi36Zyv-XR0VN0QsNimBWwxE1__fjs0_2o,554
|
36
|
+
monarch/builtins/random.py,sha256=xVt0cJBRBhCOH1Eioy8O511rp7HKFSCVXRwjBy02K5I,1798
|
37
|
+
monarch/common/_C.pyi,sha256=kHY2G3ksMAjQJ6IcPb4F1bBh5knzw5RVVNhhBlEmwFU,314
|
38
|
+
monarch/common/_C.so,sha256=gVDCDUQSKiPHwLPIpyxcRgiv8uF_quH1LpgI5Lhle9Y,715600
|
39
|
+
monarch/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
|
+
monarch/common/_coalescing.py,sha256=HXf5cXAPSU_tpw9jFkzs2muytG_6sTZJSqSKV0XuFZE,10925
|
41
|
+
monarch/common/_device_utils.py,sha256=gBpl23wMjppVAEzzj8U9HyX-B7Bs2_3ftiMAkzUS4j4,577
|
42
|
+
monarch/common/_tensor_to_table.py,sha256=yRjCNwvtl188Z1Dwkx3ZU-Bh2mwYnQ0Lnue2RAztwvc,5753
|
43
|
+
monarch/common/base_tensor.py,sha256=ujRzR6lWaeCdPv2JX0vCR-VsCWn-3SHaJIkZH1Sw9FQ,1159
|
44
|
+
monarch/common/borrows.py,sha256=7KR62xoUat1T6FyADsdHsxVAVIJDvfJWUnPO-xx277U,5307
|
45
|
+
monarch/common/client.py,sha256=wOAnoaLmabrcv7mK_z_HVnk_ivGe5igPy3iWZI4LVZc,24517
|
46
|
+
monarch/common/constants.py,sha256=ohvsVYMpfeWopv3KXDAeHWDFLukwc-OY37VRxpKNBE8,300
|
47
|
+
monarch/common/context_manager.py,sha256=GOeyaFbyCqvQmkJ0oI7q6IxRd8_0mVyYKZRccI8iaug,1067
|
48
|
+
monarch/common/controller_api.py,sha256=djGkK5aSd-V6pBkr3uBCXbfJv3OKf2o2VbBXJgFF2WI,3202
|
49
|
+
monarch/common/device_mesh.py,sha256=PyVONLa0EDOzVobU-PK-mGAQyj1Dyo9dr__lDmx2uKY,13144
|
50
|
+
monarch/common/fake.py,sha256=h57Cggz2qXNqImZ7yPuOZOSe9-l9i553ki1z-YHlgQA,1801
|
51
|
+
monarch/common/function.py,sha256=V8kdgSRTvild2SpcewWa5IETX3QiWDZQ2BEIDFa5zz8,4374
|
52
|
+
monarch/common/function_caching.py,sha256=HVdbWtv6Eea7ENMWi8iv36w1G1TaVuUJhkUX_JxGx5A,5060
|
53
|
+
monarch/common/future.py,sha256=D1UJ_8Rvb8-VG9vNE-z7xz2m2otMd2HgB0rnA02nlvA,4681
|
54
|
+
monarch/common/invocation.py,sha256=L4mSmzqlHMxo1Tb71hBU_M8aBZCRCOcb6vvPhvvewec,4195
|
55
|
+
monarch/common/mast.py,sha256=XTzYljGR0aZ7GjmNMPgU2HyuL4HWSAy4IwE3kEDqdOw,7735
|
56
|
+
monarch/common/messages.py,sha256=El7BoGZ2jlP8HyyE-S8wkiG9W8Ciw3_5JERnNrgOYHU,18278
|
57
|
+
monarch/common/mock_cuda.py,sha256=x6ho1Ton6BbKjBZ5ZxnFOUaQM032X70wnpoUNB7Ci2w,1039
|
58
|
+
monarch/common/opaque_ref.py,sha256=tWNvOC6CsjNPKD1JDx-8PSaeXqZC3eermgBExUPKML4,2871
|
59
|
+
monarch/common/pickle_flatten.py,sha256=2mc-dPiZy7kRqAstyfMLnPuoGJwsBftYYEHyF_HOZw4,1313
|
60
|
+
monarch/common/pipe.py,sha256=9pTf8--3yOv4HpnJEhgcmc_JM6Az4uL1y72TSQA55dw,5013
|
61
|
+
monarch/common/process_group.py,sha256=FbJ_AJRZYFkvQ68L2naRq64J_aNuAKe5kO0MWdn_x74,1662
|
62
|
+
monarch/common/recording.py,sha256=hoI9VY_FyW_xVx-jmfsKydqX5vW2GulwcDWsBdUVOm8,4637
|
63
|
+
monarch/common/reference.py,sha256=O26lkzEeVwj0S1xEy-OLqdHVnACmmlbQCUmXRrW4n1Q,938
|
64
|
+
monarch/common/remote.py,sha256=qZWXkShX20l07TseQSpVECh2yXZaVKYUvQXkeEM-zvY,9220
|
65
|
+
monarch/common/selection.py,sha256=lpWFbZs3ArYy29e-53eoAVAjQFksf1RvZz9NvM0CUW4,308
|
66
|
+
monarch/common/shape.py,sha256=jEHneh190QI7zGOVAARpXtkxI9mXV1YbnycXlpYQGuc,7388
|
67
|
+
monarch/common/stream.py,sha256=J9UCqhSXSbKYFGtbKaqAq1Vgmg6DJcLzsXXm-tsBQ-w,3499
|
68
|
+
monarch/common/tensor.py,sha256=mSXiHoD0Up4m2RLdQcsbesaz2N4QCFS34UNNX3Dbldk,28842
|
69
|
+
monarch/common/tensor_factory.py,sha256=qm8NZx-5ezMAFjNLiXQvb66okm5XgdboB_GRarGOdN0,801
|
70
|
+
monarch/common/tree.py,sha256=1DG3siiE7ixBV6v5cwN8RT_17aJhYZTE-L3i7wZe2_c,2282
|
71
|
+
monarch/controller/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
72
|
+
monarch/controller/backend.py,sha256=zwYFjH4YL3JlABlPILfmzCayMtaYsAu9xfiFO5RkRHQ,7757
|
73
|
+
monarch/controller/controller.py,sha256=PrDK1ZxtUL11-y4vBlBwN94OuTrjBMDJvBlz8e49vqQ,8367
|
74
|
+
monarch/controller/debugger.py,sha256=7vVERDyXY5nH3GhIoCzNIwn2rm0H76ZJ6A4equ7gfvM,1272
|
75
|
+
monarch/controller/history.py,sha256=OZbQ75nFMXnxupw_OBlhiLVXCJ8lJKFw1SV3egvLUqc,3019
|
76
|
+
monarch/controller/rust_backend/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
77
|
+
monarch/controller/rust_backend/controller.py,sha256=-bZYE6u5sB9C0Cnc6NiBoBit9TvolKHRn05I-LUpB8I,9516
|
78
|
+
monarch/gradient/__init__.py,sha256=kqmzwt16mMpk0M3GhpgP_f7da4DGnaV9chDzbt66k4Q,308
|
79
|
+
monarch/gradient/_gradient_generator.pyi,sha256=6cX0UxaDt9NAlwgIhTgnweqGOf6qRhHiGnUzSWNCxdU,630
|
80
|
+
monarch/gradient/_gradient_generator.so,sha256=povu68MOK7Yx1HHCCrYdLIK7bKnyjz-ZEXycpCsNYZU,11456608
|
81
|
+
monarch/parallel/__init__.py,sha256=6920kIkhiX7AiyjYvyc1ad8ccP-bStJJ1sS5KkeN2P0,352
|
82
|
+
monarch/parallel/pipelining/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
83
|
+
monarch/parallel/pipelining/runtime.py,sha256=KK8TG1gUYEzSsquiZoPTWGSIC74mlncD7cYknKxfb3c,32470
|
84
|
+
monarch/parallel/pipelining/schedule_ir.py,sha256=eowdF7VxUwt4S2bfhzcOpLxECHFZ4tSoimGP_cVZ_CE,27918
|
85
|
+
monarch/parallel/pipelining/scheduler.py,sha256=Q0d8m8nGzeuFIG7nnKfkRnjxH5MbmfxzD438YcslEq0,10012
|
86
|
+
monarch/simulator/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
87
|
+
monarch/simulator/command_history.py,sha256=8ag4-zCjGQ-BhTDwPRGnrJi3fsy6rPTaTz8l9p3-5bU,16511
|
88
|
+
monarch/simulator/config.py,sha256=_LlL-7JG9hSwKPTB_KYleZCers8YKTGwcs0sfDlhEzQ,477
|
89
|
+
monarch/simulator/interface.py,sha256=GDxG2ppsDhLcxcKNslxufGXWlI0hV9jqDZgvaGJc90k,1894
|
90
|
+
monarch/simulator/ir.py,sha256=kgIsyF_gXmNdpvTXGgAQVk2s3DTCRO9TUxHbAit9zrk,29725
|
91
|
+
monarch/simulator/mock_controller.py,sha256=_2bF2A6YxANAQpdm5gi1Z7Wkk-5hRncRhmw2ZNJk5eU,7388
|
92
|
+
monarch/simulator/profiling.py,sha256=dWpp538BRnF15VTxPcTQurSo7Sfp6zdkTh6nL4j1T-k,14512
|
93
|
+
monarch/simulator/simulator.py,sha256=zzW3YaeeIndytbVpGTBZkbMinBMDcwD6vVWpQyNDZWg,38192
|
94
|
+
monarch/simulator/task.py,sha256=JhOyEdLd7u4uHzGR5ejBCyJJoD_Xn9TwQEQc37fL_RU,8600
|
95
|
+
monarch/simulator/tensor.py,sha256=1eyQblRI71L9o7tgk465FW91Eqyc3B4sIfz07R4_1eY,13240
|
96
|
+
monarch/simulator/trace.py,sha256=OhB1F3n7zBznP4LxyEHin1G3s02oLPsEBbCs-8wd_bU,11911
|
97
|
+
monarch/simulator/utils.py,sha256=0CoeeK6rWZlgQzyzK8l8gqF2cSmZAjL1EY0PB32e4y8,1217
|
98
|
+
monarch/simulator/worker.py,sha256=2ez0k557jSRMsnzutix0zeOEkAUa26HKwfp1zi_aHQI,14339
|
99
|
+
monarch/timer/__init__.py,sha256=tdM52Rn0d69HNmtqGwAUE7Py37I3yvLhH0vkUopG3ok,553
|
100
|
+
monarch/timer/example_monarch.py,sha256=pYDx-Dvxxfbk0w7t9DIVikIJBgDIrp7mi8CCdfgV0p0,2338
|
101
|
+
monarch/timer/example_spmd.py,sha256=p8i3_tO1AmpwSkZryiSjgkh7qaEZ6QXp2Fy1qtPpECA,1406
|
102
|
+
monarch/timer/execution_timer.py,sha256=1YsrLIZirdohKOeFAU2H4UcONhQXHuctJbYcoX8I6gY,6985
|
103
|
+
monarch/timer/execution_timer_test.py,sha256=CSxTv44fFZQURJlCBmYvysQI1aS_zEGZs_uxl9SOHak,4486
|
104
|
+
monarch/tools/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
105
|
+
monarch/tools/cli.py,sha256=66F7dr90bh27P3kOCmxwJkVmWv2v4wBrkifvwqwUwFE,4967
|
106
|
+
monarch/tools/commands.py,sha256=BfmXndJmU_cZP4cMPlknkxGca1NjqYd8_ReDePWksXw,6908
|
107
|
+
monarch/tools/mesh_spec.py,sha256=JLykhgy1dClXiNbH1Qsl2fX5MbqplQAhl8LGoragvbo,3702
|
108
|
+
monarch/tools/components/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
109
|
+
monarch/tools/components/hyperactor.py,sha256=h0gy3QYZD-YJ7FHppJgbTKe4zOuNjUCGZqRlkwwGkhg,2012
|
110
|
+
monarch/tools/config/__init__.py,sha256=OPSflEmJB2zxAaRVzzWSWXV5M5vlknLgpulGdW1ze5U,510
|
111
|
+
monarch/tools/config/defaults.py,sha256=34a3HQhyXqt9qR2SYMVCROoNsnwk37rIwLXXiKwqtog,1894
|
112
|
+
monarch/worker/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
113
|
+
monarch/worker/_testing_function.py,sha256=A81cVMKgdlO66XvoYcBCDrxIQIm3o3GgvcH_c8M9OmI,13480
|
114
|
+
monarch/worker/compiled_block.py,sha256=hYx1F6PAu0_BnpKAprP_nV9qJtk5XWO7mcwH3JPDioU,10114
|
115
|
+
monarch/worker/debugger.py,sha256=MeRiVFe6zY5ouWZEVxH2QADq-1nB7hN-HgD-_yx7Q-o,3729
|
116
|
+
monarch/worker/lines.py,sha256=8lIf1mRaU8jpzNxwLXg-jaxrsqBXCERK7lRdQvF0C5M,1347
|
117
|
+
monarch/worker/monitor.py,sha256=uVEVHtTj9Dg-V1np-TEY-mDYKy3c6j3hU2n7OhOzfgA,1664
|
118
|
+
monarch/worker/worker.py,sha256=lVUg0jL2N9xME8_jVujJLrpVyKg1ZjHa13C3dhjsnXk,42614
|
119
|
+
monarch_supervisor/__init__.py,sha256=4_eZvVfwoM-3yhQdAMzae7I87ITEjJMvjeBy4ROc2rs,37869
|
120
|
+
monarch_supervisor/_testing.py,sha256=T5LwhusP26UG0Wxyq0M_ng_pcS5DfzEPapKDsohWmaY,832
|
121
|
+
monarch_supervisor/function_call.py,sha256=munXfLO7Qoriz9HMwdmUNlP2-peznQ8ZcUBQfRaTKr4,1026
|
122
|
+
monarch_supervisor/host.py,sha256=ca7C8NtaGkt4FubKpQCXElN-6H3chprWanyfnDpdQZE,13822
|
123
|
+
monarch_supervisor/launchers.py,sha256=nITh9eLg8WrNogS9pXKV4j3R5fijvvTrDpjw2gpQeEU,5313
|
124
|
+
monarch_supervisor/log_pstree.py,sha256=P5k0LBobXnc3NvoumxVyZOtHWS7jbhyHf5oQ4rHvRO0,1341
|
125
|
+
monarch_supervisor/logging.py,sha256=15IuHENvEXDhSuMwSQWzw4sC_1Qzww2S1X_KYjzh_O0,3318
|
126
|
+
monarch_supervisor/python_executable.py,sha256=WfCiK3wdAvm9Jxx5jgjGF991NgGc9-oHULNb68n55Hs,1389
|
127
|
+
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
128
|
+
tests/dispatch_bench.py,sha256=sU_m-8KAjQgYTsxI5khV664NdgLLutidni69Rtowk98,3933
|
129
|
+
tests/dispatch_bench_helper.py,sha256=1ORgAMrRgjAjmmWeCHLLQd_bda9mJk0rS2ucEbRu28s,633
|
130
|
+
tests/error_test_binary.py,sha256=r9-mm4eDqaJYnBo3gXcuqwhpYq1HeH6xem3a4p8rakI,4600
|
131
|
+
tests/sleep_binary.py,sha256=XfLYaAfwm9xgzM-svs8fhAeFhwYIg6SyVEnx4e6wbUw,1009
|
132
|
+
tests/test_actor_error.py,sha256=YBDS6BKwZqgKTFtydEJt4qwJGXRfWx3hgxup9ayVbhY,3827
|
133
|
+
tests/test_alloc.py,sha256=D6DdQbtOZEvvnnc7LV-WyWFMk0Xb77eblH6Oz90zJTA,745
|
134
|
+
tests/test_coalescing.py,sha256=-KtAWzTaeXbyzltplfojavx0iFeeZnvej-tFTlu2p5k,15616
|
135
|
+
tests/test_controller.py,sha256=yxuVp2DG3TDKJlwuE3cFm9dbWMlbrYtG1uHfvVWRYbw,30935
|
136
|
+
tests/test_device_mesh.py,sha256=DrbezYOM0thfP9MgLXb5-F0VoLOmSz5GR0GwjR_3bE4,5290
|
137
|
+
tests/test_fault_tolerance.py,sha256=u4wmG1z5MZ6PY6us5zUZHJh2pUC3L7i0wsUfRDNHmxA,14144
|
138
|
+
tests/test_future.py,sha256=cXzaNi2YDwVyjR541ScXmgktX1YFsKzbl8wep0DMVbk,3032
|
139
|
+
tests/test_grad_generator.py,sha256=p4Pm4kMEeGldt2jUVAkGKCB0mLccKI28pltH6OTGbQA,3412
|
140
|
+
tests/test_mock_cuda.py,sha256=5hisElxeLJ5MHw3KM9gwxBiXiMaG-Rm382u3AsQcDOI,3068
|
141
|
+
tests/test_pdb_actor.py,sha256=5KJhuhcZDPWMdjC6eAtDdwnz1W7jNFXvIrMSFaCWaPw,3858
|
142
|
+
tests/test_python_actors.py,sha256=dY109ofFtmmni9wJWNVb3W7YQH_tMZWSIGovnuAsrUw,10786
|
143
|
+
tests/test_remote_functions.py,sha256=ExqYlRQWRabpGBuKvNIOa8Hwj-iXuP87Jfb9i5RhaGs,50066
|
144
|
+
tests/test_rust_backend.py,sha256=nXSa0ZQ0NniZm4PzvKhrWvVLD-RKvIWYkPXm1BEBXq8,6235
|
145
|
+
tests/test_signal_safe_block_on.py,sha256=bmal0XgzJowZXJV6T1Blow5a-vZluYWusCThLMGxyTE,3336
|
146
|
+
tests/test_sim_backend.py,sha256=RckCkHO3DxKsAGdZMcIzRnd6YJXwDim1D5-xbBbgKio,1473
|
147
|
+
tests/simulator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
148
|
+
tests/simulator/test_profiling.py,sha256=TGYCfzTLdkpIwnOuO6KApprmrgPIRQe60KRX3wkB0sg,4565
|
149
|
+
tests/simulator/test_simulator.py,sha256=LO8lA0ssY-OGEBL5ipEu74f97Y765TEwfUOv-DtIptM,14568
|
150
|
+
tests/simulator/test_task.py,sha256=ipqBDuDAysuo1xOB9S5psaFvwe6VATD43IovCTSs0t4,2327
|
151
|
+
tests/simulator/test_worker.py,sha256=QrWWIJ3HDgDLkBPRc2mwYPlOQoXQcj1qRfc0WUfKkFY,3507
|
152
|
+
torchmonarch_nightly-2025.6.4.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
|
153
|
+
torchmonarch_nightly-2025.6.4.dist-info/METADATA,sha256=ue9cLg7CJ5ULmdZ66_8ieYA-Eade2_lgcLKPFLFjHak,2768
|
154
|
+
torchmonarch_nightly-2025.6.4.dist-info/WHEEL,sha256=_wZSFk0d90K9wOBp8Q-UGxshyiJ987JoPiyUBNC6VLk,104
|
155
|
+
torchmonarch_nightly-2025.6.4.dist-info/entry_points.txt,sha256=sqfQ16oZqjEvttUI-uj9BBXIIE6jt05bYFSmy-2hyXI,106
|
156
|
+
torchmonarch_nightly-2025.6.4.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
|
157
|
+
torchmonarch_nightly-2025.6.4.dist-info/RECORD,,
|
@@ -0,0 +1,29 @@
|
|
1
|
+
BSD 3-Clause License
|
2
|
+
|
3
|
+
Copyright (c) Meta Platforms, Inc. and affiliates.
|
4
|
+
All rights reserved.
|
5
|
+
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
7
|
+
modification, are permitted provided that the following conditions are met:
|
8
|
+
|
9
|
+
* Redistributions of source code must retain the above copyright notice, this
|
10
|
+
list of conditions and the following disclaimer.
|
11
|
+
|
12
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
13
|
+
this list of conditions and the following disclaimer in the documentation
|
14
|
+
and/or other materials provided with the distribution.
|
15
|
+
|
16
|
+
* Neither the name of the copyright holder nor the names of its
|
17
|
+
contributors may be used to endorse or promote products derived from
|
18
|
+
this software without specific prior written permission.
|
19
|
+
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
21
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
22
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
23
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
24
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
25
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
26
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
27
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
28
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|