torchmonarch-nightly 2025.8.2__cp310-cp310-manylinux2014_x86_64.whl → 2025.9.4__cp310-cp310-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monarch/_rust_bindings.so +0 -0
- monarch/_src/actor/actor_mesh.py +504 -218
- monarch/_src/actor/allocator.py +75 -6
- monarch/_src/actor/bootstrap_main.py +7 -4
- monarch/_src/actor/code_sync/__init__.py +2 -0
- monarch/_src/actor/debugger/__init__.py +7 -0
- monarch/_src/actor/{debugger.py → debugger/debugger.py} +246 -135
- monarch/_src/actor/{pdb_wrapper.py → debugger/pdb_wrapper.py} +62 -23
- monarch/_src/actor/endpoint.py +27 -45
- monarch/_src/actor/future.py +86 -24
- monarch/_src/actor/host_mesh.py +125 -0
- monarch/_src/actor/logging.py +94 -0
- monarch/_src/actor/pickle.py +25 -0
- monarch/_src/actor/proc_mesh.py +423 -156
- monarch/_src/actor/python_extension_methods.py +90 -0
- monarch/_src/actor/shape.py +8 -1
- monarch/_src/actor/source_loader.py +45 -0
- monarch/_src/actor/telemetry/__init__.py +172 -0
- monarch/_src/actor/telemetry/rust_span_tracing.py +6 -39
- monarch/_src/debug_cli/__init__.py +7 -0
- monarch/_src/debug_cli/debug_cli.py +43 -0
- monarch/_src/tensor_engine/rdma.py +64 -9
- monarch/_testing.py +1 -3
- monarch/actor/__init__.py +24 -4
- monarch/common/_C.so +0 -0
- monarch/common/device_mesh.py +14 -0
- monarch/common/future.py +10 -0
- monarch/common/remote.py +14 -25
- monarch/common/tensor.py +12 -0
- monarch/debug_cli/__init__.py +7 -0
- monarch/debug_cli/__main__.py +12 -0
- monarch/fetch.py +2 -2
- monarch/gradient/_gradient_generator.so +0 -0
- monarch/gradient_generator.py +4 -2
- monarch/mesh_controller.py +34 -14
- monarch/monarch_controller +0 -0
- monarch/tools/colors.py +25 -0
- monarch/tools/commands.py +42 -7
- monarch/tools/components/hyperactor.py +6 -4
- monarch/tools/config/__init__.py +35 -12
- monarch/tools/config/defaults.py +15 -5
- monarch/tools/config/environment.py +45 -0
- monarch/tools/config/workspace.py +165 -0
- monarch/tools/mesh_spec.py +3 -3
- monarch/utils/__init__.py +9 -0
- monarch/utils/utils.py +78 -0
- tests/error_test_binary.py +5 -3
- tests/python_actor_test_binary.py +52 -0
- tests/test_actor_error.py +142 -14
- tests/test_alloc.py +1 -1
- tests/test_allocator.py +59 -72
- tests/test_debugger.py +639 -45
- tests/test_env_before_cuda.py +4 -4
- tests/test_mesh_trait.py +38 -0
- tests/test_python_actors.py +965 -75
- tests/test_rdma.py +7 -6
- tests/test_tensor_engine.py +6 -6
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.4.dist-info}/METADATA +82 -4
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.4.dist-info}/RECORD +63 -47
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.4.dist-info}/WHEEL +0 -0
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.4.dist-info}/entry_points.txt +0 -0
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.4.dist-info}/licenses/LICENSE +0 -0
- {torchmonarch_nightly-2025.8.2.dist-info → torchmonarch_nightly-2025.9.4.dist-info}/top_level.txt +0 -0
monarch/_src/actor/pickle.py
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
# This source code is licensed under the BSD-style license found in the
|
5
5
|
# LICENSE file in the root directory of this source tree.
|
6
6
|
|
7
|
+
# pyre-unsafe
|
8
|
+
|
7
9
|
import io
|
8
10
|
import pickle
|
9
11
|
from contextlib import contextmanager, ExitStack
|
@@ -17,6 +19,29 @@ except ImportError:
|
|
17
19
|
torch = None
|
18
20
|
|
19
21
|
|
22
|
+
_orig_function_getstate = cloudpickle.cloudpickle._function_getstate
|
23
|
+
|
24
|
+
|
25
|
+
# To ensure that the debugger and tracebacks work on remote hosts
|
26
|
+
# running code that was pickled by value, we need to monkeypatch
|
27
|
+
# cloudpickle to set the `__loader__` attribute inside `__globals__`
|
28
|
+
# for the unpickled function. That way, when the remote host tries
|
29
|
+
# to load the source code for the function, it will use the RemoteImportLoader
|
30
|
+
# to retrieve the source code from the root client, where it *ostensibly*
|
31
|
+
# exists.
|
32
|
+
def _function_getstate(func):
|
33
|
+
from monarch._src.actor.source_loader import RemoteImportLoader
|
34
|
+
|
35
|
+
state, slotstate = _orig_function_getstate(func)
|
36
|
+
slotstate["__globals__"]["__loader__"] = RemoteImportLoader(
|
37
|
+
func.__code__.co_filename
|
38
|
+
)
|
39
|
+
return state, slotstate
|
40
|
+
|
41
|
+
|
42
|
+
cloudpickle.cloudpickle._function_getstate = _function_getstate
|
43
|
+
|
44
|
+
|
20
45
|
class _Pickler(cloudpickle.Pickler):
|
21
46
|
def __init__(self, filter):
|
22
47
|
self.f = io.BytesIO()
|