torchmonarch-nightly 2025.6.27__cp311-cp311-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monarch/__init__.py +189 -0
- monarch/_monarch/__init__.py +5 -0
- monarch/_monarch/hyperactor/__init__.py +58 -0
- monarch/_monarch/selection/__init__.py +13 -0
- monarch/_monarch/worker/__init__.py +0 -0
- monarch/_monarch/worker/debugger.py +117 -0
- monarch/_monarch/worker/logging.py +107 -0
- monarch/_rust_bindings.so +0 -0
- monarch/_testing.py +230 -0
- monarch/actor_mesh.py +761 -0
- monarch/allocator.py +220 -0
- monarch/bootstrap_main.py +59 -0
- monarch/builtins/__init__.py +14 -0
- monarch/builtins/log.py +22 -0
- monarch/builtins/random.py +68 -0
- monarch/cached_remote_function.py +257 -0
- monarch/code_sync.py +10 -0
- monarch/common/_C.pyi +11 -0
- monarch/common/_C.so +0 -0
- monarch/common/__init__.py +0 -0
- monarch/common/_coalescing.py +308 -0
- monarch/common/_device_utils.py +18 -0
- monarch/common/_tensor_to_table.py +172 -0
- monarch/common/base_tensor.py +28 -0
- monarch/common/borrows.py +143 -0
- monarch/common/client.py +690 -0
- monarch/common/constants.py +10 -0
- monarch/common/context_manager.py +40 -0
- monarch/common/controller_api.py +104 -0
- monarch/common/device_mesh.py +417 -0
- monarch/common/fake.py +55 -0
- monarch/common/function.py +160 -0
- monarch/common/function_caching.py +164 -0
- monarch/common/future.py +168 -0
- monarch/common/invocation.py +125 -0
- monarch/common/mast.py +221 -0
- monarch/common/messages.py +573 -0
- monarch/common/mock_cuda.py +41 -0
- monarch/common/opaque_ref.py +98 -0
- monarch/common/pickle_flatten.py +48 -0
- monarch/common/pipe.py +152 -0
- monarch/common/process_group.py +55 -0
- monarch/common/recording.py +127 -0
- monarch/common/reference.py +33 -0
- monarch/common/remote.py +297 -0
- monarch/common/selection.py +9 -0
- monarch/common/shape.py +229 -0
- monarch/common/stream.py +114 -0
- monarch/common/tensor.py +814 -0
- monarch/common/tensor_factory.py +31 -0
- monarch/common/tree.py +73 -0
- monarch/controller/__init__.py +7 -0
- monarch/controller/backend.py +223 -0
- monarch/controller/controller.py +223 -0
- monarch/controller/debugger.py +47 -0
- monarch/controller/history.py +90 -0
- monarch/controller/rust_backend/__init__.py +7 -0
- monarch/controller/rust_backend/controller.py +245 -0
- monarch/debugger.py +379 -0
- monarch/fetch.py +55 -0
- monarch/future.py +76 -0
- monarch/gradient/__init__.py +11 -0
- monarch/gradient/_gradient_generator.pyi +22 -0
- monarch/gradient/_gradient_generator.so +0 -0
- monarch/gradient_generator.py +185 -0
- monarch/memory.py +43 -0
- monarch/mesh_controller.py +271 -0
- monarch/monarch_controller +0 -0
- monarch/notebook.py +761 -0
- monarch/opaque_module.py +235 -0
- monarch/opaque_object.py +88 -0
- monarch/parallel/__init__.py +9 -0
- monarch/parallel/pipelining/__init__.py +7 -0
- monarch/parallel/pipelining/runtime.py +847 -0
- monarch/parallel/pipelining/schedule_ir.py +692 -0
- monarch/parallel/pipelining/scheduler.py +249 -0
- monarch/pdb_wrapper.py +135 -0
- monarch/proc_mesh.py +299 -0
- monarch/profiler.py +160 -0
- monarch/python_local_mesh.py +107 -0
- monarch/random.py +61 -0
- monarch/rdma.py +162 -0
- monarch/remote_class.py +114 -0
- monarch/rust_backend_mesh.py +280 -0
- monarch/rust_local_mesh.py +1402 -0
- monarch/sim_mesh.py +359 -0
- monarch/simulator/__init__.py +7 -0
- monarch/simulator/command_history.py +424 -0
- monarch/simulator/config.py +21 -0
- monarch/simulator/interface.py +59 -0
- monarch/simulator/ir.py +770 -0
- monarch/simulator/mock_controller.py +214 -0
- monarch/simulator/profiling.py +424 -0
- monarch/simulator/simulator.py +1052 -0
- monarch/simulator/task.py +255 -0
- monarch/simulator/tensor.py +373 -0
- monarch/simulator/trace.py +395 -0
- monarch/simulator/utils.py +41 -0
- monarch/simulator/worker.py +389 -0
- monarch/telemetry.py +19 -0
- monarch/tensor_worker_main.py +260 -0
- monarch/tensorboard.py +84 -0
- monarch/timer/__init__.py +21 -0
- monarch/timer/example_monarch.py +78 -0
- monarch/timer/example_spmd.py +55 -0
- monarch/timer/execution_timer.py +199 -0
- monarch/timer/execution_timer_test.py +131 -0
- monarch/tools/__init__.py +7 -0
- monarch/tools/cli.py +167 -0
- monarch/tools/commands.py +251 -0
- monarch/tools/components/__init__.py +7 -0
- monarch/tools/components/hyperactor.py +58 -0
- monarch/tools/config/__init__.py +20 -0
- monarch/tools/config/defaults.py +54 -0
- monarch/tools/mesh_spec.py +165 -0
- monarch/tools/network.py +69 -0
- monarch/worker/__init__.py +7 -0
- monarch/worker/_testing_function.py +481 -0
- monarch/worker/compiled_block.py +270 -0
- monarch/worker/debugger.py +125 -0
- monarch/worker/lines.py +47 -0
- monarch/worker/monitor.py +53 -0
- monarch/worker/worker.py +1191 -0
- monarch/world_mesh.py +34 -0
- monarch_supervisor/__init__.py +1044 -0
- monarch_supervisor/_testing.py +44 -0
- monarch_supervisor/function_call.py +30 -0
- monarch_supervisor/host.py +386 -0
- monarch_supervisor/launchers.py +145 -0
- monarch_supervisor/log_pstree.py +48 -0
- monarch_supervisor/logging.py +103 -0
- monarch_supervisor/python_executable.py +42 -0
- tests/__init__.py +0 -0
- tests/dispatch_bench.py +124 -0
- tests/dispatch_bench_helper.py +25 -0
- tests/error_test_binary.py +180 -0
- tests/simulator/__init__.py +0 -0
- tests/simulator/test_profiling.py +136 -0
- tests/simulator/test_simulator.py +411 -0
- tests/simulator/test_task.py +64 -0
- tests/simulator/test_worker.py +102 -0
- tests/sleep_binary.py +35 -0
- tests/test_actor_error.py +240 -0
- tests/test_alloc.py +25 -0
- tests/test_allocator.py +365 -0
- tests/test_coalescing.py +492 -0
- tests/test_controller.py +845 -0
- tests/test_device_mesh.py +132 -0
- tests/test_fault_tolerance.py +398 -0
- tests/test_future.py +94 -0
- tests/test_grad_generator.py +121 -0
- tests/test_mock_cuda.py +74 -0
- tests/test_pdb_actor.py +110 -0
- tests/test_python_actors.py +736 -0
- tests/test_remote_functions.py +1271 -0
- tests/test_rust_backend.py +217 -0
- tests/test_signal_safe_block_on.py +103 -0
- tests/test_sim_backend.py +54 -0
- tests/test_tensor_engine.py +52 -0
- torchmonarch_nightly-2025.6.27.dist-info/METADATA +94 -0
- torchmonarch_nightly-2025.6.27.dist-info/RECORD +165 -0
- torchmonarch_nightly-2025.6.27.dist-info/WHEEL +5 -0
- torchmonarch_nightly-2025.6.27.dist-info/entry_points.txt +3 -0
- torchmonarch_nightly-2025.6.27.dist-info/licenses/LICENSE +29 -0
- torchmonarch_nightly-2025.6.27.dist-info/top_level.txt +3 -0
monarch/__init__.py
ADDED
@@ -0,0 +1,189 @@
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
# pyre-unsafe
|
8
|
+
|
9
|
+
from importlib import import_module as _import_module
|
10
|
+
from typing import TYPE_CHECKING
|
11
|
+
|
12
|
+
# Import before monarch to pre-load torch DSOs as, in exploded wheel flows,
|
13
|
+
# our RPATHs won't correctly find them.
|
14
|
+
import torch # noqa: F401
|
15
|
+
|
16
|
+
# submodules of monarch should not be imported in this
|
17
|
+
# top-level file because it will cause them to get
|
18
|
+
# loaded even if they are not actually being used.
|
19
|
+
# for instance if we import monarch.common.functions,
|
20
|
+
# we might not want to also import monarch.common.tensor,
|
21
|
+
# which recursively imports torch.
|
22
|
+
|
23
|
+
# Instead to expose functionality as part of the
|
24
|
+
# monarch.* API, import it inside the TYPE_CHECKING
|
25
|
+
# guard (so typechecker works), and then add it
|
26
|
+
# to the _public_api dict and __all__ list. These
|
27
|
+
# entries will get loaded on demand.
|
28
|
+
|
29
|
+
|
30
|
+
if TYPE_CHECKING:
|
31
|
+
from monarch import timer
|
32
|
+
from monarch.allocator import LocalAllocator, ProcessAllocator
|
33
|
+
from monarch.common._coalescing import coalescing
|
34
|
+
|
35
|
+
from monarch.common.device_mesh import (
|
36
|
+
DeviceMesh,
|
37
|
+
get_active_mesh,
|
38
|
+
no_mesh,
|
39
|
+
RemoteProcessGroup,
|
40
|
+
slice_mesh,
|
41
|
+
to_mesh,
|
42
|
+
)
|
43
|
+
|
44
|
+
from monarch.common.function import resolvers as function_resolvers
|
45
|
+
|
46
|
+
from monarch.common.future import Future
|
47
|
+
|
48
|
+
from monarch.common.invocation import RemoteException
|
49
|
+
from monarch.common.opaque_ref import OpaqueRef
|
50
|
+
from monarch.common.pipe import create_pipe, Pipe, remote_generator
|
51
|
+
from monarch.common.remote import remote
|
52
|
+
from monarch.common.selection import Selection
|
53
|
+
from monarch.common.shape import NDSlice, Shape
|
54
|
+
from monarch.common.stream import get_active_stream, Stream
|
55
|
+
from monarch.common.tensor import reduce, reduce_, Tensor
|
56
|
+
from monarch.fetch import fetch_shard, inspect, show
|
57
|
+
from monarch.future import ActorFuture
|
58
|
+
from monarch.gradient_generator import grad_function, grad_generator
|
59
|
+
from monarch.notebook import mast_mesh, reserve_torchx as mast_reserve
|
60
|
+
from monarch.python_local_mesh import python_local_mesh
|
61
|
+
from monarch.rust_backend_mesh import (
|
62
|
+
rust_backend_mesh,
|
63
|
+
rust_backend_meshes,
|
64
|
+
rust_mast_mesh,
|
65
|
+
)
|
66
|
+
from monarch.rust_local_mesh import local_mesh, local_meshes, SocketType
|
67
|
+
from monarch.simulator.config import set_meta # noqa
|
68
|
+
from monarch.simulator.interface import Simulator
|
69
|
+
from monarch.world_mesh import world_mesh
|
70
|
+
|
71
|
+
|
72
|
+
_public_api = {
|
73
|
+
"coalescing": ("monarch.common._coalescing", "coalescing"),
|
74
|
+
"remote": ("monarch.common.remote", "remote"),
|
75
|
+
"DeviceMesh": ("monarch.common.device_mesh", "DeviceMesh"),
|
76
|
+
"get_active_mesh": ("monarch.common.device_mesh", "get_active_mesh"),
|
77
|
+
"no_mesh": ("monarch.common.device_mesh", "no_mesh"),
|
78
|
+
"RemoteProcessGroup": ("monarch.common.device_mesh", "RemoteProcessGroup"),
|
79
|
+
"function_resolvers": ("monarch.common.function", "resolvers"),
|
80
|
+
"Future": ("monarch.common.future", "Future"),
|
81
|
+
"RemoteException": ("monarch.common.invocation", "RemoteException"),
|
82
|
+
"Shape": ("monarch.common.shape", "Shape"),
|
83
|
+
"NDSlice": ("monarch.common.shape", "NDSlice"),
|
84
|
+
"Selection": ("monarch.common.selection", "Selection"),
|
85
|
+
"OpaqueRef": ("monarch.common.opaque_ref", "OpaqueRef"),
|
86
|
+
"create_pipe": ("monarch.common.pipe", "create_pipe"),
|
87
|
+
"Pipe": ("monarch.common.pipe", "Pipe"),
|
88
|
+
"remote_generator": ("monarch.common.pipe", "remote_generator"),
|
89
|
+
"get_active_stream": ("monarch.common.stream", "get_active_stream"),
|
90
|
+
"Stream": ("monarch.common.stream", "Stream"),
|
91
|
+
"Tensor": ("monarch.common.tensor", "Tensor"),
|
92
|
+
"reduce": ("monarch.common.tensor", "reduce"),
|
93
|
+
"reduce_": ("monarch.common.tensor", "reduce_"),
|
94
|
+
"to_mesh": ("monarch.common.device_mesh", "to_mesh"),
|
95
|
+
"slice_mesh": ("monarch.common.device_mesh", "slice_mesh"),
|
96
|
+
"call_on_shard_and_fetch": ("monarch.fetch", "call_on_shard_and_fetch"),
|
97
|
+
"fetch_shard": ("monarch.fetch", "fetch_shard"),
|
98
|
+
"inspect": ("monarch.fetch", "inspect"),
|
99
|
+
"show": ("monarch.fetch", "show"),
|
100
|
+
"grad_function": ("monarch.gradient_generator", "grad_function"),
|
101
|
+
"grad_generator": ("monarch.gradient_generator", "grad_generator"),
|
102
|
+
"python_local_mesh": ("monarch.python_local_mesh", "python_local_mesh"),
|
103
|
+
"mast_mesh": ("monarch.notebook", "mast_mesh"),
|
104
|
+
"mast_reserve": ("monarch.notebook", "reserve_torchx"),
|
105
|
+
"rust_backend_mesh": ("monarch.rust_backend_mesh", "rust_backend_mesh"),
|
106
|
+
"rust_backend_meshes": ("monarch.rust_backend_mesh", "rust_backend_meshes"),
|
107
|
+
"local_mesh": ("monarch.rust_local_mesh", "local_mesh"),
|
108
|
+
"local_meshes": ("monarch.rust_local_mesh", "local_meshes"),
|
109
|
+
"SocketType": ("monarch.rust_local_mesh", "SocketType"),
|
110
|
+
"rust_mast_mesh": ("monarch.rust_backend_mesh", "rust_mast_mesh"),
|
111
|
+
"set_meta": ("monarch.simulator.config", "set_meta"),
|
112
|
+
"Simulator": ("monarch.simulator.interface", "Simulator"),
|
113
|
+
"world_mesh": ("monarch.world_mesh", "world_mesh"),
|
114
|
+
"timer": ("monarch.timer", "timer"),
|
115
|
+
"ProcessAllocator": ("monarch.allocator", "ProcessAllocator"),
|
116
|
+
"LocalAllocator": ("monarch.allocator", "LocalAllocator"),
|
117
|
+
"ActorFuture": ("monarch.future", "ActorFuture"),
|
118
|
+
"builtins": ("monarch.builtins", "builtins"),
|
119
|
+
}
|
120
|
+
|
121
|
+
|
122
|
+
def __getattr__(name):
|
123
|
+
if name in _public_api:
|
124
|
+
module_path, attr_name = _public_api[name]
|
125
|
+
module = _import_module(module_path)
|
126
|
+
result = getattr(module, attr_name)
|
127
|
+
globals()[name] = result
|
128
|
+
return result
|
129
|
+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
130
|
+
|
131
|
+
|
132
|
+
try:
|
133
|
+
from __manifest__ import fbmake # noqa
|
134
|
+
|
135
|
+
IN_PAR = True
|
136
|
+
except ImportError:
|
137
|
+
IN_PAR = False
|
138
|
+
|
139
|
+
# we have to explicitly list this rather than just take the keys of the _public_api
|
140
|
+
# otherwise tools think the imports are unused
|
141
|
+
__all__ = [
|
142
|
+
"coalescing",
|
143
|
+
"DeviceMesh",
|
144
|
+
"get_active_mesh",
|
145
|
+
"no_mesh",
|
146
|
+
"remote",
|
147
|
+
"RemoteProcessGroup",
|
148
|
+
"function_resolvers",
|
149
|
+
"Future",
|
150
|
+
"RemoteException",
|
151
|
+
"Shape",
|
152
|
+
"Selection",
|
153
|
+
"NDSlice",
|
154
|
+
"OpaqueRef",
|
155
|
+
"create_pipe",
|
156
|
+
"Pipe",
|
157
|
+
"remote_generator",
|
158
|
+
"get_active_stream",
|
159
|
+
"Stream",
|
160
|
+
"Tensor",
|
161
|
+
"reduce",
|
162
|
+
"reduce_",
|
163
|
+
"to_mesh",
|
164
|
+
"slice_mesh",
|
165
|
+
"call_on_shard_and_fetch",
|
166
|
+
"fetch_shard",
|
167
|
+
"inspect",
|
168
|
+
"show",
|
169
|
+
"grad_function",
|
170
|
+
"grad_generator",
|
171
|
+
"python_local_mesh",
|
172
|
+
"mast_mesh",
|
173
|
+
"mast_reserve",
|
174
|
+
"rust_backend_mesh",
|
175
|
+
"rust_backend_meshes",
|
176
|
+
"local_mesh",
|
177
|
+
"local_meshes",
|
178
|
+
"SocketType",
|
179
|
+
"rust_mast_mesh",
|
180
|
+
"set_meta",
|
181
|
+
"Simulator",
|
182
|
+
"world_mesh",
|
183
|
+
"timer",
|
184
|
+
"ProcessAllocator",
|
185
|
+
"LocalAllocator",
|
186
|
+
"ActorFuture",
|
187
|
+
"builtins",
|
188
|
+
]
|
189
|
+
assert sorted(__all__) == sorted(_public_api)
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
# pyre-strict
|
8
|
+
|
9
|
+
from monarch._rust_bindings.monarch_hyperactor.actor import PythonMessage
|
10
|
+
|
11
|
+
from monarch._rust_bindings.monarch_hyperactor.alloc import ( # @manual=//monarch/monarch_extension:monarch_extension
|
12
|
+
LocalAllocatorBase,
|
13
|
+
)
|
14
|
+
|
15
|
+
from monarch._rust_bindings.monarch_hyperactor.mailbox import Mailbox, PortId
|
16
|
+
|
17
|
+
from monarch._rust_bindings.monarch_hyperactor.proc import ( # @manual=//monarch/monarch_extension:monarch_extension
|
18
|
+
ActorId,
|
19
|
+
Alloc,
|
20
|
+
AllocConstraints,
|
21
|
+
AllocSpec,
|
22
|
+
init_proc,
|
23
|
+
Proc,
|
24
|
+
Serialized,
|
25
|
+
)
|
26
|
+
|
27
|
+
from monarch._rust_bindings.monarch_hyperactor.shape import ( # @manual=//monarch/monarch_extension:monarch_extension
|
28
|
+
Shape,
|
29
|
+
)
|
30
|
+
|
31
|
+
__all__ = [
|
32
|
+
"init_proc",
|
33
|
+
"Actor",
|
34
|
+
"ActorId",
|
35
|
+
"ActorHandle",
|
36
|
+
"Alloc",
|
37
|
+
"AllocSpec",
|
38
|
+
"PortId",
|
39
|
+
"Proc",
|
40
|
+
"Serialized",
|
41
|
+
"PickledMessage",
|
42
|
+
"PickledMessageClientActor",
|
43
|
+
"PythonMessage",
|
44
|
+
"Mailbox",
|
45
|
+
"PortHandle",
|
46
|
+
"PortReceiver",
|
47
|
+
"OncePortHandle",
|
48
|
+
"OncePortReceiver",
|
49
|
+
"Alloc",
|
50
|
+
"AllocSpec",
|
51
|
+
"AllocConstraints",
|
52
|
+
"ProcMesh",
|
53
|
+
"PythonActorMesh",
|
54
|
+
"ProcessAllocatorBase",
|
55
|
+
"Shape",
|
56
|
+
"Selection",
|
57
|
+
"LocalAllocatorBase",
|
58
|
+
]
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
from monarch._rust_bindings.monarch_hyperactor.selection import ( # @manual=//monarch/monarch_extension:monarch_extension
|
8
|
+
Selection,
|
9
|
+
)
|
10
|
+
|
11
|
+
__all__ = [
|
12
|
+
"Selection",
|
13
|
+
]
|
File without changes
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
# pyre-unsafe
|
8
|
+
|
9
|
+
import bdb
|
10
|
+
import io
|
11
|
+
import logging
|
12
|
+
import pdb # noqa
|
13
|
+
import sys
|
14
|
+
from typing import cast, Optional
|
15
|
+
|
16
|
+
from monarch._rust_bindings.monarch_extension import debugger
|
17
|
+
from monarch._rust_bindings.monarch_messages.debugger import DebuggerAction
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
|
22
|
+
def _set_trace(*, header=None):
|
23
|
+
ds = PdbWrapper(header)
|
24
|
+
ds.set_trace()
|
25
|
+
|
26
|
+
|
27
|
+
class PdbWrapper(pdb.Pdb):
|
28
|
+
def __init__(self, header: Optional[str]):
|
29
|
+
self._actor = debugger.PdbActor()
|
30
|
+
self.header = header
|
31
|
+
super().__init__(
|
32
|
+
# pyre-ignore
|
33
|
+
stdout=WriteWrapper(self._actor),
|
34
|
+
stdin=ReadWrapper.create(self._actor),
|
35
|
+
)
|
36
|
+
self._first = True
|
37
|
+
|
38
|
+
def setup(self, *args, **kwargs):
|
39
|
+
r = super().setup(*args, **kwargs)
|
40
|
+
if self._first:
|
41
|
+
self._first = False
|
42
|
+
# when we enter the debugger, we want to present the user's stack frame
|
43
|
+
# not the nested one inside session.run. This means that the local
|
44
|
+
# variables are what gets printed, etc. To do this
|
45
|
+
# we first execute up 2 to get to that frame.
|
46
|
+
self.do_up(2)
|
47
|
+
return r
|
48
|
+
|
49
|
+
def set_continue(self) -> None:
|
50
|
+
r = super().set_continue()
|
51
|
+
if not self.breaks:
|
52
|
+
# no more breakpoints so this debugger will not
|
53
|
+
# be used again, and we detach from the controller io.
|
54
|
+
self._actor.send(DebuggerAction.Detach())
|
55
|
+
self._actor.drain_and_stop()
|
56
|
+
# break cycle with itself before we exit
|
57
|
+
self.stdin = sys.stdin
|
58
|
+
self.stdout = sys.stdout
|
59
|
+
return r
|
60
|
+
|
61
|
+
def set_trace(self):
|
62
|
+
self._actor.send(DebuggerAction.Paused())
|
63
|
+
message = self._actor.receive()
|
64
|
+
# we give the controller the option to ignore this request to debug
|
65
|
+
# by issuing a "detach" message immediately.
|
66
|
+
if isinstance(message, DebuggerAction.Detach):
|
67
|
+
return
|
68
|
+
elif isinstance(message, DebuggerAction.Attach):
|
69
|
+
pass
|
70
|
+
else:
|
71
|
+
raise RuntimeError(f"unexpected debugger message {message}")
|
72
|
+
if self.header:
|
73
|
+
self.message(self.header)
|
74
|
+
super().set_trace()
|
75
|
+
|
76
|
+
def set_quit(self):
|
77
|
+
self._actor.send(DebuggerAction.Detach())
|
78
|
+
self._actor.drain_and_stop()
|
79
|
+
super().set_quit()
|
80
|
+
|
81
|
+
|
82
|
+
class ReadWrapper(io.RawIOBase):
|
83
|
+
def __init__(self, actor: debugger.PdbActor):
|
84
|
+
self._actor = actor
|
85
|
+
|
86
|
+
def readinto(self, b):
|
87
|
+
self._actor.send(DebuggerAction.Read(len(b)))
|
88
|
+
response = self._actor.receive()
|
89
|
+
if isinstance(response, DebuggerAction.Detach):
|
90
|
+
raise bdb.BdbQuit
|
91
|
+
assert isinstance(response, DebuggerAction.Write)
|
92
|
+
response = cast(DebuggerAction.Write, response)
|
93
|
+
payload = debugger.get_bytes_from_write_action(response)
|
94
|
+
assert len(payload) <= len(b)
|
95
|
+
b[: len(payload)] = payload
|
96
|
+
return len(payload)
|
97
|
+
|
98
|
+
def readable(self) -> bool:
|
99
|
+
return True
|
100
|
+
|
101
|
+
@classmethod
|
102
|
+
def create(cls, actor: debugger.PdbActor):
|
103
|
+
return io.TextIOWrapper(io.BufferedReader(cls(actor)))
|
104
|
+
|
105
|
+
|
106
|
+
class WriteWrapper:
|
107
|
+
def __init__(self, actor: debugger.PdbActor):
|
108
|
+
self._actor = actor
|
109
|
+
|
110
|
+
def writable(self) -> bool:
|
111
|
+
return True
|
112
|
+
|
113
|
+
def write(self, s: str):
|
114
|
+
self._actor.send(DebuggerAction.Write(s.encode()))
|
115
|
+
|
116
|
+
def flush(self):
|
117
|
+
pass
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
# pyre-unsafe
|
8
|
+
import logging
|
9
|
+
import os
|
10
|
+
import socket
|
11
|
+
import sys
|
12
|
+
from pathlib import Path
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
## NOTE THIS FILE IS A DIRECT COPY OF ~/fbsource/fbcode/monarch/python/monarch_supervisor/logging.py
|
17
|
+
## It is copied here at this time to avoid pulling in the monarch python supervisor as dependency since there's
|
18
|
+
## an expectation that the other one will be removed soon.
|
19
|
+
|
20
|
+
|
21
|
+
def _handle_unhandled_exception(*args):
|
22
|
+
logger.error("Uncaught exception", exc_info=args)
|
23
|
+
|
24
|
+
|
25
|
+
_glog_level_to_abbr = {
|
26
|
+
"DEBUG": "V", # V is for VERBOSE in glog
|
27
|
+
"INFO": "I",
|
28
|
+
"WARNING": "W",
|
29
|
+
"ERROR": "E",
|
30
|
+
"CRITICAL": "C",
|
31
|
+
}
|
32
|
+
|
33
|
+
|
34
|
+
def fix_exception_lines(tb_lines):
|
35
|
+
formatted_lines = []
|
36
|
+
for line in tb_lines:
|
37
|
+
# Replace the standard file and line format with the custom format
|
38
|
+
if line.startswith(" File"):
|
39
|
+
# Extract the filename and line number
|
40
|
+
parts = line.split(",")
|
41
|
+
file_info = parts[0].strip()[6:-1] # Remove ' File "' and '"'
|
42
|
+
line_info = parts[1].strip()[5:] # Remove 'line '
|
43
|
+
new_line = f" File {file_info}:{line_info}"
|
44
|
+
if len(parts) > 2:
|
45
|
+
new_line += ", " + ",".join(parts[2:]).strip()
|
46
|
+
formatted_lines.append(new_line)
|
47
|
+
else:
|
48
|
+
formatted_lines.append(line.strip())
|
49
|
+
return formatted_lines
|
50
|
+
|
51
|
+
|
52
|
+
class _Formatter(logging.Formatter):
|
53
|
+
def __init__(self, suffix):
|
54
|
+
self.suffix = suffix
|
55
|
+
|
56
|
+
def format(self, record):
|
57
|
+
message = record.getMessage()
|
58
|
+
asctime = self.formatTime(record, "%m%d %H:%M:%S")
|
59
|
+
|
60
|
+
lines = message.strip().split("\n")
|
61
|
+
if record.exc_info:
|
62
|
+
exc_info = fix_exception_lines(
|
63
|
+
self.formatException(record.exc_info).split("\n")
|
64
|
+
)
|
65
|
+
lines.extend(exc_info)
|
66
|
+
if record.stack_info:
|
67
|
+
stack_info = self.formatStack(record.stack_info)
|
68
|
+
lines.extend(stack_info.strip().split("\n"))
|
69
|
+
|
70
|
+
shortlevel = _glog_level_to_abbr.get(record.levelname, record.levelname[0])
|
71
|
+
|
72
|
+
prefix = (
|
73
|
+
f"{shortlevel}{asctime}.{int(record.msecs*1000):06d} "
|
74
|
+
f"{record.filename}:"
|
75
|
+
f"{record.lineno}]{self.suffix}"
|
76
|
+
)
|
77
|
+
return "\n".join(f"{prefix} {line}" for line in lines)
|
78
|
+
|
79
|
+
|
80
|
+
def initialize_logging(process_name=None):
|
81
|
+
log_folder = os.environ.get("TORCH_MONARCH_LOG_FOLDER")
|
82
|
+
log_level = os.environ.get("TORCH_MONARCH_LOG_LEVEL", "INFO")
|
83
|
+
suffix = "" if process_name is None else f" {process_name}:"
|
84
|
+
handler = None
|
85
|
+
if log_folder is not None:
|
86
|
+
log_folder_path = Path(log_folder)
|
87
|
+
log_folder_path.mkdir(parents=True, exist_ok=True)
|
88
|
+
safe_process_name = (
|
89
|
+
process_name.replace("/", "_") if process_name else "logfile.log"
|
90
|
+
)
|
91
|
+
log_file_name = f"{safe_process_name}.log"
|
92
|
+
log_file_path = log_folder_path / log_file_name
|
93
|
+
handler = logging.FileHandler(log_file_path)
|
94
|
+
else:
|
95
|
+
handler = logging.StreamHandler()
|
96
|
+
handler.setFormatter(_Formatter(suffix))
|
97
|
+
handler.setLevel(log_level)
|
98
|
+
logging.root.setLevel(log_level)
|
99
|
+
logging.root.addHandler(handler)
|
100
|
+
sys.excepthook = _handle_unhandled_exception
|
101
|
+
|
102
|
+
|
103
|
+
def gethostname():
|
104
|
+
"""Get the hostname of the machine."""
|
105
|
+
hostname = socket.gethostname()
|
106
|
+
hostname = hostname.replace(".facebook.com", "")
|
107
|
+
return hostname
|
Binary file
|