torchmonarch-nightly 2025.6.27__cp313-cp313-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monarch/__init__.py +189 -0
- monarch/_monarch/__init__.py +5 -0
- monarch/_monarch/hyperactor/__init__.py +58 -0
- monarch/_monarch/selection/__init__.py +13 -0
- monarch/_monarch/worker/__init__.py +0 -0
- monarch/_monarch/worker/debugger.py +117 -0
- monarch/_monarch/worker/logging.py +107 -0
- monarch/_rust_bindings.so +0 -0
- monarch/_testing.py +230 -0
- monarch/actor_mesh.py +761 -0
- monarch/allocator.py +220 -0
- monarch/bootstrap_main.py +59 -0
- monarch/builtins/__init__.py +14 -0
- monarch/builtins/log.py +22 -0
- monarch/builtins/random.py +68 -0
- monarch/cached_remote_function.py +257 -0
- monarch/code_sync.py +10 -0
- monarch/common/_C.pyi +11 -0
- monarch/common/_C.so +0 -0
- monarch/common/__init__.py +0 -0
- monarch/common/_coalescing.py +308 -0
- monarch/common/_device_utils.py +18 -0
- monarch/common/_tensor_to_table.py +172 -0
- monarch/common/base_tensor.py +28 -0
- monarch/common/borrows.py +143 -0
- monarch/common/client.py +690 -0
- monarch/common/constants.py +10 -0
- monarch/common/context_manager.py +40 -0
- monarch/common/controller_api.py +104 -0
- monarch/common/device_mesh.py +417 -0
- monarch/common/fake.py +55 -0
- monarch/common/function.py +160 -0
- monarch/common/function_caching.py +164 -0
- monarch/common/future.py +168 -0
- monarch/common/invocation.py +125 -0
- monarch/common/mast.py +221 -0
- monarch/common/messages.py +573 -0
- monarch/common/mock_cuda.py +41 -0
- monarch/common/opaque_ref.py +98 -0
- monarch/common/pickle_flatten.py +48 -0
- monarch/common/pipe.py +152 -0
- monarch/common/process_group.py +55 -0
- monarch/common/recording.py +127 -0
- monarch/common/reference.py +33 -0
- monarch/common/remote.py +297 -0
- monarch/common/selection.py +9 -0
- monarch/common/shape.py +229 -0
- monarch/common/stream.py +114 -0
- monarch/common/tensor.py +814 -0
- monarch/common/tensor_factory.py +31 -0
- monarch/common/tree.py +73 -0
- monarch/controller/__init__.py +7 -0
- monarch/controller/backend.py +223 -0
- monarch/controller/controller.py +223 -0
- monarch/controller/debugger.py +47 -0
- monarch/controller/history.py +90 -0
- monarch/controller/rust_backend/__init__.py +7 -0
- monarch/controller/rust_backend/controller.py +245 -0
- monarch/debugger.py +379 -0
- monarch/fetch.py +55 -0
- monarch/future.py +76 -0
- monarch/gradient/__init__.py +11 -0
- monarch/gradient/_gradient_generator.pyi +22 -0
- monarch/gradient/_gradient_generator.so +0 -0
- monarch/gradient_generator.py +185 -0
- monarch/memory.py +43 -0
- monarch/mesh_controller.py +271 -0
- monarch/monarch_controller +0 -0
- monarch/notebook.py +761 -0
- monarch/opaque_module.py +235 -0
- monarch/opaque_object.py +88 -0
- monarch/parallel/__init__.py +9 -0
- monarch/parallel/pipelining/__init__.py +7 -0
- monarch/parallel/pipelining/runtime.py +847 -0
- monarch/parallel/pipelining/schedule_ir.py +692 -0
- monarch/parallel/pipelining/scheduler.py +249 -0
- monarch/pdb_wrapper.py +135 -0
- monarch/proc_mesh.py +299 -0
- monarch/profiler.py +160 -0
- monarch/python_local_mesh.py +107 -0
- monarch/random.py +61 -0
- monarch/rdma.py +162 -0
- monarch/remote_class.py +114 -0
- monarch/rust_backend_mesh.py +280 -0
- monarch/rust_local_mesh.py +1402 -0
- monarch/sim_mesh.py +359 -0
- monarch/simulator/__init__.py +7 -0
- monarch/simulator/command_history.py +424 -0
- monarch/simulator/config.py +21 -0
- monarch/simulator/interface.py +59 -0
- monarch/simulator/ir.py +770 -0
- monarch/simulator/mock_controller.py +214 -0
- monarch/simulator/profiling.py +424 -0
- monarch/simulator/simulator.py +1052 -0
- monarch/simulator/task.py +255 -0
- monarch/simulator/tensor.py +373 -0
- monarch/simulator/trace.py +395 -0
- monarch/simulator/utils.py +41 -0
- monarch/simulator/worker.py +389 -0
- monarch/telemetry.py +19 -0
- monarch/tensor_worker_main.py +260 -0
- monarch/tensorboard.py +84 -0
- monarch/timer/__init__.py +21 -0
- monarch/timer/example_monarch.py +78 -0
- monarch/timer/example_spmd.py +55 -0
- monarch/timer/execution_timer.py +199 -0
- monarch/timer/execution_timer_test.py +131 -0
- monarch/tools/__init__.py +7 -0
- monarch/tools/cli.py +167 -0
- monarch/tools/commands.py +251 -0
- monarch/tools/components/__init__.py +7 -0
- monarch/tools/components/hyperactor.py +58 -0
- monarch/tools/config/__init__.py +20 -0
- monarch/tools/config/defaults.py +54 -0
- monarch/tools/mesh_spec.py +165 -0
- monarch/tools/network.py +69 -0
- monarch/worker/__init__.py +7 -0
- monarch/worker/_testing_function.py +481 -0
- monarch/worker/compiled_block.py +270 -0
- monarch/worker/debugger.py +125 -0
- monarch/worker/lines.py +47 -0
- monarch/worker/monitor.py +53 -0
- monarch/worker/worker.py +1191 -0
- monarch/world_mesh.py +34 -0
- monarch_supervisor/__init__.py +1044 -0
- monarch_supervisor/_testing.py +44 -0
- monarch_supervisor/function_call.py +30 -0
- monarch_supervisor/host.py +386 -0
- monarch_supervisor/launchers.py +145 -0
- monarch_supervisor/log_pstree.py +48 -0
- monarch_supervisor/logging.py +103 -0
- monarch_supervisor/python_executable.py +42 -0
- tests/__init__.py +0 -0
- tests/dispatch_bench.py +124 -0
- tests/dispatch_bench_helper.py +25 -0
- tests/error_test_binary.py +180 -0
- tests/simulator/__init__.py +0 -0
- tests/simulator/test_profiling.py +136 -0
- tests/simulator/test_simulator.py +411 -0
- tests/simulator/test_task.py +64 -0
- tests/simulator/test_worker.py +102 -0
- tests/sleep_binary.py +35 -0
- tests/test_actor_error.py +240 -0
- tests/test_alloc.py +25 -0
- tests/test_allocator.py +365 -0
- tests/test_coalescing.py +492 -0
- tests/test_controller.py +845 -0
- tests/test_device_mesh.py +132 -0
- tests/test_fault_tolerance.py +398 -0
- tests/test_future.py +94 -0
- tests/test_grad_generator.py +121 -0
- tests/test_mock_cuda.py +74 -0
- tests/test_pdb_actor.py +110 -0
- tests/test_python_actors.py +736 -0
- tests/test_remote_functions.py +1271 -0
- tests/test_rust_backend.py +217 -0
- tests/test_signal_safe_block_on.py +103 -0
- tests/test_sim_backend.py +54 -0
- tests/test_tensor_engine.py +52 -0
- torchmonarch_nightly-2025.6.27.dist-info/METADATA +94 -0
- torchmonarch_nightly-2025.6.27.dist-info/RECORD +165 -0
- torchmonarch_nightly-2025.6.27.dist-info/WHEEL +5 -0
- torchmonarch_nightly-2025.6.27.dist-info/entry_points.txt +3 -0
- torchmonarch_nightly-2025.6.27.dist-info/licenses/LICENSE +29 -0
- torchmonarch_nightly-2025.6.27.dist-info/top_level.txt +3 -0
@@ -0,0 +1,270 @@
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
# pyre-unsafe
|
8
|
+
import logging
|
9
|
+
from collections import defaultdict
|
10
|
+
from contextlib import contextmanager
|
11
|
+
from dataclasses import dataclass
|
12
|
+
from typing import Any, Callable, Dict, List, Optional, Sequence, Set, TYPE_CHECKING
|
13
|
+
|
14
|
+
import torch.fx
|
15
|
+
from monarch.common.messages import DependentOnError
|
16
|
+
from monarch.common.tree import tree_map
|
17
|
+
from torch.fx.proxy import GraphAppendingTracer
|
18
|
+
|
19
|
+
from .lines import Lines
|
20
|
+
|
21
|
+
if TYPE_CHECKING:
|
22
|
+
from .worker import Cell, Stream
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
|
27
|
+
class Symbol:
|
28
|
+
def __init__(self, name: str):
|
29
|
+
self.name = name
|
30
|
+
|
31
|
+
def __repr__(self):
|
32
|
+
return self.name
|
33
|
+
|
34
|
+
|
35
|
+
@dataclass
|
36
|
+
class ErrorContext:
|
37
|
+
ident: Optional[int]
|
38
|
+
fallback_resume_offset: int
|
39
|
+
|
40
|
+
|
41
|
+
class _BlockTracer(GraphAppendingTracer):
|
42
|
+
def __init__(self, ctx: "ErrorContext", graph: torch.fx.Graph):
|
43
|
+
super().__init__(graph)
|
44
|
+
self._error_context = ctx
|
45
|
+
|
46
|
+
def create_node(self, *args, **kwargs):
|
47
|
+
n = super().create_node(*args, **kwargs)
|
48
|
+
n.context = self._error_context
|
49
|
+
return n
|
50
|
+
|
51
|
+
|
52
|
+
class CompiledBlock:
|
53
|
+
def __init__(self):
|
54
|
+
self.graphs: Dict["Stream", torch.fx.Graph] = defaultdict(
|
55
|
+
lambda: torch.fx.Graph()
|
56
|
+
)
|
57
|
+
self.used_formals: Set[int] = set()
|
58
|
+
self.used_results: Set[int] = set()
|
59
|
+
self.results: Dict[torch.fx.Node, int] = {}
|
60
|
+
self.fallback: Dict["Stream", List[Callable]] = defaultdict(list)
|
61
|
+
self.recording_stream: Optional["Stream"] = None
|
62
|
+
self.defined_borrows = {} # dict not set to preserve order
|
63
|
+
self.defined_cells: Dict["Cell", int] = {} # dict not set to preserve order
|
64
|
+
self.mutated_cells: Dict["Cell", "Stream"] = {}
|
65
|
+
self.current_context = ErrorContext(None, 0)
|
66
|
+
self.impls: Dict["Stream", Callable] = {}
|
67
|
+
|
68
|
+
def call_function(self, *args, **kwargs):
|
69
|
+
n = self.recording_graph.call_function(*args, **kwargs)
|
70
|
+
n.context = self.current_context
|
71
|
+
return n
|
72
|
+
|
73
|
+
def define_formal(self, stream: "Stream", argument_index: int):
|
74
|
+
self.used_formals.add(argument_index)
|
75
|
+
n = self.graphs[stream].call_module("formal", (argument_index,))
|
76
|
+
# pyre-ignore
|
77
|
+
n.context = self.current_context
|
78
|
+
return n
|
79
|
+
|
80
|
+
def define_result(self, node: torch.fx.Node, output_index: int):
|
81
|
+
self.used_results.add(output_index)
|
82
|
+
self.results[node] = output_index
|
83
|
+
|
84
|
+
def input_cell(self, cell: "Cell"):
|
85
|
+
n = self.recording_graph.call_module("input_cell", (cell,))
|
86
|
+
n.context = self.current_context
|
87
|
+
return n
|
88
|
+
|
89
|
+
def proxy(self, n: torch.fx.Node):
|
90
|
+
return torch.fx.Proxy(n, _BlockTracer(self.current_context, n.graph))
|
91
|
+
|
92
|
+
def mutates(self, results: Sequence["Cell"]):
|
93
|
+
for r in results:
|
94
|
+
if r not in self.defined_cells:
|
95
|
+
assert self.recording_stream is not None
|
96
|
+
self.mutated_cells[r] = self.recording_stream
|
97
|
+
|
98
|
+
@property
|
99
|
+
def recording_graph(self):
|
100
|
+
return self.graphs[self.recording_stream]
|
101
|
+
|
102
|
+
@contextmanager
|
103
|
+
def record_to(self, stream: "Stream"):
|
104
|
+
orig, self.recording_stream = self.recording_stream, stream
|
105
|
+
ctx = ErrorContext(None, len(self.fallback[stream]))
|
106
|
+
orig_context, self.current_context = self.current_context, ctx
|
107
|
+
try:
|
108
|
+
yield
|
109
|
+
finally:
|
110
|
+
self.recording_stream = orig
|
111
|
+
self.current_context = orig_context
|
112
|
+
|
113
|
+
def emit_stream(self, stream: "Stream"):
|
114
|
+
# Generated function looks like this:
|
115
|
+
|
116
|
+
# def fn(actuals: List["Cell"], outputs: List["Cell"]):
|
117
|
+
# a, b, c, d, e, f, g, e = EXTERNAL # global variable bound to all the values we just want to bind to this code
|
118
|
+
# try:
|
119
|
+
# a = cell0.get()
|
120
|
+
# b = actuals[0].get()
|
121
|
+
# r = a + b
|
122
|
+
# outputs[2].set(r)
|
123
|
+
# t = r + r
|
124
|
+
# r2 = r + t
|
125
|
+
# outputs[4].set(r2)
|
126
|
+
#
|
127
|
+
# except Exception as e:
|
128
|
+
# # error recovery, fallback to
|
129
|
+
# # interpreter code that can handle some values failing
|
130
|
+
# return fallback(locals())
|
131
|
+
|
132
|
+
graph: torch.fx.Graph = self.graphs[stream]
|
133
|
+
fallback_functions = self.fallback[stream]
|
134
|
+
|
135
|
+
external: List[Any] = []
|
136
|
+
external_names: List[str] = []
|
137
|
+
external_id_to_name: Dict[int, str] = {}
|
138
|
+
|
139
|
+
def arg_map(x):
|
140
|
+
if isinstance(x, torch.fx.Node):
|
141
|
+
return x
|
142
|
+
elif id(x) in external_id_to_name:
|
143
|
+
return external_id_to_name[id(x)]
|
144
|
+
else:
|
145
|
+
candidate = getattr(x, "__name__", "external")
|
146
|
+
sym = Symbol(graph._graph_namespace.create_name(candidate, None))
|
147
|
+
external_names.append(sym.name)
|
148
|
+
external.append(x)
|
149
|
+
external_id_to_name[id(x)] = sym
|
150
|
+
return sym
|
151
|
+
|
152
|
+
lines = Lines()
|
153
|
+
body = Lines()
|
154
|
+
|
155
|
+
def fallback(results, exc):
|
156
|
+
lineno = exc.__traceback__.tb_lineno
|
157
|
+
error_context: ErrorContext = lines.get_context(lineno)
|
158
|
+
# report new errors and set
|
159
|
+
# defined identifiers for currently failing
|
160
|
+
# op to DependentOnError
|
161
|
+
if not isinstance(exc, DependentOnError):
|
162
|
+
if error_context is None or error_context.ident is None:
|
163
|
+
raise exc
|
164
|
+
exc = stream.report_error(
|
165
|
+
stream.current_recording,
|
166
|
+
error_context.ident,
|
167
|
+
exc,
|
168
|
+
)
|
169
|
+
|
170
|
+
# set exceptionson all the values this stream was responsible for.
|
171
|
+
# this is the explicitly passed cell outputs, and all
|
172
|
+
# the cells we mutated.
|
173
|
+
for c, s in self.mutated_cells.items():
|
174
|
+
if s is stream:
|
175
|
+
c.set(exc)
|
176
|
+
for r, i in self.results.items():
|
177
|
+
if r.graph is graph:
|
178
|
+
results[i].set(exc)
|
179
|
+
|
180
|
+
# some ops we have to run despite errors such as
|
181
|
+
# borrows, collectives, send_tensor
|
182
|
+
# we run these universally here.
|
183
|
+
# Note that all of these are ok loading from cells with dependent
|
184
|
+
# on error status.
|
185
|
+
inst_range = range(
|
186
|
+
error_context.fallback_resume_offset, len(fallback_functions)
|
187
|
+
)
|
188
|
+
for inst in inst_range:
|
189
|
+
fallback_functions[inst]()
|
190
|
+
|
191
|
+
fallback_sym = arg_map(fallback)
|
192
|
+
|
193
|
+
# figure out the last use of each node that isn't
|
194
|
+
# live out, so that we appropriatelly `del` the variable.
|
195
|
+
seen = {r for r in self.results.keys() if r.graph is graph}
|
196
|
+
last_uses = defaultdict(list)
|
197
|
+
for node in reversed(graph.nodes):
|
198
|
+
for n in node.all_input_nodes:
|
199
|
+
if n not in seen:
|
200
|
+
last_uses[node].append(n)
|
201
|
+
seen.add(n)
|
202
|
+
|
203
|
+
# generate the repeat body
|
204
|
+
for node in graph.nodes:
|
205
|
+
if node.op == "call_module":
|
206
|
+
if node.target == "input_cell":
|
207
|
+
# each input goes into the prologue where we issue a load from the
|
208
|
+
# cell it came from.
|
209
|
+
(cell_obj,) = node.args
|
210
|
+
cell = arg_map(cell_obj)
|
211
|
+
with body.context(node.context):
|
212
|
+
body.emit(f" {node.name} = {cell}.get()")
|
213
|
+
elif node.target == "formal":
|
214
|
+
with body.context(node.context):
|
215
|
+
(i,) = node.args
|
216
|
+
body.emit(f" {node.name} = actuals[{i}].get()")
|
217
|
+
else:
|
218
|
+
assert node.op == "call_function"
|
219
|
+
fn = arg_map(node.target)
|
220
|
+
args, kwargs = tree_map(arg_map, (node.args, node.kwargs))
|
221
|
+
all = [
|
222
|
+
*(repr(a) for a in args),
|
223
|
+
*(f"{k}={repr(v)}" for k, v in kwargs.items()),
|
224
|
+
]
|
225
|
+
assign = ""
|
226
|
+
if node in seen:
|
227
|
+
assign = f"{node.name} = "
|
228
|
+
with body.context(node.context):
|
229
|
+
body.emit(f" {assign}{fn}({', '.join(all)})")
|
230
|
+
# some inputs to this node may no longer be used in the body
|
231
|
+
# of the loop. We explicitly del them so their lifetime
|
232
|
+
# is not longer than it was originally without compilation.
|
233
|
+
to_delete = [repr(d) for d in last_uses[node]]
|
234
|
+
if to_delete:
|
235
|
+
body.emit(f" del {', '.join(to_delete)}")
|
236
|
+
for r, i in self.results.items():
|
237
|
+
if r.graph is not graph:
|
238
|
+
continue
|
239
|
+
body.emit(f" results[{i}].set({r})")
|
240
|
+
|
241
|
+
lines.emit("def impl(results, actuals):")
|
242
|
+
lines.emit(f" {', '.join(external_names)} = EXTERNAL")
|
243
|
+
lines.emit(" _exception = None")
|
244
|
+
lines.emit(" try:")
|
245
|
+
lines.emit_lines(body)
|
246
|
+
lines.emit(" except Exception as e:")
|
247
|
+
lines.emit(" _exception = e")
|
248
|
+
# we do not call `fallback` inside of the exception block because we
|
249
|
+
# do not want future exceptions to have the stack trace of e attached.
|
250
|
+
lines.emit(
|
251
|
+
f" if _exception is not None: return {fallback_sym}(results, _exception)"
|
252
|
+
)
|
253
|
+
|
254
|
+
gbls = {"EXTERNAL": external}
|
255
|
+
text = lines.text()
|
256
|
+
logger.debug(f"Compiled\n{text}")
|
257
|
+
exec(lines.text(), gbls)
|
258
|
+
return gbls["impl"]
|
259
|
+
|
260
|
+
def emit(self):
|
261
|
+
self.impls = {stream: self.emit_stream(stream) for stream in self.graphs.keys()}
|
262
|
+
|
263
|
+
# fallback functions for borrows/reduce/send read directly from these cells
|
264
|
+
# we need to make sure they are set to errors so that they work correctly.
|
265
|
+
# it always gets an error value
|
266
|
+
err = DependentOnError(-1)
|
267
|
+
for cell in self.defined_cells:
|
268
|
+
cell.set(err)
|
269
|
+
|
270
|
+
self.defined_cells.clear()
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
# pyre-unsafe
|
8
|
+
|
9
|
+
import bdb
|
10
|
+
import io
|
11
|
+
import logging
|
12
|
+
import pdb # noqa
|
13
|
+
import sys
|
14
|
+
from typing import Optional, TYPE_CHECKING
|
15
|
+
|
16
|
+
from monarch.common import messages
|
17
|
+
|
18
|
+
logger = logging.getLogger(__name__)
|
19
|
+
|
20
|
+
if TYPE_CHECKING:
|
21
|
+
from .worker import Stream, Worker
|
22
|
+
|
23
|
+
_orig_set_trace = pdb.set_trace
|
24
|
+
|
25
|
+
|
26
|
+
def _set_trace(*, header=None):
|
27
|
+
from .worker import _tls
|
28
|
+
|
29
|
+
stream = _tls.stream
|
30
|
+
if stream is None:
|
31
|
+
_orig_set_trace(header=header)
|
32
|
+
ds = PdbWrapper(stream, header)
|
33
|
+
ds.set_trace()
|
34
|
+
|
35
|
+
|
36
|
+
class PdbWrapper(pdb.Pdb):
|
37
|
+
def __init__(self, stream: "Stream", header: Optional[str]):
|
38
|
+
self.stream = stream
|
39
|
+
self.worker: "Worker" = self.stream.worker
|
40
|
+
self.header = header
|
41
|
+
# pyre-ignore
|
42
|
+
super().__init__(stdout=WriteWrapper(self), stdin=ReadWrapper.create(self))
|
43
|
+
self._first = True
|
44
|
+
|
45
|
+
def setup(self, *args, **kwargs):
|
46
|
+
r = super().setup(*args, **kwargs)
|
47
|
+
if self._first:
|
48
|
+
self._first = False
|
49
|
+
# when we enter the debugger, we want to present the user's stack frame
|
50
|
+
# not the nested one inside session.run. This means that the local
|
51
|
+
# variables are what gets printed, etc. To do this
|
52
|
+
# we first execute up 2 to get to that frame.
|
53
|
+
self.do_up(2)
|
54
|
+
return r
|
55
|
+
|
56
|
+
def set_continue(self) -> None:
|
57
|
+
r = super().set_continue()
|
58
|
+
if not self.breaks:
|
59
|
+
# no more breakpoints so this debugger will not
|
60
|
+
# be used again, and we detach from the controller io.
|
61
|
+
self._send("detach")
|
62
|
+
# break cycle with itself before we exit
|
63
|
+
self.stdin = sys.stdin
|
64
|
+
self.stdout = sys.stdout
|
65
|
+
return r
|
66
|
+
|
67
|
+
def _send(self, action):
|
68
|
+
self.worker.schedule(
|
69
|
+
lambda: self.worker.q.send(messages.DebuggerMessage(self.stream.id, action))
|
70
|
+
)
|
71
|
+
|
72
|
+
def set_trace(self):
|
73
|
+
self._send("paused")
|
74
|
+
message = self.stream.debugger_queue.get()
|
75
|
+
# we give the controller the option to ignore this request to debug
|
76
|
+
# by issuing a "detach" message immediately.
|
77
|
+
match message:
|
78
|
+
case "attach":
|
79
|
+
pass
|
80
|
+
case "detach":
|
81
|
+
return
|
82
|
+
case other:
|
83
|
+
raise RuntimeError(f"unexpected debugger message {other}")
|
84
|
+
if self.header:
|
85
|
+
self.message(self.header)
|
86
|
+
super().set_trace()
|
87
|
+
|
88
|
+
|
89
|
+
class ReadWrapper(io.RawIOBase):
|
90
|
+
def __init__(self, session: "PdbWrapper"):
|
91
|
+
self.session = session
|
92
|
+
|
93
|
+
def readinto(self, b):
|
94
|
+
self.session._send(messages.DebuggerRead(len(b)))
|
95
|
+
response = self.session.stream.debugger_queue.get()
|
96
|
+
if response == "detach":
|
97
|
+
# this gets injected by the worker event loop to
|
98
|
+
# get the worker thread to exit on an Exit command.
|
99
|
+
raise bdb.BdbQuit
|
100
|
+
assert isinstance(response, messages.DebuggerWrite) and len(
|
101
|
+
response.payload
|
102
|
+
) <= len(b)
|
103
|
+
b[: len(response.payload)] = response.payload
|
104
|
+
return len(response.payload)
|
105
|
+
|
106
|
+
def readable(self) -> bool:
|
107
|
+
return True
|
108
|
+
|
109
|
+
@classmethod
|
110
|
+
def create(cls, session: "PdbWrapper"):
|
111
|
+
return io.TextIOWrapper(io.BufferedReader(cls(session)))
|
112
|
+
|
113
|
+
|
114
|
+
class WriteWrapper:
|
115
|
+
def __init__(self, session: "PdbWrapper"):
|
116
|
+
self.session = session
|
117
|
+
|
118
|
+
def writable(self) -> bool:
|
119
|
+
return True
|
120
|
+
|
121
|
+
def write(self, s: str):
|
122
|
+
self.session._send(messages.DebuggerWrite(s.encode()))
|
123
|
+
|
124
|
+
def flush(self):
|
125
|
+
pass
|
monarch/worker/lines.py
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
# pyre-unsafe
|
8
|
+
from contextlib import contextmanager
|
9
|
+
from typing import Any, List
|
10
|
+
|
11
|
+
|
12
|
+
class Lines:
|
13
|
+
"""
|
14
|
+
Simple way to emit code where we track a per-line context object.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def __init__(self, context=None):
|
18
|
+
self._lines: List[str] = []
|
19
|
+
self._context: List[Any] = []
|
20
|
+
self._current_context = context
|
21
|
+
|
22
|
+
def get_context(self, lineno) -> Any:
|
23
|
+
return self._context[lineno - 1]
|
24
|
+
|
25
|
+
@contextmanager
|
26
|
+
def context(self, obj: Any):
|
27
|
+
old, self._current_context = self._current_context, obj
|
28
|
+
try:
|
29
|
+
yield
|
30
|
+
finally:
|
31
|
+
self._current_context = old
|
32
|
+
|
33
|
+
def emit(self, lines: str) -> None:
|
34
|
+
self._lines.extend(lines.split("\n"))
|
35
|
+
while len(self._context) < len(self._lines):
|
36
|
+
self._context.append(self._current_context)
|
37
|
+
|
38
|
+
def emit_lines(self, lines: "Lines") -> None:
|
39
|
+
"""
|
40
|
+
Append another lines object on this one,
|
41
|
+
preserving its per-line context.
|
42
|
+
"""
|
43
|
+
self._lines.extend(lines._lines)
|
44
|
+
self._context.extend(lines._context)
|
45
|
+
|
46
|
+
def text(self) -> str:
|
47
|
+
return "\n".join(self._lines)
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the BSD-style license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
# pyre-strict
|
8
|
+
|
9
|
+
import math
|
10
|
+
import queue
|
11
|
+
import threading
|
12
|
+
from typing import Callable, Optional, Tuple
|
13
|
+
|
14
|
+
from monarch_supervisor import TTL
|
15
|
+
|
16
|
+
|
17
|
+
class Monitor:
|
18
|
+
"""A monitor is a thread that watches for reported events to expire."""
|
19
|
+
|
20
|
+
def __init__(self) -> None:
|
21
|
+
self.thread = threading.Thread(target=self._main, daemon=True, name="monitor")
|
22
|
+
self.events: queue.Queue[Tuple[Callable[[], None], Callable[[], float]]] = (
|
23
|
+
queue.Queue()
|
24
|
+
)
|
25
|
+
self.events.put((lambda: None, TTL(None)))
|
26
|
+
|
27
|
+
def start(self) -> None:
|
28
|
+
"""Start the monitor thread."""
|
29
|
+
self.thread.start()
|
30
|
+
|
31
|
+
def _main(self) -> None:
|
32
|
+
debug, ttl = self.events.get()
|
33
|
+
while True:
|
34
|
+
try:
|
35
|
+
timeout = ttl()
|
36
|
+
next_debug, next_ttl = self.events.get(
|
37
|
+
timeout=None if timeout == math.inf else timeout
|
38
|
+
)
|
39
|
+
except queue.Empty:
|
40
|
+
debug()
|
41
|
+
next_debug, next_ttl = self.events.get(timeout=None)
|
42
|
+
|
43
|
+
debug, ttl = next_debug, next_ttl
|
44
|
+
|
45
|
+
def __call__(
|
46
|
+
self,
|
47
|
+
debug_fn: Callable[[], None] = lambda: None,
|
48
|
+
timeout: Optional[float] = None,
|
49
|
+
) -> None:
|
50
|
+
"""Start a new event with the provided timeout.
|
51
|
+
If a timeout is specified, and a new event is not reported by before it expires,
|
52
|
+
the provided debug_fn is called."""
|
53
|
+
self.events.put((debug_fn, TTL(timeout)))
|