torchmonarch-nightly 2025.6.27__cp312-cp312-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. monarch/__init__.py +189 -0
  2. monarch/_monarch/__init__.py +5 -0
  3. monarch/_monarch/hyperactor/__init__.py +58 -0
  4. monarch/_monarch/selection/__init__.py +13 -0
  5. monarch/_monarch/worker/__init__.py +0 -0
  6. monarch/_monarch/worker/debugger.py +117 -0
  7. monarch/_monarch/worker/logging.py +107 -0
  8. monarch/_rust_bindings.so +0 -0
  9. monarch/_testing.py +230 -0
  10. monarch/actor_mesh.py +761 -0
  11. monarch/allocator.py +220 -0
  12. monarch/bootstrap_main.py +59 -0
  13. monarch/builtins/__init__.py +14 -0
  14. monarch/builtins/log.py +22 -0
  15. monarch/builtins/random.py +68 -0
  16. monarch/cached_remote_function.py +257 -0
  17. monarch/code_sync.py +10 -0
  18. monarch/common/_C.pyi +11 -0
  19. monarch/common/_C.so +0 -0
  20. monarch/common/__init__.py +0 -0
  21. monarch/common/_coalescing.py +308 -0
  22. monarch/common/_device_utils.py +18 -0
  23. monarch/common/_tensor_to_table.py +172 -0
  24. monarch/common/base_tensor.py +28 -0
  25. monarch/common/borrows.py +143 -0
  26. monarch/common/client.py +690 -0
  27. monarch/common/constants.py +10 -0
  28. monarch/common/context_manager.py +40 -0
  29. monarch/common/controller_api.py +104 -0
  30. monarch/common/device_mesh.py +417 -0
  31. monarch/common/fake.py +55 -0
  32. monarch/common/function.py +160 -0
  33. monarch/common/function_caching.py +164 -0
  34. monarch/common/future.py +168 -0
  35. monarch/common/invocation.py +125 -0
  36. monarch/common/mast.py +221 -0
  37. monarch/common/messages.py +573 -0
  38. monarch/common/mock_cuda.py +41 -0
  39. monarch/common/opaque_ref.py +98 -0
  40. monarch/common/pickle_flatten.py +48 -0
  41. monarch/common/pipe.py +152 -0
  42. monarch/common/process_group.py +55 -0
  43. monarch/common/recording.py +127 -0
  44. monarch/common/reference.py +33 -0
  45. monarch/common/remote.py +297 -0
  46. monarch/common/selection.py +9 -0
  47. monarch/common/shape.py +229 -0
  48. monarch/common/stream.py +114 -0
  49. monarch/common/tensor.py +814 -0
  50. monarch/common/tensor_factory.py +31 -0
  51. monarch/common/tree.py +73 -0
  52. monarch/controller/__init__.py +7 -0
  53. monarch/controller/backend.py +223 -0
  54. monarch/controller/controller.py +223 -0
  55. monarch/controller/debugger.py +47 -0
  56. monarch/controller/history.py +90 -0
  57. monarch/controller/rust_backend/__init__.py +7 -0
  58. monarch/controller/rust_backend/controller.py +245 -0
  59. monarch/debugger.py +379 -0
  60. monarch/fetch.py +55 -0
  61. monarch/future.py +76 -0
  62. monarch/gradient/__init__.py +11 -0
  63. monarch/gradient/_gradient_generator.pyi +22 -0
  64. monarch/gradient/_gradient_generator.so +0 -0
  65. monarch/gradient_generator.py +185 -0
  66. monarch/memory.py +43 -0
  67. monarch/mesh_controller.py +271 -0
  68. monarch/monarch_controller +0 -0
  69. monarch/notebook.py +761 -0
  70. monarch/opaque_module.py +235 -0
  71. monarch/opaque_object.py +88 -0
  72. monarch/parallel/__init__.py +9 -0
  73. monarch/parallel/pipelining/__init__.py +7 -0
  74. monarch/parallel/pipelining/runtime.py +847 -0
  75. monarch/parallel/pipelining/schedule_ir.py +692 -0
  76. monarch/parallel/pipelining/scheduler.py +249 -0
  77. monarch/pdb_wrapper.py +135 -0
  78. monarch/proc_mesh.py +299 -0
  79. monarch/profiler.py +160 -0
  80. monarch/python_local_mesh.py +107 -0
  81. monarch/random.py +61 -0
  82. monarch/rdma.py +162 -0
  83. monarch/remote_class.py +114 -0
  84. monarch/rust_backend_mesh.py +280 -0
  85. monarch/rust_local_mesh.py +1402 -0
  86. monarch/sim_mesh.py +359 -0
  87. monarch/simulator/__init__.py +7 -0
  88. monarch/simulator/command_history.py +424 -0
  89. monarch/simulator/config.py +21 -0
  90. monarch/simulator/interface.py +59 -0
  91. monarch/simulator/ir.py +770 -0
  92. monarch/simulator/mock_controller.py +214 -0
  93. monarch/simulator/profiling.py +424 -0
  94. monarch/simulator/simulator.py +1052 -0
  95. monarch/simulator/task.py +255 -0
  96. monarch/simulator/tensor.py +373 -0
  97. monarch/simulator/trace.py +395 -0
  98. monarch/simulator/utils.py +41 -0
  99. monarch/simulator/worker.py +389 -0
  100. monarch/telemetry.py +19 -0
  101. monarch/tensor_worker_main.py +260 -0
  102. monarch/tensorboard.py +84 -0
  103. monarch/timer/__init__.py +21 -0
  104. monarch/timer/example_monarch.py +78 -0
  105. monarch/timer/example_spmd.py +55 -0
  106. monarch/timer/execution_timer.py +199 -0
  107. monarch/timer/execution_timer_test.py +131 -0
  108. monarch/tools/__init__.py +7 -0
  109. monarch/tools/cli.py +167 -0
  110. monarch/tools/commands.py +251 -0
  111. monarch/tools/components/__init__.py +7 -0
  112. monarch/tools/components/hyperactor.py +58 -0
  113. monarch/tools/config/__init__.py +20 -0
  114. monarch/tools/config/defaults.py +54 -0
  115. monarch/tools/mesh_spec.py +165 -0
  116. monarch/tools/network.py +69 -0
  117. monarch/worker/__init__.py +7 -0
  118. monarch/worker/_testing_function.py +481 -0
  119. monarch/worker/compiled_block.py +270 -0
  120. monarch/worker/debugger.py +125 -0
  121. monarch/worker/lines.py +47 -0
  122. monarch/worker/monitor.py +53 -0
  123. monarch/worker/worker.py +1191 -0
  124. monarch/world_mesh.py +34 -0
  125. monarch_supervisor/__init__.py +1044 -0
  126. monarch_supervisor/_testing.py +44 -0
  127. monarch_supervisor/function_call.py +30 -0
  128. monarch_supervisor/host.py +386 -0
  129. monarch_supervisor/launchers.py +145 -0
  130. monarch_supervisor/log_pstree.py +48 -0
  131. monarch_supervisor/logging.py +103 -0
  132. monarch_supervisor/python_executable.py +42 -0
  133. tests/__init__.py +0 -0
  134. tests/dispatch_bench.py +124 -0
  135. tests/dispatch_bench_helper.py +25 -0
  136. tests/error_test_binary.py +180 -0
  137. tests/simulator/__init__.py +0 -0
  138. tests/simulator/test_profiling.py +136 -0
  139. tests/simulator/test_simulator.py +411 -0
  140. tests/simulator/test_task.py +64 -0
  141. tests/simulator/test_worker.py +102 -0
  142. tests/sleep_binary.py +35 -0
  143. tests/test_actor_error.py +240 -0
  144. tests/test_alloc.py +25 -0
  145. tests/test_allocator.py +365 -0
  146. tests/test_coalescing.py +492 -0
  147. tests/test_controller.py +845 -0
  148. tests/test_device_mesh.py +132 -0
  149. tests/test_fault_tolerance.py +398 -0
  150. tests/test_future.py +94 -0
  151. tests/test_grad_generator.py +121 -0
  152. tests/test_mock_cuda.py +74 -0
  153. tests/test_pdb_actor.py +110 -0
  154. tests/test_python_actors.py +736 -0
  155. tests/test_remote_functions.py +1271 -0
  156. tests/test_rust_backend.py +217 -0
  157. tests/test_signal_safe_block_on.py +103 -0
  158. tests/test_sim_backend.py +54 -0
  159. tests/test_tensor_engine.py +52 -0
  160. torchmonarch_nightly-2025.6.27.dist-info/METADATA +94 -0
  161. torchmonarch_nightly-2025.6.27.dist-info/RECORD +165 -0
  162. torchmonarch_nightly-2025.6.27.dist-info/WHEEL +5 -0
  163. torchmonarch_nightly-2025.6.27.dist-info/entry_points.txt +3 -0
  164. torchmonarch_nightly-2025.6.27.dist-info/licenses/LICENSE +29 -0
  165. torchmonarch_nightly-2025.6.27.dist-info/top_level.txt +3 -0
monarch/__init__.py ADDED
@@ -0,0 +1,189 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # pyre-unsafe
8
+
9
+ from importlib import import_module as _import_module
10
+ from typing import TYPE_CHECKING
11
+
12
+ # Import before monarch to pre-load torch DSOs as, in exploded wheel flows,
13
+ # our RPATHs won't correctly find them.
14
+ import torch # noqa: F401
15
+
16
+ # submodules of monarch should not be imported in this
17
+ # top-level file because it will cause them to get
18
+ # loaded even if they are not actually being used.
19
+ # for instance if we import monarch.common.functions,
20
+ # we might not want to also import monarch.common.tensor,
21
+ # which recursively imports torch.
22
+
23
+ # Instead to expose functionality as part of the
24
+ # monarch.* API, import it inside the TYPE_CHECKING
25
+ # guard (so typechecker works), and then add it
26
+ # to the _public_api dict and __all__ list. These
27
+ # entries will get loaded on demand.
28
+
29
+
30
+ if TYPE_CHECKING:
31
+ from monarch import timer
32
+ from monarch.allocator import LocalAllocator, ProcessAllocator
33
+ from monarch.common._coalescing import coalescing
34
+
35
+ from monarch.common.device_mesh import (
36
+ DeviceMesh,
37
+ get_active_mesh,
38
+ no_mesh,
39
+ RemoteProcessGroup,
40
+ slice_mesh,
41
+ to_mesh,
42
+ )
43
+
44
+ from monarch.common.function import resolvers as function_resolvers
45
+
46
+ from monarch.common.future import Future
47
+
48
+ from monarch.common.invocation import RemoteException
49
+ from monarch.common.opaque_ref import OpaqueRef
50
+ from monarch.common.pipe import create_pipe, Pipe, remote_generator
51
+ from monarch.common.remote import remote
52
+ from monarch.common.selection import Selection
53
+ from monarch.common.shape import NDSlice, Shape
54
+ from monarch.common.stream import get_active_stream, Stream
55
+ from monarch.common.tensor import reduce, reduce_, Tensor
56
+ from monarch.fetch import fetch_shard, inspect, show
57
+ from monarch.future import ActorFuture
58
+ from monarch.gradient_generator import grad_function, grad_generator
59
+ from monarch.notebook import mast_mesh, reserve_torchx as mast_reserve
60
+ from monarch.python_local_mesh import python_local_mesh
61
+ from monarch.rust_backend_mesh import (
62
+ rust_backend_mesh,
63
+ rust_backend_meshes,
64
+ rust_mast_mesh,
65
+ )
66
+ from monarch.rust_local_mesh import local_mesh, local_meshes, SocketType
67
+ from monarch.simulator.config import set_meta # noqa
68
+ from monarch.simulator.interface import Simulator
69
+ from monarch.world_mesh import world_mesh
70
+
71
+
72
+ _public_api = {
73
+ "coalescing": ("monarch.common._coalescing", "coalescing"),
74
+ "remote": ("monarch.common.remote", "remote"),
75
+ "DeviceMesh": ("monarch.common.device_mesh", "DeviceMesh"),
76
+ "get_active_mesh": ("monarch.common.device_mesh", "get_active_mesh"),
77
+ "no_mesh": ("monarch.common.device_mesh", "no_mesh"),
78
+ "RemoteProcessGroup": ("monarch.common.device_mesh", "RemoteProcessGroup"),
79
+ "function_resolvers": ("monarch.common.function", "resolvers"),
80
+ "Future": ("monarch.common.future", "Future"),
81
+ "RemoteException": ("monarch.common.invocation", "RemoteException"),
82
+ "Shape": ("monarch.common.shape", "Shape"),
83
+ "NDSlice": ("monarch.common.shape", "NDSlice"),
84
+ "Selection": ("monarch.common.selection", "Selection"),
85
+ "OpaqueRef": ("monarch.common.opaque_ref", "OpaqueRef"),
86
+ "create_pipe": ("monarch.common.pipe", "create_pipe"),
87
+ "Pipe": ("monarch.common.pipe", "Pipe"),
88
+ "remote_generator": ("monarch.common.pipe", "remote_generator"),
89
+ "get_active_stream": ("monarch.common.stream", "get_active_stream"),
90
+ "Stream": ("monarch.common.stream", "Stream"),
91
+ "Tensor": ("monarch.common.tensor", "Tensor"),
92
+ "reduce": ("monarch.common.tensor", "reduce"),
93
+ "reduce_": ("monarch.common.tensor", "reduce_"),
94
+ "to_mesh": ("monarch.common.device_mesh", "to_mesh"),
95
+ "slice_mesh": ("monarch.common.device_mesh", "slice_mesh"),
96
+ "call_on_shard_and_fetch": ("monarch.fetch", "call_on_shard_and_fetch"),
97
+ "fetch_shard": ("monarch.fetch", "fetch_shard"),
98
+ "inspect": ("monarch.fetch", "inspect"),
99
+ "show": ("monarch.fetch", "show"),
100
+ "grad_function": ("monarch.gradient_generator", "grad_function"),
101
+ "grad_generator": ("monarch.gradient_generator", "grad_generator"),
102
+ "python_local_mesh": ("monarch.python_local_mesh", "python_local_mesh"),
103
+ "mast_mesh": ("monarch.notebook", "mast_mesh"),
104
+ "mast_reserve": ("monarch.notebook", "reserve_torchx"),
105
+ "rust_backend_mesh": ("monarch.rust_backend_mesh", "rust_backend_mesh"),
106
+ "rust_backend_meshes": ("monarch.rust_backend_mesh", "rust_backend_meshes"),
107
+ "local_mesh": ("monarch.rust_local_mesh", "local_mesh"),
108
+ "local_meshes": ("monarch.rust_local_mesh", "local_meshes"),
109
+ "SocketType": ("monarch.rust_local_mesh", "SocketType"),
110
+ "rust_mast_mesh": ("monarch.rust_backend_mesh", "rust_mast_mesh"),
111
+ "set_meta": ("monarch.simulator.config", "set_meta"),
112
+ "Simulator": ("monarch.simulator.interface", "Simulator"),
113
+ "world_mesh": ("monarch.world_mesh", "world_mesh"),
114
+ "timer": ("monarch.timer", "timer"),
115
+ "ProcessAllocator": ("monarch.allocator", "ProcessAllocator"),
116
+ "LocalAllocator": ("monarch.allocator", "LocalAllocator"),
117
+ "ActorFuture": ("monarch.future", "ActorFuture"),
118
+ "builtins": ("monarch.builtins", "builtins"),
119
+ }
120
+
121
+
122
+ def __getattr__(name):
123
+ if name in _public_api:
124
+ module_path, attr_name = _public_api[name]
125
+ module = _import_module(module_path)
126
+ result = getattr(module, attr_name)
127
+ globals()[name] = result
128
+ return result
129
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
130
+
131
+
132
+ try:
133
+ from __manifest__ import fbmake # noqa
134
+
135
+ IN_PAR = True
136
+ except ImportError:
137
+ IN_PAR = False
138
+
139
+ # we have to explicitly list this rather than just take the keys of the _public_api
140
+ # otherwise tools think the imports are unused
141
+ __all__ = [
142
+ "coalescing",
143
+ "DeviceMesh",
144
+ "get_active_mesh",
145
+ "no_mesh",
146
+ "remote",
147
+ "RemoteProcessGroup",
148
+ "function_resolvers",
149
+ "Future",
150
+ "RemoteException",
151
+ "Shape",
152
+ "Selection",
153
+ "NDSlice",
154
+ "OpaqueRef",
155
+ "create_pipe",
156
+ "Pipe",
157
+ "remote_generator",
158
+ "get_active_stream",
159
+ "Stream",
160
+ "Tensor",
161
+ "reduce",
162
+ "reduce_",
163
+ "to_mesh",
164
+ "slice_mesh",
165
+ "call_on_shard_and_fetch",
166
+ "fetch_shard",
167
+ "inspect",
168
+ "show",
169
+ "grad_function",
170
+ "grad_generator",
171
+ "python_local_mesh",
172
+ "mast_mesh",
173
+ "mast_reserve",
174
+ "rust_backend_mesh",
175
+ "rust_backend_meshes",
176
+ "local_mesh",
177
+ "local_meshes",
178
+ "SocketType",
179
+ "rust_mast_mesh",
180
+ "set_meta",
181
+ "Simulator",
182
+ "world_mesh",
183
+ "timer",
184
+ "ProcessAllocator",
185
+ "LocalAllocator",
186
+ "ActorFuture",
187
+ "builtins",
188
+ ]
189
+ assert sorted(__all__) == sorted(_public_api)
@@ -0,0 +1,5 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
@@ -0,0 +1,58 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # pyre-strict
8
+
9
+ from monarch._rust_bindings.monarch_hyperactor.actor import PythonMessage
10
+
11
+ from monarch._rust_bindings.monarch_hyperactor.alloc import ( # @manual=//monarch/monarch_extension:monarch_extension
12
+ LocalAllocatorBase,
13
+ )
14
+
15
+ from monarch._rust_bindings.monarch_hyperactor.mailbox import Mailbox, PortId
16
+
17
+ from monarch._rust_bindings.monarch_hyperactor.proc import ( # @manual=//monarch/monarch_extension:monarch_extension
18
+ ActorId,
19
+ Alloc,
20
+ AllocConstraints,
21
+ AllocSpec,
22
+ init_proc,
23
+ Proc,
24
+ Serialized,
25
+ )
26
+
27
+ from monarch._rust_bindings.monarch_hyperactor.shape import ( # @manual=//monarch/monarch_extension:monarch_extension
28
+ Shape,
29
+ )
30
+
31
+ __all__ = [
32
+ "init_proc",
33
+ "Actor",
34
+ "ActorId",
35
+ "ActorHandle",
36
+ "Alloc",
37
+ "AllocSpec",
38
+ "PortId",
39
+ "Proc",
40
+ "Serialized",
41
+ "PickledMessage",
42
+ "PickledMessageClientActor",
43
+ "PythonMessage",
44
+ "Mailbox",
45
+ "PortHandle",
46
+ "PortReceiver",
47
+ "OncePortHandle",
48
+ "OncePortReceiver",
49
+ "Alloc",
50
+ "AllocSpec",
51
+ "AllocConstraints",
52
+ "ProcMesh",
53
+ "PythonActorMesh",
54
+ "ProcessAllocatorBase",
55
+ "Shape",
56
+ "Selection",
57
+ "LocalAllocatorBase",
58
+ ]
@@ -0,0 +1,13 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ from monarch._rust_bindings.monarch_hyperactor.selection import ( # @manual=//monarch/monarch_extension:monarch_extension
8
+ Selection,
9
+ )
10
+
11
+ __all__ = [
12
+ "Selection",
13
+ ]
File without changes
@@ -0,0 +1,117 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # pyre-unsafe
8
+
9
+ import bdb
10
+ import io
11
+ import logging
12
+ import pdb # noqa
13
+ import sys
14
+ from typing import cast, Optional
15
+
16
+ from monarch._rust_bindings.monarch_extension import debugger
17
+ from monarch._rust_bindings.monarch_messages.debugger import DebuggerAction
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def _set_trace(*, header=None):
23
+ ds = PdbWrapper(header)
24
+ ds.set_trace()
25
+
26
+
27
+ class PdbWrapper(pdb.Pdb):
28
+ def __init__(self, header: Optional[str]):
29
+ self._actor = debugger.PdbActor()
30
+ self.header = header
31
+ super().__init__(
32
+ # pyre-ignore
33
+ stdout=WriteWrapper(self._actor),
34
+ stdin=ReadWrapper.create(self._actor),
35
+ )
36
+ self._first = True
37
+
38
+ def setup(self, *args, **kwargs):
39
+ r = super().setup(*args, **kwargs)
40
+ if self._first:
41
+ self._first = False
42
+ # when we enter the debugger, we want to present the user's stack frame
43
+ # not the nested one inside session.run. This means that the local
44
+ # variables are what gets printed, etc. To do this
45
+ # we first execute up 2 to get to that frame.
46
+ self.do_up(2)
47
+ return r
48
+
49
+ def set_continue(self) -> None:
50
+ r = super().set_continue()
51
+ if not self.breaks:
52
+ # no more breakpoints so this debugger will not
53
+ # be used again, and we detach from the controller io.
54
+ self._actor.send(DebuggerAction.Detach())
55
+ self._actor.drain_and_stop()
56
+ # break cycle with itself before we exit
57
+ self.stdin = sys.stdin
58
+ self.stdout = sys.stdout
59
+ return r
60
+
61
+ def set_trace(self):
62
+ self._actor.send(DebuggerAction.Paused())
63
+ message = self._actor.receive()
64
+ # we give the controller the option to ignore this request to debug
65
+ # by issuing a "detach" message immediately.
66
+ if isinstance(message, DebuggerAction.Detach):
67
+ return
68
+ elif isinstance(message, DebuggerAction.Attach):
69
+ pass
70
+ else:
71
+ raise RuntimeError(f"unexpected debugger message {message}")
72
+ if self.header:
73
+ self.message(self.header)
74
+ super().set_trace()
75
+
76
+ def set_quit(self):
77
+ self._actor.send(DebuggerAction.Detach())
78
+ self._actor.drain_and_stop()
79
+ super().set_quit()
80
+
81
+
82
+ class ReadWrapper(io.RawIOBase):
83
+ def __init__(self, actor: debugger.PdbActor):
84
+ self._actor = actor
85
+
86
+ def readinto(self, b):
87
+ self._actor.send(DebuggerAction.Read(len(b)))
88
+ response = self._actor.receive()
89
+ if isinstance(response, DebuggerAction.Detach):
90
+ raise bdb.BdbQuit
91
+ assert isinstance(response, DebuggerAction.Write)
92
+ response = cast(DebuggerAction.Write, response)
93
+ payload = debugger.get_bytes_from_write_action(response)
94
+ assert len(payload) <= len(b)
95
+ b[: len(payload)] = payload
96
+ return len(payload)
97
+
98
+ def readable(self) -> bool:
99
+ return True
100
+
101
+ @classmethod
102
+ def create(cls, actor: debugger.PdbActor):
103
+ return io.TextIOWrapper(io.BufferedReader(cls(actor)))
104
+
105
+
106
+ class WriteWrapper:
107
+ def __init__(self, actor: debugger.PdbActor):
108
+ self._actor = actor
109
+
110
+ def writable(self) -> bool:
111
+ return True
112
+
113
+ def write(self, s: str):
114
+ self._actor.send(DebuggerAction.Write(s.encode()))
115
+
116
+ def flush(self):
117
+ pass
@@ -0,0 +1,107 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # pyre-unsafe
8
+ import logging
9
+ import os
10
+ import socket
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ ## NOTE THIS FILE IS A DIRECT COPY OF ~/fbsource/fbcode/monarch/python/monarch_supervisor/logging.py
17
+ ## It is copied here at this time to avoid pulling in the monarch python supervisor as dependency since there's
18
+ ## an expectation that the other one will be removed soon.
19
+
20
+
21
+ def _handle_unhandled_exception(*args):
22
+ logger.error("Uncaught exception", exc_info=args)
23
+
24
+
25
+ _glog_level_to_abbr = {
26
+ "DEBUG": "V", # V is for VERBOSE in glog
27
+ "INFO": "I",
28
+ "WARNING": "W",
29
+ "ERROR": "E",
30
+ "CRITICAL": "C",
31
+ }
32
+
33
+
34
+ def fix_exception_lines(tb_lines):
35
+ formatted_lines = []
36
+ for line in tb_lines:
37
+ # Replace the standard file and line format with the custom format
38
+ if line.startswith(" File"):
39
+ # Extract the filename and line number
40
+ parts = line.split(",")
41
+ file_info = parts[0].strip()[6:-1] # Remove ' File "' and '"'
42
+ line_info = parts[1].strip()[5:] # Remove 'line '
43
+ new_line = f" File {file_info}:{line_info}"
44
+ if len(parts) > 2:
45
+ new_line += ", " + ",".join(parts[2:]).strip()
46
+ formatted_lines.append(new_line)
47
+ else:
48
+ formatted_lines.append(line.strip())
49
+ return formatted_lines
50
+
51
+
52
+ class _Formatter(logging.Formatter):
53
+ def __init__(self, suffix):
54
+ self.suffix = suffix
55
+
56
+ def format(self, record):
57
+ message = record.getMessage()
58
+ asctime = self.formatTime(record, "%m%d %H:%M:%S")
59
+
60
+ lines = message.strip().split("\n")
61
+ if record.exc_info:
62
+ exc_info = fix_exception_lines(
63
+ self.formatException(record.exc_info).split("\n")
64
+ )
65
+ lines.extend(exc_info)
66
+ if record.stack_info:
67
+ stack_info = self.formatStack(record.stack_info)
68
+ lines.extend(stack_info.strip().split("\n"))
69
+
70
+ shortlevel = _glog_level_to_abbr.get(record.levelname, record.levelname[0])
71
+
72
+ prefix = (
73
+ f"{shortlevel}{asctime}.{int(record.msecs*1000):06d} "
74
+ f"{record.filename}:"
75
+ f"{record.lineno}]{self.suffix}"
76
+ )
77
+ return "\n".join(f"{prefix} {line}" for line in lines)
78
+
79
+
80
+ def initialize_logging(process_name=None):
81
+ log_folder = os.environ.get("TORCH_MONARCH_LOG_FOLDER")
82
+ log_level = os.environ.get("TORCH_MONARCH_LOG_LEVEL", "INFO")
83
+ suffix = "" if process_name is None else f" {process_name}:"
84
+ handler = None
85
+ if log_folder is not None:
86
+ log_folder_path = Path(log_folder)
87
+ log_folder_path.mkdir(parents=True, exist_ok=True)
88
+ safe_process_name = (
89
+ process_name.replace("/", "_") if process_name else "logfile.log"
90
+ )
91
+ log_file_name = f"{safe_process_name}.log"
92
+ log_file_path = log_folder_path / log_file_name
93
+ handler = logging.FileHandler(log_file_path)
94
+ else:
95
+ handler = logging.StreamHandler()
96
+ handler.setFormatter(_Formatter(suffix))
97
+ handler.setLevel(log_level)
98
+ logging.root.setLevel(log_level)
99
+ logging.root.addHandler(handler)
100
+ sys.excepthook = _handle_unhandled_exception
101
+
102
+
103
+ def gethostname():
104
+ """Get the hostname of the machine."""
105
+ hostname = socket.gethostname()
106
+ hostname = hostname.replace(".facebook.com", "")
107
+ return hostname
Binary file