torchmonarch-nightly 2025.7.1__cp312-cp312-manylinux2014_x86_64.whl → 2025.7.26__cp312-cp312-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monarch/__init__.py +13 -9
- monarch/_rust_bindings.so +0 -0
- monarch/{_monarch/selection → _src/actor}/__init__.py +3 -7
- monarch/_src/actor/actor_mesh.py +878 -0
- monarch/{allocator.py → _src/actor/allocator.py} +26 -17
- monarch/_src/actor/bootstrap_main.py +73 -0
- monarch/{code_sync.py → _src/actor/code_sync/__init__.py} +3 -1
- monarch/_src/actor/code_sync/auto_reload.py +223 -0
- monarch/_src/actor/debugger.py +565 -0
- monarch/_src/actor/endpoint.py +303 -0
- monarch/_src/actor/event_loop.py +97 -0
- monarch/_src/actor/future.py +100 -0
- monarch/{pdb_wrapper.py → _src/actor/pdb_wrapper.py} +47 -46
- monarch/{common/pickle_flatten.py → _src/actor/pickle.py} +26 -2
- monarch/_src/actor/proc_mesh.py +508 -0
- monarch/_src/actor/sync_state.py +18 -0
- monarch/{telemetry.py → _src/actor/telemetry/__init__.py} +1 -1
- monarch/_src/actor/telemetry/rust_span_tracing.py +159 -0
- monarch/_src/actor/tensor_engine_shim.py +59 -0
- monarch/_src/tensor_engine/rdma.py +180 -0
- monarch/_testing.py +3 -2
- monarch/actor/__init__.py +53 -0
- monarch/actor_mesh.py +6 -765
- monarch/bootstrap_main.py +8 -47
- monarch/common/client.py +1 -1
- monarch/common/controller_api.py +2 -1
- monarch/common/device_mesh.py +12 -2
- monarch/common/messages.py +21 -1
- monarch/common/recording.py +4 -3
- monarch/common/remote.py +135 -52
- monarch/common/tensor.py +2 -1
- monarch/controller/backend.py +2 -2
- monarch/controller/controller.py +2 -1
- monarch/controller/rust_backend/controller.py +2 -1
- monarch/fetch.py +3 -5
- monarch/gradient/_gradient_generator.so +0 -0
- monarch/mesh_controller.py +263 -139
- monarch/monarch_controller +0 -0
- monarch/opaque_module.py +4 -6
- monarch/opaque_object.py +3 -3
- monarch/proc_mesh.py +6 -309
- monarch/python_local_mesh.py +1 -1
- monarch/rust_backend_mesh.py +2 -1
- monarch/rust_local_mesh.py +4 -2
- monarch/sim_mesh.py +10 -19
- monarch/simulator/command_history.py +1 -1
- monarch/simulator/interface.py +2 -1
- monarch/simulator/mock_controller.py +1 -1
- monarch/simulator/simulator.py +1 -1
- monarch/tensor_engine/__init__.py +23 -0
- monarch/tensor_worker_main.py +3 -1
- monarch/tools/cli.py +3 -1
- monarch/tools/commands.py +129 -47
- monarch/tools/components/hyperactor.py +5 -3
- monarch/tools/config/__init__.py +18 -1
- monarch/tools/config/defaults.py +2 -2
- monarch/tools/mesh_spec.py +59 -1
- monarch/tools/utils.py +38 -0
- monarch/worker/worker.py +1 -1
- monarch/world_mesh.py +2 -1
- monarch_supervisor/python_executable.py +6 -3
- tests/error_test_binary.py +48 -10
- tests/test_actor_error.py +370 -21
- tests/test_alloc.py +1 -1
- tests/test_allocator.py +369 -17
- tests/test_controller.py +2 -0
- tests/test_debugger.py +416 -0
- tests/test_env_before_cuda.py +161 -0
- tests/test_python_actors.py +184 -333
- tests/test_rdma.py +198 -0
- tests/test_remote_functions.py +40 -12
- tests/test_rust_backend.py +7 -5
- tests/test_sim_backend.py +1 -4
- tests/test_tensor_engine.py +81 -1
- {torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/METADATA +39 -1
- {torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/RECORD +84 -72
- torchmonarch_nightly-2025.7.26.dist-info/entry_points.txt +3 -0
- monarch/_monarch/hyperactor/__init__.py +0 -58
- monarch/_monarch/worker/debugger.py +0 -117
- monarch/_monarch/worker/logging.py +0 -107
- monarch/debugger.py +0 -379
- monarch/future.py +0 -76
- monarch/rdma.py +0 -162
- torchmonarch_nightly-2025.7.1.dist-info/entry_points.txt +0 -3
- /monarch/{_monarch/worker → _src}/__init__.py +0 -0
- /monarch/{common/_device_utils.py → _src/actor/device_utils.py} +0 -0
- /monarch/{common → _src/actor}/shape.py +0 -0
- /monarch/{_monarch → _src/tensor_engine}/__init__.py +0 -0
- {torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/WHEEL +0 -0
- {torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/licenses/LICENSE +0 -0
- {torchmonarch_nightly-2025.7.1.dist-info → torchmonarch_nightly-2025.7.26.dist-info}/top_level.txt +0 -0
@@ -1,41 +1,51 @@
|
|
1
|
-
monarch/__init__.py,sha256=
|
2
|
-
monarch/_rust_bindings.so,sha256=
|
3
|
-
monarch/_testing.py,sha256=
|
4
|
-
monarch/actor_mesh.py,sha256
|
5
|
-
monarch/
|
6
|
-
monarch/bootstrap_main.py,sha256=RCUQhJk07yMFiKp6HzQuqZFUpkgsT9kVEyimiwjn6_E,1827
|
1
|
+
monarch/__init__.py,sha256=mgKiyD1kxky-1pvhMlNfF4VmxWnhi-FSYZNFzkW1BEM,7052
|
2
|
+
monarch/_rust_bindings.so,sha256=g6Qn8KSeX9x-QBtLRLDqPXjDIqsdVXDdNSAMhG-Ftos,47707960
|
3
|
+
monarch/_testing.py,sha256=_3MYNMq-_0T1qXCj2vxrW13GlWGdUuVFMskQF2Gsw_o,7877
|
4
|
+
monarch/actor_mesh.py,sha256=VtPU9syi_vUdwDSJJ639Z4Y_EcWZUScyoj0lQ88RQPs,421
|
5
|
+
monarch/bootstrap_main.py,sha256=39OZpNMrfvvNJf-iwuNzgslzYA_ItaRPHfXGn_V74N0,524
|
7
6
|
monarch/cached_remote_function.py,sha256=kYdB6r4OHx_T_uX4q3tCNcp1t2DJwF8tPTIahUiT2pU,8785
|
8
|
-
monarch/
|
9
|
-
monarch/debugger.py,sha256=AizU8MWBdloe0wj1ysxlOXmUhCwGoShVH_xGfVBCQjs,13354
|
10
|
-
monarch/fetch.py,sha256=61jxo7sx4QNUTkc0_rF5NaJROen4tKbAaiIjrXWLOvg,1705
|
11
|
-
monarch/future.py,sha256=g1VYJl8ReBBS6VbikwWilnFqEr5qJDiSKid92AnWFV4,2058
|
7
|
+
monarch/fetch.py,sha256=JMxC8HdWMvpik0T4E6e-gfxvmNnOkA0ul4eo4R3Jg_o,1712
|
12
8
|
monarch/gradient_generator.py,sha256=Rl3dmXGceTdCc1mYBg2JciR88ywGPnW7TVkL86KwqEA,6366
|
13
9
|
monarch/memory.py,sha256=ol86dBhFAJqg78iF25-BuK0wuwj1onR8FIioZ_B0gjw,1377
|
14
|
-
monarch/mesh_controller.py,sha256=
|
15
|
-
monarch/monarch_controller,sha256=
|
10
|
+
monarch/mesh_controller.py,sha256=mOk2misobJun2AgR_ALjFoopAEcOPYQVrrAJXa18ZTs,13810
|
11
|
+
monarch/monarch_controller,sha256=qT8YsQcQzd28wlNXtzKvitavskPXoPQmifgopbo8Zjw,21250240
|
16
12
|
monarch/notebook.py,sha256=zu9MKDFKf1-rCM2TqFSRJjMBeiWuKcJSyUFLvoZRQzs,25949
|
17
|
-
monarch/opaque_module.py,sha256=
|
18
|
-
monarch/opaque_object.py,sha256=
|
19
|
-
monarch/
|
20
|
-
monarch/proc_mesh.py,sha256=UX8qthL0RSPwOf7I5dLHejVAQrZtYAERGhBeUDR4Xfw,10950
|
13
|
+
monarch/opaque_module.py,sha256=jCcg0DjbcEVXA9WNG0NhUzGteLHOJLTZEBvrIYJIAns,10436
|
14
|
+
monarch/opaque_object.py,sha256=x1LoX6RIMGh4ux52xIfhPgoh6PhZHdkf9bMccHW3DW0,2808
|
15
|
+
monarch/proc_mesh.py,sha256=lrWn-fN4xnWCF72FR2VkgrN5MlgEYPbkhnzgImjC-ak,419
|
21
16
|
monarch/profiler.py,sha256=TQ9fnVM8H7smBWtYdB_6Irtzz8DBOmcp7U1T3wlUmco,4911
|
22
|
-
monarch/python_local_mesh.py,sha256=
|
17
|
+
monarch/python_local_mesh.py,sha256=jYIhkPuPAL1V1l2wcozGZCS_dMardmjw6FDUntcgGSA,3539
|
23
18
|
monarch/random.py,sha256=f9QR7Esu4Vxqxs-KCf5QYyVqlWvXJ3-UtG90L_h4j40,1527
|
24
|
-
monarch/rdma.py,sha256=1pNh11S_FWeETRgkdUpauTMUlodrRohIq1UfQjKVnN8,5418
|
25
19
|
monarch/remote_class.py,sha256=-OAowzU1aDP6i4ik_SjXntVUC9h4dqAzgqwohkQ6Grc,4167
|
26
|
-
monarch/rust_backend_mesh.py,sha256=
|
27
|
-
monarch/rust_local_mesh.py,sha256=
|
28
|
-
monarch/sim_mesh.py,sha256=
|
29
|
-
monarch/
|
30
|
-
monarch/tensor_worker_main.py,sha256=Nbarl2sJKIddLeaRFsaUnqOerLHjzggUr9SqCr2_GYI,8300
|
20
|
+
monarch/rust_backend_mesh.py,sha256=B3RpAqK0YNZnubD4OwBvCV0Yt-v96Zn_l7aKL1DTRK4,9949
|
21
|
+
monarch/rust_local_mesh.py,sha256=XGOhMzz3KHO2l5mh5Mfp64CJDwWX0z4eaB9B2IkDulQ,47402
|
22
|
+
monarch/sim_mesh.py,sha256=l8JoErzM_-YM1B8VSle6P7oyzU9-wC4QWxVviUtC9rU,11720
|
23
|
+
monarch/tensor_worker_main.py,sha256=vOJMJQThwCNDMgCqfP2At1fHm3vDSH0BSaMbV2N96Vs,8353
|
31
24
|
monarch/tensorboard.py,sha256=MnLgH5lbqeUJauEuirEgR6L_qYl2NGdtwZOWIAuOZao,2587
|
32
|
-
monarch/world_mesh.py,sha256=
|
33
|
-
monarch/
|
34
|
-
monarch/
|
35
|
-
monarch/
|
36
|
-
monarch/
|
37
|
-
monarch/
|
38
|
-
monarch/
|
25
|
+
monarch/world_mesh.py,sha256=ob5dJWaC49Uw0xqClHBm8CQLvL4xKnjd4TGzk7k8NxI,980
|
26
|
+
monarch/_src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
|
+
monarch/_src/actor/__init__.py,sha256=4iK3nzQZmEPe0HGNY70fABBenw3lCVVaaF0xddF5Fa0,235
|
28
|
+
monarch/_src/actor/actor_mesh.py,sha256=guYD9nZHguLGJAvTisc3Q664ASkupcNC6z9iheeGFUQ,29188
|
29
|
+
monarch/_src/actor/allocator.py,sha256=WpHEK1SvjgF3GdIWIYUkonXli2-gQVKJVZPInl2RFQo,8212
|
30
|
+
monarch/_src/actor/bootstrap_main.py,sha256=e5eU3GvX60MWWmCty7VcZrAmukD29iJdWBysNgQ2o3A,2342
|
31
|
+
monarch/_src/actor/debugger.py,sha256=t2iAAxz03b2KZ89T3VjRc__7GzSf83R8gM81SDyX3-o,19532
|
32
|
+
monarch/_src/actor/device_utils.py,sha256=gBpl23wMjppVAEzzj8U9HyX-B7Bs2_3ftiMAkzUS4j4,577
|
33
|
+
monarch/_src/actor/endpoint.py,sha256=jM3XYWze6gB6ajE4AMojNFSN4IaaxgioNAErJkkywjE,9721
|
34
|
+
monarch/_src/actor/event_loop.py,sha256=2i4fKIkemBzua_t47BqVa2roZ6fWB6sbmMFPNx2zKN0,2832
|
35
|
+
monarch/_src/actor/future.py,sha256=jOGh1wfwKyGJxhl9t1P8eapXYywf8KwQldZCCbupmb8,4042
|
36
|
+
monarch/_src/actor/pdb_wrapper.py,sha256=-QxRktntdEO2LdHixBGKLboYtADyh8bEIAoa3gFwIEo,4161
|
37
|
+
monarch/_src/actor/pickle.py,sha256=jD_3E07OJmMIlcMOOrNFnIuRKZU2F_Q_BP-njDFXUNM,2044
|
38
|
+
monarch/_src/actor/proc_mesh.py,sha256=amF4fbO-33qHFudlS9WabYXIVh0Y_D_0nhCTxvOhpGg,16640
|
39
|
+
monarch/_src/actor/shape.py,sha256=B-7DI768ZhT8ECUNCJcI7DfCB7iDFGFH0r-HmXaAfcM,8296
|
40
|
+
monarch/_src/actor/sync_state.py,sha256=GB6bTAGztkcN8fZ9K7zXklOzjYzv6cvkJeBje20xFkE,471
|
41
|
+
monarch/_src/actor/tensor_engine_shim.py,sha256=hupavQ2rjPwECaTlDAhY-eeiEY18Wyyx59MZHcSEcYM,1622
|
42
|
+
monarch/_src/actor/code_sync/__init__.py,sha256=qzWoFNJEJvEbqab0QuHbkvhdz6FHi7BOTw6-2B3p0A4,378
|
43
|
+
monarch/_src/actor/code_sync/auto_reload.py,sha256=kqXCQuSzjxMw8bcDLsUZiL_NImo4j2EScfNklwpltmU,6685
|
44
|
+
monarch/_src/actor/telemetry/__init__.py,sha256=sHA5fmFdWU9jcUJVszNFhbXbjRSIBmuDXDMwJrrE0hw,523
|
45
|
+
monarch/_src/actor/telemetry/rust_span_tracing.py,sha256=UvkywuwjQX7tIyLdKZbF-fcmI_aHporAejsTRTyJNNg,4445
|
46
|
+
monarch/_src/tensor_engine/__init__.py,sha256=Md3cCHD7Ano9kV15PqGbicgUO-RMdh4aVy1yKiDt_xE,208
|
47
|
+
monarch/_src/tensor_engine/rdma.py,sha256=KbhJXMuuHruYXnmxzB3BpkpcGsZ4hSu_7C6wF-EPhDk,6331
|
48
|
+
monarch/actor/__init__.py,sha256=F87BC7owDdH_yRjLvMu6pbICbajndsEbtWG2W53Rapo,1050
|
39
49
|
monarch/builtins/__init__.py,sha256=QcfnHZGbc2qktBg7DyZt2ruE6VahnIt4S8lEZLHdJqU,443
|
40
50
|
monarch/builtins/log.py,sha256=H1QkuVzwxyi36Zyv-XR0VN0QsNimBWwxE1__fjs0_2o,554
|
41
51
|
monarch/builtins/random.py,sha256=wPbvscg7u53EXpMFo885fO2XOlsyjrNAJ4rBxLzfxdg,1839
|
@@ -43,85 +53,84 @@ monarch/common/_C.pyi,sha256=kHY2G3ksMAjQJ6IcPb4F1bBh5knzw5RVVNhhBlEmwFU,314
|
|
43
53
|
monarch/common/_C.so,sha256=x5DsOw_PNCPUHJ1kjEMUaO_dyWdKLyU6f0CyGIc8Q-k,715160
|
44
54
|
monarch/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
55
|
monarch/common/_coalescing.py,sha256=HXf5cXAPSU_tpw9jFkzs2muytG_6sTZJSqSKV0XuFZE,10925
|
46
|
-
monarch/common/_device_utils.py,sha256=gBpl23wMjppVAEzzj8U9HyX-B7Bs2_3ftiMAkzUS4j4,577
|
47
56
|
monarch/common/_tensor_to_table.py,sha256=yRjCNwvtl188Z1Dwkx3ZU-Bh2mwYnQ0Lnue2RAztwvc,5753
|
48
57
|
monarch/common/base_tensor.py,sha256=ujRzR6lWaeCdPv2JX0vCR-VsCWn-3SHaJIkZH1Sw9FQ,1159
|
49
58
|
monarch/common/borrows.py,sha256=7KR62xoUat1T6FyADsdHsxVAVIJDvfJWUnPO-xx277U,5307
|
50
|
-
monarch/common/client.py,sha256=
|
59
|
+
monarch/common/client.py,sha256=RIs-YFTb7yLEV6njVhsIuD2ofnLYGtT1dAoU5htk4Vw,25810
|
51
60
|
monarch/common/constants.py,sha256=ohvsVYMpfeWopv3KXDAeHWDFLukwc-OY37VRxpKNBE8,300
|
52
61
|
monarch/common/context_manager.py,sha256=GOeyaFbyCqvQmkJ0oI7q6IxRd8_0mVyYKZRccI8iaug,1067
|
53
|
-
monarch/common/controller_api.py,sha256=
|
54
|
-
monarch/common/device_mesh.py,sha256=
|
62
|
+
monarch/common/controller_api.py,sha256=G-8BMuW3rtySng-EGjXAvw8IVqXcK6lymKuZGyyW_Ow,3207
|
63
|
+
monarch/common/device_mesh.py,sha256=YUv1TfpoxqIcHQ7eVS5KhHDPK9o95RyBLuf2Cf4w74o,12550
|
55
64
|
monarch/common/fake.py,sha256=h57Cggz2qXNqImZ7yPuOZOSe9-l9i553ki1z-YHlgQA,1801
|
56
65
|
monarch/common/function.py,sha256=V8kdgSRTvild2SpcewWa5IETX3QiWDZQ2BEIDFa5zz8,4374
|
57
66
|
monarch/common/function_caching.py,sha256=HVdbWtv6Eea7ENMWi8iv36w1G1TaVuUJhkUX_JxGx5A,5060
|
58
67
|
monarch/common/future.py,sha256=D1UJ_8Rvb8-VG9vNE-z7xz2m2otMd2HgB0rnA02nlvA,4681
|
59
68
|
monarch/common/invocation.py,sha256=L4mSmzqlHMxo1Tb71hBU_M8aBZCRCOcb6vvPhvvewec,4195
|
60
69
|
monarch/common/mast.py,sha256=XTzYljGR0aZ7GjmNMPgU2HyuL4HWSAy4IwE3kEDqdOw,7735
|
61
|
-
monarch/common/messages.py,sha256=
|
70
|
+
monarch/common/messages.py,sha256=jwwJMVVx3gKd39AXcnRxjMr7lPJRLimHtZYel3zjq4o,18833
|
62
71
|
monarch/common/mock_cuda.py,sha256=x6ho1Ton6BbKjBZ5ZxnFOUaQM032X70wnpoUNB7Ci2w,1039
|
63
72
|
monarch/common/opaque_ref.py,sha256=tWNvOC6CsjNPKD1JDx-8PSaeXqZC3eermgBExUPKML4,2871
|
64
|
-
monarch/common/pickle_flatten.py,sha256=2mc-dPiZy7kRqAstyfMLnPuoGJwsBftYYEHyF_HOZw4,1313
|
65
73
|
monarch/common/pipe.py,sha256=9pTf8--3yOv4HpnJEhgcmc_JM6Az4uL1y72TSQA55dw,5013
|
66
74
|
monarch/common/process_group.py,sha256=FbJ_AJRZYFkvQ68L2naRq64J_aNuAKe5kO0MWdn_x74,1662
|
67
|
-
monarch/common/recording.py,sha256=
|
75
|
+
monarch/common/recording.py,sha256=Q39Zhb3kT52NCPf4VVMox2WXjtXju5eTuvPMZ_QGW7o,4660
|
68
76
|
monarch/common/reference.py,sha256=O26lkzEeVwj0S1xEy-OLqdHVnACmmlbQCUmXRrW4n1Q,938
|
69
|
-
monarch/common/remote.py,sha256=
|
77
|
+
monarch/common/remote.py,sha256=Q2YpAo_fsdh22ElUNruxYyn-cNTecZr8POcHCGtuKyg,12129
|
70
78
|
monarch/common/selection.py,sha256=lpWFbZs3ArYy29e-53eoAVAjQFksf1RvZz9NvM0CUW4,308
|
71
|
-
monarch/common/shape.py,sha256=B-7DI768ZhT8ECUNCJcI7DfCB7iDFGFH0r-HmXaAfcM,8296
|
72
79
|
monarch/common/stream.py,sha256=_ejoxafHtdD10lLzznRCXKwrkZ_ZH9k_VTgiA5yfBrI,3583
|
73
|
-
monarch/common/tensor.py,sha256=
|
80
|
+
monarch/common/tensor.py,sha256=ysT51NClNF4FxV0DFLJJUNmCRaVy8uQuYWpLViyPLdY,29292
|
74
81
|
monarch/common/tensor_factory.py,sha256=qm8NZx-5ezMAFjNLiXQvb66okm5XgdboB_GRarGOdN0,801
|
75
82
|
monarch/common/tree.py,sha256=1DG3siiE7ixBV6v5cwN8RT_17aJhYZTE-L3i7wZe2_c,2282
|
76
83
|
monarch/controller/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
77
|
-
monarch/controller/backend.py,sha256=
|
78
|
-
monarch/controller/controller.py,sha256=
|
84
|
+
monarch/controller/backend.py,sha256=FVJFXqGM4CQbt9AOxc3SYLv3xbpxJpGrLovV5YmnxMA,7761
|
85
|
+
monarch/controller/controller.py,sha256=xoonWAjCPrEGNdVHEwaIcSsDKg18-9yD_Ivvr-0U4CE,8372
|
79
86
|
monarch/controller/debugger.py,sha256=7vVERDyXY5nH3GhIoCzNIwn2rm0H76ZJ6A4equ7gfvM,1272
|
80
87
|
monarch/controller/history.py,sha256=OZbQ75nFMXnxupw_OBlhiLVXCJ8lJKFw1SV3egvLUqc,3019
|
81
88
|
monarch/controller/rust_backend/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
82
|
-
monarch/controller/rust_backend/controller.py,sha256
|
89
|
+
monarch/controller/rust_backend/controller.py,sha256=8IYnVUiqEVKO9rGL3vKqcCSAhWJG1bYYQ0MoaMqsp78,9521
|
83
90
|
monarch/gradient/__init__.py,sha256=kqmzwt16mMpk0M3GhpgP_f7da4DGnaV9chDzbt66k4Q,308
|
84
91
|
monarch/gradient/_gradient_generator.pyi,sha256=6cX0UxaDt9NAlwgIhTgnweqGOf6qRhHiGnUzSWNCxdU,630
|
85
|
-
monarch/gradient/_gradient_generator.so,sha256=
|
92
|
+
monarch/gradient/_gradient_generator.so,sha256=oUxX5Ww47ioFGvxKaE4IFgrCZGuK_-uGKMe0P6aAIKs,11535816
|
86
93
|
monarch/parallel/__init__.py,sha256=6920kIkhiX7AiyjYvyc1ad8ccP-bStJJ1sS5KkeN2P0,352
|
87
94
|
monarch/parallel/pipelining/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
88
95
|
monarch/parallel/pipelining/runtime.py,sha256=KK8TG1gUYEzSsquiZoPTWGSIC74mlncD7cYknKxfb3c,32470
|
89
96
|
monarch/parallel/pipelining/schedule_ir.py,sha256=eowdF7VxUwt4S2bfhzcOpLxECHFZ4tSoimGP_cVZ_CE,27918
|
90
97
|
monarch/parallel/pipelining/scheduler.py,sha256=Q0d8m8nGzeuFIG7nnKfkRnjxH5MbmfxzD438YcslEq0,10012
|
91
98
|
monarch/simulator/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
92
|
-
monarch/simulator/command_history.py,sha256=
|
99
|
+
monarch/simulator/command_history.py,sha256=fmoKNp-8D-GeF2Hev0LGfpgKPuAMCiLtU1Gyf_iyylc,16515
|
93
100
|
monarch/simulator/config.py,sha256=_LlL-7JG9hSwKPTB_KYleZCers8YKTGwcs0sfDlhEzQ,477
|
94
|
-
monarch/simulator/interface.py,sha256=
|
101
|
+
monarch/simulator/interface.py,sha256=McvyZz0rkvTCJjzmzqmabSHPDoeYbJS2Kzm6QNBBNVo,1899
|
95
102
|
monarch/simulator/ir.py,sha256=kgIsyF_gXmNdpvTXGgAQVk2s3DTCRO9TUxHbAit9zrk,29725
|
96
|
-
monarch/simulator/mock_controller.py,sha256=
|
103
|
+
monarch/simulator/mock_controller.py,sha256=9m8A9RckMF6Q-Den4-loCOYZxVGA5jpeLxs_oFvjihk,7392
|
97
104
|
monarch/simulator/profiling.py,sha256=dWpp538BRnF15VTxPcTQurSo7Sfp6zdkTh6nL4j1T-k,14512
|
98
|
-
monarch/simulator/simulator.py,sha256=
|
105
|
+
monarch/simulator/simulator.py,sha256=Nz29n775ckbFz9_dKy-6Il4klC0LqOOR7Dhwq4yRSiA,38196
|
99
106
|
monarch/simulator/task.py,sha256=JhOyEdLd7u4uHzGR5ejBCyJJoD_Xn9TwQEQc37fL_RU,8600
|
100
107
|
monarch/simulator/tensor.py,sha256=1eyQblRI71L9o7tgk465FW91Eqyc3B4sIfz07R4_1eY,13240
|
101
108
|
monarch/simulator/trace.py,sha256=OhB1F3n7zBznP4LxyEHin1G3s02oLPsEBbCs-8wd_bU,11911
|
102
109
|
monarch/simulator/utils.py,sha256=0CoeeK6rWZlgQzyzK8l8gqF2cSmZAjL1EY0PB32e4y8,1217
|
103
110
|
monarch/simulator/worker.py,sha256=2ez0k557jSRMsnzutix0zeOEkAUa26HKwfp1zi_aHQI,14339
|
111
|
+
monarch/tensor_engine/__init__.py,sha256=qVf4EK57lAhtWs3FCf0idolYPJ8tMUf3CYAL1ifTAEM,553
|
104
112
|
monarch/timer/__init__.py,sha256=tdM52Rn0d69HNmtqGwAUE7Py37I3yvLhH0vkUopG3ok,553
|
105
113
|
monarch/timer/example_monarch.py,sha256=pYDx-Dvxxfbk0w7t9DIVikIJBgDIrp7mi8CCdfgV0p0,2338
|
106
114
|
monarch/timer/example_spmd.py,sha256=p8i3_tO1AmpwSkZryiSjgkh7qaEZ6QXp2Fy1qtPpECA,1406
|
107
115
|
monarch/timer/execution_timer.py,sha256=1YsrLIZirdohKOeFAU2H4UcONhQXHuctJbYcoX8I6gY,6985
|
108
116
|
monarch/timer/execution_timer_test.py,sha256=CSxTv44fFZQURJlCBmYvysQI1aS_zEGZs_uxl9SOHak,4486
|
109
117
|
monarch/tools/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
110
|
-
monarch/tools/cli.py,sha256=
|
111
|
-
monarch/tools/commands.py,sha256=
|
112
|
-
monarch/tools/mesh_spec.py,sha256=
|
118
|
+
monarch/tools/cli.py,sha256=b3mKZnK-MwP7JwskTxHI0KcJXxSU6498jEb2ntVr_VM,5001
|
119
|
+
monarch/tools/commands.py,sha256=3xuvHcMwl0t6cWTVUxI_r8EqrJZnay0bkKxOijhlKrw,12126
|
120
|
+
monarch/tools/mesh_spec.py,sha256=in6txNRmA-UvveVSMHCjX6mGpofd3K8vl2Plz1eD6rg,7935
|
113
121
|
monarch/tools/network.py,sha256=mN8Fx9mervxM3VdFHRn4ZXt4z7yWxZp52BTxx2tfpus,2455
|
122
|
+
monarch/tools/utils.py,sha256=2GGUQQE0dLtzoKy40_tAsOfbSxE6krnL0WvwMgUBgmw,1213
|
114
123
|
monarch/tools/components/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
115
|
-
monarch/tools/components/hyperactor.py,sha256=
|
116
|
-
monarch/tools/config/__init__.py,sha256=
|
117
|
-
monarch/tools/config/defaults.py,sha256=
|
124
|
+
monarch/tools/components/hyperactor.py,sha256=gYZS8AcmoTuq48mRrZWWnyxQqaiwTNHv8YqHhHi799U,2169
|
125
|
+
monarch/tools/config/__init__.py,sha256=MLa6uvVJssN_zTciCvCMeCURWglchCuqE3zdqA-gh4U,869
|
126
|
+
monarch/tools/config/defaults.py,sha256=ZymyKr9fNnBIgsV-xz-KrtrXRLkJo3hymTqxjXXnBzs,1910
|
118
127
|
monarch/worker/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
|
119
128
|
monarch/worker/_testing_function.py,sha256=A81cVMKgdlO66XvoYcBCDrxIQIm3o3GgvcH_c8M9OmI,13480
|
120
129
|
monarch/worker/compiled_block.py,sha256=hYx1F6PAu0_BnpKAprP_nV9qJtk5XWO7mcwH3JPDioU,10114
|
121
130
|
monarch/worker/debugger.py,sha256=MeRiVFe6zY5ouWZEVxH2QADq-1nB7hN-HgD-_yx7Q-o,3729
|
122
131
|
monarch/worker/lines.py,sha256=8lIf1mRaU8jpzNxwLXg-jaxrsqBXCERK7lRdQvF0C5M,1347
|
123
132
|
monarch/worker/monitor.py,sha256=uVEVHtTj9Dg-V1np-TEY-mDYKy3c6j3hU2n7OhOzfgA,1664
|
124
|
-
monarch/worker/worker.py,sha256=
|
133
|
+
monarch/worker/worker.py,sha256=XcmQdvQPFmRYhj1DdwjPrkLs7HafzdS1aczv2J1ZzGw,42618
|
125
134
|
monarch_supervisor/__init__.py,sha256=4_eZvVfwoM-3yhQdAMzae7I87ITEjJMvjeBy4ROc2rs,37869
|
126
135
|
monarch_supervisor/_testing.py,sha256=T5LwhusP26UG0Wxyq0M_ng_pcS5DfzEPapKDsohWmaY,832
|
127
136
|
monarch_supervisor/function_call.py,sha256=munXfLO7Qoriz9HMwdmUNlP2-peznQ8ZcUBQfRaTKr4,1026
|
@@ -129,37 +138,40 @@ monarch_supervisor/host.py,sha256=ca7C8NtaGkt4FubKpQCXElN-6H3chprWanyfnDpdQZE,13
|
|
129
138
|
monarch_supervisor/launchers.py,sha256=nITh9eLg8WrNogS9pXKV4j3R5fijvvTrDpjw2gpQeEU,5313
|
130
139
|
monarch_supervisor/log_pstree.py,sha256=P5k0LBobXnc3NvoumxVyZOtHWS7jbhyHf5oQ4rHvRO0,1341
|
131
140
|
monarch_supervisor/logging.py,sha256=15IuHENvEXDhSuMwSQWzw4sC_1Qzww2S1X_KYjzh_O0,3318
|
132
|
-
monarch_supervisor/python_executable.py,sha256=
|
141
|
+
monarch_supervisor/python_executable.py,sha256=67jIBelUVgLr07aSRMbw5EOhiY9_bxfLB5aHHFItYZQ,1781
|
133
142
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
134
143
|
tests/dispatch_bench.py,sha256=sU_m-8KAjQgYTsxI5khV664NdgLLutidni69Rtowk98,3933
|
135
144
|
tests/dispatch_bench_helper.py,sha256=1ORgAMrRgjAjmmWeCHLLQd_bda9mJk0rS2ucEbRu28s,633
|
136
|
-
tests/error_test_binary.py,sha256=
|
145
|
+
tests/error_test_binary.py,sha256=cgdrnVI3SIzAFSRXTvASfiR8eKSMrZ7N3tSCLVkJo44,7880
|
137
146
|
tests/sleep_binary.py,sha256=XfLYaAfwm9xgzM-svs8fhAeFhwYIg6SyVEnx4e6wbUw,1009
|
138
|
-
tests/test_actor_error.py,sha256=
|
139
|
-
tests/test_alloc.py,sha256=
|
140
|
-
tests/test_allocator.py,sha256=
|
147
|
+
tests/test_actor_error.py,sha256=kEfj1XW_WPk2mplucFBuzCWU3UrvzwkKoHSLIZfyQr0,20945
|
148
|
+
tests/test_alloc.py,sha256=IW7yJSaKxhOYc8SJtFyREakDUwiKWq9M0CGgYyBYHoc,743
|
149
|
+
tests/test_allocator.py,sha256=4LcUB4QRNGDp0qBWAyLM6ektmoxpO922f-NcHZziJ_w,28762
|
141
150
|
tests/test_coalescing.py,sha256=JZ4YgQNlWWs7N-Z8KCCXQPANcuyyXEKjeHIXYbPnQhk,15606
|
142
|
-
tests/test_controller.py,sha256=
|
151
|
+
tests/test_controller.py,sha256=CIMb-ApmBcBj1eCqccDUAbVyyJWMGooAha5gQk0AoeY,31452
|
152
|
+
tests/test_debugger.py,sha256=mtd_no7dAooBePoQ_TZOxtgzwd1-x6xkpzAFK1_Y8B8,13703
|
143
153
|
tests/test_device_mesh.py,sha256=DrbezYOM0thfP9MgLXb5-F0VoLOmSz5GR0GwjR_3bE4,5290
|
154
|
+
tests/test_env_before_cuda.py,sha256=K5zdpXNRZB8hXQJaTN_CftcGHb3vzzdKasu8KFUoiCg,5440
|
144
155
|
tests/test_fault_tolerance.py,sha256=u4wmG1z5MZ6PY6us5zUZHJh2pUC3L7i0wsUfRDNHmxA,14144
|
145
156
|
tests/test_future.py,sha256=cXzaNi2YDwVyjR541ScXmgktX1YFsKzbl8wep0DMVbk,3032
|
146
157
|
tests/test_grad_generator.py,sha256=p4Pm4kMEeGldt2jUVAkGKCB0mLccKI28pltH6OTGbQA,3412
|
147
158
|
tests/test_mock_cuda.py,sha256=5hisElxeLJ5MHw3KM9gwxBiXiMaG-Rm382u3AsQcDOI,3068
|
148
159
|
tests/test_pdb_actor.py,sha256=5KJhuhcZDPWMdjC6eAtDdwnz1W7jNFXvIrMSFaCWaPw,3858
|
149
|
-
tests/test_python_actors.py,sha256=
|
150
|
-
tests/
|
151
|
-
tests/
|
160
|
+
tests/test_python_actors.py,sha256=fts3dfdld-zkpRaKyxEAOW6JFnG_jQspu1WehenwyI0,15220
|
161
|
+
tests/test_rdma.py,sha256=vgeCCsfOjRjlGoGR0SYRuTP_Sx5RlEUUKfO9ATK0d4E,6125
|
162
|
+
tests/test_remote_functions.py,sha256=VT65W7htp1jCsP9-AsiO1dofhx-4OebWEOssDEgi3GM,51054
|
163
|
+
tests/test_rust_backend.py,sha256=3TLu8dSVEqyLhjHED2DoAEQHTpbBQcr3WI6K2eGZLZw,7861
|
152
164
|
tests/test_signal_safe_block_on.py,sha256=bmal0XgzJowZXJV6T1Blow5a-vZluYWusCThLMGxyTE,3336
|
153
|
-
tests/test_sim_backend.py,sha256=
|
154
|
-
tests/test_tensor_engine.py,sha256=
|
165
|
+
tests/test_sim_backend.py,sha256=kT7MnPo5O9xxX8f7uZOpR9Tkuz5brjaOyK1g1NqHRlo,1398
|
166
|
+
tests/test_tensor_engine.py,sha256=_F70SQiUCRVZcbq5JcP5XkGJFnul57pqBpu1rF9kipE,3591
|
155
167
|
tests/simulator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
156
168
|
tests/simulator/test_profiling.py,sha256=TGYCfzTLdkpIwnOuO6KApprmrgPIRQe60KRX3wkB0sg,4565
|
157
169
|
tests/simulator/test_simulator.py,sha256=LO8lA0ssY-OGEBL5ipEu74f97Y765TEwfUOv-DtIptM,14568
|
158
170
|
tests/simulator/test_task.py,sha256=ipqBDuDAysuo1xOB9S5psaFvwe6VATD43IovCTSs0t4,2327
|
159
171
|
tests/simulator/test_worker.py,sha256=QrWWIJ3HDgDLkBPRc2mwYPlOQoXQcj1qRfc0WUfKkFY,3507
|
160
|
-
torchmonarch_nightly-2025.7.
|
161
|
-
torchmonarch_nightly-2025.7.
|
162
|
-
torchmonarch_nightly-2025.7.
|
163
|
-
torchmonarch_nightly-2025.7.
|
164
|
-
torchmonarch_nightly-2025.7.
|
165
|
-
torchmonarch_nightly-2025.7.
|
172
|
+
torchmonarch_nightly-2025.7.26.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
|
173
|
+
torchmonarch_nightly-2025.7.26.dist-info/METADATA,sha256=rJuOVCi7kVf2R9tHhtMSlaWv80ybWS5g9MvMzmso5M8,3852
|
174
|
+
torchmonarch_nightly-2025.7.26.dist-info/WHEEL,sha256=lduYNUEDASmtUEDemd8SmeX1qOMvvA6YKAbAo1Qbwk8,104
|
175
|
+
torchmonarch_nightly-2025.7.26.dist-info/entry_points.txt,sha256=60QVSpYVzkzS4iDOiLp0fsLxVp47X3J2l3v7W-59LMo,117
|
176
|
+
torchmonarch_nightly-2025.7.26.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
|
177
|
+
torchmonarch_nightly-2025.7.26.dist-info/RECORD,,
|
@@ -1,58 +0,0 @@
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
-
# All rights reserved.
|
3
|
-
#
|
4
|
-
# This source code is licensed under the BSD-style license found in the
|
5
|
-
# LICENSE file in the root directory of this source tree.
|
6
|
-
|
7
|
-
# pyre-strict
|
8
|
-
|
9
|
-
from monarch._rust_bindings.monarch_hyperactor.actor import PythonMessage
|
10
|
-
|
11
|
-
from monarch._rust_bindings.monarch_hyperactor.alloc import ( # @manual=//monarch/monarch_extension:monarch_extension
|
12
|
-
LocalAllocatorBase,
|
13
|
-
)
|
14
|
-
|
15
|
-
from monarch._rust_bindings.monarch_hyperactor.mailbox import Mailbox, PortId
|
16
|
-
|
17
|
-
from monarch._rust_bindings.monarch_hyperactor.proc import ( # @manual=//monarch/monarch_extension:monarch_extension
|
18
|
-
ActorId,
|
19
|
-
Alloc,
|
20
|
-
AllocConstraints,
|
21
|
-
AllocSpec,
|
22
|
-
init_proc,
|
23
|
-
Proc,
|
24
|
-
Serialized,
|
25
|
-
)
|
26
|
-
|
27
|
-
from monarch._rust_bindings.monarch_hyperactor.shape import ( # @manual=//monarch/monarch_extension:monarch_extension
|
28
|
-
Shape,
|
29
|
-
)
|
30
|
-
|
31
|
-
__all__ = [
|
32
|
-
"init_proc",
|
33
|
-
"Actor",
|
34
|
-
"ActorId",
|
35
|
-
"ActorHandle",
|
36
|
-
"Alloc",
|
37
|
-
"AllocSpec",
|
38
|
-
"PortId",
|
39
|
-
"Proc",
|
40
|
-
"Serialized",
|
41
|
-
"PickledMessage",
|
42
|
-
"PickledMessageClientActor",
|
43
|
-
"PythonMessage",
|
44
|
-
"Mailbox",
|
45
|
-
"PortHandle",
|
46
|
-
"PortReceiver",
|
47
|
-
"OncePortHandle",
|
48
|
-
"OncePortReceiver",
|
49
|
-
"Alloc",
|
50
|
-
"AllocSpec",
|
51
|
-
"AllocConstraints",
|
52
|
-
"ProcMesh",
|
53
|
-
"PythonActorMesh",
|
54
|
-
"ProcessAllocatorBase",
|
55
|
-
"Shape",
|
56
|
-
"Selection",
|
57
|
-
"LocalAllocatorBase",
|
58
|
-
]
|
@@ -1,117 +0,0 @@
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
-
# All rights reserved.
|
3
|
-
#
|
4
|
-
# This source code is licensed under the BSD-style license found in the
|
5
|
-
# LICENSE file in the root directory of this source tree.
|
6
|
-
|
7
|
-
# pyre-unsafe
|
8
|
-
|
9
|
-
import bdb
|
10
|
-
import io
|
11
|
-
import logging
|
12
|
-
import pdb # noqa
|
13
|
-
import sys
|
14
|
-
from typing import cast, Optional
|
15
|
-
|
16
|
-
from monarch._rust_bindings.monarch_extension import debugger
|
17
|
-
from monarch._rust_bindings.monarch_messages.debugger import DebuggerAction
|
18
|
-
|
19
|
-
logger = logging.getLogger(__name__)
|
20
|
-
|
21
|
-
|
22
|
-
def _set_trace(*, header=None):
|
23
|
-
ds = PdbWrapper(header)
|
24
|
-
ds.set_trace()
|
25
|
-
|
26
|
-
|
27
|
-
class PdbWrapper(pdb.Pdb):
|
28
|
-
def __init__(self, header: Optional[str]):
|
29
|
-
self._actor = debugger.PdbActor()
|
30
|
-
self.header = header
|
31
|
-
super().__init__(
|
32
|
-
# pyre-ignore
|
33
|
-
stdout=WriteWrapper(self._actor),
|
34
|
-
stdin=ReadWrapper.create(self._actor),
|
35
|
-
)
|
36
|
-
self._first = True
|
37
|
-
|
38
|
-
def setup(self, *args, **kwargs):
|
39
|
-
r = super().setup(*args, **kwargs)
|
40
|
-
if self._first:
|
41
|
-
self._first = False
|
42
|
-
# when we enter the debugger, we want to present the user's stack frame
|
43
|
-
# not the nested one inside session.run. This means that the local
|
44
|
-
# variables are what gets printed, etc. To do this
|
45
|
-
# we first execute up 2 to get to that frame.
|
46
|
-
self.do_up(2)
|
47
|
-
return r
|
48
|
-
|
49
|
-
def set_continue(self) -> None:
|
50
|
-
r = super().set_continue()
|
51
|
-
if not self.breaks:
|
52
|
-
# no more breakpoints so this debugger will not
|
53
|
-
# be used again, and we detach from the controller io.
|
54
|
-
self._actor.send(DebuggerAction.Detach())
|
55
|
-
self._actor.drain_and_stop()
|
56
|
-
# break cycle with itself before we exit
|
57
|
-
self.stdin = sys.stdin
|
58
|
-
self.stdout = sys.stdout
|
59
|
-
return r
|
60
|
-
|
61
|
-
def set_trace(self):
|
62
|
-
self._actor.send(DebuggerAction.Paused())
|
63
|
-
message = self._actor.receive()
|
64
|
-
# we give the controller the option to ignore this request to debug
|
65
|
-
# by issuing a "detach" message immediately.
|
66
|
-
if isinstance(message, DebuggerAction.Detach):
|
67
|
-
return
|
68
|
-
elif isinstance(message, DebuggerAction.Attach):
|
69
|
-
pass
|
70
|
-
else:
|
71
|
-
raise RuntimeError(f"unexpected debugger message {message}")
|
72
|
-
if self.header:
|
73
|
-
self.message(self.header)
|
74
|
-
super().set_trace()
|
75
|
-
|
76
|
-
def set_quit(self):
|
77
|
-
self._actor.send(DebuggerAction.Detach())
|
78
|
-
self._actor.drain_and_stop()
|
79
|
-
super().set_quit()
|
80
|
-
|
81
|
-
|
82
|
-
class ReadWrapper(io.RawIOBase):
|
83
|
-
def __init__(self, actor: debugger.PdbActor):
|
84
|
-
self._actor = actor
|
85
|
-
|
86
|
-
def readinto(self, b):
|
87
|
-
self._actor.send(DebuggerAction.Read(len(b)))
|
88
|
-
response = self._actor.receive()
|
89
|
-
if isinstance(response, DebuggerAction.Detach):
|
90
|
-
raise bdb.BdbQuit
|
91
|
-
assert isinstance(response, DebuggerAction.Write)
|
92
|
-
response = cast(DebuggerAction.Write, response)
|
93
|
-
payload = debugger.get_bytes_from_write_action(response)
|
94
|
-
assert len(payload) <= len(b)
|
95
|
-
b[: len(payload)] = payload
|
96
|
-
return len(payload)
|
97
|
-
|
98
|
-
def readable(self) -> bool:
|
99
|
-
return True
|
100
|
-
|
101
|
-
@classmethod
|
102
|
-
def create(cls, actor: debugger.PdbActor):
|
103
|
-
return io.TextIOWrapper(io.BufferedReader(cls(actor)))
|
104
|
-
|
105
|
-
|
106
|
-
class WriteWrapper:
|
107
|
-
def __init__(self, actor: debugger.PdbActor):
|
108
|
-
self._actor = actor
|
109
|
-
|
110
|
-
def writable(self) -> bool:
|
111
|
-
return True
|
112
|
-
|
113
|
-
def write(self, s: str):
|
114
|
-
self._actor.send(DebuggerAction.Write(s.encode()))
|
115
|
-
|
116
|
-
def flush(self):
|
117
|
-
pass
|
@@ -1,107 +0,0 @@
|
|
1
|
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2
|
-
# All rights reserved.
|
3
|
-
#
|
4
|
-
# This source code is licensed under the BSD-style license found in the
|
5
|
-
# LICENSE file in the root directory of this source tree.
|
6
|
-
|
7
|
-
# pyre-unsafe
|
8
|
-
import logging
|
9
|
-
import os
|
10
|
-
import socket
|
11
|
-
import sys
|
12
|
-
from pathlib import Path
|
13
|
-
|
14
|
-
logger = logging.getLogger(__name__)
|
15
|
-
|
16
|
-
## NOTE THIS FILE IS A DIRECT COPY OF ~/fbsource/fbcode/monarch/python/monarch_supervisor/logging.py
|
17
|
-
## It is copied here at this time to avoid pulling in the monarch python supervisor as dependency since there's
|
18
|
-
## an expectation that the other one will be removed soon.
|
19
|
-
|
20
|
-
|
21
|
-
def _handle_unhandled_exception(*args):
|
22
|
-
logger.error("Uncaught exception", exc_info=args)
|
23
|
-
|
24
|
-
|
25
|
-
_glog_level_to_abbr = {
|
26
|
-
"DEBUG": "V", # V is for VERBOSE in glog
|
27
|
-
"INFO": "I",
|
28
|
-
"WARNING": "W",
|
29
|
-
"ERROR": "E",
|
30
|
-
"CRITICAL": "C",
|
31
|
-
}
|
32
|
-
|
33
|
-
|
34
|
-
def fix_exception_lines(tb_lines):
|
35
|
-
formatted_lines = []
|
36
|
-
for line in tb_lines:
|
37
|
-
# Replace the standard file and line format with the custom format
|
38
|
-
if line.startswith(" File"):
|
39
|
-
# Extract the filename and line number
|
40
|
-
parts = line.split(",")
|
41
|
-
file_info = parts[0].strip()[6:-1] # Remove ' File "' and '"'
|
42
|
-
line_info = parts[1].strip()[5:] # Remove 'line '
|
43
|
-
new_line = f" File {file_info}:{line_info}"
|
44
|
-
if len(parts) > 2:
|
45
|
-
new_line += ", " + ",".join(parts[2:]).strip()
|
46
|
-
formatted_lines.append(new_line)
|
47
|
-
else:
|
48
|
-
formatted_lines.append(line.strip())
|
49
|
-
return formatted_lines
|
50
|
-
|
51
|
-
|
52
|
-
class _Formatter(logging.Formatter):
|
53
|
-
def __init__(self, suffix):
|
54
|
-
self.suffix = suffix
|
55
|
-
|
56
|
-
def format(self, record):
|
57
|
-
message = record.getMessage()
|
58
|
-
asctime = self.formatTime(record, "%m%d %H:%M:%S")
|
59
|
-
|
60
|
-
lines = message.strip().split("\n")
|
61
|
-
if record.exc_info:
|
62
|
-
exc_info = fix_exception_lines(
|
63
|
-
self.formatException(record.exc_info).split("\n")
|
64
|
-
)
|
65
|
-
lines.extend(exc_info)
|
66
|
-
if record.stack_info:
|
67
|
-
stack_info = self.formatStack(record.stack_info)
|
68
|
-
lines.extend(stack_info.strip().split("\n"))
|
69
|
-
|
70
|
-
shortlevel = _glog_level_to_abbr.get(record.levelname, record.levelname[0])
|
71
|
-
|
72
|
-
prefix = (
|
73
|
-
f"{shortlevel}{asctime}.{int(record.msecs*1000):06d} "
|
74
|
-
f"{record.filename}:"
|
75
|
-
f"{record.lineno}]{self.suffix}"
|
76
|
-
)
|
77
|
-
return "\n".join(f"{prefix} {line}" for line in lines)
|
78
|
-
|
79
|
-
|
80
|
-
def initialize_logging(process_name=None):
|
81
|
-
log_folder = os.environ.get("TORCH_MONARCH_LOG_FOLDER")
|
82
|
-
log_level = os.environ.get("TORCH_MONARCH_LOG_LEVEL", "INFO")
|
83
|
-
suffix = "" if process_name is None else f" {process_name}:"
|
84
|
-
handler = None
|
85
|
-
if log_folder is not None:
|
86
|
-
log_folder_path = Path(log_folder)
|
87
|
-
log_folder_path.mkdir(parents=True, exist_ok=True)
|
88
|
-
safe_process_name = (
|
89
|
-
process_name.replace("/", "_") if process_name else "logfile.log"
|
90
|
-
)
|
91
|
-
log_file_name = f"{safe_process_name}.log"
|
92
|
-
log_file_path = log_folder_path / log_file_name
|
93
|
-
handler = logging.FileHandler(log_file_path)
|
94
|
-
else:
|
95
|
-
handler = logging.StreamHandler()
|
96
|
-
handler.setFormatter(_Formatter(suffix))
|
97
|
-
handler.setLevel(log_level)
|
98
|
-
logging.root.setLevel(log_level)
|
99
|
-
logging.root.addHandler(handler)
|
100
|
-
sys.excepthook = _handle_unhandled_exception
|
101
|
-
|
102
|
-
|
103
|
-
def gethostname():
|
104
|
-
"""Get the hostname of the machine."""
|
105
|
-
hostname = socket.gethostname()
|
106
|
-
hostname = hostname.replace(".facebook.com", "")
|
107
|
-
return hostname
|