torchmonarch-nightly 2025.7.30__cp313-cp313-manylinux2014_x86_64.whl → 2025.8.1__cp313-cp313-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
monarch/_rust_bindings.so CHANGED
Binary file
@@ -65,15 +65,19 @@ from monarch._src.actor.shape import MeshTrait
65
65
 
66
66
  HAS_TENSOR_ENGINE = False
67
67
  try:
68
+ # Torch is needed for tensor engine
69
+ import torch # @manual
70
+
71
+ # Confirm that rust bindings were built with tensor engine enabled
68
72
  from monarch._rust_bindings.rdma import ( # type: ignore[import]
69
73
  _RdmaBuffer,
70
74
  _RdmaManager,
71
75
  )
72
76
 
73
77
  # type: ignore[16]
74
- HAS_TENSOR_ENGINE = _RdmaBuffer.rdma_supported()
78
+ HAS_TENSOR_ENGINE = torch.cuda.is_available()
75
79
  except ImportError:
76
- logging.warning("RDMA is not available on this platform")
80
+ logging.warning("Tensor engine is not available on this platform")
77
81
 
78
82
 
79
83
  if TYPE_CHECKING:
@@ -133,9 +137,10 @@ class ProcMesh(MeshTrait):
133
137
  setup: Callable[[], None] | None = None,
134
138
  ) -> "ProcMesh":
135
139
  _rdma_manager = (
136
- # pyre-ignore
140
+ # type: ignore[16]
137
141
  await _RdmaManager.create_rdma_manager_nonblocking(self._proc_mesh)
138
- if HAS_TENSOR_ENGINE
142
+ # type: ignore[16]
143
+ if HAS_TENSOR_ENGINE and _RdmaBuffer.rdma_supported()
139
144
  else None
140
145
  )
141
146
 
monarch/common/_C.so CHANGED
Binary file
Binary file
Binary file
monarch/tools/utils.py CHANGED
@@ -53,13 +53,10 @@ class conda:
53
53
  Returns the currently active conda environment name.
54
54
  `None` if run outside of a conda environment.
55
55
  """
56
- env_name = os.getenv("CONDA_DEFAULT_ENV")
57
-
58
- if not env_name:
59
- # conda envs activated with metaconda doesn't set CODNA_DEFAULT_ENV so
60
- # fallback to CONDA_PREFIX which points to the path of the currently active conda environment
61
- # e.g./home/$USER/.conda/envs/{env_name}
62
- if env_dir := conda.active_env_dir():
63
- env_name = os.path.basename(env_dir)
56
+ # we do not check CODNA_DEFAULT_ENV as CONDA_PREFIX is a preferred way
57
+ # to get the active conda environment, e.g./home/$USER/.conda/envs/{env_name}
58
+ env_name: Optional[str] = None
59
+ if env_dir := conda.active_env_dir():
60
+ env_name = os.path.basename(env_dir)
64
61
 
65
62
  return env_name
tests/test_allocator.py CHANGED
@@ -104,7 +104,9 @@ class TestActor(Actor):
104
104
 
105
105
  @contextlib.contextmanager
106
106
  def remote_process_allocator(
107
- addr: Optional[str] = None, timeout: Optional[int] = None
107
+ addr: Optional[str] = None,
108
+ timeout: Optional[int] = None,
109
+ envs: Optional[dict[str, str]] = None,
108
110
  ) -> Generator[str, None, None]:
109
111
  """Start a remote process allocator on addr. If timeout is not None, have it
110
112
  timeout after that many seconds if no messages come in"""
@@ -120,16 +122,19 @@ def remote_process_allocator(
120
122
  if timeout is not None:
121
123
  args.append(f"--timeout-sec={timeout}")
122
124
 
125
+ env = {
126
+ # prefix PATH with this test module's directory to
127
+ # give 'process_allocator' and 'monarch_bootstrap' binary resources
128
+ # in this test module's directory precedence over the installed ones
129
+ # useful in BUCK where these binaries are added as 'resources' of this test target
130
+ "PATH": f"{package_path}:{os.getenv('PATH', '')}",
131
+ "RUST_LOG": "debug",
132
+ }
133
+ if envs:
134
+ env.update(envs)
123
135
  process_allocator = subprocess.Popen(
124
136
  args=args,
125
- env={
126
- # prefix PATH with this test module's directory to
127
- # give 'process_allocator' and 'monarch_bootstrap' binary resources
128
- # in this test module's directory precedence over the installed ones
129
- # useful in BUCK where these binaries are added as 'resources' of this test target
130
- "PATH": f"{package_path}:{os.getenv('PATH', '')}",
131
- "RUST_LOG": "debug",
132
- },
137
+ env=env,
133
138
  )
134
139
  try:
135
140
  yield addr
@@ -233,6 +238,26 @@ class TestRemoteAllocator(unittest.IsolatedAsyncioTestCase):
233
238
  computed_world_sizes = {p.rank: v for p, v in list(computed.flatten("rank"))}
234
239
  self.assertDictEqual(expected_world_sizes, computed_world_sizes)
235
240
 
241
+ async def test_allocate_failure_message(self) -> None:
242
+ spec = AllocSpec(AllocConstraints(), host=2, gpu=4)
243
+
244
+ with self.assertRaisesRegex(
245
+ Exception,
246
+ r"exited with code 1: Traceback \(most recent call last\).*",
247
+ ):
248
+ with remote_process_allocator(
249
+ envs={"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1"}
250
+ ) as host1, remote_process_allocator(
251
+ envs={"MONARCH_ERROR_DURING_BOOTSTRAP_FOR_TESTING": "1"}
252
+ ) as host2:
253
+ allocator = RemoteAllocator(
254
+ world_id="test_remote_allocator",
255
+ initializer=StaticRemoteAllocInitializer(host1, host2),
256
+ heartbeat_interval=_100_MILLISECONDS,
257
+ )
258
+ alloc = await allocator.allocate(spec)
259
+ await ProcMesh.from_alloc(alloc)
260
+
236
261
  async def test_call_allocate_twice(self) -> None:
237
262
  class DeletingAllocInitializer(StaticRemoteAllocInitializer):
238
263
  """test initializer that removes the last address from the list each time initialize_alloc() is called
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: torchmonarch-nightly
3
- Version: 2025.7.30
3
+ Version: 2025.8.1
4
4
  Summary: Monarch: Single controller library
5
5
  Author: Meta
6
6
  Author-email: oncall+monarch@xmail.facebook.com
@@ -1,5 +1,5 @@
1
1
  monarch/__init__.py,sha256=mgKiyD1kxky-1pvhMlNfF4VmxWnhi-FSYZNFzkW1BEM,7052
2
- monarch/_rust_bindings.so,sha256=r7qvxPiMHRlXvwxCM--hf3BgB0JYISYCRDpi64v5vO0,50331040
2
+ monarch/_rust_bindings.so,sha256=prrqR30Gcgdga3OZoUTOxaZyt7r7ozzwXoTSeizn3JM,50340744
3
3
  monarch/_testing.py,sha256=_3MYNMq-_0T1qXCj2vxrW13GlWGdUuVFMskQF2Gsw_o,7877
4
4
  monarch/actor_mesh.py,sha256=VtPU9syi_vUdwDSJJ639Z4Y_EcWZUScyoj0lQ88RQPs,421
5
5
  monarch/bootstrap_main.py,sha256=39OZpNMrfvvNJf-iwuNzgslzYA_ItaRPHfXGn_V74N0,524
@@ -8,7 +8,7 @@ monarch/fetch.py,sha256=JMxC8HdWMvpik0T4E6e-gfxvmNnOkA0ul4eo4R3Jg_o,1712
8
8
  monarch/gradient_generator.py,sha256=Rl3dmXGceTdCc1mYBg2JciR88ywGPnW7TVkL86KwqEA,6366
9
9
  monarch/memory.py,sha256=ol86dBhFAJqg78iF25-BuK0wuwj1onR8FIioZ_B0gjw,1377
10
10
  monarch/mesh_controller.py,sha256=R9ZnVV89wYva0QTAwOgHi_PkjYPEj_7_yF9810NHPak,14675
11
- monarch/monarch_controller,sha256=Fzbm8eLjh8L_s83_DJN9vM0iaK0C1kaHF7jWBRANgcs,24036856
11
+ monarch/monarch_controller,sha256=c5-zlhoJK5vpEVxY6DMwRXEmHSmBy0Qgv55Tbu-8vWI,24034032
12
12
  monarch/notebook.py,sha256=zu9MKDFKf1-rCM2TqFSRJjMBeiWuKcJSyUFLvoZRQzs,25949
13
13
  monarch/opaque_module.py,sha256=jCcg0DjbcEVXA9WNG0NhUzGteLHOJLTZEBvrIYJIAns,10436
14
14
  monarch/opaque_object.py,sha256=x1LoX6RIMGh4ux52xIfhPgoh6PhZHdkf9bMccHW3DW0,2808
@@ -35,7 +35,7 @@ monarch/_src/actor/event_loop.py,sha256=2i4fKIkemBzua_t47BqVa2roZ6fWB6sbmMFPNx2z
35
35
  monarch/_src/actor/future.py,sha256=7QDiPu6-CnTw7cN_GWomQa9qGxDo5yXqCSqgyCJ7roU,5195
36
36
  monarch/_src/actor/pdb_wrapper.py,sha256=3pjk-eTSc7_rctDiZl-vilqTXQoaERGqyi1LueyoQGg,4342
37
37
  monarch/_src/actor/pickle.py,sha256=jD_3E07OJmMIlcMOOrNFnIuRKZU2F_Q_BP-njDFXUNM,2044
38
- monarch/_src/actor/proc_mesh.py,sha256=FNBRl-F1HdvdLWwHt0YrWYQhzXK4y1y-nSKnfwkznwA,15531
38
+ monarch/_src/actor/proc_mesh.py,sha256=5AKgjsskVe7blJjwROom31p1EXzx9ANg3ADzvcLcU5s,15746
39
39
  monarch/_src/actor/shape.py,sha256=E9kxf1RNym1LNJNXF18gNDmnAHR7SDcl3W4nXR65BPY,8293
40
40
  monarch/_src/actor/sync_state.py,sha256=GB6bTAGztkcN8fZ9K7zXklOzjYzv6cvkJeBje20xFkE,471
41
41
  monarch/_src/actor/tensor_engine_shim.py,sha256=hupavQ2rjPwECaTlDAhY-eeiEY18Wyyx59MZHcSEcYM,1622
@@ -50,7 +50,7 @@ monarch/builtins/__init__.py,sha256=QcfnHZGbc2qktBg7DyZt2ruE6VahnIt4S8lEZLHdJqU,
50
50
  monarch/builtins/log.py,sha256=H1QkuVzwxyi36Zyv-XR0VN0QsNimBWwxE1__fjs0_2o,554
51
51
  monarch/builtins/random.py,sha256=wPbvscg7u53EXpMFo885fO2XOlsyjrNAJ4rBxLzfxdg,1839
52
52
  monarch/common/_C.pyi,sha256=kHY2G3ksMAjQJ6IcPb4F1bBh5knzw5RVVNhhBlEmwFU,314
53
- monarch/common/_C.so,sha256=i0Mqno03fQrMtkAnBwsenC4EzrU-NQy7fUk3-cGfOcw,719904
53
+ monarch/common/_C.so,sha256=WkoR6BuS6k6tfia5sGSmfQVsBNmdKqRXXQ8o9pqfpy0,719808
54
54
  monarch/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
55
  monarch/common/_coalescing.py,sha256=HXf5cXAPSU_tpw9jFkzs2muytG_6sTZJSqSKV0XuFZE,10925
56
56
  monarch/common/_tensor_to_table.py,sha256=yRjCNwvtl188Z1Dwkx3ZU-Bh2mwYnQ0Lnue2RAztwvc,5753
@@ -89,7 +89,7 @@ monarch/controller/rust_backend/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTY
89
89
  monarch/controller/rust_backend/controller.py,sha256=8IYnVUiqEVKO9rGL3vKqcCSAhWJG1bYYQ0MoaMqsp78,9521
90
90
  monarch/gradient/__init__.py,sha256=kqmzwt16mMpk0M3GhpgP_f7da4DGnaV9chDzbt66k4Q,308
91
91
  monarch/gradient/_gradient_generator.pyi,sha256=6cX0UxaDt9NAlwgIhTgnweqGOf6qRhHiGnUzSWNCxdU,630
92
- monarch/gradient/_gradient_generator.so,sha256=Jw4MAcg1zcjYfm51N6EZWxTA8bedXaTcGMj3g45o9jQ,11534720
92
+ monarch/gradient/_gradient_generator.so,sha256=kSqzNPJx7AeQj9mtAjqdg1U7gQ7Zc2_QCkOSOYqY3Gs,11755672
93
93
  monarch/parallel/__init__.py,sha256=6920kIkhiX7AiyjYvyc1ad8ccP-bStJJ1sS5KkeN2P0,352
94
94
  monarch/parallel/pipelining/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
95
95
  monarch/parallel/pipelining/runtime.py,sha256=KK8TG1gUYEzSsquiZoPTWGSIC74mlncD7cYknKxfb3c,32470
@@ -119,7 +119,7 @@ monarch/tools/cli.py,sha256=b3mKZnK-MwP7JwskTxHI0KcJXxSU6498jEb2ntVr_VM,5001
119
119
  monarch/tools/commands.py,sha256=3xuvHcMwl0t6cWTVUxI_r8EqrJZnay0bkKxOijhlKrw,12126
120
120
  monarch/tools/mesh_spec.py,sha256=in6txNRmA-UvveVSMHCjX6mGpofd3K8vl2Plz1eD6rg,7935
121
121
  monarch/tools/network.py,sha256=mN8Fx9mervxM3VdFHRn4ZXt4z7yWxZp52BTxx2tfpus,2455
122
- monarch/tools/utils.py,sha256=p9zunV_OP_eYC2GQFZ_NKFj5Xl1tf7Tin1ZxjZqp3TY,2010
122
+ monarch/tools/utils.py,sha256=gcZyalfoBC6Y3v65h-QMngwXsn24ejXh2TH8RxlgXkA,1888
123
123
  monarch/tools/components/__init__.py,sha256=J8qjUOysmcMAek2KFN13mViOXZxTYc5vCrF02t3VuFU,223
124
124
  monarch/tools/components/hyperactor.py,sha256=jbfC5J9oRzzMQFO2eIx9acRS8RPHp3GtKyJjYblcJFM,2169
125
125
  monarch/tools/config/__init__.py,sha256=HZjmRSC_R28WMDSvNAqNRlqhH7lMHFWPIrztcFFt8Us,890
@@ -147,7 +147,7 @@ tests/sleep_binary.py,sha256=XfLYaAfwm9xgzM-svs8fhAeFhwYIg6SyVEnx4e6wbUw,1009
147
147
  tests/test_actor_error.py,sha256=U7QL1jRn-YpS-o62imt7HFLPtaSbwMBu9xpD09Mb-Bc,20875
148
148
  tests/test_actor_shape.py,sha256=ph-RC9sMNHWptZOCwQqMfG4lIUEzhp_pEnfhITeYdHM,4533
149
149
  tests/test_alloc.py,sha256=IW7yJSaKxhOYc8SJtFyREakDUwiKWq9M0CGgYyBYHoc,743
150
- tests/test_allocator.py,sha256=4LcUB4QRNGDp0qBWAyLM6ektmoxpO922f-NcHZziJ_w,28762
150
+ tests/test_allocator.py,sha256=DpQhygQ4jB19g-aY-BFR61J-gYbG-hDlTNPVuQ4Fmn0,29730
151
151
  tests/test_coalescing.py,sha256=JZ4YgQNlWWs7N-Z8KCCXQPANcuyyXEKjeHIXYbPnQhk,15606
152
152
  tests/test_controller.py,sha256=CIMb-ApmBcBj1eCqccDUAbVyyJWMGooAha5gQk0AoeY,31452
153
153
  tests/test_debugger.py,sha256=9opgQXCBuZ1Z-7uOKI-FuGB0jLbLLilmWQKq0sE-dgQ,21950
@@ -170,9 +170,9 @@ tests/simulator/test_profiling.py,sha256=TGYCfzTLdkpIwnOuO6KApprmrgPIRQe60KRX3wk
170
170
  tests/simulator/test_simulator.py,sha256=LO8lA0ssY-OGEBL5ipEu74f97Y765TEwfUOv-DtIptM,14568
171
171
  tests/simulator/test_task.py,sha256=ipqBDuDAysuo1xOB9S5psaFvwe6VATD43IovCTSs0t4,2327
172
172
  tests/simulator/test_worker.py,sha256=QrWWIJ3HDgDLkBPRc2mwYPlOQoXQcj1qRfc0WUfKkFY,3507
173
- torchmonarch_nightly-2025.7.30.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
174
- torchmonarch_nightly-2025.7.30.dist-info/METADATA,sha256=TLI6C3RGzWoCq2Ual0X4CEz_oUaMFvHdAxIsAI6yxaI,3852
175
- torchmonarch_nightly-2025.7.30.dist-info/WHEEL,sha256=OlISbtpDcfagPrLwG7WtpcZbPTUnoKPnwphA_26fNqE,104
176
- torchmonarch_nightly-2025.7.30.dist-info/entry_points.txt,sha256=60QVSpYVzkzS4iDOiLp0fsLxVp47X3J2l3v7W-59LMo,117
177
- torchmonarch_nightly-2025.7.30.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
178
- torchmonarch_nightly-2025.7.30.dist-info/RECORD,,
173
+ torchmonarch_nightly-2025.8.1.dist-info/licenses/LICENSE,sha256=e0Eotbf_rHOYPuEUlppIbvwy4SN98CZnl_hqwvbDA4Q,1530
174
+ torchmonarch_nightly-2025.8.1.dist-info/METADATA,sha256=gnu970IbH-M9RmjioXTmMbk_MXOCcf6iTTGa226YHdI,3851
175
+ torchmonarch_nightly-2025.8.1.dist-info/WHEEL,sha256=OlISbtpDcfagPrLwG7WtpcZbPTUnoKPnwphA_26fNqE,104
176
+ torchmonarch_nightly-2025.8.1.dist-info/entry_points.txt,sha256=60QVSpYVzkzS4iDOiLp0fsLxVp47X3J2l3v7W-59LMo,117
177
+ torchmonarch_nightly-2025.8.1.dist-info/top_level.txt,sha256=E-ZssZzyM17glpVrh-S9--qJ-w9p2EjuYOuNw9tQ4Eg,33
178
+ torchmonarch_nightly-2025.8.1.dist-info/RECORD,,