xmanager-slurm 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xmanager-slurm might be problematic. Click here for more details.
- xm_slurm/api/web/client.py +1 -1
- xm_slurm/config.py +2 -1
- xm_slurm/execution.py +47 -17
- xm_slurm/templates/slurm/runtimes/podman.bash.j2 +0 -1
- xm_slurm/utils.py +30 -0
- {xmanager_slurm-0.4.6.dist-info → xmanager_slurm-0.4.8.dist-info}/METADATA +1 -1
- {xmanager_slurm-0.4.6.dist-info → xmanager_slurm-0.4.8.dist-info}/RECORD +10 -10
- {xmanager_slurm-0.4.6.dist-info → xmanager_slurm-0.4.8.dist-info}/WHEEL +0 -0
- {xmanager_slurm-0.4.6.dist-info → xmanager_slurm-0.4.8.dist-info}/entry_points.txt +0 -0
- {xmanager_slurm-0.4.6.dist-info → xmanager_slurm-0.4.8.dist-info}/licenses/LICENSE.md +0 -0
xm_slurm/api/web/client.py
CHANGED
|
@@ -33,7 +33,7 @@ class XManagerWebAPI(XManagerAPI):
|
|
|
33
33
|
self.client = httpx.Client(headers={"Authorization": f"Bearer {token}"}, verify=False)
|
|
34
34
|
|
|
35
35
|
def _make_url(self, path: str) -> str:
|
|
36
|
-
return f"{self.base_url}
|
|
36
|
+
return f"{self.base_url}{path}"
|
|
37
37
|
|
|
38
38
|
@with_backoff
|
|
39
39
|
def get_experiment(self, xid: int) -> models.Experiment:
|
xm_slurm/config.py
CHANGED
|
@@ -119,8 +119,9 @@ class SlurmSSHConfig:
|
|
|
119
119
|
config=None,
|
|
120
120
|
kbdint_auth=False,
|
|
121
121
|
disable_trivial_auth=True,
|
|
122
|
+
known_hosts=self.known_hosts,
|
|
122
123
|
)
|
|
123
|
-
options.prepare(last_config=self.config
|
|
124
|
+
options.prepare(last_config=self.config)
|
|
124
125
|
return options
|
|
125
126
|
|
|
126
127
|
def serialize(self):
|
xm_slurm/execution.py
CHANGED
|
@@ -235,11 +235,10 @@ class SlurmHandle(_BatchedSlurmHandle, tp.Generic[SlurmJobT]):
|
|
|
235
235
|
return await self._batched_get_state(self.ssh, self.slurm_job)
|
|
236
236
|
|
|
237
237
|
async def logs(
|
|
238
|
-
self, *, num_lines: int, block_size: int, wait: bool, follow: bool
|
|
239
|
-
) -> tp.AsyncGenerator[ConsoleRenderable, None]:
|
|
240
|
-
|
|
241
|
-
file =
|
|
242
|
-
|
|
238
|
+
self, *, num_lines: int, block_size: int, wait: bool, follow: bool, raw: bool = False
|
|
239
|
+
) -> tp.AsyncGenerator[tp.Union[str, ConsoleRenderable], None]:
|
|
240
|
+
experiment_dir = await get_client().experiment_dir(self.ssh, self.experiment_id)
|
|
241
|
+
file = experiment_dir / f"slurm-{self.slurm_job.job_id}.out"
|
|
243
242
|
fs = await get_client().fs(self.ssh)
|
|
244
243
|
|
|
245
244
|
if wait:
|
|
@@ -249,7 +248,7 @@ class SlurmHandle(_BatchedSlurmHandle, tp.Generic[SlurmJobT]):
|
|
|
249
248
|
file_size = await fs.size(file)
|
|
250
249
|
assert file_size is not None
|
|
251
250
|
|
|
252
|
-
async with await fs.open(file, "rb") as remote_file:
|
|
251
|
+
async with await fs.open(file, "rb") as remote_file:
|
|
253
252
|
data = b""
|
|
254
253
|
lines = []
|
|
255
254
|
position = file_size
|
|
@@ -257,27 +256,40 @@ class SlurmHandle(_BatchedSlurmHandle, tp.Generic[SlurmJobT]):
|
|
|
257
256
|
while len(lines) <= num_lines and position > 0:
|
|
258
257
|
read_size = min(block_size, position)
|
|
259
258
|
position -= read_size
|
|
260
|
-
await remote_file.seek(position)
|
|
259
|
+
await remote_file.seek(position)
|
|
261
260
|
chunk = await remote_file.read(read_size)
|
|
262
261
|
data = chunk + data
|
|
263
262
|
lines = data.splitlines()
|
|
264
263
|
|
|
265
264
|
if position <= 0:
|
|
266
|
-
|
|
265
|
+
if raw:
|
|
266
|
+
yield "\033[31mBEGINNING OF FILE\033[0m\n"
|
|
267
|
+
else:
|
|
268
|
+
yield Rule("[bold red]BEGINNING OF FILE[/bold red]")
|
|
267
269
|
for line in lines[-num_lines:]:
|
|
268
|
-
|
|
270
|
+
if raw:
|
|
271
|
+
yield line.decode("utf-8", errors="replace") + "\n"
|
|
272
|
+
else:
|
|
273
|
+
yield Text.from_ansi(line.decode("utf-8", errors="replace"))
|
|
269
274
|
|
|
270
275
|
if (await self.get_state()) not in status.SlurmActiveJobStates:
|
|
271
|
-
|
|
272
|
-
|
|
276
|
+
if raw:
|
|
277
|
+
yield "\033[31mEND OF FILE\033[0m\n"
|
|
278
|
+
return
|
|
279
|
+
else:
|
|
280
|
+
yield Rule("[bold red]END OF FILE[/bold red]")
|
|
281
|
+
return
|
|
273
282
|
|
|
274
283
|
if not follow:
|
|
275
284
|
return
|
|
276
285
|
|
|
277
|
-
await remote_file.seek(file_size)
|
|
286
|
+
await remote_file.seek(file_size)
|
|
278
287
|
while True:
|
|
279
288
|
if new_data := (await remote_file.read(block_size)):
|
|
280
|
-
|
|
289
|
+
if raw:
|
|
290
|
+
yield new_data.decode("utf-8", errors="replace")
|
|
291
|
+
else:
|
|
292
|
+
yield Text.from_ansi(new_data.decode("utf-8", errors="replace"))
|
|
281
293
|
else:
|
|
282
294
|
await asyncio.sleep(0.25)
|
|
283
295
|
|
|
@@ -454,10 +466,28 @@ class SlurmExecutionClient:
|
|
|
454
466
|
return False
|
|
455
467
|
|
|
456
468
|
@functools.cache
|
|
469
|
+
@utils.reawaitable
|
|
457
470
|
async def _state_dir(self, ssh_config: SlurmSSHConfig) -> pathlib.Path:
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
471
|
+
state_dirs = [
|
|
472
|
+
("XM_SLURM_STATE_DIR", ""),
|
|
473
|
+
("XDG_STATE_HOME", "xm-slurm"),
|
|
474
|
+
("HOME", ".local/state/xm-slurm"),
|
|
475
|
+
]
|
|
476
|
+
|
|
477
|
+
for env_var, subpath in state_dirs:
|
|
478
|
+
cmd = await self.run(ssh_config, f"printenv {env_var}", check=False)
|
|
479
|
+
assert isinstance(cmd.stdout, str)
|
|
480
|
+
if cmd.returncode == 0:
|
|
481
|
+
return pathlib.Path(cmd.stdout.strip()) / subpath
|
|
482
|
+
|
|
483
|
+
raise SlurmExecutionError(
|
|
484
|
+
"Failed to find a valid state directory for XManager. "
|
|
485
|
+
"We weren't able to resolve any of the following paths: "
|
|
486
|
+
f"{', '.join(env_var + ('/' + subpath if subpath else '') for env_var, subpath in state_dirs)}."
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
async def experiment_dir(self, ssh_config: SlurmSSHConfig, experiment_id: int) -> pathlib.Path:
|
|
490
|
+
return (await self._state_dir(ssh_config)) / f"{experiment_id:08d}"
|
|
461
491
|
|
|
462
492
|
async def run(
|
|
463
493
|
self,
|
|
@@ -646,7 +676,7 @@ class SlurmExecutionClient:
|
|
|
646
676
|
|
|
647
677
|
fs = await self.fs(cluster.ssh)
|
|
648
678
|
|
|
649
|
-
template_dir =
|
|
679
|
+
template_dir = await self.experiment_dir(cluster.ssh, experiment_id)
|
|
650
680
|
|
|
651
681
|
await fs.makedirs(template_dir, exist_ok=True)
|
|
652
682
|
await fs.write(template_dir / submission_script_path, submission_script.encode())
|
|
@@ -3,7 +3,6 @@ time podman pull \
|
|
|
3
3
|
{% if job.executable.credentials %}
|
|
4
4
|
--creds {{ job.executable.credentials.username }}:{{ job.executable.credentials.password }} \
|
|
5
5
|
{% endif %}
|
|
6
|
-
--retry 3 \
|
|
7
6
|
{{ job.executable.image }}
|
|
8
7
|
|
|
9
8
|
cat << 'ENTRYPOINT_EOF' > "$SLURM_TMPDIR"/xm-slurm-entrypoint.sh
|
xm_slurm/utils.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import functools
|
|
2
3
|
import logging
|
|
3
4
|
import os
|
|
@@ -13,10 +14,39 @@ import typing as tp
|
|
|
13
14
|
from xmanager import xm
|
|
14
15
|
|
|
15
16
|
T = tp.TypeVar("T")
|
|
17
|
+
P = tp.ParamSpec("P")
|
|
16
18
|
|
|
17
19
|
logger = logging.getLogger(__name__)
|
|
18
20
|
|
|
19
21
|
|
|
22
|
+
class CachedAwaitable(tp.Awaitable[T]):
|
|
23
|
+
def __init__(self, awaitable: tp.Awaitable[T]):
|
|
24
|
+
self.awaitable = awaitable
|
|
25
|
+
self.result: asyncio.Future[T] | None = None
|
|
26
|
+
|
|
27
|
+
def __await__(self):
|
|
28
|
+
if not self.result:
|
|
29
|
+
future = asyncio.get_event_loop().create_future()
|
|
30
|
+
self.result = future
|
|
31
|
+
try:
|
|
32
|
+
result = yield from self.awaitable.__await__()
|
|
33
|
+
future.set_result(result)
|
|
34
|
+
except Exception as e:
|
|
35
|
+
future.set_exception(e)
|
|
36
|
+
|
|
37
|
+
if not self.result.done():
|
|
38
|
+
yield from self.result
|
|
39
|
+
return self.result.result()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def reawaitable(f: tp.Callable[P, tp.Awaitable[T]]) -> tp.Callable[P, CachedAwaitable[T]]:
|
|
43
|
+
@functools.wraps(f)
|
|
44
|
+
def wrapper(*args: P.args, **kwargs: P.kwargs) -> CachedAwaitable[T]:
|
|
45
|
+
return CachedAwaitable(f(*args, **kwargs))
|
|
46
|
+
|
|
47
|
+
return wrapper
|
|
48
|
+
|
|
49
|
+
|
|
20
50
|
@functools.cache
|
|
21
51
|
def find_project_root() -> pathlib.Path:
|
|
22
52
|
launch_script_path: pathlib.Path | None = None
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
xm_slurm/__init__.py,sha256=WgRn9HDYa5H3sfIH-HZu33liBOh98jM4GqcR349RaSY,1086
|
|
2
2
|
xm_slurm/batching.py,sha256=GbKBsNz9w8gIc2fHLZpslC0e4K9YUfLXFHmjduRRCfQ,4385
|
|
3
|
-
xm_slurm/config.py,sha256=
|
|
3
|
+
xm_slurm/config.py,sha256=PvdLG6fSNfbABbtwELbnytx85vWLPtyVfEJtoQsLf94,7084
|
|
4
4
|
xm_slurm/console.py,sha256=UpMqeJ0C8i0pkue1AHnnyyX0bFJ9zZeJ7HBR6yhuA8A,54
|
|
5
5
|
xm_slurm/constants.py,sha256=zefVtlFdflgSolie5g_rVxWV-Zpydxapchm3y0a2FDc,999
|
|
6
6
|
xm_slurm/dependencies.py,sha256=-5gN_tpfs3dOA7H5_MIHO2ratb7F5Pm_yjkR5rZcgI8,6421
|
|
7
7
|
xm_slurm/executables.py,sha256=fGmrFBl-258bMn6ip5adYeM7xxUHAeIbDN9zD2FDGtY,6373
|
|
8
|
-
xm_slurm/execution.py,sha256=
|
|
8
|
+
xm_slurm/execution.py,sha256=c0aV1h2tKQFyAGM6JLd16MWFgpRLKAbcutZz17xPUSw,31400
|
|
9
9
|
xm_slurm/executors.py,sha256=fMtxGUCi4vEKmb_p4JEpqPUTh7L_f1LcR_TamMLAWNg,4667
|
|
10
10
|
xm_slurm/experiment.py,sha256=94r0mhtUPUzw4eaUEz0kpsufC25wEGqlDhV4Fcr1ukY,39883
|
|
11
11
|
xm_slurm/filesystem.py,sha256=4rKtq3t-KDgxJbSGt6JVyRJT_3lCN_vIKTcwKHpTo3I,4389
|
|
@@ -15,12 +15,12 @@ xm_slurm/packageables.py,sha256=fPUvqF2IvJ2Hn6hodDdQwtx1Ze3sJ8U-BUbxDHauW-g,1239
|
|
|
15
15
|
xm_slurm/resources.py,sha256=tET3TPOQ8nXYE_SxAs2fiHt9UKJsCLW1vFktJTH0xG4,5722
|
|
16
16
|
xm_slurm/status.py,sha256=WTWiDHi-ZHtwHRnDP0cGa-27zTSm6LkA-GCKsN-zBgg,6916
|
|
17
17
|
xm_slurm/types.py,sha256=TsVykDm-LazVkrjeJrTwCMs4Q8APKhy7BTk0yKIhFNg,805
|
|
18
|
-
xm_slurm/utils.py,sha256=
|
|
18
|
+
xm_slurm/utils.py,sha256=xtFvktaxr0z65sTdu6HhOVfyo0OAB9t-EYXWcYrQQEU,5958
|
|
19
19
|
xm_slurm/api/__init__.py,sha256=cyao3LZ3uLftu1wIv1aN7Qvsl6gYzYpkxeehTHZ0fA8,1089
|
|
20
20
|
xm_slurm/api/abc.py,sha256=-lS2OndnOuEiwNdr8ccQKkwMd1iDmKMmkBOSTvo5H5w,1816
|
|
21
21
|
xm_slurm/api/models.py,sha256=_INVh0j-4-rRs0WASyg4fNB6NF1L1nUeGgQ6-XnbwsM,1610
|
|
22
22
|
xm_slurm/api/sqlite/client.py,sha256=WykSIO7b14rRLy9qebbkiLKXy7EHU61jtoebLX17HMM,14124
|
|
23
|
-
xm_slurm/api/web/client.py,sha256=
|
|
23
|
+
xm_slurm/api/web/client.py,sha256=uO67Y7fnQ-w__Vm_A5BEuy7Qi8wQcWk3vIsBGEBkyfk,6261
|
|
24
24
|
xm_slurm/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
xm_slurm/contrib/clusters/__init__.py,sha256=XFCVnkThiU3_8uA_tUgDByOBanXNHrxDvfmuptmQ2KE,2214
|
|
26
26
|
xm_slurm/contrib/clusters/drac.py,sha256=ViLYerYBMSuZXnWVbz9RDIPPV7JA8BgBpgTfj1wPP28,5881
|
|
@@ -43,9 +43,9 @@ xm_slurm/templates/slurm/job.bash.j2,sha256=d35VYHdAKkgVK8s4XnUDJwQR0gLnDWRJu-Ld
|
|
|
43
43
|
xm_slurm/templates/slurm/fragments/monitor.bash.j2,sha256=HYqYhXsTv8TCed5UaGCZVGIYsqxSKHcnPyNNTHWNvxc,1279
|
|
44
44
|
xm_slurm/templates/slurm/fragments/proxy.bash.j2,sha256=VJLglZo-Nvx9R-qe3rHTxr07CylTQ6Z9NwBzvIpAZrA,814
|
|
45
45
|
xm_slurm/templates/slurm/runtimes/apptainer.bash.j2,sha256=lE2EWVCK2O-n08RL4_MJYIikVTvODjcYKuv7Eh73Q2w,1932
|
|
46
|
-
xm_slurm/templates/slurm/runtimes/podman.bash.j2,sha256=
|
|
47
|
-
xmanager_slurm-0.4.
|
|
48
|
-
xmanager_slurm-0.4.
|
|
49
|
-
xmanager_slurm-0.4.
|
|
50
|
-
xmanager_slurm-0.4.
|
|
51
|
-
xmanager_slurm-0.4.
|
|
46
|
+
xm_slurm/templates/slurm/runtimes/podman.bash.j2,sha256=3j7K5eyXt_WhXK0EoMlxnhlmFVJ2JyxRKbsMRaDqzSs,1148
|
|
47
|
+
xmanager_slurm-0.4.8.dist-info/METADATA,sha256=8QQ9xbptuObTCHB8WaYr0rSKOgxK5ojCaeD7mN9Qvl0,1042
|
|
48
|
+
xmanager_slurm-0.4.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
49
|
+
xmanager_slurm-0.4.8.dist-info/entry_points.txt,sha256=_HLGmLgxuQLOPmF2gOFYDVq2HqtMVD_SzigHvUh8TCY,49
|
|
50
|
+
xmanager_slurm-0.4.8.dist-info/licenses/LICENSE.md,sha256=IxstXr3MPHwTJ5jMrByHrQsR1ZAGQ2U_uz_4qzI_15Y,11756
|
|
51
|
+
xmanager_slurm-0.4.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|