xmanager-slurm 0.4.5__py3-none-any.whl → 0.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xmanager-slurm might be problematic. Click here for more details.
- xm_slurm/__init__.py +0 -2
- xm_slurm/api/__init__.py +33 -0
- xm_slurm/api/abc.py +65 -0
- xm_slurm/api/models.py +70 -0
- xm_slurm/api/sqlite/client.py +358 -0
- xm_slurm/api/web/client.py +173 -0
- xm_slurm/config.py +11 -3
- xm_slurm/contrib/clusters/__init__.py +3 -6
- xm_slurm/contrib/clusters/drac.py +4 -3
- xm_slurm/executables.py +4 -7
- xm_slurm/execution.py +273 -159
- xm_slurm/experiment.py +26 -180
- xm_slurm/filesystem.py +129 -0
- xm_slurm/metadata_context.py +253 -0
- xm_slurm/packageables.py +0 -9
- xm_slurm/packaging/docker.py +72 -22
- xm_slurm/packaging/utils.py +0 -108
- xm_slurm/scripts/cli.py +9 -2
- xm_slurm/templates/docker/uv.Dockerfile +6 -3
- xm_slurm/templates/slurm/entrypoint.bash.j2 +27 -0
- xm_slurm/templates/slurm/job-array.bash.j2 +4 -4
- xm_slurm/templates/slurm/job-group.bash.j2 +2 -2
- xm_slurm/templates/slurm/job.bash.j2 +5 -4
- xm_slurm/templates/slurm/runtimes/apptainer.bash.j2 +18 -54
- xm_slurm/templates/slurm/runtimes/podman.bash.j2 +10 -24
- xm_slurm/utils.py +122 -41
- {xmanager_slurm-0.4.5.dist-info → xmanager_slurm-0.4.6.dist-info}/METADATA +7 -3
- xmanager_slurm-0.4.6.dist-info/RECORD +51 -0
- {xmanager_slurm-0.4.5.dist-info → xmanager_slurm-0.4.6.dist-info}/WHEEL +1 -1
- xm_slurm/api.py +0 -528
- xmanager_slurm-0.4.5.dist-info/RECORD +0 -44
- {xmanager_slurm-0.4.5.dist-info → xmanager_slurm-0.4.6.dist-info}/entry_points.txt +0 -0
- {xmanager_slurm-0.4.5.dist-info → xmanager_slurm-0.4.6.dist-info}/licenses/LICENSE.md +0 -0
xm_slurm/utils.py
CHANGED
|
@@ -1,51 +1,20 @@
|
|
|
1
1
|
import functools
|
|
2
|
+
import logging
|
|
2
3
|
import os
|
|
3
4
|
import pathlib
|
|
5
|
+
import pty
|
|
6
|
+
import re
|
|
7
|
+
import select
|
|
8
|
+
import shutil
|
|
9
|
+
import subprocess
|
|
4
10
|
import sys
|
|
5
|
-
|
|
11
|
+
import typing as tp
|
|
6
12
|
|
|
7
|
-
|
|
13
|
+
from xmanager import xm
|
|
8
14
|
|
|
15
|
+
T = tp.TypeVar("T")
|
|
9
16
|
|
|
10
|
-
|
|
11
|
-
__hash__ = MutableSet._hash
|
|
12
|
-
|
|
13
|
-
def __init__(
|
|
14
|
-
self,
|
|
15
|
-
iterable: Iterable[T] = (),
|
|
16
|
-
/,
|
|
17
|
-
on_add: Callable[[T], None] = lambda x: None,
|
|
18
|
-
on_remove: Callable[[T], None] = lambda x: None,
|
|
19
|
-
on_discard: Callable[[T], None] = lambda x: None,
|
|
20
|
-
):
|
|
21
|
-
self.data = set(iterable)
|
|
22
|
-
self._on_add = on_add
|
|
23
|
-
self._on_remove = on_remove
|
|
24
|
-
self._on_discard = on_discard
|
|
25
|
-
|
|
26
|
-
def __contains__(self, value):
|
|
27
|
-
return value in self.data
|
|
28
|
-
|
|
29
|
-
def __iter__(self):
|
|
30
|
-
return iter(self.data)
|
|
31
|
-
|
|
32
|
-
def __len__(self):
|
|
33
|
-
return len(self.data)
|
|
34
|
-
|
|
35
|
-
def __repr__(self):
|
|
36
|
-
return repr(self.data)
|
|
37
|
-
|
|
38
|
-
def add(self, value: T):
|
|
39
|
-
self.data.add(value)
|
|
40
|
-
self._on_add(value)
|
|
41
|
-
|
|
42
|
-
def remove(self, value: T):
|
|
43
|
-
self.data.remove(value)
|
|
44
|
-
self._on_remove(value)
|
|
45
|
-
|
|
46
|
-
def discard(self, value: T):
|
|
47
|
-
self.data.discard(value)
|
|
48
|
-
self._on_discard(value)
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
49
18
|
|
|
50
19
|
|
|
51
20
|
@functools.cache
|
|
@@ -75,3 +44,115 @@ def find_project_root() -> pathlib.Path:
|
|
|
75
44
|
pdir = pdir.parent
|
|
76
45
|
|
|
77
46
|
raise RuntimeError(f"Could not find project root from {sys.argv[0]}. Please specify `context`.")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# Cursor commands to filter out from the command data stream
|
|
50
|
+
_CURSOR_ESCAPE_SEQUENCES_REGEX = re.compile(
|
|
51
|
+
rb"\x1b\[\?25[hl]" # Matches cursor show/hide commands (CSI ?25h and CSI ?25l)
|
|
52
|
+
rb"|\x1b\[[0-9;]*[Hf]" # Matches cursor position commands (CSI n;mH and CSI n;mf)
|
|
53
|
+
rb"|\x1b\[s" # Matches cursor save position (CSI s)
|
|
54
|
+
rb"|\x1b\[u" # Matches cursor restore position (CSI u)
|
|
55
|
+
rb"|\x1b\[2J" # Matches clear screen (CSI 2J)
|
|
56
|
+
rb"|\x1b\[K" # Matches clear line (CSI K)
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def run_command(
|
|
61
|
+
args: tp.Sequence[str] | xm.SequentialArgs,
|
|
62
|
+
env: dict[str, str] | None = None,
|
|
63
|
+
tty: bool = False,
|
|
64
|
+
cwd: str | os.PathLike[str] | None = None,
|
|
65
|
+
stdin: tp.IO[tp.AnyStr] | str | None = None,
|
|
66
|
+
check: bool = False,
|
|
67
|
+
return_stdout: bool = False,
|
|
68
|
+
return_stderr: bool = False,
|
|
69
|
+
) -> subprocess.CompletedProcess[str]:
|
|
70
|
+
if isinstance(args, xm.SequentialArgs):
|
|
71
|
+
args = args.to_list()
|
|
72
|
+
args = list(args)
|
|
73
|
+
|
|
74
|
+
executable = shutil.which(args[0])
|
|
75
|
+
if not executable:
|
|
76
|
+
raise RuntimeError(f"Couldn't find executable {args[0]}")
|
|
77
|
+
executable = pathlib.Path(executable)
|
|
78
|
+
|
|
79
|
+
subprocess_env = os.environ.copy() | (env if env else {})
|
|
80
|
+
if executable.name == "docker" and args[1] == "buildx":
|
|
81
|
+
subprocess_env |= {"DOCKER_CLI_EXPERIMENTAL": "enabled"}
|
|
82
|
+
|
|
83
|
+
logger.debug(f"command: {' '.join(args)}")
|
|
84
|
+
|
|
85
|
+
stdout_master, stdout_slave = pty.openpty()
|
|
86
|
+
stderr_master, stderr_slave = pty.openpty()
|
|
87
|
+
|
|
88
|
+
stdout_data, stderr_data = b"", b""
|
|
89
|
+
with subprocess.Popen(
|
|
90
|
+
executable=executable,
|
|
91
|
+
args=args,
|
|
92
|
+
shell=False,
|
|
93
|
+
text=True,
|
|
94
|
+
bufsize=0,
|
|
95
|
+
stdin=subprocess.PIPE if stdin else None,
|
|
96
|
+
stdout=stdout_slave,
|
|
97
|
+
stderr=stderr_slave,
|
|
98
|
+
start_new_session=True,
|
|
99
|
+
close_fds=True,
|
|
100
|
+
cwd=cwd,
|
|
101
|
+
env=subprocess_env,
|
|
102
|
+
) as process:
|
|
103
|
+
os.close(stdout_slave)
|
|
104
|
+
os.close(stderr_slave)
|
|
105
|
+
|
|
106
|
+
if stdin and process.stdin:
|
|
107
|
+
process.stdin.write(stdin if isinstance(stdin, str) else tp.cast(str, stdin.read()))
|
|
108
|
+
process.stdin.close()
|
|
109
|
+
|
|
110
|
+
fds = [stdout_master, stderr_master]
|
|
111
|
+
while fds:
|
|
112
|
+
rlist, _, _ = select.select(fds, [], [])
|
|
113
|
+
for fd in rlist:
|
|
114
|
+
try:
|
|
115
|
+
data = os.read(fd, 1024)
|
|
116
|
+
except OSError:
|
|
117
|
+
data = None
|
|
118
|
+
|
|
119
|
+
if not data:
|
|
120
|
+
os.close(fd)
|
|
121
|
+
fds.remove(fd)
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
data = _CURSOR_ESCAPE_SEQUENCES_REGEX.sub(b"", data)
|
|
125
|
+
|
|
126
|
+
if fd == stdout_master:
|
|
127
|
+
if return_stdout:
|
|
128
|
+
stdout_data += data
|
|
129
|
+
if tty:
|
|
130
|
+
os.write(pty.STDOUT_FILENO, data)
|
|
131
|
+
elif fd == stderr_master:
|
|
132
|
+
if return_stderr:
|
|
133
|
+
stderr_data += data
|
|
134
|
+
if tty:
|
|
135
|
+
os.write(pty.STDERR_FILENO, data)
|
|
136
|
+
else:
|
|
137
|
+
raise RuntimeError("Unexpected file descriptor")
|
|
138
|
+
|
|
139
|
+
stdout = stdout_data.decode(errors="replace") if stdout_data else ""
|
|
140
|
+
stderr = stderr_data.decode(errors="replace") if stderr_data else ""
|
|
141
|
+
|
|
142
|
+
logger.debug(f"return code: {process.returncode}")
|
|
143
|
+
if stdout:
|
|
144
|
+
logger.debug(f"stdout: {stdout}")
|
|
145
|
+
if stderr:
|
|
146
|
+
logger.debug(f"stderr: {stderr}")
|
|
147
|
+
|
|
148
|
+
retcode = process.poll()
|
|
149
|
+
assert retcode is not None
|
|
150
|
+
|
|
151
|
+
if check and retcode:
|
|
152
|
+
raise subprocess.CalledProcessError(retcode, process.args)
|
|
153
|
+
return subprocess.CompletedProcess(
|
|
154
|
+
process.args,
|
|
155
|
+
retcode,
|
|
156
|
+
stdout=stdout,
|
|
157
|
+
stderr=stderr,
|
|
158
|
+
)
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: xmanager-slurm
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.6
|
|
4
4
|
Summary: Slurm backend for XManager.
|
|
5
5
|
Project-URL: GitHub, https://github.com/jessefarebro/xm-slurm
|
|
6
6
|
Author-email: Jesse Farebrother <jfarebro@cs.mcgill.ca>
|
|
7
7
|
License: MIT
|
|
8
|
+
License-File: LICENSE.md
|
|
8
9
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
9
10
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
11
|
Classifier: Operating System :: OS Independent
|
|
@@ -13,13 +14,16 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
14
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
16
|
Requires-Python: >=3.10
|
|
16
|
-
Requires-Dist:
|
|
17
|
+
Requires-Dist: aiofile>=3.9.0
|
|
18
|
+
Requires-Dist: asyncssh>=2.19.0
|
|
17
19
|
Requires-Dist: backoff>=2.2.1
|
|
18
20
|
Requires-Dist: cloudpickle>=3.0.0
|
|
21
|
+
Requires-Dist: httpx>=0.28.1
|
|
19
22
|
Requires-Dist: humanize>=4.8.0
|
|
20
23
|
Requires-Dist: immutabledict>=3.0.0
|
|
21
24
|
Requires-Dist: jinja2>=3.1.2
|
|
22
25
|
Requires-Dist: more-itertools>=10.2.0
|
|
23
26
|
Requires-Dist: rich>=13.5.2
|
|
24
27
|
Requires-Dist: toml>=0.10.2
|
|
28
|
+
Requires-Dist: wrapt>=1.16.0
|
|
25
29
|
Requires-Dist: xmanager>=0.5.0
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
xm_slurm/__init__.py,sha256=WgRn9HDYa5H3sfIH-HZu33liBOh98jM4GqcR349RaSY,1086
|
|
2
|
+
xm_slurm/batching.py,sha256=GbKBsNz9w8gIc2fHLZpslC0e4K9YUfLXFHmjduRRCfQ,4385
|
|
3
|
+
xm_slurm/config.py,sha256=NxlObwlhpjCqafs2CZuw1P5WFwhqayfblGHo6NGFi3Y,7072
|
|
4
|
+
xm_slurm/console.py,sha256=UpMqeJ0C8i0pkue1AHnnyyX0bFJ9zZeJ7HBR6yhuA8A,54
|
|
5
|
+
xm_slurm/constants.py,sha256=zefVtlFdflgSolie5g_rVxWV-Zpydxapchm3y0a2FDc,999
|
|
6
|
+
xm_slurm/dependencies.py,sha256=-5gN_tpfs3dOA7H5_MIHO2ratb7F5Pm_yjkR5rZcgI8,6421
|
|
7
|
+
xm_slurm/executables.py,sha256=fGmrFBl-258bMn6ip5adYeM7xxUHAeIbDN9zD2FDGtY,6373
|
|
8
|
+
xm_slurm/execution.py,sha256=MRv7MjFVDhGbUrq28kpW82kOYvKdb0tS7tV5jh8g_I0,30163
|
|
9
|
+
xm_slurm/executors.py,sha256=fMtxGUCi4vEKmb_p4JEpqPUTh7L_f1LcR_TamMLAWNg,4667
|
|
10
|
+
xm_slurm/experiment.py,sha256=94r0mhtUPUzw4eaUEz0kpsufC25wEGqlDhV4Fcr1ukY,39883
|
|
11
|
+
xm_slurm/filesystem.py,sha256=4rKtq3t-KDgxJbSGt6JVyRJT_3lCN_vIKTcwKHpTo3I,4389
|
|
12
|
+
xm_slurm/job_blocks.py,sha256=_F8CKCs5BQFj40a2-mjG71HfacvWoBXBDPDKEaKTbXc,616
|
|
13
|
+
xm_slurm/metadata_context.py,sha256=mksVRbVUuistL1uE7TC-fkW-Y69On52jN_svP1e1kiQ,7841
|
|
14
|
+
xm_slurm/packageables.py,sha256=fPUvqF2IvJ2Hn6hodDdQwtx1Ze3sJ8U-BUbxDHauW-g,12398
|
|
15
|
+
xm_slurm/resources.py,sha256=tET3TPOQ8nXYE_SxAs2fiHt9UKJsCLW1vFktJTH0xG4,5722
|
|
16
|
+
xm_slurm/status.py,sha256=WTWiDHi-ZHtwHRnDP0cGa-27zTSm6LkA-GCKsN-zBgg,6916
|
|
17
|
+
xm_slurm/types.py,sha256=TsVykDm-LazVkrjeJrTwCMs4Q8APKhy7BTk0yKIhFNg,805
|
|
18
|
+
xm_slurm/utils.py,sha256=ftV7kyyr4EUcIjon0godTDhhSzdVjyDkF2sVHA8zyls,5024
|
|
19
|
+
xm_slurm/api/__init__.py,sha256=cyao3LZ3uLftu1wIv1aN7Qvsl6gYzYpkxeehTHZ0fA8,1089
|
|
20
|
+
xm_slurm/api/abc.py,sha256=-lS2OndnOuEiwNdr8ccQKkwMd1iDmKMmkBOSTvo5H5w,1816
|
|
21
|
+
xm_slurm/api/models.py,sha256=_INVh0j-4-rRs0WASyg4fNB6NF1L1nUeGgQ6-XnbwsM,1610
|
|
22
|
+
xm_slurm/api/sqlite/client.py,sha256=WykSIO7b14rRLy9qebbkiLKXy7EHU61jtoebLX17HMM,14124
|
|
23
|
+
xm_slurm/api/web/client.py,sha256=GkQIaOAJc1MIz0mYVWUA97ZvhAgz_o4xXHvVdfs2ErA,6265
|
|
24
|
+
xm_slurm/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
+
xm_slurm/contrib/clusters/__init__.py,sha256=XFCVnkThiU3_8uA_tUgDByOBanXNHrxDvfmuptmQ2KE,2214
|
|
26
|
+
xm_slurm/contrib/clusters/drac.py,sha256=ViLYerYBMSuZXnWVbz9RDIPPV7JA8BgBpgTfj1wPP28,5881
|
|
27
|
+
xm_slurm/experimental/parameter_controller.py,sha256=b5LfglHV307F6QcPrHeZX5GJBtyOK9aQydke_SZ3Wto,8457
|
|
28
|
+
xm_slurm/packaging/__init__.py,sha256=dh307yLpUT9KN7rJ1e9fYC6hegGKfZcGboUq9nGpDVQ,233
|
|
29
|
+
xm_slurm/packaging/docker.py,sha256=ZNlTWyBQVJQ1kzOhWNqva5g8F31gAbmWL2-DFY9FJ-w,13677
|
|
30
|
+
xm_slurm/packaging/registry.py,sha256=GrdmQg9MgSo38OiqOzMKWSkQyBuyryOfc3zcdgZ4CUE,1148
|
|
31
|
+
xm_slurm/packaging/router.py,sha256=yPbdA9clrhly97cLgDsSRZG2LZRKE-oz8Hhdb7WtYqk,2070
|
|
32
|
+
xm_slurm/packaging/utils.py,sha256=6EAb17zKQQeuyNY2EV9AoW1RvnDGrQwmIT9wtQEsC4c,632
|
|
33
|
+
xm_slurm/scripts/_cloudpickle.py,sha256=dlJYf2SceOuUn8wi-ozuoYAQg71wqD2MUVOUCyOwWIY,647
|
|
34
|
+
xm_slurm/scripts/cli.py,sha256=nnfNF2FAVLrm-4KuUooFfSWxX7hOjmXpI7k5f3Sn5us,2349
|
|
35
|
+
xm_slurm/templates/docker/docker-bake.hcl.j2,sha256=7qSJl2VN5poz-Hh8Gjo7--qR-k3lmfGtBu2mNbfG2uA,1499
|
|
36
|
+
xm_slurm/templates/docker/mamba.Dockerfile,sha256=Sgxr5IA5T-pT1Shumb5k3JngoG4pgCdBXjzqslFJdZI,753
|
|
37
|
+
xm_slurm/templates/docker/python.Dockerfile,sha256=U4b4QVkopckQ0o9jJIE7d_M6TvExEYlYDirNwCoZ7W4,865
|
|
38
|
+
xm_slurm/templates/docker/uv.Dockerfile,sha256=L2UJMX2c8waMdrRhiqPytQe3pTBu6u5PpMhJYsKkbEg,1040
|
|
39
|
+
xm_slurm/templates/slurm/entrypoint.bash.j2,sha256=MRdSVwgGrgQdpEhqfkP35IidgsblrtVXB1YWzvE9hkk,666
|
|
40
|
+
xm_slurm/templates/slurm/job-array.bash.j2,sha256=smxmSSzBEUHm6MJF-nYPVVjK6CLKrb1fRxF_tfrzAX8,552
|
|
41
|
+
xm_slurm/templates/slurm/job-group.bash.j2,sha256=Cp8YhNOxYqaOkl4MFjQlcaLMGZwdDh97m8OGT5RWbAo,1101
|
|
42
|
+
xm_slurm/templates/slurm/job.bash.j2,sha256=d35VYHdAKkgVK8s4XnUDJwQR0gLnDWRJu-Ldz-qALmQ,1914
|
|
43
|
+
xm_slurm/templates/slurm/fragments/monitor.bash.j2,sha256=HYqYhXsTv8TCed5UaGCZVGIYsqxSKHcnPyNNTHWNvxc,1279
|
|
44
|
+
xm_slurm/templates/slurm/fragments/proxy.bash.j2,sha256=VJLglZo-Nvx9R-qe3rHTxr07CylTQ6Z9NwBzvIpAZrA,814
|
|
45
|
+
xm_slurm/templates/slurm/runtimes/apptainer.bash.j2,sha256=lE2EWVCK2O-n08RL4_MJYIikVTvODjcYKuv7Eh73Q2w,1932
|
|
46
|
+
xm_slurm/templates/slurm/runtimes/podman.bash.j2,sha256=yUOUTzGNyVy1IQSgvY9wn9pE-reB1SYNJmuV2ji8l6w,1162
|
|
47
|
+
xmanager_slurm-0.4.6.dist-info/METADATA,sha256=Zo382SdJ2QWrzEVwNYQX7DXRhhcv9HZA8SPRuWtT0W4,1042
|
|
48
|
+
xmanager_slurm-0.4.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
49
|
+
xmanager_slurm-0.4.6.dist-info/entry_points.txt,sha256=_HLGmLgxuQLOPmF2gOFYDVq2HqtMVD_SzigHvUh8TCY,49
|
|
50
|
+
xmanager_slurm-0.4.6.dist-info/licenses/LICENSE.md,sha256=IxstXr3MPHwTJ5jMrByHrQsR1ZAGQ2U_uz_4qzI_15Y,11756
|
|
51
|
+
xmanager_slurm-0.4.6.dist-info/RECORD,,
|