xmanager-slurm 0.4.5__py3-none-any.whl → 0.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xmanager-slurm might be problematic. Click here for more details.
- xm_slurm/__init__.py +0 -2
- xm_slurm/api/__init__.py +33 -0
- xm_slurm/api/abc.py +65 -0
- xm_slurm/api/models.py +70 -0
- xm_slurm/api/sqlite/client.py +358 -0
- xm_slurm/api/web/client.py +173 -0
- xm_slurm/config.py +11 -3
- xm_slurm/contrib/clusters/__init__.py +3 -6
- xm_slurm/contrib/clusters/drac.py +4 -3
- xm_slurm/executables.py +4 -7
- xm_slurm/execution.py +273 -159
- xm_slurm/experiment.py +26 -180
- xm_slurm/filesystem.py +129 -0
- xm_slurm/metadata_context.py +253 -0
- xm_slurm/packageables.py +0 -9
- xm_slurm/packaging/docker.py +72 -22
- xm_slurm/packaging/utils.py +0 -108
- xm_slurm/scripts/cli.py +9 -2
- xm_slurm/templates/docker/uv.Dockerfile +6 -3
- xm_slurm/templates/slurm/entrypoint.bash.j2 +27 -0
- xm_slurm/templates/slurm/job-array.bash.j2 +4 -4
- xm_slurm/templates/slurm/job-group.bash.j2 +2 -2
- xm_slurm/templates/slurm/job.bash.j2 +5 -4
- xm_slurm/templates/slurm/runtimes/apptainer.bash.j2 +18 -54
- xm_slurm/templates/slurm/runtimes/podman.bash.j2 +10 -24
- xm_slurm/utils.py +122 -41
- {xmanager_slurm-0.4.5.dist-info → xmanager_slurm-0.4.6.dist-info}/METADATA +7 -3
- xmanager_slurm-0.4.6.dist-info/RECORD +51 -0
- {xmanager_slurm-0.4.5.dist-info → xmanager_slurm-0.4.6.dist-info}/WHEEL +1 -1
- xm_slurm/api.py +0 -528
- xmanager_slurm-0.4.5.dist-info/RECORD +0 -44
- {xmanager_slurm-0.4.5.dist-info → xmanager_slurm-0.4.6.dist-info}/entry_points.txt +0 -0
- {xmanager_slurm-0.4.5.dist-info → xmanager_slurm-0.4.6.dist-info}/licenses/LICENSE.md +0 -0
xm_slurm/api.py
DELETED
|
@@ -1,528 +0,0 @@
|
|
|
1
|
-
import dataclasses
|
|
2
|
-
import enum
|
|
3
|
-
import functools
|
|
4
|
-
import importlib.util
|
|
5
|
-
import logging
|
|
6
|
-
import os
|
|
7
|
-
import typing
|
|
8
|
-
from abc import ABC, abstractmethod
|
|
9
|
-
from contextlib import contextmanager
|
|
10
|
-
from pathlib import Path
|
|
11
|
-
from typing import Any
|
|
12
|
-
|
|
13
|
-
from sqlalchemy import Column, ForeignKey, Integer, String, create_engine
|
|
14
|
-
from sqlalchemy.ext.declarative import declarative_base
|
|
15
|
-
from sqlalchemy.orm import relationship, sessionmaker
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger(__name__)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class ExperimentUnitRole(enum.Enum):
|
|
21
|
-
WORK_UNIT = enum.auto()
|
|
22
|
-
AUX_UNIT = enum.auto()
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
26
|
-
class ExperimentPatchModel:
|
|
27
|
-
title: str | None = None
|
|
28
|
-
description: str | None = None
|
|
29
|
-
note: str | None = None
|
|
30
|
-
tags: list[str] | None = None
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
34
|
-
class SlurmJobModel:
|
|
35
|
-
name: str
|
|
36
|
-
slurm_job_id: int
|
|
37
|
-
slurm_ssh_config: str
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
41
|
-
class ArtifactModel:
|
|
42
|
-
name: str
|
|
43
|
-
uri: str
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
47
|
-
class ExperimentUnitModel:
|
|
48
|
-
identity: str
|
|
49
|
-
args: str | None = None
|
|
50
|
-
jobs: list[SlurmJobModel] = dataclasses.field(default_factory=list)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
54
|
-
class ExperimentUnitPatchModel:
|
|
55
|
-
identity: str | None
|
|
56
|
-
args: str | None = None
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
60
|
-
class WorkUnitModel(ExperimentUnitModel):
|
|
61
|
-
wid: int
|
|
62
|
-
artifacts: list[ArtifactModel] = dataclasses.field(default_factory=list)
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
66
|
-
class ExperimentModel:
|
|
67
|
-
title: str
|
|
68
|
-
description: str | None
|
|
69
|
-
note: str | None
|
|
70
|
-
tags: list[str] | None
|
|
71
|
-
|
|
72
|
-
work_units: list[WorkUnitModel]
|
|
73
|
-
artifacts: list[ArtifactModel]
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
class XManagerAPI(ABC):
|
|
77
|
-
@abstractmethod
|
|
78
|
-
def get_experiment(self, xid: int) -> ExperimentModel:
|
|
79
|
-
pass
|
|
80
|
-
|
|
81
|
-
@abstractmethod
|
|
82
|
-
def delete_experiment(self, experiment_id: int) -> None:
|
|
83
|
-
pass
|
|
84
|
-
|
|
85
|
-
@abstractmethod
|
|
86
|
-
def insert_experiment(self, experiment: ExperimentPatchModel) -> int:
|
|
87
|
-
pass
|
|
88
|
-
|
|
89
|
-
@abstractmethod
|
|
90
|
-
def update_experiment(self, experiment_id: int, experiment_patch: ExperimentPatchModel) -> None:
|
|
91
|
-
pass
|
|
92
|
-
|
|
93
|
-
@abstractmethod
|
|
94
|
-
def insert_job(self, experiment_id: int, work_unit_id: int, job: SlurmJobModel) -> None:
|
|
95
|
-
pass
|
|
96
|
-
|
|
97
|
-
@abstractmethod
|
|
98
|
-
def insert_work_unit(self, experiment_id: int, work_unit: WorkUnitModel) -> None:
|
|
99
|
-
pass
|
|
100
|
-
|
|
101
|
-
@abstractmethod
|
|
102
|
-
def update_work_unit(
|
|
103
|
-
self, experiment_id: int, work_unit_id: int, patch: ExperimentUnitPatchModel
|
|
104
|
-
) -> None:
|
|
105
|
-
pass
|
|
106
|
-
|
|
107
|
-
@abstractmethod
|
|
108
|
-
def delete_work_unit_artifact(self, experiment_id: int, work_unit_id: int, name: str) -> None:
|
|
109
|
-
pass
|
|
110
|
-
|
|
111
|
-
@abstractmethod
|
|
112
|
-
def insert_work_unit_artifact(
|
|
113
|
-
self, experiment_id: int, work_unit_id: int, artifact: ArtifactModel
|
|
114
|
-
) -> None:
|
|
115
|
-
pass
|
|
116
|
-
|
|
117
|
-
@abstractmethod
|
|
118
|
-
def delete_experiment_artifact(self, experiment_id: int, name: str) -> None:
|
|
119
|
-
pass
|
|
120
|
-
|
|
121
|
-
@abstractmethod
|
|
122
|
-
def insert_experiment_artifact(self, experiment_id: int, artifact: ArtifactModel) -> None:
|
|
123
|
-
pass
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
class XManagerWebAPI(XManagerAPI):
|
|
127
|
-
def __init__(self, base_url: str, token: str):
|
|
128
|
-
if importlib.util.find_spec("xm_slurm_api_client") is None:
|
|
129
|
-
raise ImportError("xm_slurm_api_client not found.")
|
|
130
|
-
|
|
131
|
-
from xm_slurm_api_client import AuthenticatedClient # type: ignore
|
|
132
|
-
from xm_slurm_api_client import models as _models # type: ignore
|
|
133
|
-
|
|
134
|
-
self.models = _models
|
|
135
|
-
self.client = AuthenticatedClient(
|
|
136
|
-
base_url,
|
|
137
|
-
token=token,
|
|
138
|
-
raise_on_unexpected_status=True,
|
|
139
|
-
verify_ssl=False,
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
def get_experiment(self, xid: int) -> ExperimentModel:
|
|
143
|
-
from xm_slurm_api_client.api.experiment import ( # type: ignore
|
|
144
|
-
get_experiment as _get_experiment,
|
|
145
|
-
)
|
|
146
|
-
|
|
147
|
-
experiment: Any = _get_experiment.sync(xid, client=self.client) # type: ignore
|
|
148
|
-
wus = []
|
|
149
|
-
for wu in experiment.work_units:
|
|
150
|
-
jobs = []
|
|
151
|
-
for job in wu.jobs:
|
|
152
|
-
jobs.append(SlurmJobModel(**job.dict()))
|
|
153
|
-
artifacts = []
|
|
154
|
-
for artifact in wu.artifacts:
|
|
155
|
-
artifacts.append(ArtifactModel(**artifact.dict()))
|
|
156
|
-
wus.append(
|
|
157
|
-
WorkUnitModel(
|
|
158
|
-
wid=wu.wid,
|
|
159
|
-
identity=wu.identity,
|
|
160
|
-
args=wu.args,
|
|
161
|
-
jobs=jobs,
|
|
162
|
-
artifacts=artifacts,
|
|
163
|
-
)
|
|
164
|
-
)
|
|
165
|
-
|
|
166
|
-
artifacts = []
|
|
167
|
-
for artifact in experiment.artifacts:
|
|
168
|
-
artifacts.append(ArtifactModel(**artifact.dict()))
|
|
169
|
-
|
|
170
|
-
return ExperimentModel(
|
|
171
|
-
title=experiment.title,
|
|
172
|
-
description=experiment.description,
|
|
173
|
-
note=experiment.note,
|
|
174
|
-
tags=experiment.tags,
|
|
175
|
-
work_units=wus,
|
|
176
|
-
artifacts=artifacts,
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
def delete_experiment(self, experiment_id: int) -> None:
|
|
180
|
-
from xm_slurm_api_client.api.experiment import ( # type: ignore
|
|
181
|
-
delete_experiment as _delete_experiment,
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
_delete_experiment.sync(experiment_id, client=self.client)
|
|
185
|
-
|
|
186
|
-
def insert_experiment(self, experiment: ExperimentPatchModel) -> int:
|
|
187
|
-
from xm_slurm_api_client.api.experiment import ( # type: ignore
|
|
188
|
-
insert_experiment as _insert_experiment,
|
|
189
|
-
)
|
|
190
|
-
|
|
191
|
-
assert experiment.title is not None, "Title must be set in the experiment model."
|
|
192
|
-
assert (
|
|
193
|
-
experiment.description is None and experiment.note is None and experiment.tags is None
|
|
194
|
-
), "Only title should be set in the experiment model."
|
|
195
|
-
experiment_response = _insert_experiment.sync(
|
|
196
|
-
client=self.client,
|
|
197
|
-
body=self.models.Experiment(title=experiment.title),
|
|
198
|
-
)
|
|
199
|
-
return typing.cast(int, experiment_response["xid"]) # type: ignore
|
|
200
|
-
|
|
201
|
-
def update_experiment(self, experiment_id: int, experiment_patch: ExperimentPatchModel) -> None:
|
|
202
|
-
from xm_slurm_api_client.api.experiment import ( # type: ignore
|
|
203
|
-
update_experiment as _update_experiment,
|
|
204
|
-
)
|
|
205
|
-
|
|
206
|
-
_update_experiment.sync(
|
|
207
|
-
experiment_id,
|
|
208
|
-
client=self.client,
|
|
209
|
-
body=self.models.ExperimentPatch(**dataclasses.asdict(experiment_patch)),
|
|
210
|
-
)
|
|
211
|
-
|
|
212
|
-
def insert_job(self, experiment_id: int, work_unit_id: int, job: SlurmJobModel) -> None:
|
|
213
|
-
from xm_slurm_api_client.api.job import insert_job as _insert_job # type: ignore
|
|
214
|
-
|
|
215
|
-
_insert_job.sync(
|
|
216
|
-
experiment_id,
|
|
217
|
-
work_unit_id,
|
|
218
|
-
client=self.client,
|
|
219
|
-
body=self.models.SlurmJob(**dataclasses.asdict(job)),
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
def insert_work_unit(self, experiment_id: int, work_unit: WorkUnitModel) -> None:
|
|
223
|
-
from xm_slurm_api_client.api.work_unit import ( # type: ignore
|
|
224
|
-
insert_work_unit as _insert_work_unit,
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
_insert_work_unit.sync(
|
|
228
|
-
experiment_id,
|
|
229
|
-
client=self.client,
|
|
230
|
-
body=self.models.WorkUnit(**dataclasses.asdict(work_unit)),
|
|
231
|
-
)
|
|
232
|
-
|
|
233
|
-
def delete_work_unit_artifact(self, experiment_id: int, work_unit_id: int, name: str) -> None:
|
|
234
|
-
from xm_slurm_api_client.api.artifact import ( # type: ignore
|
|
235
|
-
delete_work_unit_artifact as _delete_work_unit_artifact,
|
|
236
|
-
)
|
|
237
|
-
|
|
238
|
-
_delete_work_unit_artifact.sync(experiment_id, work_unit_id, name, client=self.client)
|
|
239
|
-
|
|
240
|
-
def insert_work_unit_artifact(
|
|
241
|
-
self, experiment_id: int, work_unit_id: int, artifact: ArtifactModel
|
|
242
|
-
) -> None:
|
|
243
|
-
from xm_slurm_api_client.api.artifact import ( # type: ignore
|
|
244
|
-
insert_work_unit_artifact as _insert_work_unit_artifact,
|
|
245
|
-
)
|
|
246
|
-
|
|
247
|
-
_insert_work_unit_artifact.sync(
|
|
248
|
-
experiment_id,
|
|
249
|
-
work_unit_id,
|
|
250
|
-
client=self.client,
|
|
251
|
-
body=self.models.Artifact(**dataclasses.asdict(artifact)),
|
|
252
|
-
)
|
|
253
|
-
|
|
254
|
-
def delete_experiment_artifact(self, experiment_id: int, name: str) -> None: ...
|
|
255
|
-
|
|
256
|
-
def insert_experiment_artifact(self, experiment_id: int, artifact: ArtifactModel) -> None:
|
|
257
|
-
from xm_slurm_api_client.api.artifact import ( # type: ignore
|
|
258
|
-
insert_experiment_artifact as _insert_experiment_artifact,
|
|
259
|
-
)
|
|
260
|
-
|
|
261
|
-
_insert_experiment_artifact.sync(
|
|
262
|
-
experiment_id,
|
|
263
|
-
client=self.client,
|
|
264
|
-
body=self.models.Artifact(**dataclasses.asdict(artifact)),
|
|
265
|
-
)
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
Base = declarative_base()
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
class Experiment(Base):
|
|
272
|
-
__tablename__ = "experiments"
|
|
273
|
-
|
|
274
|
-
id = Column(Integer, primary_key=True)
|
|
275
|
-
title = Column(String)
|
|
276
|
-
description = Column(String)
|
|
277
|
-
note = Column(String)
|
|
278
|
-
tags = Column(String)
|
|
279
|
-
work_units = relationship("WorkUnit", back_populates="experiment")
|
|
280
|
-
artifacts = relationship("Artifact", back_populates="experiment")
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
class WorkUnit(Base):
|
|
284
|
-
__tablename__ = "work_units"
|
|
285
|
-
|
|
286
|
-
id = Column(Integer, primary_key=True)
|
|
287
|
-
experiment_id = Column(Integer, ForeignKey("experiments.id"))
|
|
288
|
-
wid = Column(Integer)
|
|
289
|
-
identity = Column(String)
|
|
290
|
-
args = Column(String)
|
|
291
|
-
experiment = relationship("Experiment", back_populates="work_units")
|
|
292
|
-
jobs = relationship("SlurmJob", back_populates="work_unit")
|
|
293
|
-
artifacts = relationship("Artifact", back_populates="work_unit")
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
class SlurmJob(Base):
|
|
297
|
-
__tablename__ = "slurm_jobs"
|
|
298
|
-
|
|
299
|
-
id = Column(Integer, primary_key=True)
|
|
300
|
-
work_unit_id = Column(Integer, ForeignKey("work_units.id"))
|
|
301
|
-
name = Column(String)
|
|
302
|
-
slurm_job_id = Column(Integer)
|
|
303
|
-
slurm_ssh_config = Column(String)
|
|
304
|
-
work_unit = relationship("WorkUnit", back_populates="jobs")
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
class Artifact(Base):
|
|
308
|
-
__tablename__ = "artifacts"
|
|
309
|
-
|
|
310
|
-
id = Column(Integer, primary_key=True)
|
|
311
|
-
experiment_id = Column(Integer, ForeignKey("experiments.id"))
|
|
312
|
-
work_unit_id = Column(Integer, ForeignKey("work_units.id"))
|
|
313
|
-
name = Column(String)
|
|
314
|
-
uri = Column(String)
|
|
315
|
-
experiment = relationship("Experiment", back_populates="artifacts")
|
|
316
|
-
work_unit = relationship("WorkUnit", back_populates="artifacts")
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
class XManagerSqliteAPI(XManagerAPI):
|
|
320
|
-
def __init__(self):
|
|
321
|
-
if "XM_SLURM_STATE_DIR" in os.environ:
|
|
322
|
-
db_path = Path(os.environ["XM_SLURM_STATE_DIR"]) / "db.sqlite3"
|
|
323
|
-
else:
|
|
324
|
-
db_path = Path.home() / ".local" / "state" / "xm-slurm" / "db.sqlite3"
|
|
325
|
-
logger.debug("Looking for db at: ", db_path)
|
|
326
|
-
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
327
|
-
engine = create_engine(f"sqlite:///{db_path}")
|
|
328
|
-
Base.metadata.create_all(engine)
|
|
329
|
-
self.Session = sessionmaker(bind=engine)
|
|
330
|
-
|
|
331
|
-
@contextmanager
|
|
332
|
-
def session_scope(self):
|
|
333
|
-
session = self.Session()
|
|
334
|
-
try:
|
|
335
|
-
yield session
|
|
336
|
-
session.commit()
|
|
337
|
-
except:
|
|
338
|
-
session.rollback()
|
|
339
|
-
raise
|
|
340
|
-
finally:
|
|
341
|
-
session.close()
|
|
342
|
-
|
|
343
|
-
def get_experiment(self, xid: int) -> ExperimentModel:
|
|
344
|
-
with self.session_scope() as session:
|
|
345
|
-
experiment = session.query(Experiment).filter(Experiment.id == xid).first()
|
|
346
|
-
if not experiment:
|
|
347
|
-
raise ValueError(f"Experiment with id {xid} not found")
|
|
348
|
-
|
|
349
|
-
work_units = []
|
|
350
|
-
for wu in experiment.work_units:
|
|
351
|
-
jobs = [
|
|
352
|
-
SlurmJobModel(
|
|
353
|
-
name=job.name,
|
|
354
|
-
slurm_job_id=job.slurm_job_id,
|
|
355
|
-
slurm_ssh_config=job.slurm_ssh_config,
|
|
356
|
-
)
|
|
357
|
-
for job in wu.jobs
|
|
358
|
-
]
|
|
359
|
-
artifacts = [
|
|
360
|
-
ArtifactModel(name=artifact.name, uri=artifact.uri) for artifact in wu.artifacts
|
|
361
|
-
]
|
|
362
|
-
work_units.append(
|
|
363
|
-
WorkUnitModel(
|
|
364
|
-
wid=wu.wid,
|
|
365
|
-
identity=wu.identity,
|
|
366
|
-
args=wu.args,
|
|
367
|
-
jobs=jobs,
|
|
368
|
-
artifacts=artifacts,
|
|
369
|
-
)
|
|
370
|
-
)
|
|
371
|
-
|
|
372
|
-
artifacts = [
|
|
373
|
-
ArtifactModel(name=artifact.name, uri=artifact.uri)
|
|
374
|
-
for artifact in experiment.artifacts
|
|
375
|
-
]
|
|
376
|
-
|
|
377
|
-
return ExperimentModel(
|
|
378
|
-
title=experiment.title,
|
|
379
|
-
description=experiment.description,
|
|
380
|
-
note=experiment.note,
|
|
381
|
-
tags=experiment.tags.split(",") if experiment.tags else None,
|
|
382
|
-
work_units=work_units,
|
|
383
|
-
artifacts=artifacts,
|
|
384
|
-
)
|
|
385
|
-
|
|
386
|
-
def delete_experiment(self, experiment_id: int) -> None:
|
|
387
|
-
with self.session_scope() as session:
|
|
388
|
-
experiment = session.query(Experiment).filter(Experiment.id == experiment_id).first()
|
|
389
|
-
if experiment:
|
|
390
|
-
session.delete(experiment)
|
|
391
|
-
|
|
392
|
-
def insert_experiment(self, experiment: ExperimentPatchModel) -> int:
|
|
393
|
-
with self.session_scope() as session:
|
|
394
|
-
new_experiment = Experiment(
|
|
395
|
-
title=experiment.title,
|
|
396
|
-
description=experiment.description,
|
|
397
|
-
note=experiment.note,
|
|
398
|
-
tags=",".join(experiment.tags) if experiment.tags else None,
|
|
399
|
-
)
|
|
400
|
-
session.add(new_experiment)
|
|
401
|
-
session.flush()
|
|
402
|
-
return new_experiment.id
|
|
403
|
-
|
|
404
|
-
def update_experiment(self, experiment_id: int, experiment_patch: ExperimentPatchModel) -> None:
|
|
405
|
-
with self.session_scope() as session:
|
|
406
|
-
experiment = session.query(Experiment).filter(Experiment.id == experiment_id).first()
|
|
407
|
-
if experiment:
|
|
408
|
-
if experiment_patch.title is not None:
|
|
409
|
-
experiment.title = experiment_patch.title
|
|
410
|
-
if experiment_patch.description is not None:
|
|
411
|
-
experiment.description = experiment_patch.description
|
|
412
|
-
if experiment_patch.note is not None:
|
|
413
|
-
experiment.note = experiment_patch.note
|
|
414
|
-
if experiment_patch.tags is not None:
|
|
415
|
-
experiment.tags = ",".join(experiment_patch.tags)
|
|
416
|
-
|
|
417
|
-
def insert_job(self, experiment_id: int, work_unit_id: int, job: SlurmJobModel) -> None:
|
|
418
|
-
with self.session_scope() as session:
|
|
419
|
-
work_unit = (
|
|
420
|
-
session.query(WorkUnit)
|
|
421
|
-
.filter_by(experiment_id=experiment_id, wid=work_unit_id)
|
|
422
|
-
.first()
|
|
423
|
-
)
|
|
424
|
-
if work_unit:
|
|
425
|
-
new_job = SlurmJob(
|
|
426
|
-
work_unit_id=work_unit.id,
|
|
427
|
-
name=job.name,
|
|
428
|
-
slurm_job_id=job.slurm_job_id,
|
|
429
|
-
slurm_ssh_config=job.slurm_ssh_config,
|
|
430
|
-
)
|
|
431
|
-
session.add(new_job)
|
|
432
|
-
else:
|
|
433
|
-
raise ValueError(
|
|
434
|
-
f"Work unit with id {work_unit_id} not found in experiment {experiment_id}"
|
|
435
|
-
)
|
|
436
|
-
|
|
437
|
-
def insert_work_unit(self, experiment_id: int, work_unit: WorkUnitModel) -> None:
|
|
438
|
-
with self.session_scope() as session:
|
|
439
|
-
new_work_unit = WorkUnit(
|
|
440
|
-
experiment_id=experiment_id,
|
|
441
|
-
wid=work_unit.wid,
|
|
442
|
-
identity=work_unit.identity,
|
|
443
|
-
args=work_unit.args,
|
|
444
|
-
)
|
|
445
|
-
session.add(new_work_unit)
|
|
446
|
-
for job in work_unit.jobs:
|
|
447
|
-
new_job = SlurmJob(
|
|
448
|
-
work_unit_id=new_work_unit.id,
|
|
449
|
-
name=job.name,
|
|
450
|
-
slurm_job_id=job.slurm_job_id,
|
|
451
|
-
slurm_ssh_config=job.slurm_ssh_config,
|
|
452
|
-
)
|
|
453
|
-
session.add(new_job)
|
|
454
|
-
for artifact in work_unit.artifacts:
|
|
455
|
-
new_artifact = Artifact(
|
|
456
|
-
work_unit_id=new_work_unit.id, name=artifact.name, uri=artifact.uri
|
|
457
|
-
)
|
|
458
|
-
session.add(new_artifact)
|
|
459
|
-
|
|
460
|
-
def update_work_unit(
|
|
461
|
-
self, experiment_id: int, work_unit_id: int, patch: ExperimentUnitPatchModel
|
|
462
|
-
) -> None:
|
|
463
|
-
with self.session_scope() as session:
|
|
464
|
-
work_unit = (
|
|
465
|
-
session.query(WorkUnit)
|
|
466
|
-
.filter(WorkUnit.experiment_id == experiment_id, WorkUnit.wid == work_unit_id)
|
|
467
|
-
.first()
|
|
468
|
-
)
|
|
469
|
-
|
|
470
|
-
if work_unit:
|
|
471
|
-
if patch.identity is not None:
|
|
472
|
-
work_unit.identity = patch.identity
|
|
473
|
-
if patch.args is not None:
|
|
474
|
-
work_unit.args = patch.args
|
|
475
|
-
else:
|
|
476
|
-
raise ValueError(
|
|
477
|
-
f"Work unit with id {work_unit_id} not found in experiment {experiment_id}"
|
|
478
|
-
)
|
|
479
|
-
|
|
480
|
-
def delete_work_unit_artifact(self, experiment_id: int, work_unit_id: int, name: str) -> None:
|
|
481
|
-
with self.session_scope() as session:
|
|
482
|
-
artifact = (
|
|
483
|
-
session.query(Artifact)
|
|
484
|
-
.filter(Artifact.work_unit_id == work_unit_id, Artifact.name == name)
|
|
485
|
-
.first()
|
|
486
|
-
)
|
|
487
|
-
if artifact:
|
|
488
|
-
session.delete(artifact)
|
|
489
|
-
|
|
490
|
-
def insert_work_unit_artifact(
|
|
491
|
-
self, experiment_id: int, work_unit_id: int, artifact: ArtifactModel
|
|
492
|
-
) -> None:
|
|
493
|
-
with self.session_scope() as session:
|
|
494
|
-
new_artifact = Artifact(work_unit_id=work_unit_id, name=artifact.name, uri=artifact.uri)
|
|
495
|
-
session.add(new_artifact)
|
|
496
|
-
|
|
497
|
-
def delete_experiment_artifact(self, experiment_id: int, name: str) -> None:
|
|
498
|
-
with self.session_scope() as session:
|
|
499
|
-
artifact = (
|
|
500
|
-
session.query(Artifact)
|
|
501
|
-
.filter(Artifact.experiment_id == experiment_id, Artifact.name == name)
|
|
502
|
-
.first()
|
|
503
|
-
)
|
|
504
|
-
if artifact:
|
|
505
|
-
session.delete(artifact)
|
|
506
|
-
|
|
507
|
-
def insert_experiment_artifact(self, experiment_id: int, artifact: ArtifactModel) -> None:
|
|
508
|
-
with self.session_scope() as session:
|
|
509
|
-
new_artifact = Artifact(
|
|
510
|
-
experiment_id=experiment_id, name=artifact.name, uri=artifact.uri
|
|
511
|
-
)
|
|
512
|
-
session.add(new_artifact)
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
@functools.cache
|
|
516
|
-
def client() -> XManagerAPI:
|
|
517
|
-
if importlib.util.find_spec("xm_slurm_api_client") is not None:
|
|
518
|
-
if (base_url := os.environ.get("XM_SLURM_API_BASE_URL")) is not None and (
|
|
519
|
-
token := os.environ.get("XM_SLURM_API_TOKEN")
|
|
520
|
-
) is not None:
|
|
521
|
-
return XManagerWebAPI(base_url=base_url, token=token)
|
|
522
|
-
else:
|
|
523
|
-
logger.warn(
|
|
524
|
-
"XM_SLURM_API_BASE_URL and XM_SLURM_API_TOKEN not set. "
|
|
525
|
-
"Disabling XManager API client."
|
|
526
|
-
)
|
|
527
|
-
|
|
528
|
-
return XManagerSqliteAPI()
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
xm_slurm/__init__.py,sha256=Ld2w7ofLlTieWOHP_Jb3f48-qtVQBjFXynxUm9WF8mc,1116
|
|
2
|
-
xm_slurm/api.py,sha256=LeGgHz82t8Oay0Z1Ourv9-r-DBur3lhCUTnmmGhGFY4,18502
|
|
3
|
-
xm_slurm/batching.py,sha256=GbKBsNz9w8gIc2fHLZpslC0e4K9YUfLXFHmjduRRCfQ,4385
|
|
4
|
-
xm_slurm/config.py,sha256=GLLEkRLJxQW0urmHCLmwq_4ECmimEBQFl8Nz62SIo78,6787
|
|
5
|
-
xm_slurm/console.py,sha256=UpMqeJ0C8i0pkue1AHnnyyX0bFJ9zZeJ7HBR6yhuA8A,54
|
|
6
|
-
xm_slurm/constants.py,sha256=zefVtlFdflgSolie5g_rVxWV-Zpydxapchm3y0a2FDc,999
|
|
7
|
-
xm_slurm/dependencies.py,sha256=-5gN_tpfs3dOA7H5_MIHO2ratb7F5Pm_yjkR5rZcgI8,6421
|
|
8
|
-
xm_slurm/executables.py,sha256=S3z8jSDL6AdyGYpzy_cCs03Mj0vgA4ZTqIe8APYor3E,6469
|
|
9
|
-
xm_slurm/execution.py,sha256=i2oYH5RS-mHsHPwFDFZvo5qCudbgqBML-Hzq6DPNItw,25721
|
|
10
|
-
xm_slurm/executors.py,sha256=fMtxGUCi4vEKmb_p4JEpqPUTh7L_f1LcR_TamMLAWNg,4667
|
|
11
|
-
xm_slurm/experiment.py,sha256=trHapcYxPNKofzSqu7KZawML59tZ8FVjoEZYe2Wal7w,44521
|
|
12
|
-
xm_slurm/job_blocks.py,sha256=_F8CKCs5BQFj40a2-mjG71HfacvWoBXBDPDKEaKTbXc,616
|
|
13
|
-
xm_slurm/packageables.py,sha256=YZFTL6UWx9A_zyztTy1THUlj3pW1rA0cBPHJxD1LOJk,12884
|
|
14
|
-
xm_slurm/resources.py,sha256=tET3TPOQ8nXYE_SxAs2fiHt9UKJsCLW1vFktJTH0xG4,5722
|
|
15
|
-
xm_slurm/status.py,sha256=WTWiDHi-ZHtwHRnDP0cGa-27zTSm6LkA-GCKsN-zBgg,6916
|
|
16
|
-
xm_slurm/types.py,sha256=TsVykDm-LazVkrjeJrTwCMs4Q8APKhy7BTk0yKIhFNg,805
|
|
17
|
-
xm_slurm/utils.py,sha256=ESjOkGT7bRSzIeZrUtZplSHP4oaH6VZ92y2woYdcyKM,2239
|
|
18
|
-
xm_slurm/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
-
xm_slurm/contrib/clusters/__init__.py,sha256=JI_zTMfozanpfbBcNjPpgGi5Ppc6jyjM05X3rvFODLs,2321
|
|
20
|
-
xm_slurm/contrib/clusters/drac.py,sha256=tJeQFWFIpeZ1gD3j6AAJssNoLSiDkB-3lz1_ObnkRhc,5905
|
|
21
|
-
xm_slurm/experimental/parameter_controller.py,sha256=b5LfglHV307F6QcPrHeZX5GJBtyOK9aQydke_SZ3Wto,8457
|
|
22
|
-
xm_slurm/packaging/__init__.py,sha256=dh307yLpUT9KN7rJ1e9fYC6hegGKfZcGboUq9nGpDVQ,233
|
|
23
|
-
xm_slurm/packaging/docker.py,sha256=qvtVK7P3_b63Cwf4Y-rX0LltnZaxKPmIMLXAI8jBAOI,11519
|
|
24
|
-
xm_slurm/packaging/registry.py,sha256=GrdmQg9MgSo38OiqOzMKWSkQyBuyryOfc3zcdgZ4CUE,1148
|
|
25
|
-
xm_slurm/packaging/router.py,sha256=yPbdA9clrhly97cLgDsSRZG2LZRKE-oz8Hhdb7WtYqk,2070
|
|
26
|
-
xm_slurm/packaging/utils.py,sha256=KI5s32rNTCfgwzY_7Ghck27jHKvKg5sl5_NEEqJbJqI,3999
|
|
27
|
-
xm_slurm/scripts/_cloudpickle.py,sha256=dlJYf2SceOuUn8wi-ozuoYAQg71wqD2MUVOUCyOwWIY,647
|
|
28
|
-
xm_slurm/scripts/cli.py,sha256=xA4SqcMtX_NXXdUDgJ47qNHw2uGvmn_JA3XiDXk-jFA,2152
|
|
29
|
-
xm_slurm/templates/docker/docker-bake.hcl.j2,sha256=7qSJl2VN5poz-Hh8Gjo7--qR-k3lmfGtBu2mNbfG2uA,1499
|
|
30
|
-
xm_slurm/templates/docker/mamba.Dockerfile,sha256=Sgxr5IA5T-pT1Shumb5k3JngoG4pgCdBXjzqslFJdZI,753
|
|
31
|
-
xm_slurm/templates/docker/python.Dockerfile,sha256=U4b4QVkopckQ0o9jJIE7d_M6TvExEYlYDirNwCoZ7W4,865
|
|
32
|
-
xm_slurm/templates/docker/uv.Dockerfile,sha256=YB4LTs42ycDw8EHyz3U0_fR3lRAjmjrnXGlfV1Um394,956
|
|
33
|
-
xm_slurm/templates/slurm/job-array.bash.j2,sha256=iYtGMRDXgwwc2_8E3v4a30f3fKuq4zWgZHkxCXJ9iXc,567
|
|
34
|
-
xm_slurm/templates/slurm/job-group.bash.j2,sha256=UkjfBE7jg9mepcUWaHZEAjkiXsIM1j_sLxLzxkteD-Y,1120
|
|
35
|
-
xm_slurm/templates/slurm/job.bash.j2,sha256=v0xGYzagDdWW6Tg44qobGJLNSUP1Cf4CcekrPibYdrE,1864
|
|
36
|
-
xm_slurm/templates/slurm/fragments/monitor.bash.j2,sha256=HYqYhXsTv8TCed5UaGCZVGIYsqxSKHcnPyNNTHWNvxc,1279
|
|
37
|
-
xm_slurm/templates/slurm/fragments/proxy.bash.j2,sha256=VJLglZo-Nvx9R-qe3rHTxr07CylTQ6Z9NwBzvIpAZrA,814
|
|
38
|
-
xm_slurm/templates/slurm/runtimes/apptainer.bash.j2,sha256=ggSsAxv-2_Ct3hSxFJgwgwa3Wu8xH3JqLxWtJOYYrsA,3253
|
|
39
|
-
xm_slurm/templates/slurm/runtimes/podman.bash.j2,sha256=xKXYFvQvazMx0PgvmlRXR6eecoiBUl8y52dIzQtWkBE,1469
|
|
40
|
-
xmanager_slurm-0.4.5.dist-info/METADATA,sha256=jWwWAlHsDdppNdgPOhFCnPUe7aum7xwQ1e6gEJSHoxQ,929
|
|
41
|
-
xmanager_slurm-0.4.5.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
|
42
|
-
xmanager_slurm-0.4.5.dist-info/entry_points.txt,sha256=_HLGmLgxuQLOPmF2gOFYDVq2HqtMVD_SzigHvUh8TCY,49
|
|
43
|
-
xmanager_slurm-0.4.5.dist-info/licenses/LICENSE.md,sha256=IxstXr3MPHwTJ5jMrByHrQsR1ZAGQ2U_uz_4qzI_15Y,11756
|
|
44
|
-
xmanager_slurm-0.4.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|