snakemake-executor-plugin-slurm 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snakemake-executor-plugin-slurm might be problematic. Click here for more details.
- snakemake_executor_plugin_slurm/__init__.py +95 -17
- snakemake_executor_plugin_slurm/utils.py +26 -0
- {snakemake_executor_plugin_slurm-0.11.2.dist-info → snakemake_executor_plugin_slurm-0.12.0.dist-info}/METADATA +2 -2
- snakemake_executor_plugin_slurm-0.12.0.dist-info/RECORD +6 -0
- {snakemake_executor_plugin_slurm-0.11.2.dist-info → snakemake_executor_plugin_slurm-0.12.0.dist-info}/WHEEL +1 -1
- snakemake_executor_plugin_slurm-0.11.2.dist-info/RECORD +0 -6
- {snakemake_executor_plugin_slurm-0.11.2.dist-info → snakemake_executor_plugin_slurm-0.12.0.dist-info}/LICENSE +0 -0
|
@@ -3,9 +3,11 @@ __copyright__ = "Copyright 2023, David Lähnemann, Johannes Köster, Christian M
|
|
|
3
3
|
__email__ = "johannes.koester@uni-due.de"
|
|
4
4
|
__license__ = "MIT"
|
|
5
5
|
|
|
6
|
+
import atexit
|
|
6
7
|
import csv
|
|
7
8
|
from io import StringIO
|
|
8
9
|
import os
|
|
10
|
+
from pathlib import Path
|
|
9
11
|
import re
|
|
10
12
|
import shlex
|
|
11
13
|
import subprocess
|
|
@@ -26,18 +28,48 @@ from snakemake_interface_executor_plugins.jobs import (
|
|
|
26
28
|
from snakemake_interface_common.exceptions import WorkflowError
|
|
27
29
|
from snakemake_executor_plugin_slurm_jobstep import get_cpus_per_task
|
|
28
30
|
|
|
29
|
-
from .utils import delete_slurm_environment
|
|
31
|
+
from .utils import delete_slurm_environment, delete_empty_dirs
|
|
30
32
|
|
|
31
33
|
|
|
32
34
|
@dataclass
|
|
33
35
|
class ExecutorSettings(ExecutorSettingsBase):
|
|
36
|
+
logdir: Optional[Path] = field(
|
|
37
|
+
default=None,
|
|
38
|
+
metadata={
|
|
39
|
+
"help": "Per default the SLURM log directory is relative to "
|
|
40
|
+
"the working directory."
|
|
41
|
+
"This flag allows to set an alternative directory.",
|
|
42
|
+
"env_var": False,
|
|
43
|
+
"required": False,
|
|
44
|
+
},
|
|
45
|
+
)
|
|
46
|
+
keep_successful_logs: bool = field(
|
|
47
|
+
default=False,
|
|
48
|
+
metadata={
|
|
49
|
+
"help": "Per default SLURM log files will be deleted upon sucessful "
|
|
50
|
+
"completion of a job. Whenever a SLURM job fails, its log "
|
|
51
|
+
"file will be preserved. "
|
|
52
|
+
"This flag allows to keep all SLURM log files, even those "
|
|
53
|
+
"of successful jobs.",
|
|
54
|
+
"env_var": False,
|
|
55
|
+
"required": False,
|
|
56
|
+
},
|
|
57
|
+
)
|
|
58
|
+
delete_logfiles_older_than: Optional[int] = field(
|
|
59
|
+
default=10,
|
|
60
|
+
metadata={
|
|
61
|
+
"help": "Per default SLURM log files in the SLURM log directory "
|
|
62
|
+
"of a workflow will be deleted after 10 days. For this, "
|
|
63
|
+
"best leave the default log directory unaltered. "
|
|
64
|
+
"Setting this flag allows to change this behaviour. "
|
|
65
|
+
"If set to <=0, no old files will be deleted. ",
|
|
66
|
+
},
|
|
67
|
+
)
|
|
34
68
|
init_seconds_before_status_checks: Optional[int] = field(
|
|
35
69
|
default=40,
|
|
36
70
|
metadata={
|
|
37
|
-
"help": ""
|
|
38
|
-
|
|
39
|
-
check is performed after job submission.
|
|
40
|
-
""",
|
|
71
|
+
"help": "Defines the time in seconds before the first status "
|
|
72
|
+
"check is performed after job submission.",
|
|
41
73
|
"env_var": False,
|
|
42
74
|
"required": False,
|
|
43
75
|
},
|
|
@@ -45,11 +77,10 @@ class ExecutorSettings(ExecutorSettingsBase):
|
|
|
45
77
|
requeue: bool = field(
|
|
46
78
|
default=False,
|
|
47
79
|
metadata={
|
|
48
|
-
"help": ""
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
""",
|
|
80
|
+
"help": "Allow requeuing preempted of failed jobs, "
|
|
81
|
+
"if no cluster default. Results in "
|
|
82
|
+
"`sbatch ... --requeue ...` "
|
|
83
|
+
"This flag has no effect, if not set.",
|
|
53
84
|
"env_var": False,
|
|
54
85
|
"required": False,
|
|
55
86
|
},
|
|
@@ -91,6 +122,32 @@ class Executor(RemoteExecutor):
|
|
|
91
122
|
self._fallback_account_arg = None
|
|
92
123
|
self._fallback_partition = None
|
|
93
124
|
self._preemption_warning = False # no preemption warning has been issued
|
|
125
|
+
self.slurm_logdir = None
|
|
126
|
+
atexit.register(self.clean_old_logs)
|
|
127
|
+
|
|
128
|
+
def clean_old_logs(self) -> None:
|
|
129
|
+
"""Delete files older than specified age from the SLURM log directory."""
|
|
130
|
+
# shorthands:
|
|
131
|
+
age_cutoff = self.workflow.executor_settings.delete_logfiles_older_than
|
|
132
|
+
keep_all = self.workflow.executor_settings.keep_successful_logs
|
|
133
|
+
if age_cutoff <= 0 or keep_all:
|
|
134
|
+
return
|
|
135
|
+
cutoff_secs = age_cutoff * 86400
|
|
136
|
+
current_time = time.time()
|
|
137
|
+
self.logger.info(f"Cleaning up log files older than {age_cutoff} day(s)")
|
|
138
|
+
for path in self.slurm_logdir.rglob("*.log"):
|
|
139
|
+
if path.is_file():
|
|
140
|
+
try:
|
|
141
|
+
file_age = current_time - path.stat().st_mtime
|
|
142
|
+
if file_age > cutoff_secs:
|
|
143
|
+
path.unlink()
|
|
144
|
+
except (OSError, FileNotFoundError) as e:
|
|
145
|
+
self.logger.warning(f"Could not delete logfile {path}: {e}")
|
|
146
|
+
# we need a 2nd iteration to remove putatively empty directories
|
|
147
|
+
try:
|
|
148
|
+
delete_empty_dirs(self.slurm_logdir)
|
|
149
|
+
except (OSError, FileNotFoundError) as e:
|
|
150
|
+
self.logger.warning(f"Could not delete empty directory {path}: {e}")
|
|
94
151
|
|
|
95
152
|
def warn_on_jobcontext(self, done=None):
|
|
96
153
|
if not done:
|
|
@@ -123,18 +180,22 @@ class Executor(RemoteExecutor):
|
|
|
123
180
|
except AttributeError:
|
|
124
181
|
wildcard_str = ""
|
|
125
182
|
|
|
126
|
-
|
|
127
|
-
|
|
183
|
+
self.slurm_logdir = (
|
|
184
|
+
Path(self.workflow.executor_settings.logdir)
|
|
185
|
+
if self.workflow.executor_settings.logdir
|
|
186
|
+
else Path(".snakemake/slurm_logs").resolve()
|
|
128
187
|
)
|
|
129
|
-
|
|
188
|
+
|
|
189
|
+
self.slurm_logdir.mkdir(parents=True, exist_ok=True)
|
|
190
|
+
slurm_logfile = self.slurm_logdir / group_or_rule / wildcard_str / "%j.log"
|
|
191
|
+
slurm_logfile.parent.mkdir(parents=True, exist_ok=True)
|
|
130
192
|
# this behavior has been fixed in slurm 23.02, but there might be plenty of
|
|
131
193
|
# older versions around, hence we should rather be conservative here.
|
|
132
|
-
assert "%j" not in
|
|
194
|
+
assert "%j" not in str(self.slurm_logdir), (
|
|
133
195
|
"bug: jobid placeholder in parent dir of logfile. This does not work as "
|
|
134
196
|
"we have to create that dir before submission in order to make sbatch "
|
|
135
197
|
"happy. Otherwise we get silent fails without logfiles being created."
|
|
136
198
|
)
|
|
137
|
-
os.makedirs(logdir, exist_ok=True)
|
|
138
199
|
|
|
139
200
|
# generic part of a submission string:
|
|
140
201
|
# we use a run_uuid as the job-name, to allow `--name`-based
|
|
@@ -247,7 +308,9 @@ class Executor(RemoteExecutor):
|
|
|
247
308
|
slurm_jobid = out.strip().split(";")[0]
|
|
248
309
|
if not slurm_jobid:
|
|
249
310
|
raise WorkflowError("Failed to retrieve SLURM job ID from sbatch output.")
|
|
250
|
-
slurm_logfile = slurm_logfile.
|
|
311
|
+
slurm_logfile = slurm_logfile.with_name(
|
|
312
|
+
slurm_logfile.name.replace("%j", slurm_jobid)
|
|
313
|
+
)
|
|
251
314
|
self.logger.info(
|
|
252
315
|
f"Job {job.jobid} has been submitted with SLURM jobid {slurm_jobid} "
|
|
253
316
|
f"(log: {slurm_logfile})."
|
|
@@ -380,6 +443,19 @@ class Executor(RemoteExecutor):
|
|
|
380
443
|
self.report_job_success(j)
|
|
381
444
|
any_finished = True
|
|
382
445
|
active_jobs_seen_by_sacct.remove(j.external_jobid)
|
|
446
|
+
if not self.workflow.executor_settings.keep_successful_logs:
|
|
447
|
+
self.logger.debug(
|
|
448
|
+
"removing log for successful job "
|
|
449
|
+
f"with SLURM ID '{j.external_jobid}'"
|
|
450
|
+
)
|
|
451
|
+
try:
|
|
452
|
+
if j.aux["slurm_logfile"].exists():
|
|
453
|
+
j.aux["slurm_logfile"].unlink()
|
|
454
|
+
except (OSError, FileNotFoundError) as e:
|
|
455
|
+
self.logger.warning(
|
|
456
|
+
"Could not remove log file"
|
|
457
|
+
f" {j.aux['slurm_logfile']}: {e}"
|
|
458
|
+
)
|
|
383
459
|
elif status == "PREEMPTED" and not self._preemption_warning:
|
|
384
460
|
self._preemption_warning = True
|
|
385
461
|
self.logger.warning(
|
|
@@ -404,7 +480,9 @@ We leave it to SLURM to resume your job(s)"""
|
|
|
404
480
|
# with a new sentence
|
|
405
481
|
f"'{status}'. "
|
|
406
482
|
)
|
|
407
|
-
self.report_job_error(
|
|
483
|
+
self.report_job_error(
|
|
484
|
+
j, msg=msg, aux_logs=[j.aux["slurm_logfile"]._str]
|
|
485
|
+
)
|
|
408
486
|
active_jobs_seen_by_sacct.remove(j.external_jobid)
|
|
409
487
|
else: # still running?
|
|
410
488
|
yield j
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# utility functions for the SLURM executor plugin
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
from pathlib import Path
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def delete_slurm_environment():
|
|
@@ -14,3 +15,28 @@ def delete_slurm_environment():
|
|
|
14
15
|
for var in os.environ:
|
|
15
16
|
if var.startswith("SLURM_"):
|
|
16
17
|
del os.environ[var]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def delete_empty_dirs(path: Path) -> None:
|
|
21
|
+
"""
|
|
22
|
+
Function to delete all empty directories in a given path.
|
|
23
|
+
This is needed to clean up the working directory after
|
|
24
|
+
a job has sucessfully finished. This function is needed because
|
|
25
|
+
the shutil.rmtree() function does not delete empty
|
|
26
|
+
directories.
|
|
27
|
+
"""
|
|
28
|
+
if not path.is_dir():
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
# Process subdirectories first (bottom-up)
|
|
32
|
+
for child in path.iterdir():
|
|
33
|
+
if child.is_dir():
|
|
34
|
+
delete_empty_dirs(child)
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
# Check if directory is now empty after processing children
|
|
38
|
+
if not any(path.iterdir()):
|
|
39
|
+
path.rmdir()
|
|
40
|
+
except (OSError, FileNotFoundError) as e:
|
|
41
|
+
# Provide more context in the error message
|
|
42
|
+
raise OSError(f"Failed to remove empty directory {path}: {e}") from e
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: snakemake-executor-plugin-slurm
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.0
|
|
4
4
|
Summary: A Snakemake executor plugin for submitting jobs to a SLURM cluster.
|
|
5
5
|
Home-page: https://github.com/snakemake/snakemake-executor-plugin-slurm
|
|
6
6
|
License: MIT
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
snakemake_executor_plugin_slurm/__init__.py,sha256=HhAOwrgUp31fM9ciKOhU1HtY1zfdPj_yQbRu9CKj7vY,29029
|
|
2
|
+
snakemake_executor_plugin_slurm/utils.py,sha256=JOpQaUviGz6SORrMUsVDrSHc0lH6qX_SM0eUjVbWgp0,1282
|
|
3
|
+
snakemake_executor_plugin_slurm-0.12.0.dist-info/LICENSE,sha256=YVc4xTLWMqGfFL36120k7rzXtsT6e4RkJsh68VVn12s,1076
|
|
4
|
+
snakemake_executor_plugin_slurm-0.12.0.dist-info/METADATA,sha256=CkoWIpPni0VLr-EXTxv33UoVe8DQoL_PuLYfGgv5PmA,1432
|
|
5
|
+
snakemake_executor_plugin_slurm-0.12.0.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
|
|
6
|
+
snakemake_executor_plugin_slurm-0.12.0.dist-info/RECORD,,
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
snakemake_executor_plugin_slurm/__init__.py,sha256=Ui5xbATI0VfB93GTyy8RPNu6dSyB0ArwfPV5PJA5b4U,25543
|
|
2
|
-
snakemake_executor_plugin_slurm/utils.py,sha256=DuJdFJsAmvFsrnpyb8kMoqxTEEmTsEVxroDS1t9qOGw,434
|
|
3
|
-
snakemake_executor_plugin_slurm-0.11.2.dist-info/LICENSE,sha256=YVc4xTLWMqGfFL36120k7rzXtsT6e4RkJsh68VVn12s,1076
|
|
4
|
-
snakemake_executor_plugin_slurm-0.11.2.dist-info/METADATA,sha256=fpd5ziYEmFkZkEwLrqYq2pJmGyeXfBDG7Xp-3pqMafA,1432
|
|
5
|
-
snakemake_executor_plugin_slurm-0.11.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
6
|
-
snakemake_executor_plugin_slurm-0.11.2.dist-info/RECORD,,
|
|
File without changes
|