snakemake-executor-plugin-slurm 0.4.2__tar.gz → 0.12.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snakemake-executor-plugin-slurm might be problematic. Click here for more details.

@@ -1,8 +1,7 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: snakemake-executor-plugin-slurm
3
- Version: 0.4.2
3
+ Version: 0.12.1
4
4
  Summary: A Snakemake executor plugin for submitting jobs to a SLURM cluster.
5
- Home-page: https://github.com/snakemake/snakemake-executor-plugin-slurm
6
5
  License: MIT
7
6
  Keywords: snakemake,plugin,executor,cluster,slurm
8
7
  Author: Christian Meesters
@@ -12,9 +11,10 @@ Classifier: License :: OSI Approved :: MIT License
12
11
  Classifier: Programming Language :: Python :: 3
13
12
  Classifier: Programming Language :: Python :: 3.11
14
13
  Classifier: Programming Language :: Python :: 3.12
15
- Requires-Dist: snakemake-executor-plugin-slurm-jobstep (>=0.1.10,<0.2.0)
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Requires-Dist: snakemake-executor-plugin-slurm-jobstep (>=0.2.0,<0.3.0)
16
16
  Requires-Dist: snakemake-interface-common (>=1.13.0,<2.0.0)
17
- Requires-Dist: snakemake-interface-executor-plugins (>=9.0.0,<10.0.0)
17
+ Requires-Dist: snakemake-interface-executor-plugins (>=9.1.1,<10.0.0)
18
18
  Requires-Dist: throttler (>=1.2.2,<2.0.0)
19
19
  Project-URL: Documentation, https://snakemake.github.io/snakemake-plugin-catalog/plugins/executor/slurm.html
20
20
  Project-URL: Repository, https://github.com/snakemake/snakemake-executor-plugin-slurm
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "snakemake-executor-plugin-slurm"
3
- version = "0.4.2"
3
+ version = "0.12.1"
4
4
  description = "A Snakemake executor plugin for submitting jobs to a SLURM cluster."
5
5
  authors = [
6
6
  "Christian Meesters <meesters@uni-mainz.de>",
@@ -16,8 +16,8 @@ keywords = ["snakemake", "plugin", "executor", "cluster", "slurm"]
16
16
  [tool.poetry.dependencies]
17
17
  python = "^3.11"
18
18
  snakemake-interface-common = "^1.13.0"
19
- snakemake-interface-executor-plugins = "^9.0.0"
20
- snakemake-executor-plugin-slurm-jobstep = "^0.1.10"
19
+ snakemake-interface-executor-plugins = "^9.1.1"
20
+ snakemake-executor-plugin-slurm-jobstep = "^0.2.0"
21
21
  throttler = "^1.2.2"
22
22
 
23
23
  [tool.poetry.group.dev.dependencies]
@@ -25,7 +25,7 @@ black = "^23.7.0"
25
25
  flake8 = "^6.1.0"
26
26
  coverage = "^7.3.1"
27
27
  pytest = "^7.4.2"
28
- snakemake = {git = "https://github.com/snakemake/snakemake.git"}
28
+ snakemake = "^8.20.0"
29
29
 
30
30
  [tool.coverage.run]
31
31
  omit = [".*", "*/site-packages/*", "Snakefile"]
@@ -3,21 +3,88 @@ __copyright__ = "Copyright 2023, David Lähnemann, Johannes Köster, Christian M
3
3
  __email__ = "johannes.koester@uni-due.de"
4
4
  __license__ = "MIT"
5
5
 
6
+ import atexit
6
7
  import csv
7
8
  from io import StringIO
8
9
  import os
10
+ from pathlib import Path
11
+ import re
12
+ import shlex
9
13
  import subprocess
10
14
  import time
15
+ from dataclasses import dataclass, field
11
16
  from datetime import datetime, timedelta
12
- from typing import List, Generator
17
+ from typing import List, Generator, Optional
13
18
  import uuid
14
19
  from snakemake_interface_executor_plugins.executors.base import SubmittedJobInfo
15
20
  from snakemake_interface_executor_plugins.executors.remote import RemoteExecutor
16
- from snakemake_interface_executor_plugins.settings import CommonSettings
21
+ from snakemake_interface_executor_plugins.settings import (
22
+ ExecutorSettingsBase,
23
+ CommonSettings,
24
+ )
17
25
  from snakemake_interface_executor_plugins.jobs import (
18
26
  JobExecutorInterface,
19
27
  )
20
28
  from snakemake_interface_common.exceptions import WorkflowError
29
+ from snakemake_executor_plugin_slurm_jobstep import get_cpus_per_task
30
+
31
+ from .utils import delete_slurm_environment, delete_empty_dirs
32
+
33
+
34
+ @dataclass
35
+ class ExecutorSettings(ExecutorSettingsBase):
36
+ logdir: Optional[Path] = field(
37
+ default=None,
38
+ metadata={
39
+ "help": "Per default the SLURM log directory is relative to "
40
+ "the working directory."
41
+ "This flag allows to set an alternative directory.",
42
+ "env_var": False,
43
+ "required": False,
44
+ },
45
+ )
46
+ keep_successful_logs: bool = field(
47
+ default=False,
48
+ metadata={
49
+ "help": "Per default SLURM log files will be deleted upon sucessful "
50
+ "completion of a job. Whenever a SLURM job fails, its log "
51
+ "file will be preserved. "
52
+ "This flag allows to keep all SLURM log files, even those "
53
+ "of successful jobs.",
54
+ "env_var": False,
55
+ "required": False,
56
+ },
57
+ )
58
+ delete_logfiles_older_than: Optional[int] = field(
59
+ default=10,
60
+ metadata={
61
+ "help": "Per default SLURM log files in the SLURM log directory "
62
+ "of a workflow will be deleted after 10 days. For this, "
63
+ "best leave the default log directory unaltered. "
64
+ "Setting this flag allows to change this behaviour. "
65
+ "If set to <=0, no old files will be deleted. ",
66
+ },
67
+ )
68
+ init_seconds_before_status_checks: Optional[int] = field(
69
+ default=40,
70
+ metadata={
71
+ "help": "Defines the time in seconds before the first status "
72
+ "check is performed after job submission.",
73
+ "env_var": False,
74
+ "required": False,
75
+ },
76
+ )
77
+ requeue: bool = field(
78
+ default=False,
79
+ metadata={
80
+ "help": "Allow requeuing preempted of failed jobs, "
81
+ "if no cluster default. Results in "
82
+ "`sbatch ... --requeue ...` "
83
+ "This flag has no effect, if not set.",
84
+ "env_var": False,
85
+ "required": False,
86
+ },
87
+ )
21
88
 
22
89
 
23
90
  # Required:
@@ -48,10 +115,51 @@ common_settings = CommonSettings(
48
115
  # Implementation of your executor
49
116
  class Executor(RemoteExecutor):
50
117
  def __post_init__(self):
118
+ # run check whether we are running in a SLURM job context
119
+ self.warn_on_jobcontext()
51
120
  self.run_uuid = str(uuid.uuid4())
52
121
  self.logger.info(f"SLURM run ID: {self.run_uuid}")
53
122
  self._fallback_account_arg = None
54
123
  self._fallback_partition = None
124
+ self._preemption_warning = False # no preemption warning has been issued
125
+ self.slurm_logdir = None
126
+ atexit.register(self.clean_old_logs)
127
+
128
+ def clean_old_logs(self) -> None:
129
+ """Delete files older than specified age from the SLURM log directory."""
130
+ # shorthands:
131
+ age_cutoff = self.workflow.executor_settings.delete_logfiles_older_than
132
+ keep_all = self.workflow.executor_settings.keep_successful_logs
133
+ if age_cutoff <= 0 or keep_all:
134
+ return
135
+ cutoff_secs = age_cutoff * 86400
136
+ current_time = time.time()
137
+ self.logger.info(f"Cleaning up log files older than {age_cutoff} day(s)")
138
+ for path in self.slurm_logdir.rglob("*.log"):
139
+ if path.is_file():
140
+ try:
141
+ file_age = current_time - path.stat().st_mtime
142
+ if file_age > cutoff_secs:
143
+ path.unlink()
144
+ except (OSError, FileNotFoundError) as e:
145
+ self.logger.warning(f"Could not delete logfile {path}: {e}")
146
+ # we need a 2nd iteration to remove putatively empty directories
147
+ try:
148
+ delete_empty_dirs(self.slurm_logdir)
149
+ except (OSError, FileNotFoundError) as e:
150
+ self.logger.warning(f"Could not delete empty directory {path}: {e}")
151
+
152
+ def warn_on_jobcontext(self, done=None):
153
+ if not done:
154
+ if "SLURM_JOB_ID" in os.environ:
155
+ self.logger.warning(
156
+ "You are running snakemake in a SLURM job context. "
157
+ "This is not recommended, as it may lead to unexpected behavior. "
158
+ "Please run Snakemake directly on the login node."
159
+ )
160
+ time.sleep(5)
161
+ delete_slurm_environment()
162
+ done = True
55
163
 
56
164
  def additional_general_args(self):
57
165
  return "--executor slurm-jobstep --jobs 1"
@@ -65,29 +173,55 @@ class Executor(RemoteExecutor):
65
173
  # with job_info being of type
66
174
  # snakemake_interface_executor_plugins.executors.base.SubmittedJobInfo.
67
175
 
68
- log_folder = f"group_{job.name}" if job.is_group() else f"rule_{job.name}"
176
+ group_or_rule = f"group_{job.name}" if job.is_group() else f"rule_{job.name}"
69
177
 
70
178
  try:
71
- wildcard_str = f"_{'_'.join(job.wildcards)}" if job.wildcards else ""
179
+ wildcard_str = "_".join(job.wildcards) if job.wildcards else ""
72
180
  except AttributeError:
73
181
  wildcard_str = ""
74
182
 
75
- slurm_logfile = os.path.abspath(
76
- f".snakemake/slurm_logs/{log_folder}/%j{wildcard_str}.log"
183
+ self.slurm_logdir = (
184
+ Path(self.workflow.executor_settings.logdir)
185
+ if self.workflow.executor_settings.logdir
186
+ else Path(".snakemake/slurm_logs").resolve()
187
+ )
188
+
189
+ self.slurm_logdir.mkdir(parents=True, exist_ok=True)
190
+ slurm_logfile = self.slurm_logdir / group_or_rule / wildcard_str / "%j.log"
191
+ slurm_logfile.parent.mkdir(parents=True, exist_ok=True)
192
+ # this behavior has been fixed in slurm 23.02, but there might be plenty of
193
+ # older versions around, hence we should rather be conservative here.
194
+ assert "%j" not in str(self.slurm_logdir), (
195
+ "bug: jobid placeholder in parent dir of logfile. This does not work as "
196
+ "we have to create that dir before submission in order to make sbatch "
197
+ "happy. Otherwise we get silent fails without logfiles being created."
77
198
  )
78
- os.makedirs(os.path.dirname(slurm_logfile), exist_ok=True)
79
199
 
80
200
  # generic part of a submission string:
81
201
  # we use a run_uuid as the job-name, to allow `--name`-based
82
202
  # filtering in the job status checks (`sacct --name` and `squeue --name`)
203
+ if wildcard_str == "":
204
+ comment_str = f"rule_{job.name}"
205
+ else:
206
+ comment_str = f"rule_{job.name}_wildcards_{wildcard_str}"
83
207
  call = (
84
- f"sbatch --job-name {self.run_uuid} --output {slurm_logfile} --export=ALL "
85
- f"--comment {job.name}"
208
+ f"sbatch "
209
+ f"--parsable "
210
+ f"--job-name {self.run_uuid} "
211
+ f"--output '{slurm_logfile}' "
212
+ f"--export=ALL "
213
+ f"--comment {comment_str}"
86
214
  )
87
215
 
88
216
  call += self.get_account_arg(job)
89
217
  call += self.get_partition_arg(job)
90
218
 
219
+ if self.workflow.executor_settings.requeue:
220
+ call += " --requeue"
221
+
222
+ if job.resources.get("clusters"):
223
+ call += f" --clusters {job.resources.clusters}"
224
+
91
225
  if job.resources.get("runtime"):
92
226
  call += f" -t {job.resources.runtime}"
93
227
  else:
@@ -99,7 +233,7 @@ class Executor(RemoteExecutor):
99
233
  )
100
234
 
101
235
  if job.resources.get("constraint"):
102
- call += f" -C {job.resources.constraint}"
236
+ call += f" -C '{job.resources.constraint}'"
103
237
  if job.resources.get("mem_mb_per_cpu"):
104
238
  call += f" --mem-per-cpu {job.resources.mem_mb_per_cpu}"
105
239
  elif job.resources.get("mem_mb"):
@@ -110,28 +244,27 @@ class Executor(RemoteExecutor):
110
244
  "- submitting without. This might or might not work on your cluster."
111
245
  )
112
246
 
113
- # MPI job
114
- if job.resources.get("mpi", False):
115
- if job.resources.get("nodes", False):
116
- call += f" --nodes={job.resources.get('nodes', 1)}"
247
+ if job.resources.get("nodes", False):
248
+ call += f" --nodes={job.resources.get('nodes', 1)}"
117
249
 
118
- # fixes #40 - set ntasks regarlless of mpi, because
250
+ # fixes #40 - set ntasks regardless of mpi, because
119
251
  # SLURM v22.05 will require it for all jobs
120
252
  call += f" --ntasks={job.resources.get('tasks', 1)}"
121
-
122
- cpus_per_task = job.threads
123
- if job.resources.get("cpus_per_task"):
124
- if not isinstance(cpus_per_task, int):
125
- raise WorkflowError(
126
- f"cpus_per_task must be an integer, but is {cpus_per_task}"
253
+ # MPI job
254
+ if job.resources.get("mpi", False):
255
+ if not job.resources.get("tasks_per_node") and not job.resources.get(
256
+ "nodes"
257
+ ):
258
+ self.logger.warning(
259
+ "MPI job detected, but no 'tasks_per_node' or 'nodes' "
260
+ "specified. Assuming 'tasks_per_node=1'."
261
+ "Probably not what you want."
127
262
  )
128
- cpus_per_task = job.resources.cpus_per_task
129
- # ensure that at least 1 cpu is requested
130
- # because 0 is not allowed by slurm
131
- cpus_per_task = max(1, cpus_per_task)
132
- call += f" --cpus-per-task={cpus_per_task}"
263
+
264
+ call += f" --cpus-per-task={get_cpus_per_task(job)}"
133
265
 
134
266
  if job.resources.get("slurm_extra"):
267
+ self.check_slurm_extra(job)
135
268
  call += f" {job.resources.slurm_extra}"
136
269
 
137
270
  exec_job = self.format_job_exec(job)
@@ -145,16 +278,39 @@ class Executor(RemoteExecutor):
145
278
 
146
279
  self.logger.debug(f"sbatch call: {call}")
147
280
  try:
148
- out = subprocess.check_output(
149
- call, shell=True, text=True, stderr=subprocess.STDOUT
150
- ).strip()
281
+ process = subprocess.Popen(
282
+ call,
283
+ shell=True,
284
+ text=True,
285
+ stdout=subprocess.PIPE,
286
+ stderr=subprocess.PIPE,
287
+ )
288
+ out, err = process.communicate()
289
+ if process.returncode != 0:
290
+ raise subprocess.CalledProcessError(
291
+ process.returncode, call, output=err
292
+ )
151
293
  except subprocess.CalledProcessError as e:
152
294
  raise WorkflowError(
153
- f"SLURM job submission failed. The error message was {e.output}"
295
+ f"SLURM sbatch failed. The error message was {e.output}"
296
+ )
297
+ # any other error message indicating failure?
298
+ if "submission failed" in err:
299
+ raise WorkflowError(
300
+ f"SLURM job submission failed. The error message was {err}"
154
301
  )
155
302
 
156
- slurm_jobid = out.split(" ")[-1]
157
- slurm_logfile = slurm_logfile.replace("%j", slurm_jobid)
303
+ # multicluster submissions yield submission infos like
304
+ # "Submitted batch job <id> on cluster <name>" by default, but with the
305
+ # --parsable option it simply yields "<id>;<name>".
306
+ # To extract the job id we split by semicolon and take the first element
307
+ # (this also works if no cluster name was provided)
308
+ slurm_jobid = out.strip().split(";")[0]
309
+ if not slurm_jobid:
310
+ raise WorkflowError("Failed to retrieve SLURM job ID from sbatch output.")
311
+ slurm_logfile = slurm_logfile.with_name(
312
+ slurm_logfile.name.replace("%j", slurm_jobid)
313
+ )
158
314
  self.logger.info(
159
315
  f"Job {job.jobid} has been submitted with SLURM jobid {slurm_jobid} "
160
316
  f"(log: {slurm_logfile})."
@@ -169,7 +325,6 @@ class Executor(RemoteExecutor):
169
325
  self, active_jobs: List[SubmittedJobInfo]
170
326
  ) -> Generator[SubmittedJobInfo, None, None]:
171
327
  # Check the status of active jobs.
172
-
173
328
  # You have to iterate over the given list active_jobs.
174
329
  # For jobs that have finished successfully, you have to call
175
330
  # self.report_job_success(job).
@@ -189,7 +344,6 @@ class Executor(RemoteExecutor):
189
344
  "FAILED",
190
345
  "NODE_FAIL",
191
346
  "OUT_OF_MEMORY",
192
- "PREEMPTED",
193
347
  "TIMEOUT",
194
348
  "ERROR",
195
349
  )
@@ -212,21 +366,28 @@ class Executor(RemoteExecutor):
212
366
 
213
367
  # We use this sacct syntax for argument 'starttime' to keep it compatible
214
368
  # with slurm < 20.11
215
- sacct_starttime = f"{datetime.now() - timedelta(days=2):%Y-%m-%dT%H:00}"
369
+ sacct_starttime = f"{datetime.now() - timedelta(days = 2):%Y-%m-%dT%H:00}"
216
370
  # previously we had
217
371
  # f"--starttime now-2days --endtime now --name {self.run_uuid}"
218
372
  # in line 218 - once v20.11 is definitively not in use any more,
219
373
  # the more readable version ought to be re-adapted
220
374
 
375
+ # -X: only show main job, no substeps
376
+ sacct_command = f"""sacct -X --parsable2 \
377
+ --clusters all \
378
+ --noheader --format=JobIdRaw,State \
379
+ --starttime {sacct_starttime} \
380
+ --endtime now --name {self.run_uuid}"""
381
+
382
+ # for better redability in verbose output
383
+ sacct_command = " ".join(shlex.split(sacct_command))
384
+
221
385
  # this code is inspired by the snakemake profile:
222
386
  # https://github.com/Snakemake-Profiles/slurm
223
387
  for i in range(status_attempts):
224
388
  async with self.status_rate_limiter:
225
389
  (status_of_jobs, sacct_query_duration) = await self.job_stati(
226
- # -X: only show main job, no substeps
227
- f"sacct -X --parsable2 --noheader --format=JobIdRaw,State "
228
- f"--starttime {sacct_starttime} "
229
- f"--endtime now --name {self.run_uuid}"
390
+ sacct_command
230
391
  )
231
392
  if status_of_jobs is None and sacct_query_duration is None:
232
393
  self.logger.debug(f"could not check status of job {self.run_uuid}")
@@ -282,6 +443,30 @@ class Executor(RemoteExecutor):
282
443
  self.report_job_success(j)
283
444
  any_finished = True
284
445
  active_jobs_seen_by_sacct.remove(j.external_jobid)
446
+ if not self.workflow.executor_settings.keep_successful_logs:
447
+ self.logger.debug(
448
+ "removing log for successful job "
449
+ f"with SLURM ID '{j.external_jobid}'"
450
+ )
451
+ try:
452
+ if j.aux["slurm_logfile"].exists():
453
+ j.aux["slurm_logfile"].unlink()
454
+ except (OSError, FileNotFoundError) as e:
455
+ self.logger.warning(
456
+ "Could not remove log file"
457
+ f" {j.aux['slurm_logfile']}: {e}"
458
+ )
459
+ elif status == "PREEMPTED" and not self._preemption_warning:
460
+ self._preemption_warning = True
461
+ self.logger.warning(
462
+ """
463
+ ===== A Job preemption occured! =====
464
+ Leave Snakemake running, if possible. Otherwise Snakemake
465
+ needs to restart this job upon a Snakemake restart.
466
+
467
+ We leave it to SLURM to resume your job(s)"""
468
+ )
469
+ yield j
285
470
  elif status == "UNKNOWN":
286
471
  # the job probably does not exist anymore, but 'sacct' did not work
287
472
  # so we assume it is finished
@@ -291,9 +476,13 @@ class Executor(RemoteExecutor):
291
476
  elif status in fail_stati:
292
477
  msg = (
293
478
  f"SLURM-job '{j.external_jobid}' failed, SLURM status is: "
294
- f"'{status}'"
479
+ # message ends with '. ', because it is proceeded
480
+ # with a new sentence
481
+ f"'{status}'. "
482
+ )
483
+ self.report_job_error(
484
+ j, msg=msg, aux_logs=[j.aux["slurm_logfile"]._str]
295
485
  )
296
- self.report_job_error(j, msg=msg, aux_logs=[j.aux["slurm_logfile"]])
297
486
  active_jobs_seen_by_sacct.remove(j.external_jobid)
298
487
  else: # still running?
299
488
  yield j
@@ -316,8 +505,10 @@ class Executor(RemoteExecutor):
316
505
  # about 30 sec, but can be longer in extreme cases.
317
506
  # Under 'normal' circumstances, 'scancel' is executed in
318
507
  # virtually no time.
508
+ scancel_command = f"scancel {jobids} --clusters=all"
509
+
319
510
  subprocess.check_output(
320
- f"scancel {jobids}",
511
+ scancel_command,
321
512
  text=True,
322
513
  shell=True,
323
514
  timeout=60,
@@ -325,6 +516,14 @@ class Executor(RemoteExecutor):
325
516
  )
326
517
  except subprocess.TimeoutExpired:
327
518
  self.logger.warning("Unable to cancel jobs within a minute.")
519
+ except subprocess.CalledProcessError as e:
520
+ msg = e.stderr.strip()
521
+ if msg:
522
+ msg = f": {msg}"
523
+ raise WorkflowError(
524
+ "Unable to cancel jobs with scancel "
525
+ f"(exit code {e.returncode}){msg}"
526
+ ) from e
328
527
 
329
528
  async def job_stati(self, command):
330
529
  """Obtain SLURM job status of all submitted jobs with sacct
@@ -371,13 +570,14 @@ class Executor(RemoteExecutor):
371
570
  # here, we check whether the given or guessed account is valid
372
571
  # if not, a WorkflowError is raised
373
572
  self.test_account(job.resources.slurm_account)
374
- return f" -A {job.resources.slurm_account}"
573
+ return f" -A '{job.resources.slurm_account}'"
375
574
  else:
376
575
  if self._fallback_account_arg is None:
377
576
  self.logger.warning("No SLURM account given, trying to guess.")
378
577
  account = self.get_account()
379
578
  if account:
380
579
  self.logger.warning(f"Guessed SLURM account: {account}")
580
+ self.test_account(f"{account}")
381
581
  self._fallback_account_arg = f" -A {account}"
382
582
  else:
383
583
  self.logger.warning(
@@ -415,7 +615,7 @@ class Executor(RemoteExecutor):
415
615
  sacct_out = subprocess.check_output(
416
616
  cmd, shell=True, text=True, stderr=subprocess.PIPE
417
617
  )
418
- return sacct_out.strip()
618
+ return sacct_out.replace("(null)", "").strip()
419
619
  except subprocess.CalledProcessError as e:
420
620
  self.logger.warning(
421
621
  f"No account was given, not able to get a SLURM account via sacct: "
@@ -433,12 +633,28 @@ class Executor(RemoteExecutor):
433
633
  cmd, shell=True, text=True, stderr=subprocess.PIPE
434
634
  )
435
635
  except subprocess.CalledProcessError as e:
436
- raise WorkflowError(
437
- f"Unable to test the validity of the given or guessed SLURM account "
438
- f"'{account}' with sacctmgr: {e.stderr}"
636
+ sacctmgr_report = (
637
+ "Unable to test the validity of the given or guessed "
638
+ f"SLURM account '{account}' with sacctmgr: {e.stderr}."
439
639
  )
640
+ try:
641
+ cmd = "sshare -U --format Account --noheader"
642
+ accounts = subprocess.check_output(
643
+ cmd, shell=True, text=True, stderr=subprocess.PIPE
644
+ )
645
+ except subprocess.CalledProcessError as e2:
646
+ sshare_report = (
647
+ "Unable to test the validity of the given or guessed"
648
+ f" SLURM account '{account}' with sshare: {e2.stderr}."
649
+ )
650
+ raise WorkflowError(
651
+ f"The 'sacctmgr' reported: '{sacctmgr_report}' "
652
+ f"and likewise 'sshare' reported: '{sshare_report}'."
653
+ )
440
654
 
441
- accounts = accounts.split()
655
+ # The set() has been introduced during review to eliminate
656
+ # duplicates. They are not harmful, but disturbing to read.
657
+ accounts = set(_.strip() for _ in accounts.split("\n") if _)
442
658
 
443
659
  if account not in accounts:
444
660
  raise WorkflowError(
@@ -473,3 +689,15 @@ class Executor(RemoteExecutor):
473
689
  "'slurm_partition=<your default partition>'."
474
690
  )
475
691
  return ""
692
+
693
+ def check_slurm_extra(self, job):
694
+ jobname = re.compile(r"--job-name[=?|\s+]|-J\s?")
695
+ if re.search(jobname, job.resources.slurm_extra):
696
+ raise WorkflowError(
697
+ "The --job-name option is not allowed in the 'slurm_extra' "
698
+ "parameter. The job name is set by snakemake and must not be "
699
+ "overwritten. It is internally used to check the stati of the "
700
+ "all submitted jobs by this workflow."
701
+ "Please consult the documentation if you are unsure how to "
702
+ "query the status of your jobs."
703
+ )
@@ -0,0 +1,42 @@
1
+ # utility functions for the SLURM executor plugin
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+
7
+ def delete_slurm_environment():
8
+ """
9
+ Function to delete all environment variables
10
+ starting with 'SLURM_'. The parent shell will
11
+ still have this environment. This is needed to
12
+ submit within a SLURM job context to avoid
13
+ conflicting environments.
14
+ """
15
+ for var in os.environ:
16
+ if var.startswith("SLURM_"):
17
+ del os.environ[var]
18
+
19
+
20
+ def delete_empty_dirs(path: Path) -> None:
21
+ """
22
+ Function to delete all empty directories in a given path.
23
+ This is needed to clean up the working directory after
24
+ a job has sucessfully finished. This function is needed because
25
+ the shutil.rmtree() function does not delete empty
26
+ directories.
27
+ """
28
+ if not path.is_dir():
29
+ return
30
+
31
+ # Process subdirectories first (bottom-up)
32
+ for child in path.iterdir():
33
+ if child.is_dir():
34
+ delete_empty_dirs(child)
35
+
36
+ try:
37
+ # Check if directory is now empty after processing children
38
+ if not any(path.iterdir()):
39
+ path.rmdir()
40
+ except (OSError, FileNotFoundError) as e:
41
+ # Provide more context in the error message
42
+ raise OSError(f"Failed to remove empty directory {path}: {e}") from e