snakemake-executor-plugin-slurm 0.9.0__tar.gz → 0.10.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snakemake-executor-plugin-slurm might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: snakemake-executor-plugin-slurm
3
- Version: 0.9.0
3
+ Version: 0.10.1
4
4
  Summary: A Snakemake executor plugin for submitting jobs to a SLURM cluster.
5
5
  Home-page: https://github.com/snakemake/snakemake-executor-plugin-slurm
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "snakemake-executor-plugin-slurm"
3
- version = "0.9.0"
3
+ version = "0.10.1"
4
4
  description = "A Snakemake executor plugin for submitting jobs to a SLURM cluster."
5
5
  authors = [
6
6
  "Christian Meesters <meesters@uni-mainz.de>",
@@ -25,7 +25,7 @@ black = "^23.7.0"
25
25
  flake8 = "^6.1.0"
26
26
  coverage = "^7.3.1"
27
27
  pytest = "^7.4.2"
28
- snakemake = {git = "https://github.com/snakemake/snakemake.git"}
28
+ snakemake = "^8.20.0"
29
29
 
30
30
  [tool.coverage.run]
31
31
  omit = [".*", "*/site-packages/*", "Snakefile"]
@@ -26,6 +26,8 @@ from snakemake_interface_executor_plugins.jobs import (
26
26
  from snakemake_interface_common.exceptions import WorkflowError
27
27
  from snakemake_executor_plugin_slurm_jobstep import get_cpus_per_task
28
28
 
29
+ from .utils import delete_slurm_environment
30
+
29
31
 
30
32
  @dataclass
31
33
  class ExecutorSettings(ExecutorSettingsBase):
@@ -76,6 +78,7 @@ class Executor(RemoteExecutor):
76
78
  self.logger.info(f"SLURM run ID: {self.run_uuid}")
77
79
  self._fallback_account_arg = None
78
80
  self._fallback_partition = None
81
+ self._preemption_warning = False # no preemption warning has been issued
79
82
  # providing a short-hand, even if subsequent calls seem redundant
80
83
  self.settings: ExecutorSettings = self.workflow.executor_settings
81
84
 
@@ -84,10 +87,11 @@ class Executor(RemoteExecutor):
84
87
  if "SLURM_JOB_ID" in os.environ:
85
88
  self.logger.warning(
86
89
  "You are running snakemake in a SLURM job context. "
87
- "This is not recommended, as it may lead to unexpected behavior."
90
+ "This is not recommended, as it may lead to unexpected behavior. "
88
91
  "Please run Snakemake directly on the login node."
89
92
  )
90
93
  time.sleep(5)
94
+ delete_slurm_environment()
91
95
  done = True
92
96
 
93
97
  def additional_general_args(self):
@@ -130,7 +134,11 @@ class Executor(RemoteExecutor):
130
134
  else:
131
135
  comment_str = f"rule_{job.name}_wildcards_{wildcard_str}"
132
136
  call = (
133
- f"sbatch --job-name {self.run_uuid} --output {slurm_logfile} --export=ALL "
137
+ f"sbatch "
138
+ f"--parsable "
139
+ f"--job-name {self.run_uuid} "
140
+ f"--output '{slurm_logfile}' "
141
+ f"--export=ALL "
134
142
  f"--comment {comment_str}"
135
143
  )
136
144
 
@@ -205,10 +213,11 @@ class Executor(RemoteExecutor):
205
213
  )
206
214
 
207
215
  # multicluster submissions yield submission infos like
208
- # "Submitted batch job <id> on cluster <name>".
209
- # To extract the job id in this case we need to match any number
210
- # in between a string - which might change in future versions of SLURM.
211
- slurm_jobid = re.search(r"\d+", out).group()
216
+ # "Submitted batch job <id> on cluster <name>" by default, but with the
217
+ # --parsable option it simply yields "<id>;<name>".
218
+ # To extract the job id we split by semicolon and take the first element
219
+ # (this also works if no cluster name was provided)
220
+ slurm_jobid = out.split(";")[0]
212
221
  slurm_logfile = slurm_logfile.replace("%j", slurm_jobid)
213
222
  self.logger.info(
214
223
  f"Job {job.jobid} has been submitted with SLURM jobid {slurm_jobid} "
@@ -243,7 +252,6 @@ class Executor(RemoteExecutor):
243
252
  "FAILED",
244
253
  "NODE_FAIL",
245
254
  "OUT_OF_MEMORY",
246
- "PREEMPTED",
247
255
  "TIMEOUT",
248
256
  "ERROR",
249
257
  )
@@ -343,6 +351,16 @@ class Executor(RemoteExecutor):
343
351
  self.report_job_success(j)
344
352
  any_finished = True
345
353
  active_jobs_seen_by_sacct.remove(j.external_jobid)
354
+ elif status == "PREEMPTED" and not self._preemption_warning:
355
+ self._preemption_warning = True
356
+ self.logger.warning(
357
+ """
358
+ ===== A Job preemption occured! =====
359
+ Leave Snakemake running, if possible. Otherwise Snakemake
360
+ needs to restart this job upon a Snakemake restart.
361
+
362
+ We leave it to SLURM to resume your job(s)"""
363
+ )
346
364
  elif status == "UNKNOWN":
347
365
  # the job probably does not exist anymore, but 'sacct' did not work
348
366
  # so we assume it is finished
@@ -390,6 +408,14 @@ class Executor(RemoteExecutor):
390
408
  )
391
409
  except subprocess.TimeoutExpired:
392
410
  self.logger.warning("Unable to cancel jobs within a minute.")
411
+ except subprocess.CalledProcessError as e:
412
+ msg = e.stderr.strip()
413
+ if msg:
414
+ msg = f": {msg}"
415
+ raise WorkflowError(
416
+ "Unable to cancel jobs with scancel "
417
+ f"(exit code {e.returncode}){msg}"
418
+ ) from e
393
419
 
394
420
  async def job_stati(self, command):
395
421
  """Obtain SLURM job status of all submitted jobs with sacct
@@ -0,0 +1,16 @@
1
+ # utility functions for the SLURM executor plugin
2
+
3
+ import os
4
+
5
+
6
+ def delete_slurm_environment():
7
+ """
8
+ Function to delete all environment variables
9
+ starting with 'SLURM_'. The parent shell will
10
+ still have this environment. This is needed to
11
+ submit within a SLURM job context to avoid
12
+ conflicting environments.
13
+ """
14
+ for var in os.environ:
15
+ if var.startswith("SLURM_"):
16
+ del os.environ[var]