snakemake-executor-plugin-slurm 0.8.0__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snakemake-executor-plugin-slurm might be problematic. Click here for more details.

@@ -7,6 +7,7 @@ import csv
7
7
  from io import StringIO
8
8
  import os
9
9
  import re
10
+ import shlex
10
11
  import subprocess
11
12
  import time
12
13
  from dataclasses import dataclass, field
@@ -25,6 +26,8 @@ from snakemake_interface_executor_plugins.jobs import (
25
26
  from snakemake_interface_common.exceptions import WorkflowError
26
27
  from snakemake_executor_plugin_slurm_jobstep import get_cpus_per_task
27
28
 
29
+ from .utils import delete_slurm_environment
30
+
28
31
 
29
32
  @dataclass
30
33
  class ExecutorSettings(ExecutorSettingsBase):
@@ -75,6 +78,7 @@ class Executor(RemoteExecutor):
75
78
  self.logger.info(f"SLURM run ID: {self.run_uuid}")
76
79
  self._fallback_account_arg = None
77
80
  self._fallback_partition = None
81
+ self._preemption_warning = False # no preemption warning has been issued
78
82
  # providing a short-hand, even if subsequent calls seem redundant
79
83
  self.settings: ExecutorSettings = self.workflow.executor_settings
80
84
 
@@ -83,10 +87,11 @@ class Executor(RemoteExecutor):
83
87
  if "SLURM_JOB_ID" in os.environ:
84
88
  self.logger.warning(
85
89
  "You are running snakemake in a SLURM job context. "
86
- "This is not recommended, as it may lead to unexpected behavior."
90
+ "This is not recommended, as it may lead to unexpected behavior. "
87
91
  "Please run Snakemake directly on the login node."
88
92
  )
89
93
  time.sleep(5)
94
+ delete_slurm_environment()
90
95
  done = True
91
96
 
92
97
  def additional_general_args(self):
@@ -129,13 +134,20 @@ class Executor(RemoteExecutor):
129
134
  else:
130
135
  comment_str = f"rule_{job.name}_wildcards_{wildcard_str}"
131
136
  call = (
132
- f"sbatch --job-name {self.run_uuid} --output {slurm_logfile} --export=ALL "
137
+ f"sbatch "
138
+ f"--parsable "
139
+ f"--job-name {self.run_uuid} "
140
+ f"--output {slurm_logfile} "
141
+ f"--export=ALL "
133
142
  f"--comment {comment_str}"
134
143
  )
135
144
 
136
145
  call += self.get_account_arg(job)
137
146
  call += self.get_partition_arg(job)
138
147
 
148
+ if job.resources.get("clusters"):
149
+ call += f" --clusters {job.resources.clusters}"
150
+
139
151
  if job.resources.get("runtime"):
140
152
  call += f" -t {job.resources.runtime}"
141
153
  else:
@@ -147,7 +159,7 @@ class Executor(RemoteExecutor):
147
159
  )
148
160
 
149
161
  if job.resources.get("constraint"):
150
- call += f" -C {job.resources.constraint}"
162
+ call += f" -C '{job.resources.constraint}'"
151
163
  if job.resources.get("mem_mb_per_cpu"):
152
164
  call += f" --mem-per-cpu {job.resources.mem_mb_per_cpu}"
153
165
  elif job.resources.get("mem_mb"):
@@ -200,7 +212,12 @@ class Executor(RemoteExecutor):
200
212
  f"SLURM job submission failed. The error message was {e.output}"
201
213
  )
202
214
 
203
- slurm_jobid = out.split(" ")[-1]
215
+ # multicluster submissions yield submission infos like
216
+ # "Submitted batch job <id> on cluster <name>" by default, but with the
217
+ # --parsable option it simply yields "<id>;<name>".
218
+ # To extract the job id we split by semicolon and take the first element
219
+ # (this also works if no cluster name was provided)
220
+ slurm_jobid = out.split(";")[0]
204
221
  slurm_logfile = slurm_logfile.replace("%j", slurm_jobid)
205
222
  self.logger.info(
206
223
  f"Job {job.jobid} has been submitted with SLURM jobid {slurm_jobid} "
@@ -235,7 +252,6 @@ class Executor(RemoteExecutor):
235
252
  "FAILED",
236
253
  "NODE_FAIL",
237
254
  "OUT_OF_MEMORY",
238
- "PREEMPTED",
239
255
  "TIMEOUT",
240
256
  "ERROR",
241
257
  )
@@ -264,15 +280,22 @@ class Executor(RemoteExecutor):
264
280
  # in line 218 - once v20.11 is definitively not in use any more,
265
281
  # the more readable version ought to be re-adapted
266
282
 
283
+ # -X: only show main job, no substeps
284
+ sacct_command = f"""sacct -X --parsable2 \
285
+ --clusters all \
286
+ --noheader --format=JobIdRaw,State \
287
+ --starttime {sacct_starttime} \
288
+ --endtime now --name {self.run_uuid}"""
289
+
290
+ # for better redability in verbose output
291
+ sacct_command = " ".join(shlex.split(sacct_command))
292
+
267
293
  # this code is inspired by the snakemake profile:
268
294
  # https://github.com/Snakemake-Profiles/slurm
269
295
  for i in range(status_attempts):
270
296
  async with self.status_rate_limiter:
271
297
  (status_of_jobs, sacct_query_duration) = await self.job_stati(
272
- # -X: only show main job, no substeps
273
- f"sacct -X --parsable2 --noheader --format=JobIdRaw,State "
274
- f"--starttime {sacct_starttime} "
275
- f"--endtime now --name {self.run_uuid}"
298
+ sacct_command
276
299
  )
277
300
  if status_of_jobs is None and sacct_query_duration is None:
278
301
  self.logger.debug(f"could not check status of job {self.run_uuid}")
@@ -328,6 +351,16 @@ class Executor(RemoteExecutor):
328
351
  self.report_job_success(j)
329
352
  any_finished = True
330
353
  active_jobs_seen_by_sacct.remove(j.external_jobid)
354
+ elif status == "PREEMPTED" and not self._preemption_warning:
355
+ self._preemption_warning = True
356
+ self.logger.warning(
357
+ """
358
+ ===== A Job preemption occured! =====
359
+ Leave Snakemake running, if possible. Otherwise Snakemake
360
+ needs to restart this job upon a Snakemake restart.
361
+
362
+ We leave it to SLURM to resume your job(s)"""
363
+ )
331
364
  elif status == "UNKNOWN":
332
365
  # the job probably does not exist anymore, but 'sacct' did not work
333
366
  # so we assume it is finished
@@ -364,8 +397,10 @@ class Executor(RemoteExecutor):
364
397
  # about 30 sec, but can be longer in extreme cases.
365
398
  # Under 'normal' circumstances, 'scancel' is executed in
366
399
  # virtually no time.
400
+ scancel_command = f"scancel {jobids} --clusters=all"
401
+
367
402
  subprocess.check_output(
368
- f"scancel {jobids}",
403
+ scancel_command,
369
404
  text=True,
370
405
  shell=True,
371
406
  timeout=60,
@@ -0,0 +1,16 @@
1
+ # utility functions for the SLURM executor plugin
2
+
3
+ import os
4
+
5
+
6
+ def delete_slurm_environment():
7
+ """
8
+ Function to delete all environment variables
9
+ starting with 'SLURM_'. The parent shell will
10
+ still have this environment. This is needed to
11
+ submit within a SLURM job context to avoid
12
+ conflicting environments.
13
+ """
14
+ for var in os.environ:
15
+ if var.startswith("SLURM_"):
16
+ del os.environ[var]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: snakemake-executor-plugin-slurm
3
- Version: 0.8.0
3
+ Version: 0.10.0
4
4
  Summary: A Snakemake executor plugin for submitting jobs to a SLURM cluster.
5
5
  Home-page: https://github.com/snakemake/snakemake-executor-plugin-slurm
6
6
  License: MIT
@@ -0,0 +1,6 @@
1
+ snakemake_executor_plugin_slurm/__init__.py,sha256=VMIZpkp-R61GKq2jXoLxElxfSguHwgCsv0zv8-usHQY,24229
2
+ snakemake_executor_plugin_slurm/utils.py,sha256=DuJdFJsAmvFsrnpyb8kMoqxTEEmTsEVxroDS1t9qOGw,434
3
+ snakemake_executor_plugin_slurm-0.10.0.dist-info/LICENSE,sha256=YVc4xTLWMqGfFL36120k7rzXtsT6e4RkJsh68VVn12s,1076
4
+ snakemake_executor_plugin_slurm-0.10.0.dist-info/METADATA,sha256=_88ZsjTcrcyn0m2OY1LryvmW_oJY0fzWQMeBNI8lnjk,1381
5
+ snakemake_executor_plugin_slurm-0.10.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
6
+ snakemake_executor_plugin_slurm-0.10.0.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- snakemake_executor_plugin_slurm/__init__.py,sha256=GC5yU3EsnBJBC9Z6gIQdt2GHK3QLdF0sQj5TDI6VDLo,22851
2
- snakemake_executor_plugin_slurm-0.8.0.dist-info/LICENSE,sha256=YVc4xTLWMqGfFL36120k7rzXtsT6e4RkJsh68VVn12s,1076
3
- snakemake_executor_plugin_slurm-0.8.0.dist-info/METADATA,sha256=S2aTNWZg3rDSECsTQISccHHvxkc83YyPLugtIuHKdUk,1380
4
- snakemake_executor_plugin_slurm-0.8.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
5
- snakemake_executor_plugin_slurm-0.8.0.dist-info/RECORD,,