snakemake-executor-plugin-slurm 1.0.1__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snakemake-executor-plugin-slurm might be problematic. Click here for more details.

@@ -26,9 +26,9 @@ from snakemake_interface_executor_plugins.jobs import (
26
26
  JobExecutorInterface,
27
27
  )
28
28
  from snakemake_interface_common.exceptions import WorkflowError
29
- from snakemake_executor_plugin_slurm_jobstep import get_cpu_setting
30
29
 
31
30
  from .utils import delete_slurm_environment, delete_empty_dirs, set_gres_string
31
+ from .submit_string import get_submit_command
32
32
 
33
33
 
34
34
  @dataclass
@@ -74,6 +74,18 @@ class ExecutorSettings(ExecutorSettingsBase):
74
74
  "required": False,
75
75
  },
76
76
  )
77
+ status_attempts: Optional[int] = field(
78
+ default=5,
79
+ metadata={
80
+ "help": "Defines the number of attempts to query the status of "
81
+ "all active jobs. If the status query fails, the next attempt "
82
+ "will be performed after the next status check interval."
83
+ "The default is 5 status attempts before giving up. The maximum "
84
+ "time between status checks is 180 seconds.",
85
+ "env_var": False,
86
+ "required": False,
87
+ },
88
+ )
77
89
  requeue: bool = field(
78
90
  default=False,
79
91
  metadata={
@@ -123,9 +135,10 @@ common_settings = CommonSettings(
123
135
  # Required:
124
136
  # Implementation of your executor
125
137
  class Executor(RemoteExecutor):
126
- def __post_init__(self):
138
+ def __post_init__(self, test_mode: bool = False):
127
139
  # run check whether we are running in a SLURM job context
128
140
  self.warn_on_jobcontext()
141
+ self.test_mode = test_mode
129
142
  self.run_uuid = str(uuid.uuid4())
130
143
  self.logger.info(f"SLURM run ID: {self.run_uuid}")
131
144
  self._fallback_account_arg = None
@@ -213,31 +226,28 @@ class Executor(RemoteExecutor):
213
226
  comment_str = f"rule_{job.name}"
214
227
  else:
215
228
  comment_str = f"rule_{job.name}_wildcards_{wildcard_str}"
216
- call = (
217
- f"sbatch "
218
- f"--parsable "
219
- f"--job-name {self.run_uuid} "
220
- f"--output '{slurm_logfile}' "
221
- f"--export=ALL "
222
- f"--comment '{comment_str}'"
223
- )
229
+ # check whether the 'slurm_extra' parameter is used correctly
230
+ # prior to putatively setting in the sbatch call
231
+ if job.resources.get("slurm_extra"):
232
+ self.check_slurm_extra(job)
224
233
 
225
- if not self.workflow.executor_settings.no_account:
226
- call += self.get_account_arg(job)
234
+ job_params = {
235
+ "run_uuid": self.run_uuid,
236
+ "slurm_logfile": slurm_logfile,
237
+ "comment_str": comment_str,
238
+ "account": self.get_account_arg(job),
239
+ "partition": self.get_partition_arg(job),
240
+ "workdir": self.workflow.workdir_init,
241
+ }
227
242
 
228
- call += self.get_partition_arg(job)
243
+ call = get_submit_command(job, job_params)
229
244
 
230
245
  if self.workflow.executor_settings.requeue:
231
246
  call += " --requeue"
232
247
 
233
248
  call += set_gres_string(job)
234
249
 
235
- if job.resources.get("clusters"):
236
- call += f" --clusters {job.resources.clusters}"
237
-
238
- if job.resources.get("runtime"):
239
- call += f" -t {job.resources.runtime}"
240
- else:
250
+ if not job.resources.get("runtime"):
241
251
  self.logger.warning(
242
252
  "No wall time information given. This might or might not "
243
253
  "work on your cluster. "
@@ -245,28 +255,12 @@ class Executor(RemoteExecutor):
245
255
  "default via --default-resources."
246
256
  )
247
257
 
248
- if job.resources.get("constraint"):
249
- call += f" -C '{job.resources.constraint}'"
250
- if job.resources.get("mem_mb_per_cpu"):
251
- call += f" --mem-per-cpu {job.resources.mem_mb_per_cpu}"
252
- elif job.resources.get("mem_mb"):
253
- call += f" --mem {job.resources.mem_mb}"
254
- else:
258
+ if not job.resources.get("mem_mb_per_cpu") and not job.resources.get("mem_mb"):
255
259
  self.logger.warning(
256
260
  "No job memory information ('mem_mb' or 'mem_mb_per_cpu') is given "
257
261
  "- submitting without. This might or might not work on your cluster."
258
262
  )
259
263
 
260
- if job.resources.get("nodes", False):
261
- call += f" --nodes={job.resources.get('nodes', 1)}"
262
-
263
- # fixes #40 - set ntasks regardless of mpi, because
264
- # SLURM v22.05 will require it for all jobs
265
- gpu_job = job.resources.get("gpu") or "gpu" in job.resources.get("gres", "")
266
- if gpu_job:
267
- call += f" --ntasks-per-gpu={job.resources.get('tasks', 1)}"
268
- else:
269
- call += f" --ntasks={job.resources.get('tasks', 1)}"
270
264
  # MPI job
271
265
  if job.resources.get("mpi", False):
272
266
  if not job.resources.get("tasks_per_node") and not job.resources.get(
@@ -278,19 +272,8 @@ class Executor(RemoteExecutor):
278
272
  "Probably not what you want."
279
273
  )
280
274
 
281
- # we need to set cpus-per-task OR cpus-per-gpu, the function
282
- # will return a string with the corresponding value
283
- call += f" {get_cpu_setting(job, gpu_job)}"
284
- if job.resources.get("slurm_extra"):
285
- self.check_slurm_extra(job)
286
- call += f" {job.resources.slurm_extra}"
287
-
288
275
  exec_job = self.format_job_exec(job)
289
276
 
290
- # ensure that workdir is set correctly
291
- # use short argument as this is the same in all slurm versions
292
- # (see https://github.com/snakemake/snakemake/issues/2014)
293
- call += f" -D {self.workflow.workdir_init}"
294
277
  # and finally the job to execute with all the snakemake parameters
295
278
  call += f' --wrap="{exec_job}"'
296
279
 
@@ -376,7 +359,11 @@ class Executor(RemoteExecutor):
376
359
 
377
360
  sacct_query_durations = []
378
361
 
379
- status_attempts = 5
362
+ status_attempts = self.workflow.executor_settings.status_attempts
363
+ self.logger.debug(
364
+ f"Checking the status of {len(active_jobs)} active jobs "
365
+ f"with {status_attempts} attempts."
366
+ )
380
367
 
381
368
  active_jobs_ids = {job_info.external_jobid for job_info in active_jobs}
382
369
  active_jobs_seen_by_sacct = set()
@@ -510,7 +497,7 @@ We leave it to SLURM to resume your job(s)"""
510
497
  self.next_seconds_between_status_checks + 10, max_sleep_time
511
498
  )
512
499
  else:
513
- self.next_seconds_between_status_checks = None
500
+ self.next_seconds_between_status_checks = 40
514
501
 
515
502
  def cancel_jobs(self, active_jobs: List[SubmittedJobInfo]):
516
503
  # Cancel all active jobs.
@@ -570,10 +557,22 @@ We leave it to SLURM to resume your job(s)"""
570
557
  for entry in csv.reader(StringIO(command_res), delimiter="|")
571
558
  }
572
559
  except subprocess.CalledProcessError as e:
573
- self.logger.error(
574
- f"The job status query failed with command: {command}\n"
575
- f"Error message: {e.stderr.strip()}\n"
576
- )
560
+ error_message = e.stderr.strip()
561
+ if "slurm_persist_conn_open_without_init" in error_message:
562
+ self.logger.warning(
563
+ "The SLURM database might not be available ... "
564
+ f"Error message: '{error_message}'"
565
+ "This error message indicates that the SLURM database is currently "
566
+ "not available. This is not an error of the Snakemake plugin, "
567
+ "but some kind of server issue. "
568
+ "Please consult with your HPC provider."
569
+ )
570
+ else:
571
+ self.logger.error(
572
+ f"The job status query failed with command '{command}'"
573
+ f"Error message: '{error_message}'"
574
+ "This error message is not expected, please report it back to us."
575
+ )
577
576
  pass
578
577
 
579
578
  return (res, query_duration)
@@ -684,7 +683,7 @@ We leave it to SLURM to resume your job(s)"""
684
683
  )
685
684
  return ""
686
685
 
687
- if account not in accounts:
686
+ if account.lower() not in accounts:
688
687
  raise WorkflowError(
689
688
  f"The given account {account} appears to be invalid. Available "
690
689
  f"accounts:\n{', '.join(accounts)}"
@@ -0,0 +1,72 @@
1
+ from snakemake_executor_plugin_slurm_jobstep import get_cpu_setting
2
+ from types import SimpleNamespace
3
+
4
+
5
+ def get_submit_command(job, params):
6
+ """
7
+ Return the submit command for the job.
8
+ """
9
+ # Convert params dict to a SimpleNamespace for attribute-style access
10
+ params = SimpleNamespace(**params)
11
+
12
+ call = (
13
+ f"sbatch "
14
+ f"--parsable "
15
+ f"--job-name {params.run_uuid} "
16
+ f'--output "{params.slurm_logfile}" '
17
+ f"--export=ALL "
18
+ f'--comment "{params.comment_str}"'
19
+ )
20
+
21
+ # No accout or partition checking is required, here.
22
+ # Checking is done in the submit function.
23
+
24
+ # here, only the string is used, as it already contains
25
+ # '-A {account_name}'
26
+ call += f" {params.account}"
27
+ # here, only the string is used, as it already contains
28
+ # '- p {partition_name}'
29
+ call += f" {params.partition}"
30
+
31
+ if job.resources.get("clusters"):
32
+ call += f" --clusters {job.resources.clusters}"
33
+
34
+ if job.resources.get("runtime"):
35
+ call += f" -t {job.resources.runtime}"
36
+
37
+ if job.resources.get("constraint") or isinstance(
38
+ job.resources.get("constraint"), str
39
+ ):
40
+ call += f" -C '{job.resources.get('constraint')}'"
41
+
42
+ if job.resources.get("qos") or isinstance(job.resources.get("qos"), str):
43
+ call += f" --qos='{job.resources.qos}'"
44
+
45
+ if job.resources.get("mem_mb_per_cpu"):
46
+ call += f" --mem-per-cpu {job.resources.mem_mb_per_cpu}"
47
+ elif job.resources.get("mem_mb"):
48
+ call += f" --mem {job.resources.mem_mb}"
49
+
50
+ if job.resources.get("nodes", False):
51
+ call += f" --nodes={job.resources.get('nodes', 1)}"
52
+
53
+ # fixes #40 - set ntasks regardless of mpi, because
54
+ # SLURM v22.05 will require it for all jobs
55
+ gpu_job = job.resources.get("gpu") or "gpu" in job.resources.get("gres", "")
56
+ if gpu_job:
57
+ call += f" --ntasks-per-gpu={job.resources.get('tasks', 1)}"
58
+ else:
59
+ call += f" --ntasks={job.resources.get('tasks', 1)}"
60
+
61
+ # we need to set cpus-per-task OR cpus-per-gpu, the function
62
+ # will return a string with the corresponding value
63
+ call += f" {get_cpu_setting(job, gpu_job)}"
64
+ if job.resources.get("slurm_extra"):
65
+ call += f" {job.resources.slurm_extra}"
66
+
67
+ # ensure that workdir is set correctly
68
+ # use short argument as this is the same in all slurm versions
69
+ # (see https://github.com/snakemake/snakemake/issues/2014)
70
+ call += f" -D '{params.workdir}'"
71
+
72
+ return call
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: snakemake-executor-plugin-slurm
3
- Version: 1.0.1
3
+ Version: 1.2.0
4
4
  Summary: A Snakemake executor plugin for submitting jobs to a SLURM cluster.
5
5
  License: MIT
6
6
  Keywords: snakemake,plugin,executor,cluster,slurm
@@ -0,0 +1,7 @@
1
+ snakemake_executor_plugin_slurm/__init__.py,sha256=EqMKNkKYVFeDfw2pwCnFKYxgKOGJazlDm658wvFvQN0,30942
2
+ snakemake_executor_plugin_slurm/submit_string.py,sha256=sXzMm5SVNQ4upIOcsIZjUqj7khnG-lieo5yJSSus5sc,2483
3
+ snakemake_executor_plugin_slurm/utils.py,sha256=ZzXiXFDVLs15PLJnDP0eq98fNCtzlLbhtT03ec8Ou34,3578
4
+ snakemake_executor_plugin_slurm-1.2.0.dist-info/LICENSE,sha256=YVc4xTLWMqGfFL36120k7rzXtsT6e4RkJsh68VVn12s,1076
5
+ snakemake_executor_plugin_slurm-1.2.0.dist-info/METADATA,sha256=rOpk-4_-aw3w-2X0POSy6rAvFZnPfzArN6MT9CuUxwA,1360
6
+ snakemake_executor_plugin_slurm-1.2.0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
7
+ snakemake_executor_plugin_slurm-1.2.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.1
2
+ Generator: poetry-core 2.1.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,6 +0,0 @@
1
- snakemake_executor_plugin_slurm/__init__.py,sha256=d9aiBqYfhZY54ooqiawCQ67Kv2cFVpUrLCtSAjFvr6c,30722
2
- snakemake_executor_plugin_slurm/utils.py,sha256=ZzXiXFDVLs15PLJnDP0eq98fNCtzlLbhtT03ec8Ou34,3578
3
- snakemake_executor_plugin_slurm-1.0.1.dist-info/LICENSE,sha256=YVc4xTLWMqGfFL36120k7rzXtsT6e4RkJsh68VVn12s,1076
4
- snakemake_executor_plugin_slurm-1.0.1.dist-info/METADATA,sha256=BK6xoB4FHYho7p5mxYUOlsp2T8dipyuUIV21b0sLVOE,1360
5
- snakemake_executor_plugin_slurm-1.0.1.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
6
- snakemake_executor_plugin_slurm-1.0.1.dist-info/RECORD,,