snakemake-executor-plugin-slurm 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snakemake-executor-plugin-slurm might be problematic. Click here for more details.

@@ -74,6 +74,18 @@ class ExecutorSettings(ExecutorSettingsBase):
74
74
  "required": False,
75
75
  },
76
76
  )
77
+ status_attempts: Optional[int] = field(
78
+ default=5,
79
+ metadata={
80
+ "help": "Defines the number of attempts to query the status of "
81
+ "all active jobs. If the status query fails, the next attempt "
82
+ "will be performed after the next status check interval."
83
+ "The default is 5 status attempts before giving up. The maximum "
84
+ "time between status checks is 180 seconds.",
85
+ "env_var": False,
86
+ "required": False,
87
+ },
88
+ )
77
89
  requeue: bool = field(
78
90
  default=False,
79
91
  metadata={
@@ -85,6 +97,15 @@ class ExecutorSettings(ExecutorSettingsBase):
85
97
  "required": False,
86
98
  },
87
99
  )
100
+ no_account: bool = field(
101
+ default=False,
102
+ metadata={
103
+ "help": "Do not use any account for submission. "
104
+ "This flag has no effect, if not set.",
105
+ "env_var": False,
106
+ "required": False,
107
+ },
108
+ )
88
109
 
89
110
 
90
111
  # Required:
@@ -213,7 +234,9 @@ class Executor(RemoteExecutor):
213
234
  f"--comment '{comment_str}'"
214
235
  )
215
236
 
216
- call += self.get_account_arg(job)
237
+ if not self.workflow.executor_settings.no_account:
238
+ call += self.get_account_arg(job)
239
+
217
240
  call += self.get_partition_arg(job)
218
241
 
219
242
  if self.workflow.executor_settings.requeue:
@@ -365,7 +388,11 @@ class Executor(RemoteExecutor):
365
388
 
366
389
  sacct_query_durations = []
367
390
 
368
- status_attempts = 5
391
+ status_attempts = self.workflow.executor_settings.status_attempts
392
+ self.logger.debug(
393
+ f"Checking the status of {len(active_jobs)} active jobs "
394
+ f"with {status_attempts} attempts."
395
+ )
369
396
 
370
397
  active_jobs_ids = {job_info.external_jobid for job_info in active_jobs}
371
398
  active_jobs_seen_by_sacct = set()
@@ -499,7 +526,7 @@ We leave it to SLURM to resume your job(s)"""
499
526
  self.next_seconds_between_status_checks + 10, max_sleep_time
500
527
  )
501
528
  else:
502
- self.next_seconds_between_status_checks = None
529
+ self.next_seconds_between_status_checks = 40
503
530
 
504
531
  def cancel_jobs(self, active_jobs: List[SubmittedJobInfo]):
505
532
  # Cancel all active jobs.
@@ -559,10 +586,22 @@ We leave it to SLURM to resume your job(s)"""
559
586
  for entry in csv.reader(StringIO(command_res), delimiter="|")
560
587
  }
561
588
  except subprocess.CalledProcessError as e:
562
- self.logger.error(
563
- f"The job status query failed with command: {command}\n"
564
- f"Error message: {e.stderr.strip()}\n"
565
- )
589
+ error_message = e.stderr.strip()
590
+ if "slurm_persist_conn_open_without_init" in error_message:
591
+ self.logger.warning(
592
+ "The SLURM database might not be available ... "
593
+ f"Error message: '{error_message}'"
594
+ "This error message indicates that the SLURM database is currently "
595
+ "not available. This is not an error of the Snakemake plugin, "
596
+ "but some kind of server issue. "
597
+ "Please consult with your HPC provider."
598
+ )
599
+ else:
600
+ self.logger.error(
601
+ f"The job status query failed with command '{command}'"
602
+ f"Error message: '{error_message}'"
603
+ "This error message is not expected, please report it back to us."
604
+ )
566
605
  pass
567
606
 
568
607
  return (res, query_duration)
@@ -634,35 +673,45 @@ We leave it to SLURM to resume your job(s)"""
634
673
  """
635
674
  tests whether the given account is registered, raises an error, if not
636
675
  """
637
- cmd = f'sacctmgr -n -s list user "{os.environ["USER"]}" format=account%256'
676
+ cmd = "sshare -U --format Account --noheader"
638
677
  try:
639
678
  accounts = subprocess.check_output(
640
679
  cmd, shell=True, text=True, stderr=subprocess.PIPE
641
680
  )
642
681
  except subprocess.CalledProcessError as e:
643
- sacctmgr_report = (
644
- "Unable to test the validity of the given or guessed "
645
- f"SLURM account '{account}' with sacctmgr: {e.stderr}."
682
+ sshare_report = (
683
+ "Unable to test the validity of the given or guessed"
684
+ f" SLURM account '{account}' with sshare: {e.stderr}."
646
685
  )
686
+ accounts = ""
687
+
688
+ if not accounts.strip():
689
+ cmd = f'sacctmgr -n -s list user "{os.environ["USER"]}" format=account%256'
647
690
  try:
648
- cmd = "sshare -U --format Account --noheader"
649
691
  accounts = subprocess.check_output(
650
692
  cmd, shell=True, text=True, stderr=subprocess.PIPE
651
693
  )
652
- except subprocess.CalledProcessError as e2:
653
- sshare_report = (
654
- "Unable to test the validity of the given or guessed"
655
- f" SLURM account '{account}' with sshare: {e2.stderr}."
694
+ except subprocess.CalledProcessError as e:
695
+ sacctmgr_report = (
696
+ "Unable to test the validity of the given or guessed "
697
+ f"SLURM account '{account}' with sacctmgr: {e.stderr}."
656
698
  )
657
699
  raise WorkflowError(
658
- f"The 'sacctmgr' reported: '{sacctmgr_report}' "
659
- f"and likewise 'sshare' reported: '{sshare_report}'."
700
+ f"The 'sshare' reported: '{sshare_report}' "
701
+ f"and likewise 'sacctmgr' reported: '{sacctmgr_report}'."
660
702
  )
661
703
 
662
704
  # The set() has been introduced during review to eliminate
663
705
  # duplicates. They are not harmful, but disturbing to read.
664
706
  accounts = set(_.strip() for _ in accounts.split("\n") if _)
665
707
 
708
+ if not accounts:
709
+ self.logger.warning(
710
+ f"Both 'sshare' and 'sacctmgr' returned empty results for account "
711
+ f"'{account}'. Proceeding without account validation."
712
+ )
713
+ return ""
714
+
666
715
  if account not in accounts:
667
716
  raise WorkflowError(
668
717
  f"The given account {account} appears to be invalid. Available "
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: snakemake-executor-plugin-slurm
3
- Version: 1.0.0
3
+ Version: 1.1.0
4
4
  Summary: A Snakemake executor plugin for submitting jobs to a SLURM cluster.
5
5
  License: MIT
6
6
  Keywords: snakemake,plugin,executor,cluster,slurm
@@ -0,0 +1,6 @@
1
+ snakemake_executor_plugin_slurm/__init__.py,sha256=yNz8JRS5jDcY4Jyr16Fvk5afGMDyYAEHuoackPcK-MI,32142
2
+ snakemake_executor_plugin_slurm/utils.py,sha256=ZzXiXFDVLs15PLJnDP0eq98fNCtzlLbhtT03ec8Ou34,3578
3
+ snakemake_executor_plugin_slurm-1.1.0.dist-info/LICENSE,sha256=YVc4xTLWMqGfFL36120k7rzXtsT6e4RkJsh68VVn12s,1076
4
+ snakemake_executor_plugin_slurm-1.1.0.dist-info/METADATA,sha256=0h-JOJUxaORswgyMb18PpTcvGlI1lrpUUSXR9h8kBWk,1360
5
+ snakemake_executor_plugin_slurm-1.1.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
6
+ snakemake_executor_plugin_slurm-1.1.0.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- snakemake_executor_plugin_slurm/__init__.py,sha256=Bpro_L6Ca3kjnYXbpjpDKWFYE-fgZCW0sW41claGpp4,30090
2
- snakemake_executor_plugin_slurm/utils.py,sha256=ZzXiXFDVLs15PLJnDP0eq98fNCtzlLbhtT03ec8Ou34,3578
3
- snakemake_executor_plugin_slurm-1.0.0.dist-info/LICENSE,sha256=YVc4xTLWMqGfFL36120k7rzXtsT6e4RkJsh68VVn12s,1076
4
- snakemake_executor_plugin_slurm-1.0.0.dist-info/METADATA,sha256=3wsaiJjVQrZXR5rcgtSqCU-HMWesfEqwtECI8COdeq0,1360
5
- snakemake_executor_plugin_slurm-1.0.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
6
- snakemake_executor_plugin_slurm-1.0.0.dist-info/RECORD,,