snakemake-executor-plugin-slurm 1.4.0__tar.gz → 1.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snakemake-executor-plugin-slurm might be problematic. Click here for more details.
- {snakemake_executor_plugin_slurm-1.4.0 → snakemake_executor_plugin_slurm-1.5.0}/PKG-INFO +3 -1
- {snakemake_executor_plugin_slurm-1.4.0 → snakemake_executor_plugin_slurm-1.5.0}/pyproject.toml +5 -2
- {snakemake_executor_plugin_slurm-1.4.0 → snakemake_executor_plugin_slurm-1.5.0}/snakemake_executor_plugin_slurm/__init__.py +66 -10
- snakemake_executor_plugin_slurm-1.5.0/snakemake_executor_plugin_slurm/efficiency_report.py +185 -0
- {snakemake_executor_plugin_slurm-1.4.0 → snakemake_executor_plugin_slurm-1.5.0}/LICENSE +0 -0
- {snakemake_executor_plugin_slurm-1.4.0 → snakemake_executor_plugin_slurm-1.5.0}/README.md +0 -0
- {snakemake_executor_plugin_slurm-1.4.0 → snakemake_executor_plugin_slurm-1.5.0}/snakemake_executor_plugin_slurm/submit_string.py +0 -0
- {snakemake_executor_plugin_slurm-1.4.0 → snakemake_executor_plugin_slurm-1.5.0}/snakemake_executor_plugin_slurm/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: snakemake-executor-plugin-slurm
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.0
|
|
4
4
|
Summary: A Snakemake executor plugin for submitting jobs to a SLURM cluster.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: snakemake,plugin,executor,cluster,slurm
|
|
@@ -12,6 +12,8 @@ Classifier: Programming Language :: Python :: 3
|
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Requires-Dist: numpy (>=1.26.4,<2.0.0)
|
|
16
|
+
Requires-Dist: pandas (>=2.2.3,<3.0.0)
|
|
15
17
|
Requires-Dist: snakemake-executor-plugin-slurm-jobstep (>=0.3.0,<0.4.0)
|
|
16
18
|
Requires-Dist: snakemake-interface-common (>=1.13.0,<2.0.0)
|
|
17
19
|
Requires-Dist: snakemake-interface-executor-plugins (>=9.1.1,<10.0.0)
|
{snakemake_executor_plugin_slurm-1.4.0 → snakemake_executor_plugin_slurm-1.5.0}/pyproject.toml
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "snakemake-executor-plugin-slurm"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.5.0"
|
|
4
4
|
description = "A Snakemake executor plugin for submitting jobs to a SLURM cluster."
|
|
5
5
|
authors = [
|
|
6
6
|
"Christian Meesters <meesters@uni-mainz.de>",
|
|
@@ -18,6 +18,8 @@ python = "^3.11"
|
|
|
18
18
|
snakemake-interface-common = "^1.13.0"
|
|
19
19
|
snakemake-interface-executor-plugins = "^9.1.1"
|
|
20
20
|
snakemake-executor-plugin-slurm-jobstep = "^0.3.0"
|
|
21
|
+
pandas = "^2.2.3"
|
|
22
|
+
numpy = "^1.26.4"
|
|
21
23
|
throttler = "^1.2.2"
|
|
22
24
|
|
|
23
25
|
[tool.poetry.group.dev.dependencies]
|
|
@@ -25,7 +27,8 @@ black = "^23.7.0"
|
|
|
25
27
|
flake8 = "^6.1.0"
|
|
26
28
|
coverage = "^7.3.1"
|
|
27
29
|
pytest = "^8.3.5"
|
|
28
|
-
snakemake = "^9.
|
|
30
|
+
snakemake = "^9.6.0"
|
|
31
|
+
pandas = "^2.2.3"
|
|
29
32
|
|
|
30
33
|
[tool.coverage.run]
|
|
31
34
|
omit = [".*", "*/site-packages/*", "Snakefile"]
|
|
@@ -3,7 +3,6 @@ __copyright__ = "Copyright 2023, David Lähnemann, Johannes Köster, Christian M
|
|
|
3
3
|
__email__ = "johannes.koester@uni-due.de"
|
|
4
4
|
__license__ = "MIT"
|
|
5
5
|
|
|
6
|
-
import atexit
|
|
7
6
|
import csv
|
|
8
7
|
from io import StringIO
|
|
9
8
|
import os
|
|
@@ -16,6 +15,7 @@ from dataclasses import dataclass, field
|
|
|
16
15
|
from datetime import datetime, timedelta
|
|
17
16
|
from typing import List, Generator, Optional
|
|
18
17
|
import uuid
|
|
18
|
+
|
|
19
19
|
from snakemake_interface_executor_plugins.executors.base import SubmittedJobInfo
|
|
20
20
|
from snakemake_interface_executor_plugins.executors.remote import RemoteExecutor
|
|
21
21
|
from snakemake_interface_executor_plugins.settings import (
|
|
@@ -27,7 +27,12 @@ from snakemake_interface_executor_plugins.jobs import (
|
|
|
27
27
|
)
|
|
28
28
|
from snakemake_interface_common.exceptions import WorkflowError
|
|
29
29
|
|
|
30
|
-
from .utils import
|
|
30
|
+
from .utils import (
|
|
31
|
+
delete_slurm_environment,
|
|
32
|
+
delete_empty_dirs,
|
|
33
|
+
set_gres_string,
|
|
34
|
+
)
|
|
35
|
+
from .efficiency_report import create_efficiency_report
|
|
31
36
|
from .submit_string import get_submit_command
|
|
32
37
|
|
|
33
38
|
|
|
@@ -106,6 +111,35 @@ class ExecutorSettings(ExecutorSettingsBase):
|
|
|
106
111
|
"required": False,
|
|
107
112
|
},
|
|
108
113
|
)
|
|
114
|
+
efficiency_report: bool = field(
|
|
115
|
+
default=False,
|
|
116
|
+
metadata={
|
|
117
|
+
"help": "Generate an efficiency report at the end of the workflow. "
|
|
118
|
+
"This flag has no effect, if not set.",
|
|
119
|
+
"env_var": False,
|
|
120
|
+
"required": False,
|
|
121
|
+
},
|
|
122
|
+
)
|
|
123
|
+
efficiency_report_path: Optional[Path] = field(
|
|
124
|
+
default=None,
|
|
125
|
+
metadata={
|
|
126
|
+
"help": "Path to the efficiency report file. "
|
|
127
|
+
"If not set, the report will be written to "
|
|
128
|
+
"the current working directory with the name "
|
|
129
|
+
"'efficiency_report_<run_uuid>.csv'. "
|
|
130
|
+
"This flag has no effect, if not set.",
|
|
131
|
+
"env_var": False,
|
|
132
|
+
"required": False,
|
|
133
|
+
},
|
|
134
|
+
)
|
|
135
|
+
efficiency_threshold: Optional[float] = field(
|
|
136
|
+
default=0.8,
|
|
137
|
+
metadata={
|
|
138
|
+
"help": "The efficiency threshold for the efficiency report. "
|
|
139
|
+
"Jobs with an efficiency below this threshold will be reported. "
|
|
140
|
+
"This flag has no effect, if not set.",
|
|
141
|
+
},
|
|
142
|
+
)
|
|
109
143
|
reservation: Optional[str] = field(
|
|
110
144
|
default=None,
|
|
111
145
|
metadata={
|
|
@@ -157,7 +191,26 @@ class Executor(RemoteExecutor):
|
|
|
157
191
|
if self.workflow.executor_settings.logdir
|
|
158
192
|
else Path(".snakemake/slurm_logs").resolve()
|
|
159
193
|
)
|
|
160
|
-
|
|
194
|
+
|
|
195
|
+
def shutdown(self) -> None:
|
|
196
|
+
"""
|
|
197
|
+
Shutdown the executor.
|
|
198
|
+
This method is overloaded, to include the cleaning of old log files
|
|
199
|
+
and to optionally create an efficiency report.
|
|
200
|
+
"""
|
|
201
|
+
# First, we invoke the original shutdown method
|
|
202
|
+
super().shutdown()
|
|
203
|
+
|
|
204
|
+
# Next, clean up old log files, unconditionally.
|
|
205
|
+
self.clean_old_logs()
|
|
206
|
+
# If the efficiency report is enabled, create it.
|
|
207
|
+
if self.workflow.executor_settings.efficiency_report:
|
|
208
|
+
create_efficiency_report(
|
|
209
|
+
e_threshold=self.workflow.executor_settings.efficiency_threshold,
|
|
210
|
+
run_uuid=self.run_uuid,
|
|
211
|
+
e_report_path=self.workflow.executor_settings.efficiency_report_path,
|
|
212
|
+
logger=self.logger,
|
|
213
|
+
)
|
|
161
214
|
|
|
162
215
|
def clean_old_logs(self) -> None:
|
|
163
216
|
"""Delete files older than specified age from the SLURM log directory."""
|
|
@@ -168,7 +221,8 @@ class Executor(RemoteExecutor):
|
|
|
168
221
|
return
|
|
169
222
|
cutoff_secs = age_cutoff * 86400
|
|
170
223
|
current_time = time.time()
|
|
171
|
-
self.logger.info(f"Cleaning up log files older than {age_cutoff} day(s)")
|
|
224
|
+
self.logger.info(f"Cleaning up log files older than {age_cutoff} day(s).")
|
|
225
|
+
|
|
172
226
|
for path in self.slurm_logdir.rglob("*.log"):
|
|
173
227
|
if path.is_file():
|
|
174
228
|
try:
|
|
@@ -176,12 +230,14 @@ class Executor(RemoteExecutor):
|
|
|
176
230
|
if file_age > cutoff_secs:
|
|
177
231
|
path.unlink()
|
|
178
232
|
except (OSError, FileNotFoundError) as e:
|
|
179
|
-
self.logger.
|
|
233
|
+
self.logger.error(f"Could not delete logfile {path}: {e}")
|
|
180
234
|
# we need a 2nd iteration to remove putatively empty directories
|
|
181
235
|
try:
|
|
182
236
|
delete_empty_dirs(self.slurm_logdir)
|
|
183
237
|
except (OSError, FileNotFoundError) as e:
|
|
184
|
-
self.logger.
|
|
238
|
+
self.logger.error(
|
|
239
|
+
f"Could not delete empty directories in {self.slurm_logdir}: {e}"
|
|
240
|
+
)
|
|
185
241
|
|
|
186
242
|
def warn_on_jobcontext(self, done=None):
|
|
187
243
|
if not done:
|
|
@@ -741,10 +797,10 @@ We leave it to SLURM to resume your job(s)"""
|
|
|
741
797
|
jobname = re.compile(r"--job-name[=?|\s+]|-J\s?")
|
|
742
798
|
if re.search(jobname, job.resources.slurm_extra):
|
|
743
799
|
raise WorkflowError(
|
|
744
|
-
"The --job-name option is not allowed in the 'slurm_extra' "
|
|
745
|
-
"
|
|
746
|
-
"
|
|
747
|
-
"
|
|
800
|
+
"The --job-name option is not allowed in the 'slurm_extra' parameter. "
|
|
801
|
+
"The job name is set by snakemake and must not be overwritten. "
|
|
802
|
+
"It is internally used to check the stati of the all submitted jobs "
|
|
803
|
+
"by this workflow."
|
|
748
804
|
"Please consult the documentation if you are unsure how to "
|
|
749
805
|
"query the status of your jobs."
|
|
750
806
|
)
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import subprocess
|
|
5
|
+
import shlex
|
|
6
|
+
|
|
7
|
+
import os # only temporarily needed for printf debugging
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def time_to_seconds(time_str):
|
|
12
|
+
"""Convert SLURM time format to seconds."""
|
|
13
|
+
if pd.isna(time_str) or time_str.strip() == "":
|
|
14
|
+
return 0
|
|
15
|
+
parts = time_str.split(":")
|
|
16
|
+
|
|
17
|
+
if len(parts) == 3: # H:M:S
|
|
18
|
+
return int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2])
|
|
19
|
+
elif len(parts) == 2: # M:S
|
|
20
|
+
return int(parts[0]) * 60 + float(parts[1])
|
|
21
|
+
elif len(parts) == 1: # S
|
|
22
|
+
return float(parts[0])
|
|
23
|
+
return 0
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def parse_maxrss(maxrss):
|
|
27
|
+
"""Convert MaxRSS to MB."""
|
|
28
|
+
if pd.isna(maxrss) or maxrss.strip() == "" or maxrss == "0":
|
|
29
|
+
return 0
|
|
30
|
+
match = re.match(r"(\d+(?:\.\d+)?)([KMG]?)", maxrss)
|
|
31
|
+
if match:
|
|
32
|
+
value, unit = match.groups()
|
|
33
|
+
value = float(value)
|
|
34
|
+
unit_multipliers = {"K": 1 / 1024, "M": 1, "G": 1024}
|
|
35
|
+
return value * unit_multipliers.get(unit, 1)
|
|
36
|
+
return 0
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def parse_reqmem(reqmem, number_of_nodes=1):
|
|
40
|
+
"""Convert requested memory to MB."""
|
|
41
|
+
if pd.isna(reqmem) or reqmem.strip() == "":
|
|
42
|
+
return 0
|
|
43
|
+
# 4Gc (per-CPU) / 16Gn (per-node) / 2.5G
|
|
44
|
+
match = re.match(r"(\d+(?:\.\d+)?)([KMG])?([cn]|/node)?", reqmem)
|
|
45
|
+
if match:
|
|
46
|
+
value, unit, per_unit = match.groups()
|
|
47
|
+
value = float(value)
|
|
48
|
+
unit_multipliers = {"K": 1 / 1024, "M": 1, "G": 1024}
|
|
49
|
+
mem_mb = value * unit_multipliers.get(unit, 1)
|
|
50
|
+
if per_unit in ("n", "/node"): # per-node
|
|
51
|
+
nodes = 1 if pd.isna(number_of_nodes) else number_of_nodes
|
|
52
|
+
return mem_mb * nodes
|
|
53
|
+
# `/c` or `c` → per-CPU; caller may multiply later
|
|
54
|
+
return mem_mb # Default case (per CPU or total)
|
|
55
|
+
return 0
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def create_efficiency_report(e_threshold, run_uuid, e_report_path, logger):
|
|
59
|
+
"""
|
|
60
|
+
Fetch sacct job data for a Snakemake workflow
|
|
61
|
+
and compute efficiency metrics.
|
|
62
|
+
"""
|
|
63
|
+
cmd = f"sacct --name={run_uuid} --parsable2 --noheader"
|
|
64
|
+
cmd += (
|
|
65
|
+
" --format=JobID,JobName,Comment,Elapsed,TotalCPU," "NNodes,NCPUS,MaxRSS,ReqMem"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
result = subprocess.run(
|
|
70
|
+
shlex.split(cmd), capture_output=True, text=True, check=True
|
|
71
|
+
)
|
|
72
|
+
raw = result.stdout.strip()
|
|
73
|
+
if not raw:
|
|
74
|
+
logger.warning(f"No job data found for workflow {run_uuid}.")
|
|
75
|
+
return None
|
|
76
|
+
lines = raw.split("\n")
|
|
77
|
+
|
|
78
|
+
except subprocess.CalledProcessError:
|
|
79
|
+
logger.error(f"Failed to retrieve job data for workflow {run_uuid}.")
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
# Convert to DataFrame
|
|
83
|
+
df = pd.DataFrame(
|
|
84
|
+
(line.split("|") for line in lines),
|
|
85
|
+
columns=[
|
|
86
|
+
"JobID",
|
|
87
|
+
"JobName",
|
|
88
|
+
"Comment",
|
|
89
|
+
"Elapsed",
|
|
90
|
+
"TotalCPU",
|
|
91
|
+
"NNodes",
|
|
92
|
+
"NCPUS",
|
|
93
|
+
"MaxRSS",
|
|
94
|
+
"ReqMem",
|
|
95
|
+
],
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# If the "Comment" column is empty,
|
|
99
|
+
# a) delete the column
|
|
100
|
+
# b) issue a warning
|
|
101
|
+
if df["Comment"].replace("", pd.NA).isna().all():
|
|
102
|
+
logger.warning(
|
|
103
|
+
f"No comments found for workflow {run_uuid}. "
|
|
104
|
+
"This field is used to store the rule name. "
|
|
105
|
+
"Please ensure that the 'comment' field is set for your cluster. "
|
|
106
|
+
"Administrators can set this up in the SLURM configuration."
|
|
107
|
+
)
|
|
108
|
+
df.drop(columns=["Comment"], inplace=True)
|
|
109
|
+
# remember, that the comment column is not available
|
|
110
|
+
nocomment = True
|
|
111
|
+
# else: rename the column to 'RuleName'
|
|
112
|
+
else:
|
|
113
|
+
df.rename(columns={"Comment": "RuleName"}, inplace=True)
|
|
114
|
+
nocomment = False
|
|
115
|
+
# Convert types
|
|
116
|
+
df["NNodes"] = pd.to_numeric(df["NNodes"], errors="coerce")
|
|
117
|
+
df["NCPUS"] = pd.to_numeric(df["NCPUS"], errors="coerce")
|
|
118
|
+
|
|
119
|
+
# Convert time fields
|
|
120
|
+
df["Elapsed_sec"] = df["Elapsed"].apply(time_to_seconds)
|
|
121
|
+
df["TotalCPU_sec"] = df["TotalCPU"].apply(time_to_seconds)
|
|
122
|
+
|
|
123
|
+
# Compute CPU efficiency
|
|
124
|
+
df["CPU Efficiency (%)"] = (
|
|
125
|
+
df["TotalCPU_sec"]
|
|
126
|
+
/ (df["Elapsed_sec"].clip(lower=1) * df["NCPUS"].clip(lower=1))
|
|
127
|
+
) * 100
|
|
128
|
+
df.replace([np.inf, -np.inf], 0, inplace=True)
|
|
129
|
+
|
|
130
|
+
# Convert MaxRSS
|
|
131
|
+
df["MaxRSS_MB"] = df["MaxRSS"].apply(parse_maxrss)
|
|
132
|
+
|
|
133
|
+
# Convert ReqMem and calculate memory efficiency
|
|
134
|
+
df["RequestedMem_MB"] = df.apply(
|
|
135
|
+
lambda row: parse_reqmem(row["ReqMem"], row["NNodes"]), axis=1
|
|
136
|
+
)
|
|
137
|
+
df["Memory Usage (%)"] = df.apply(
|
|
138
|
+
lambda row: (
|
|
139
|
+
(row["MaxRSS_MB"] / row["RequestedMem_MB"] * 100)
|
|
140
|
+
if row["RequestedMem_MB"] > 0
|
|
141
|
+
else 0
|
|
142
|
+
),
|
|
143
|
+
axis=1,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
df["Memory Usage (%)"] = df["Memory Usage (%)"].fillna(0).round(2)
|
|
147
|
+
|
|
148
|
+
# Drop all rows containing "batch" or "extern" as job names
|
|
149
|
+
df = df[~df["JobName"].str.contains("batch|extern", na=False)]
|
|
150
|
+
|
|
151
|
+
# Log warnings for low efficiency
|
|
152
|
+
for _, row in df.iterrows():
|
|
153
|
+
if row["CPU Efficiency (%)"] < e_threshold:
|
|
154
|
+
if nocomment:
|
|
155
|
+
logger.warning(
|
|
156
|
+
f"Job {row['JobID']} ({row['JobName']}) "
|
|
157
|
+
f"has low CPU efficiency: {row['CPU Efficiency (%)']}%."
|
|
158
|
+
)
|
|
159
|
+
else:
|
|
160
|
+
# if the comment column is available, we can use it to
|
|
161
|
+
# identify the rule name
|
|
162
|
+
logger.warning(
|
|
163
|
+
f"Job {row['JobID']} for rule '{row['RuleName']}' "
|
|
164
|
+
f"({row['JobName']}) has low CPU efficiency: "
|
|
165
|
+
f"{row['CPU Efficiency (%)']}%."
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# we construct a path object to allow for a customi
|
|
169
|
+
# logdir, if specified
|
|
170
|
+
p = Path()
|
|
171
|
+
|
|
172
|
+
# Save the report to a CSV file
|
|
173
|
+
logfile = f"efficiency_report_{run_uuid}.csv"
|
|
174
|
+
if e_report_path:
|
|
175
|
+
logfile = Path(e_report_path) / logfile
|
|
176
|
+
else:
|
|
177
|
+
logfile = p.cwd() / logfile
|
|
178
|
+
# ensure the directory exists
|
|
179
|
+
logfile.parent.mkdir(parents=True, exist_ok=True)
|
|
180
|
+
df.to_csv(logfile)
|
|
181
|
+
|
|
182
|
+
# write out the efficiency report at normal verbosity in any case
|
|
183
|
+
logger.info(f"Efficiency report for workflow {run_uuid} saved to {logfile}.")
|
|
184
|
+
# state directory contents for debugging purposes
|
|
185
|
+
logger.debug(f"Current directory contents in '{p.cwd()}': {os.listdir(p.cwd())}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|