snakemake-executor-plugin-slurm 1.6.0__py3-none-any.whl → 1.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snakemake-executor-plugin-slurm might be problematic. Click here for more details.

@@ -328,13 +328,6 @@ class Executor(RemoteExecutor):
328
328
  "- submitting without. This might or might not work on your cluster."
329
329
  )
330
330
 
331
- # fixes #40 - set ntasks regardless of mpi, because
332
- # SLURM v22.05 introduced the requirement for all jobs
333
- gpu_job = job.resources.get("gpu") or "gpu" in job.resources.get("gres", "")
334
- if gpu_job:
335
- call += f" --ntasks-per-gpu={job.resources.get('tasks', 1)}"
336
- else:
337
- call += f" --ntasks={job.resources.get('tasks', 1)}"
338
331
  # MPI job
339
332
  if job.resources.get("mpi", False):
340
333
  if not job.resources.get("tasks_per_node") and not job.resources.get(
@@ -55,15 +55,10 @@ def parse_reqmem(reqmem, number_of_nodes=1):
55
55
  return 0
56
56
 
57
57
 
58
- def create_efficiency_report(e_threshold, run_uuid, e_report_path, logger):
59
- """
60
- Fetch sacct job data for a Snakemake workflow
61
- and compute efficiency metrics.
62
- """
58
+ def get_sacct_data(run_uuid, logger):
59
+ """Fetch raw sacct data for a workflow."""
63
60
  cmd = f"sacct --name={run_uuid} --parsable2 --noheader"
64
- cmd += (
65
- " --format=JobID,JobName,Comment,Elapsed,TotalCPU," "NNodes,NCPUS,MaxRSS,ReqMem"
66
- )
61
+ cmd += " --format=JobID,JobName,Comment,Elapsed,TotalCPU,NNodes,NCPUS,MaxRSS,ReqMem"
67
62
 
68
63
  try:
69
64
  result = subprocess.run(
@@ -74,12 +69,14 @@ def create_efficiency_report(e_threshold, run_uuid, e_report_path, logger):
74
69
  logger.warning(f"No job data found for workflow {run_uuid}.")
75
70
  return None
76
71
  lines = raw.split("\n")
72
+ return lines
77
73
 
78
74
  except subprocess.CalledProcessError:
79
75
  logger.error(f"Failed to retrieve job data for workflow {run_uuid}.")
80
76
  return None
81
77
 
82
- # Convert to DataFrame
78
+
79
+ def parse_sacct_data(lines, e_threshold, run_uuid, logger):
83
80
  df = pd.DataFrame(
84
81
  (line.split("|") for line in lines),
85
82
  columns=[
@@ -120,13 +117,6 @@ def create_efficiency_report(e_threshold, run_uuid, e_report_path, logger):
120
117
  df["Elapsed_sec"] = df["Elapsed"].apply(time_to_seconds)
121
118
  df["TotalCPU_sec"] = df["TotalCPU"].apply(time_to_seconds)
122
119
 
123
- # Compute CPU efficiency
124
- df["CPU Efficiency (%)"] = (
125
- df["TotalCPU_sec"]
126
- / (df["Elapsed_sec"].clip(lower=1) * df["NCPUS"].clip(lower=1))
127
- ) * 100
128
- df.replace([np.inf, -np.inf], 0, inplace=True)
129
-
130
120
  # Convert MaxRSS
131
121
  df["MaxRSS_MB"] = df["MaxRSS"].apply(parse_maxrss)
132
122
 
@@ -134,6 +124,37 @@ def create_efficiency_report(e_threshold, run_uuid, e_report_path, logger):
134
124
  df["RequestedMem_MB"] = df.apply(
135
125
  lambda row: parse_reqmem(row["ReqMem"], row["NNodes"]), axis=1
136
126
  )
127
+
128
+ # Drop all rows containing "batch" or "extern" as job names
129
+ df = df[~df["JobName"].str.contains("batch|extern", na=False)]
130
+
131
+ # Extract main job ID for grouping
132
+ df["MainJobID"] = df["JobID"].str.extract(r"^(\d+)", expand=False)
133
+
134
+ # Separate main jobs and job steps
135
+ main_jobs = df[~df["JobID"].str.contains(r"\.\d+", regex=True)].copy()
136
+ job_steps = df[df["JobID"].str.contains(r"\.\d+", regex=True)].copy()
137
+
138
+ # Create maps from main jobs for inheritance
139
+ if not nocomment:
140
+ rule_name_map = main_jobs.set_index("MainJobID")["RuleName"].to_dict()
141
+ mem_map = main_jobs.set_index("MainJobID")["RequestedMem_MB"].to_dict()
142
+
143
+ # Inherit data from main jobs to job steps
144
+ if not nocomment:
145
+ job_steps["RuleName"] = job_steps["MainJobID"].map(rule_name_map).fillna("")
146
+ job_steps["RequestedMem_MB"] = job_steps["MainJobID"].map(mem_map).fillna(0)
147
+
148
+ # Use job steps as the final dataset (they have the actual resource usage)
149
+ df = job_steps.copy()
150
+
151
+ # Compute CPU efficiency
152
+ df["CPU Efficiency (%)"] = (
153
+ df["TotalCPU_sec"]
154
+ / (df["Elapsed_sec"].clip(lower=1) * df["NCPUS"].clip(lower=1))
155
+ ) * 100
156
+ df.replace([np.inf, -np.inf], 0, inplace=True)
157
+
137
158
  df["Memory Usage (%)"] = df.apply(
138
159
  lambda row: (
139
160
  (row["MaxRSS_MB"] / row["RequestedMem_MB"] * 100)
@@ -145,9 +166,6 @@ def create_efficiency_report(e_threshold, run_uuid, e_report_path, logger):
145
166
 
146
167
  df["Memory Usage (%)"] = df["Memory Usage (%)"].fillna(0).round(2)
147
168
 
148
- # Drop all rows containing "batch" or "extern" as job names
149
- df = df[~df["JobName"].str.contains("batch|extern", na=False)]
150
-
151
169
  # Log warnings for low efficiency
152
170
  for _, row in df.iterrows():
153
171
  if row["CPU Efficiency (%)"] < e_threshold:
@@ -164,6 +182,20 @@ def create_efficiency_report(e_threshold, run_uuid, e_report_path, logger):
164
182
  f"({row['JobName']}) has low CPU efficiency: "
165
183
  f"{row['CPU Efficiency (%)']}%."
166
184
  )
185
+ return df
186
+
187
+
188
+ def create_efficiency_report(e_threshold, run_uuid, e_report_path, logger):
189
+ """
190
+ Fetch sacct job data for a Snakemake workflow
191
+ and compute efficiency metrics.
192
+ """
193
+ lines = get_sacct_data(run_uuid, logger)
194
+
195
+ if lines is None or not lines:
196
+ return None
197
+
198
+ df = parse_sacct_data(lines, e_threshold, run_uuid, logger)
167
199
 
168
200
  # we construct a path object to allow for a customi
169
201
  # logdir, if specified
@@ -55,15 +55,18 @@ def get_submit_command(job, params):
55
55
  # fixes #316 - allow unsetting of tasks per gpu
56
56
  # apparently, python's internal process manangement interfers with SLURM
57
57
  # e.g. for pytorch
58
- ntasks_per_gpu = job.resources.get(
59
- "tasks_per_gpu", job.resources.get("tasks", 1)
60
- )
58
+ ntasks_per_gpu = job.resources.get("tasks_per_gpu")
59
+ if ntasks_per_gpu is None:
60
+ ntasks_per_gpu = job.resources.get("tasks")
61
+ if ntasks_per_gpu is None:
62
+ ntasks_per_gpu = 1
63
+
61
64
  if ntasks_per_gpu >= 1:
62
65
  call += f" --ntasks-per-gpu={ntasks_per_gpu}"
63
66
  else:
64
67
  # fixes #40 - set ntasks regardless of mpi, because
65
68
  # SLURM v22.05 will require it for all jobs
66
- call += f" --ntasks={job.resources.get('tasks', 1)}"
69
+ call += f" --ntasks={job.resources.get('tasks') or 1}"
67
70
 
68
71
  # we need to set cpus-per-task OR cpus-per-gpu, the function
69
72
  # will return a string with the corresponding value
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: snakemake-executor-plugin-slurm
3
- Version: 1.6.0
3
+ Version: 1.6.1
4
4
  Summary: A Snakemake executor plugin for submitting jobs to a SLURM cluster.
5
5
  License: MIT
6
6
  Keywords: snakemake,plugin,executor,cluster,slurm
@@ -15,8 +15,8 @@ Classifier: Programming Language :: Python :: 3.13
15
15
  Requires-Dist: numpy (>=1.26.4,<3)
16
16
  Requires-Dist: pandas (>=2.2.3,<3.0.0)
17
17
  Requires-Dist: snakemake-executor-plugin-slurm-jobstep (>=0.3.0,<0.4.0)
18
- Requires-Dist: snakemake-interface-common (>=1.13.0,<2.0.0)
19
- Requires-Dist: snakemake-interface-executor-plugins (>=9.1.1,<10.0.0)
18
+ Requires-Dist: snakemake-interface-common (>=1.21.0,<2.0.0)
19
+ Requires-Dist: snakemake-interface-executor-plugins (>=9.3.9,<10.0.0)
20
20
  Requires-Dist: throttler (>=1.2.2,<2.0.0)
21
21
  Project-URL: Documentation, https://snakemake.github.io/snakemake-plugin-catalog/plugins/executor/slurm.html
22
22
  Project-URL: Repository, https://github.com/snakemake/snakemake-executor-plugin-slurm
@@ -0,0 +1,8 @@
1
+ snakemake_executor_plugin_slurm/__init__.py,sha256=gQA7SwXAcpXAovDC7jlCKczjd6L3gLb-n37rGSm1MY8,33570
2
+ snakemake_executor_plugin_slurm/efficiency_report.py,sha256=crPfJDK4NojfRbu_wEw3ZmC3suMRABr5r-1rO5q3WEo,7429
3
+ snakemake_executor_plugin_slurm/submit_string.py,sha256=Cn9qopyQwBqs1MvZFxSyRR_7mZzCVj8_vO_JNzbiqew,2896
4
+ snakemake_executor_plugin_slurm/utils.py,sha256=7XVXtzu7bg_89wWZisW-Zk7TNQyEgK4v_y4Y3F9uOwc,4491
5
+ snakemake_executor_plugin_slurm-1.6.1.dist-info/LICENSE,sha256=YVc4xTLWMqGfFL36120k7rzXtsT6e4RkJsh68VVn12s,1076
6
+ snakemake_executor_plugin_slurm-1.6.1.dist-info/METADATA,sha256=12IQgnU3tuz2cQ6N6iqhyzSmR7BD_mt9HWfenEse2r4,1434
7
+ snakemake_executor_plugin_slurm-1.6.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
8
+ snakemake_executor_plugin_slurm-1.6.1.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- snakemake_executor_plugin_slurm/__init__.py,sha256=VXA5Bn58OCd143Kb3STvv6YxTePUsE5wbfrKYXFIpvE,33978
2
- snakemake_executor_plugin_slurm/efficiency_report.py,sha256=Lcafd8Zv14NMOmghPzuUd7rxLUS8PQmOISuNosauA4w,6256
3
- snakemake_executor_plugin_slurm/submit_string.py,sha256=C4-ZIsrHJg9WiJuDW3HqLno0uigBz8IYvgbI6nldJ38,2789
4
- snakemake_executor_plugin_slurm/utils.py,sha256=7XVXtzu7bg_89wWZisW-Zk7TNQyEgK4v_y4Y3F9uOwc,4491
5
- snakemake_executor_plugin_slurm-1.6.0.dist-info/LICENSE,sha256=YVc4xTLWMqGfFL36120k7rzXtsT6e4RkJsh68VVn12s,1076
6
- snakemake_executor_plugin_slurm-1.6.0.dist-info/METADATA,sha256=qWKq4kHlkj-KFSdNk1YINAnL08M299SQTWBhqj566qo,1434
7
- snakemake_executor_plugin_slurm-1.6.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
8
- snakemake_executor_plugin_slurm-1.6.0.dist-info/RECORD,,