weco 0.2.22__py3-none-any.whl → 0.2.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
weco/optimizer.py CHANGED
@@ -20,7 +20,6 @@ from .api import (
20
20
  from .auth import handle_authentication
21
21
  from .panels import (
22
22
  SummaryPanel,
23
- PlanPanel,
24
23
  Node,
25
24
  MetricTreePanel,
26
25
  EvaluationOutputPanel,
@@ -37,6 +36,7 @@ from .utils import (
37
36
  smooth_update,
38
37
  format_number,
39
38
  )
39
+ from .constants import DEFAULT_API_TIMEOUT
40
40
 
41
41
 
42
42
  # --- Heartbeat Sender Class ---
@@ -63,7 +63,7 @@ class HeartbeatSender(threading.Thread):
63
63
 
64
64
  except Exception as e:
65
65
  # Catch any unexpected error in the loop to prevent silent thread death
66
- print(f"[ERROR HeartbeatSender] Unhandled exception in run loop for run {self.run_id}: {e}", file=sys.stderr)
66
+ print(f"[ERROR HeartbeatSender] Unexpected error in heartbeat thread for run {self.run_id}: {e}", file=sys.stderr)
67
67
  traceback.print_exc(file=sys.stderr)
68
68
  # The loop will break due to the exception, and thread will terminate via finally.
69
69
 
@@ -78,6 +78,7 @@ def execute_optimization(
78
78
  log_dir: str = ".runs",
79
79
  additional_instructions: Optional[str] = None,
80
80
  console: Optional[Console] = None,
81
+ eval_timeout: Optional[int] = None,
81
82
  ) -> bool:
82
83
  """
83
84
  Execute the core optimization logic.
@@ -153,14 +154,13 @@ def execute_optimization(
153
154
  "debug_prob": 0.5,
154
155
  "max_debug_depth": max(1, math.ceil(0.1 * steps)),
155
156
  }
156
- timeout = 800
157
+ api_timeout = DEFAULT_API_TIMEOUT
157
158
  processed_additional_instructions = read_additional_instructions(additional_instructions=additional_instructions)
158
159
  source_fp = pathlib.Path(source)
159
160
  source_code = read_from_path(fp=source_fp, is_json=False)
160
161
 
161
162
  # --- Panel Initialization ---
162
163
  summary_panel = SummaryPanel(maximize=maximize, metric_name=metric, total_steps=steps, model=model, runs_dir=log_dir)
163
- plan_panel = PlanPanel()
164
164
  solution_panels = SolutionPanels(metric_name=metric, source_fp=source_fp)
165
165
  eval_output_panel = EvaluationOutputPanel()
166
166
  tree_panel = MetricTreePanel(maximize=maximize)
@@ -181,9 +181,10 @@ def execute_optimization(
181
181
  additional_instructions=processed_additional_instructions,
182
182
  api_keys=llm_api_keys,
183
183
  auth_headers=auth_headers,
184
- timeout=timeout,
184
+ timeout=api_timeout,
185
185
  )
186
186
  run_id = run_response["run_id"]
187
+ run_name = run_response["run_name"]
187
188
  current_run_id_for_heartbeat = run_id
188
189
 
189
190
  # --- Start Heartbeat Thread ---
@@ -203,12 +204,14 @@ def execute_optimization(
203
204
  write_to_path(fp=source_fp, content=run_response["code"])
204
205
 
205
206
  # Update the panels with the initial solution
206
- summary_panel.set_run_id(run_id=run_id) # Add run id now that we have it
207
+ # Add run id and run name now that we have it
208
+ summary_panel.set_run_id(run_id=run_id)
209
+ summary_panel.set_run_name(run_name=run_name)
207
210
  # Set the step of the progress bar
208
211
  summary_panel.set_step(step=0)
209
212
  # Update the token counts
210
213
  summary_panel.update_token_counts(usage=run_response["usage"])
211
- plan_panel.update(plan=run_response["plan"])
214
+ summary_panel.update_thinking(thinking=run_response["plan"])
212
215
  # Build the metric tree
213
216
  tree_panel.build_metric_tree(
214
217
  nodes=[
@@ -218,7 +221,7 @@ def execute_optimization(
218
221
  "code": run_response["code"],
219
222
  "step": 0,
220
223
  "metric_value": None,
221
- "is_buggy": False,
224
+ "is_buggy": None,
222
225
  }
223
226
  ]
224
227
  )
@@ -227,7 +230,7 @@ def execute_optimization(
227
230
  # Update the solution panels with the initial solution and get the panel displays
228
231
  solution_panels.update(
229
232
  current_node=Node(
230
- id=run_response["solution_id"], parent_id=None, code=run_response["code"], metric=None, is_buggy=False
233
+ id=run_response["solution_id"], parent_id=None, code=run_response["code"], metric=None, is_buggy=None
231
234
  ),
232
235
  best_node=None,
233
236
  )
@@ -238,7 +241,6 @@ def execute_optimization(
238
241
  layout=layout,
239
242
  sections_to_update=[
240
243
  ("summary", summary_panel.get_display()),
241
- ("plan", plan_panel.get_display()),
242
244
  ("tree", tree_panel.get_display(is_done=False)),
243
245
  ("current_solution", current_solution_panel),
244
246
  ("best_solution", best_solution_panel),
@@ -248,7 +250,7 @@ def execute_optimization(
248
250
  )
249
251
 
250
252
  # Run evaluation on the initial solution
251
- term_out = run_evaluation(eval_command=eval_command)
253
+ term_out = run_evaluation(eval_command=eval_command, timeout=eval_timeout)
252
254
  # Update the evaluation output panel
253
255
  eval_output_panel.update(output=term_out)
254
256
  smooth_update(
@@ -265,7 +267,7 @@ def execute_optimization(
265
267
  if run_id:
266
268
  try:
267
269
  current_status_response = get_optimization_run_status(
268
- run_id=run_id, include_history=False, timeout=30, auth_headers=auth_headers
270
+ console=console, run_id=run_id, include_history=False, timeout=(10, 30), auth_headers=auth_headers
269
271
  )
270
272
  current_run_status_val = current_status_response.get("status")
271
273
  if current_run_status_val == "stopping":
@@ -273,30 +275,31 @@ def execute_optimization(
273
275
  user_stop_requested_flag = True
274
276
  break
275
277
  except requests.exceptions.RequestException as e:
276
- console.print(f"\n[bold red]Warning: Could not check run status: {e}. Continuing optimization...[/]")
278
+ console.print(f"\n[bold red]Warning: Unable to check run status: {e}. Continuing optimization...[/]")
277
279
  except Exception as e:
278
280
  console.print(f"\n[bold red]Warning: Error checking run status: {e}. Continuing optimization...[/]")
279
281
 
280
282
  # Send feedback and get next suggestion
281
283
  eval_and_next_solution_response = evaluate_feedback_then_suggest_next_solution(
284
+ console=console,
282
285
  run_id=run_id,
283
286
  execution_output=term_out,
284
287
  additional_instructions=current_additional_instructions,
285
288
  api_keys=llm_api_keys,
286
289
  auth_headers=auth_headers,
287
- timeout=timeout,
290
+ timeout=api_timeout,
288
291
  )
289
292
  # Save next solution (.runs/<run-id>/step_<step>.<extension>)
290
293
  write_to_path(fp=runs_dir / f"step_{step}{source_fp.suffix}", content=eval_and_next_solution_response["code"])
291
294
  # Write the next solution to the source file
292
295
  write_to_path(fp=source_fp, content=eval_and_next_solution_response["code"])
293
296
  status_response = get_optimization_run_status(
294
- run_id=run_id, include_history=True, timeout=timeout, auth_headers=auth_headers
297
+ console=console, run_id=run_id, include_history=True, timeout=api_timeout, auth_headers=auth_headers
295
298
  )
296
299
  # Update the step of the progress bar, token counts, plan and metric tree
297
300
  summary_panel.set_step(step=step)
298
301
  summary_panel.update_token_counts(usage=eval_and_next_solution_response["usage"])
299
- plan_panel.update(plan=eval_and_next_solution_response["plan"])
302
+ summary_panel.update_thinking(thinking=eval_and_next_solution_response["plan"])
300
303
 
301
304
  nodes_list_from_status = status_response.get("nodes")
302
305
  tree_panel.build_metric_tree(nodes=nodes_list_from_status if nodes_list_from_status is not None else [])
@@ -327,7 +330,9 @@ def execute_optimization(
327
330
  is_buggy=node_data["is_buggy"],
328
331
  )
329
332
  if current_solution_node is None:
330
- raise ValueError("Current solution node not found in nodes list from status response")
333
+ raise ValueError(
334
+ "Current solution node not found in the optimization status response. This may indicate a synchronization issue with the backend."
335
+ )
331
336
 
332
337
  # Update the solution panels with the current and best solution
333
338
  solution_panels.update(current_node=current_solution_node, best_node=best_solution_node)
@@ -339,7 +344,6 @@ def execute_optimization(
339
344
  layout=layout,
340
345
  sections_to_update=[
341
346
  ("summary", summary_panel.get_display()),
342
- ("plan", plan_panel.get_display()),
343
347
  ("tree", tree_panel.get_display(is_done=False)),
344
348
  ("current_solution", current_solution_panel),
345
349
  ("best_solution", best_solution_panel),
@@ -347,7 +351,7 @@ def execute_optimization(
347
351
  ],
348
352
  transition_delay=0.08, # Slightly longer delay for more noticeable transitions
349
353
  )
350
- term_out = run_evaluation(eval_command=eval_command)
354
+ term_out = run_evaluation(eval_command=eval_command, timeout=eval_timeout)
351
355
  eval_output_panel.update(output=term_out)
352
356
  smooth_update(
353
357
  live=live,
@@ -361,17 +365,18 @@ def execute_optimization(
361
365
  current_additional_instructions = read_additional_instructions(additional_instructions=additional_instructions)
362
366
  # Evaluate the final solution thats been generated
363
367
  eval_and_next_solution_response = evaluate_feedback_then_suggest_next_solution(
368
+ console=console,
364
369
  run_id=run_id,
365
370
  execution_output=term_out,
366
371
  additional_instructions=current_additional_instructions,
367
372
  api_keys=llm_api_keys,
368
- timeout=timeout,
373
+ timeout=api_timeout,
369
374
  auth_headers=auth_headers,
370
375
  )
371
376
  summary_panel.set_step(step=steps)
372
377
  summary_panel.update_token_counts(usage=eval_and_next_solution_response["usage"])
373
378
  status_response = get_optimization_run_status(
374
- run_id=run_id, include_history=True, timeout=timeout, auth_headers=auth_headers
379
+ console=console, run_id=run_id, include_history=True, timeout=api_timeout, auth_headers=auth_headers
375
380
  )
376
381
  # No need to update the plan panel since we have finished the optimization
377
382
  # Get the optimization run status for
weco/panels.py CHANGED
@@ -7,30 +7,40 @@ from rich.syntax import Syntax
7
7
  from rich import box
8
8
  from typing import Dict, List, Optional, Union, Tuple
9
9
  from .utils import format_number
10
- import pathlib
10
+ from pathlib import Path
11
11
  from .__init__ import __dashboard_url__
12
12
 
13
13
 
14
14
  class SummaryPanel:
15
15
  """Holds a summary of the optimization run."""
16
16
 
17
- def __init__(self, maximize: bool, metric_name: str, total_steps: int, model: str, runs_dir: str, run_id: str = None):
17
+ def __init__(
18
+ self,
19
+ maximize: bool,
20
+ metric_name: str,
21
+ total_steps: int,
22
+ model: str,
23
+ runs_dir: str,
24
+ run_id: str = None,
25
+ run_name: str = None,
26
+ ):
18
27
  self.maximize = maximize
19
28
  self.metric_name = metric_name
20
- self.goal = ("Maximizing" if self.maximize else "Minimizing") + f" {self.metric_name}..."
21
29
  self.total_input_tokens = 0
22
30
  self.total_output_tokens = 0
23
31
  self.total_steps = total_steps
24
32
  self.model = model
25
33
  self.runs_dir = runs_dir
26
34
  self.run_id = run_id if run_id is not None else "N/A"
35
+ self.run_name = run_name if run_name is not None else "N/A"
27
36
  self.dashboard_url = "N/A"
37
+ self.thinking_content = ""
28
38
  self.progress = Progress(
29
39
  TextColumn("[progress.description]{task.description}"),
30
40
  BarColumn(bar_width=20),
31
41
  TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
32
42
  TextColumn("•"),
33
- TextColumn("[bold]{task.completed}/{task.total} Steps"),
43
+ TextColumn("[bold]{task.completed}/{task.total} Steps "),
34
44
  expand=False,
35
45
  )
36
46
  self.task_id = self.progress.add_task("", total=total_steps)
@@ -40,6 +50,10 @@ class SummaryPanel:
40
50
  self.run_id = run_id
41
51
  self.set_dashboard_url(run_id=run_id)
42
52
 
53
+ def set_run_name(self, run_name: str):
54
+ """Set the run name."""
55
+ self.run_name = run_name
56
+
43
57
  def set_dashboard_url(self, run_id: str):
44
58
  """Set the dashboard URL."""
45
59
  self.dashboard_url = f"{__dashboard_url__}/runs/{run_id}"
@@ -51,69 +65,100 @@ class SummaryPanel:
51
65
  def update_token_counts(self, usage: Dict[str, int]):
52
66
  """Update token counts from usage data."""
53
67
  if not isinstance(usage, dict) or "input_tokens" not in usage or "output_tokens" not in usage:
54
- raise ValueError("Invalid token usage response from API.")
68
+ raise ValueError("Invalid token usage data received.")
55
69
  self.total_input_tokens += usage["input_tokens"]
56
70
  self.total_output_tokens += usage["output_tokens"]
57
71
 
72
+ def update_thinking(self, thinking: str):
73
+ """Update the thinking content."""
74
+ self.thinking_content = thinking
75
+
76
+ def clear_thinking(self):
77
+ """Clear the thinking content."""
78
+ self.thinking_content = ""
79
+
58
80
  def get_display(self, final_message: Optional[str] = None) -> Panel:
59
- """Create a summary panel with the relevant information."""
60
- layout = Layout(name="summary")
61
- summary_table = Table(show_header=False, box=None, padding=(0, 1))
81
+ """Return a Rich panel summarising the current run."""
82
+ # ───────────────────── summary grid ──────────────────────
83
+ summary_table = Table.grid(expand=True, padding=(0, 1))
84
+ summary_table.add_column(ratio=1)
85
+ summary_table.add_column(justify="right")
86
+ summary_table.add_row("")
62
87
 
88
+ # Dashboard url
89
+ summary_table.add_row(f" Dashboard: [underline blue]{self.dashboard_url}[/]")
63
90
  summary_table.add_row("")
64
- # Goal
91
+
65
92
  if final_message is not None:
66
- summary_table.add_row(f"[bold cyan]Result:[/] {final_message}")
67
- else:
68
- summary_table.add_row(f"[bold cyan]Goal:[/] {self.goal}")
69
- summary_table.add_row("")
70
- # Model used
71
- summary_table.add_row(f"[bold cyan]Model:[/] {self.model}")
72
- summary_table.add_row("")
73
- # Log directory
74
- summary_table.add_row(f"[bold cyan]Logs:[/] [blue underline]{self.runs_dir}/{self.run_id}[/]")
75
- summary_table.add_row("")
76
- # Dashboard link
77
- summary_table.add_row(f"[bold cyan]Dashboard:[/] [blue underline]{self.dashboard_url}[/]")
78
- summary_table.add_row("")
79
- # Token counts
80
- summary_table.add_row(
81
- f"[bold cyan]Tokens:[/] ↑[yellow]{format_number(self.total_input_tokens)}[/] ↓[yellow]{format_number(self.total_output_tokens)}[/] = [green]{format_number(self.total_input_tokens + self.total_output_tokens)}[/]"
93
+ # Add the final message
94
+ summary_table.add_row(f"[bold cyan] Result:[/] {final_message}", "")
95
+ summary_table.add_row("")
96
+
97
+ # Token info
98
+ token_info = (
99
+ f"[bold cyan] {self.model}:[/] "
100
+ f"↑[yellow]{format_number(self.total_input_tokens)}[/] "
101
+ f"[yellow]{format_number(self.total_output_tokens)}[/] = "
102
+ f"[green]{format_number(self.total_input_tokens + self.total_output_tokens)} Tokens[/]"
82
103
  )
104
+ summary_table.add_row(token_info)
83
105
  summary_table.add_row("")
106
+
84
107
  # Progress bar
85
108
  summary_table.add_row(self.progress)
109
+ summary_table.add_row("")
86
110
 
87
- # Update layout
88
- layout.update(summary_table)
89
-
90
- return Panel(layout, title="[bold]📊 Summary", border_style="magenta", expand=True, padding=(0, 1))
91
-
92
-
93
- class PlanPanel:
94
- """Displays the optimization plan with truncation for long plans."""
95
-
96
- def __init__(self):
97
- self.plan = ""
111
+ # Logs url
112
+ logs_url = Path(self.runs_dir) / self.run_id
113
+ summary_table.add_row(f" Logs: [underline blue]{logs_url}[/]")
114
+ summary_table.add_row("")
98
115
 
99
- def update(self, plan: str):
100
- """Update the plan text."""
101
- self.plan = plan
116
+ if final_message is not None:
117
+ # Don't include the thinking section
118
+ return Panel(
119
+ summary_table,
120
+ title=f"[bold]📊 {'Maximizing' if self.maximize else 'Minimizing'} {self.run_name}",
121
+ border_style="magenta",
122
+ expand=True,
123
+ padding=(0, 1),
124
+ )
102
125
 
103
- def clear(self):
104
- """Clear the plan text."""
105
- self.plan = ""
126
+ # Include the thinking section
127
+ layout = Layout(name="summary")
128
+ layout.split_column(
129
+ Layout(summary_table, name="main_summary", ratio=1),
130
+ Layout(
131
+ Panel(
132
+ self.thinking_content or "[dim]No thinking content yet...[/]",
133
+ title="[bold]📝 Thinking...",
134
+ border_style="cyan",
135
+ expand=True,
136
+ padding=(0, 1),
137
+ ),
138
+ name="thinking_section",
139
+ ratio=1,
140
+ ),
141
+ )
106
142
 
107
- def get_display(self) -> Panel:
108
- """Create a panel displaying the plan with truncation if needed."""
109
- return Panel(self.plan, title="[bold]📝 Thinking...", border_style="cyan", expand=True, padding=(0, 1))
143
+ return Panel(
144
+ layout,
145
+ title=f"[bold]📊 {'Maximizing' if self.maximize else 'Minimizing'} {self.run_name}",
146
+ border_style="magenta",
147
+ expand=True,
148
+ padding=(0, 1),
149
+ )
110
150
 
111
151
 
112
152
  class Node:
113
153
  """Represents a node in the solution tree."""
114
154
 
115
155
  def __init__(
116
- self, id: str, parent_id: Union[str, None], code: Union[str, None], metric: Union[float, None], is_buggy: bool
156
+ self,
157
+ id: str,
158
+ parent_id: Union[str, None],
159
+ code: Union[str, None],
160
+ metric: Union[float, None],
161
+ is_buggy: Union[bool, None],
117
162
  ):
118
163
  self.id = id
119
164
  self.parent_id = parent_id
@@ -144,12 +189,15 @@ class MetricTree:
144
189
  # Add node to node's parent's children
145
190
  if node.parent_id is not None:
146
191
  if node.parent_id not in self.nodes:
147
- raise ValueError("Could not construct tree: parent node not found.")
192
+ raise ValueError("Cannot construct optimization tree.")
148
193
  self.nodes[node.parent_id].children.append(node)
149
194
 
150
- def get_draft_nodes(self) -> List[Node]:
151
- """Get all draft nodes from the tree."""
152
- return [node for node in self.nodes.values() if node.parent_id is None]
195
+ def get_root_node(self) -> Node:
196
+ """Get the root node from the tree."""
197
+ nodes = [node for node in self.nodes.values() if node.parent_id is None]
198
+ if len(nodes) != 1:
199
+ raise ValueError("Cannot construct optimization tree.")
200
+ return nodes[0]
153
201
 
154
202
  def get_best_node(self) -> Optional[Node]:
155
203
  """Get the best node from the tree."""
@@ -157,7 +205,8 @@ class MetricTree:
157
205
  node
158
206
  for node in self.nodes.values()
159
207
  if node.evaluated # evaluated
160
- and not node.is_buggy # not buggy
208
+ and node.is_buggy
209
+ is False # not buggy => is_buggy can exist in 3 states: None (solution has not yet been evaluated for bugs), True (solution has bug), False (solution does not have a bug)
161
210
  and node.metric is not None # has metric
162
211
  ]
163
212
  if len(measured_nodes) == 0:
@@ -247,8 +296,8 @@ class MetricTreePanel:
247
296
  append_rec(child, subtree)
248
297
 
249
298
  tree = Tree("", hide_root=True)
250
- for n in self.metric_tree.get_draft_nodes():
251
- append_rec(n, tree)
299
+ root_node = self.metric_tree.get_root_node()
300
+ append_rec(node=root_node, tree=tree)
252
301
 
253
302
  return tree
254
303
 
@@ -286,7 +335,7 @@ class EvaluationOutputPanel:
286
335
  class SolutionPanels:
287
336
  """Displays the current and best solutions side by side."""
288
337
 
289
- def __init__(self, metric_name: str, source_fp: pathlib.Path):
338
+ def __init__(self, metric_name: str, source_fp: Path):
290
339
  # Current solution
291
340
  self.current_node = None
292
341
  # Best solution
@@ -296,7 +345,7 @@ class SolutionPanels:
296
345
  # Determine the lexer for the source file
297
346
  self.lexer = self._determine_lexer(source_fp)
298
347
 
299
- def _determine_lexer(self, source_fp: pathlib.Path) -> str:
348
+ def _determine_lexer(self, source_fp: Path) -> str:
300
349
  """Determine the lexer for the source file."""
301
350
  return Syntax.from_path(source_fp).lexer
302
351
 
@@ -346,10 +395,7 @@ def create_optimization_layout() -> Layout:
346
395
  )
347
396
 
348
397
  # Split the top section into left and right
349
- layout["top_section"].split_row(Layout(name="left_panels", ratio=1), Layout(name="tree", ratio=1))
350
-
351
- # Split the left panels into summary and thinking
352
- layout["left_panels"].split_column(Layout(name="summary", ratio=2), Layout(name="plan", ratio=1))
398
+ layout["top_section"].split_row(Layout(name="summary", ratio=1), Layout(name="tree", ratio=1))
353
399
 
354
400
  # Split the middle section into left and right
355
401
  layout["middle_section"].split_row(Layout(name="current_solution", ratio=1), Layout(name="best_solution", ratio=1))
weco/utils.py CHANGED
@@ -45,7 +45,7 @@ def determine_default_model(llm_api_keys: Dict[str, Any]) -> str:
45
45
  return "gemini-2.5-pro"
46
46
  else:
47
47
  raise ValueError(
48
- "No LLM API keys found in environment. Please set one of the following: OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY."
48
+ "No LLM API keys found in environment variables. Please set one of the following: OPENAI_API_KEY, ANTHROPIC_API_KEY, or GEMINI_API_KEY based on your model of choice."
49
49
  )
50
50
 
51
51
 
@@ -84,7 +84,7 @@ def write_to_path(fp: pathlib.Path, content: Union[str, Dict[str, Any]], is_json
84
84
  elif isinstance(content, str):
85
85
  f.write(content)
86
86
  else:
87
- raise TypeError("Content must be str or Dict[str, Any]")
87
+ raise TypeError("Error writing to file. Please verify the file path and try again.")
88
88
 
89
89
 
90
90
  # Visualization helper functions
@@ -124,19 +124,54 @@ def smooth_update(
124
124
 
125
125
 
126
126
  # Other helper functions
127
- def run_evaluation(eval_command: str) -> str:
127
+ DEFAULT_MAX_LINES = 50
128
+ DEFAULT_MAX_CHARS = 5000
129
+
130
+
131
+ def truncate_output(output: str, max_lines: int = DEFAULT_MAX_LINES, max_chars: int = DEFAULT_MAX_CHARS) -> str:
132
+ """Truncate the output to a reasonable size."""
133
+ lines = output.splitlines()
134
+
135
+ # Determine what truncations are needed based on original output
136
+ lines_truncated = len(lines) > max_lines
137
+ chars_truncated = len(output) > max_chars
138
+
139
+ # Apply truncations to the original output
140
+ if lines_truncated:
141
+ output = "\n".join(lines[-max_lines:])
142
+
143
+ if chars_truncated:
144
+ output = output[-max_chars:]
145
+
146
+ # Add prefixes for truncations that were applied
147
+ prefixes = []
148
+ if lines_truncated:
149
+ prefixes.append(f"truncated to last {max_lines} lines")
150
+ if chars_truncated:
151
+ prefixes.append(f"truncated to last {max_chars} characters")
152
+
153
+ if prefixes:
154
+ prefix_text = ", ".join(prefixes)
155
+ output = f"... ({prefix_text})\n{output}"
156
+
157
+ return output
158
+
159
+
160
+ def run_evaluation(eval_command: str, timeout: int | None = None) -> str:
128
161
  """Run the evaluation command on the code and return the output."""
129
162
 
130
163
  # Run the eval command as is
131
- result = subprocess.run(eval_command, shell=True, capture_output=True, text=True, check=False)
132
-
133
- # Combine stdout and stderr for complete output
134
- output = result.stderr if result.stderr else ""
135
- if result.stdout:
136
- if len(output) > 0:
137
- output += "\n"
138
- output += result.stdout
139
- return output
164
+ try:
165
+ result = subprocess.run(eval_command, shell=True, capture_output=True, text=True, check=False, timeout=timeout)
166
+ # Combine stdout and stderr for complete output
167
+ output = result.stderr if result.stderr else ""
168
+ if result.stdout:
169
+ if len(output) > 0:
170
+ output += "\n"
171
+ output += result.stdout
172
+ return truncate_output(output)
173
+ except subprocess.TimeoutExpired:
174
+ return f"Evaluation timed out after {'an unspecified duration' if timeout is None else f'{timeout} seconds'}."
140
175
 
141
176
 
142
177
  # Update Check Function