weco 0.2.22__py3-none-any.whl → 0.2.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- weco/api.py +84 -87
- weco/auth.py +7 -5
- weco/chatbot.py +34 -23
- weco/cli.py +10 -1
- weco/constants.py +7 -0
- weco/optimizer.py +26 -21
- weco/panels.py +105 -59
- weco/utils.py +47 -12
- {weco-0.2.22.dist-info → weco-0.2.24.dist-info}/METADATA +36 -25
- weco-0.2.24.dist-info/RECORD +15 -0
- weco-0.2.22.dist-info/RECORD +0 -14
- {weco-0.2.22.dist-info → weco-0.2.24.dist-info}/WHEEL +0 -0
- {weco-0.2.22.dist-info → weco-0.2.24.dist-info}/entry_points.txt +0 -0
- {weco-0.2.22.dist-info → weco-0.2.24.dist-info}/licenses/LICENSE +0 -0
- {weco-0.2.22.dist-info → weco-0.2.24.dist-info}/top_level.txt +0 -0
weco/optimizer.py
CHANGED
|
@@ -20,7 +20,6 @@ from .api import (
|
|
|
20
20
|
from .auth import handle_authentication
|
|
21
21
|
from .panels import (
|
|
22
22
|
SummaryPanel,
|
|
23
|
-
PlanPanel,
|
|
24
23
|
Node,
|
|
25
24
|
MetricTreePanel,
|
|
26
25
|
EvaluationOutputPanel,
|
|
@@ -37,6 +36,7 @@ from .utils import (
|
|
|
37
36
|
smooth_update,
|
|
38
37
|
format_number,
|
|
39
38
|
)
|
|
39
|
+
from .constants import DEFAULT_API_TIMEOUT
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
# --- Heartbeat Sender Class ---
|
|
@@ -63,7 +63,7 @@ class HeartbeatSender(threading.Thread):
|
|
|
63
63
|
|
|
64
64
|
except Exception as e:
|
|
65
65
|
# Catch any unexpected error in the loop to prevent silent thread death
|
|
66
|
-
print(f"[ERROR HeartbeatSender]
|
|
66
|
+
print(f"[ERROR HeartbeatSender] Unexpected error in heartbeat thread for run {self.run_id}: {e}", file=sys.stderr)
|
|
67
67
|
traceback.print_exc(file=sys.stderr)
|
|
68
68
|
# The loop will break due to the exception, and thread will terminate via finally.
|
|
69
69
|
|
|
@@ -78,6 +78,7 @@ def execute_optimization(
|
|
|
78
78
|
log_dir: str = ".runs",
|
|
79
79
|
additional_instructions: Optional[str] = None,
|
|
80
80
|
console: Optional[Console] = None,
|
|
81
|
+
eval_timeout: Optional[int] = None,
|
|
81
82
|
) -> bool:
|
|
82
83
|
"""
|
|
83
84
|
Execute the core optimization logic.
|
|
@@ -153,14 +154,13 @@ def execute_optimization(
|
|
|
153
154
|
"debug_prob": 0.5,
|
|
154
155
|
"max_debug_depth": max(1, math.ceil(0.1 * steps)),
|
|
155
156
|
}
|
|
156
|
-
|
|
157
|
+
api_timeout = DEFAULT_API_TIMEOUT
|
|
157
158
|
processed_additional_instructions = read_additional_instructions(additional_instructions=additional_instructions)
|
|
158
159
|
source_fp = pathlib.Path(source)
|
|
159
160
|
source_code = read_from_path(fp=source_fp, is_json=False)
|
|
160
161
|
|
|
161
162
|
# --- Panel Initialization ---
|
|
162
163
|
summary_panel = SummaryPanel(maximize=maximize, metric_name=metric, total_steps=steps, model=model, runs_dir=log_dir)
|
|
163
|
-
plan_panel = PlanPanel()
|
|
164
164
|
solution_panels = SolutionPanels(metric_name=metric, source_fp=source_fp)
|
|
165
165
|
eval_output_panel = EvaluationOutputPanel()
|
|
166
166
|
tree_panel = MetricTreePanel(maximize=maximize)
|
|
@@ -181,9 +181,10 @@ def execute_optimization(
|
|
|
181
181
|
additional_instructions=processed_additional_instructions,
|
|
182
182
|
api_keys=llm_api_keys,
|
|
183
183
|
auth_headers=auth_headers,
|
|
184
|
-
timeout=
|
|
184
|
+
timeout=api_timeout,
|
|
185
185
|
)
|
|
186
186
|
run_id = run_response["run_id"]
|
|
187
|
+
run_name = run_response["run_name"]
|
|
187
188
|
current_run_id_for_heartbeat = run_id
|
|
188
189
|
|
|
189
190
|
# --- Start Heartbeat Thread ---
|
|
@@ -203,12 +204,14 @@ def execute_optimization(
|
|
|
203
204
|
write_to_path(fp=source_fp, content=run_response["code"])
|
|
204
205
|
|
|
205
206
|
# Update the panels with the initial solution
|
|
206
|
-
|
|
207
|
+
# Add run id and run name now that we have it
|
|
208
|
+
summary_panel.set_run_id(run_id=run_id)
|
|
209
|
+
summary_panel.set_run_name(run_name=run_name)
|
|
207
210
|
# Set the step of the progress bar
|
|
208
211
|
summary_panel.set_step(step=0)
|
|
209
212
|
# Update the token counts
|
|
210
213
|
summary_panel.update_token_counts(usage=run_response["usage"])
|
|
211
|
-
|
|
214
|
+
summary_panel.update_thinking(thinking=run_response["plan"])
|
|
212
215
|
# Build the metric tree
|
|
213
216
|
tree_panel.build_metric_tree(
|
|
214
217
|
nodes=[
|
|
@@ -218,7 +221,7 @@ def execute_optimization(
|
|
|
218
221
|
"code": run_response["code"],
|
|
219
222
|
"step": 0,
|
|
220
223
|
"metric_value": None,
|
|
221
|
-
"is_buggy":
|
|
224
|
+
"is_buggy": None,
|
|
222
225
|
}
|
|
223
226
|
]
|
|
224
227
|
)
|
|
@@ -227,7 +230,7 @@ def execute_optimization(
|
|
|
227
230
|
# Update the solution panels with the initial solution and get the panel displays
|
|
228
231
|
solution_panels.update(
|
|
229
232
|
current_node=Node(
|
|
230
|
-
id=run_response["solution_id"], parent_id=None, code=run_response["code"], metric=None, is_buggy=
|
|
233
|
+
id=run_response["solution_id"], parent_id=None, code=run_response["code"], metric=None, is_buggy=None
|
|
231
234
|
),
|
|
232
235
|
best_node=None,
|
|
233
236
|
)
|
|
@@ -238,7 +241,6 @@ def execute_optimization(
|
|
|
238
241
|
layout=layout,
|
|
239
242
|
sections_to_update=[
|
|
240
243
|
("summary", summary_panel.get_display()),
|
|
241
|
-
("plan", plan_panel.get_display()),
|
|
242
244
|
("tree", tree_panel.get_display(is_done=False)),
|
|
243
245
|
("current_solution", current_solution_panel),
|
|
244
246
|
("best_solution", best_solution_panel),
|
|
@@ -248,7 +250,7 @@ def execute_optimization(
|
|
|
248
250
|
)
|
|
249
251
|
|
|
250
252
|
# Run evaluation on the initial solution
|
|
251
|
-
term_out = run_evaluation(eval_command=eval_command)
|
|
253
|
+
term_out = run_evaluation(eval_command=eval_command, timeout=eval_timeout)
|
|
252
254
|
# Update the evaluation output panel
|
|
253
255
|
eval_output_panel.update(output=term_out)
|
|
254
256
|
smooth_update(
|
|
@@ -265,7 +267,7 @@ def execute_optimization(
|
|
|
265
267
|
if run_id:
|
|
266
268
|
try:
|
|
267
269
|
current_status_response = get_optimization_run_status(
|
|
268
|
-
run_id=run_id, include_history=False, timeout=30, auth_headers=auth_headers
|
|
270
|
+
console=console, run_id=run_id, include_history=False, timeout=(10, 30), auth_headers=auth_headers
|
|
269
271
|
)
|
|
270
272
|
current_run_status_val = current_status_response.get("status")
|
|
271
273
|
if current_run_status_val == "stopping":
|
|
@@ -273,30 +275,31 @@ def execute_optimization(
|
|
|
273
275
|
user_stop_requested_flag = True
|
|
274
276
|
break
|
|
275
277
|
except requests.exceptions.RequestException as e:
|
|
276
|
-
console.print(f"\n[bold red]Warning:
|
|
278
|
+
console.print(f"\n[bold red]Warning: Unable to check run status: {e}. Continuing optimization...[/]")
|
|
277
279
|
except Exception as e:
|
|
278
280
|
console.print(f"\n[bold red]Warning: Error checking run status: {e}. Continuing optimization...[/]")
|
|
279
281
|
|
|
280
282
|
# Send feedback and get next suggestion
|
|
281
283
|
eval_and_next_solution_response = evaluate_feedback_then_suggest_next_solution(
|
|
284
|
+
console=console,
|
|
282
285
|
run_id=run_id,
|
|
283
286
|
execution_output=term_out,
|
|
284
287
|
additional_instructions=current_additional_instructions,
|
|
285
288
|
api_keys=llm_api_keys,
|
|
286
289
|
auth_headers=auth_headers,
|
|
287
|
-
timeout=
|
|
290
|
+
timeout=api_timeout,
|
|
288
291
|
)
|
|
289
292
|
# Save next solution (.runs/<run-id>/step_<step>.<extension>)
|
|
290
293
|
write_to_path(fp=runs_dir / f"step_{step}{source_fp.suffix}", content=eval_and_next_solution_response["code"])
|
|
291
294
|
# Write the next solution to the source file
|
|
292
295
|
write_to_path(fp=source_fp, content=eval_and_next_solution_response["code"])
|
|
293
296
|
status_response = get_optimization_run_status(
|
|
294
|
-
run_id=run_id, include_history=True, timeout=
|
|
297
|
+
console=console, run_id=run_id, include_history=True, timeout=api_timeout, auth_headers=auth_headers
|
|
295
298
|
)
|
|
296
299
|
# Update the step of the progress bar, token counts, plan and metric tree
|
|
297
300
|
summary_panel.set_step(step=step)
|
|
298
301
|
summary_panel.update_token_counts(usage=eval_and_next_solution_response["usage"])
|
|
299
|
-
|
|
302
|
+
summary_panel.update_thinking(thinking=eval_and_next_solution_response["plan"])
|
|
300
303
|
|
|
301
304
|
nodes_list_from_status = status_response.get("nodes")
|
|
302
305
|
tree_panel.build_metric_tree(nodes=nodes_list_from_status if nodes_list_from_status is not None else [])
|
|
@@ -327,7 +330,9 @@ def execute_optimization(
|
|
|
327
330
|
is_buggy=node_data["is_buggy"],
|
|
328
331
|
)
|
|
329
332
|
if current_solution_node is None:
|
|
330
|
-
raise ValueError(
|
|
333
|
+
raise ValueError(
|
|
334
|
+
"Current solution node not found in the optimization status response. This may indicate a synchronization issue with the backend."
|
|
335
|
+
)
|
|
331
336
|
|
|
332
337
|
# Update the solution panels with the current and best solution
|
|
333
338
|
solution_panels.update(current_node=current_solution_node, best_node=best_solution_node)
|
|
@@ -339,7 +344,6 @@ def execute_optimization(
|
|
|
339
344
|
layout=layout,
|
|
340
345
|
sections_to_update=[
|
|
341
346
|
("summary", summary_panel.get_display()),
|
|
342
|
-
("plan", plan_panel.get_display()),
|
|
343
347
|
("tree", tree_panel.get_display(is_done=False)),
|
|
344
348
|
("current_solution", current_solution_panel),
|
|
345
349
|
("best_solution", best_solution_panel),
|
|
@@ -347,7 +351,7 @@ def execute_optimization(
|
|
|
347
351
|
],
|
|
348
352
|
transition_delay=0.08, # Slightly longer delay for more noticeable transitions
|
|
349
353
|
)
|
|
350
|
-
term_out = run_evaluation(eval_command=eval_command)
|
|
354
|
+
term_out = run_evaluation(eval_command=eval_command, timeout=eval_timeout)
|
|
351
355
|
eval_output_panel.update(output=term_out)
|
|
352
356
|
smooth_update(
|
|
353
357
|
live=live,
|
|
@@ -361,17 +365,18 @@ def execute_optimization(
|
|
|
361
365
|
current_additional_instructions = read_additional_instructions(additional_instructions=additional_instructions)
|
|
362
366
|
# Evaluate the final solution thats been generated
|
|
363
367
|
eval_and_next_solution_response = evaluate_feedback_then_suggest_next_solution(
|
|
368
|
+
console=console,
|
|
364
369
|
run_id=run_id,
|
|
365
370
|
execution_output=term_out,
|
|
366
371
|
additional_instructions=current_additional_instructions,
|
|
367
372
|
api_keys=llm_api_keys,
|
|
368
|
-
timeout=
|
|
373
|
+
timeout=api_timeout,
|
|
369
374
|
auth_headers=auth_headers,
|
|
370
375
|
)
|
|
371
376
|
summary_panel.set_step(step=steps)
|
|
372
377
|
summary_panel.update_token_counts(usage=eval_and_next_solution_response["usage"])
|
|
373
378
|
status_response = get_optimization_run_status(
|
|
374
|
-
run_id=run_id, include_history=True, timeout=
|
|
379
|
+
console=console, run_id=run_id, include_history=True, timeout=api_timeout, auth_headers=auth_headers
|
|
375
380
|
)
|
|
376
381
|
# No need to update the plan panel since we have finished the optimization
|
|
377
382
|
# Get the optimization run status for
|
weco/panels.py
CHANGED
|
@@ -7,30 +7,40 @@ from rich.syntax import Syntax
|
|
|
7
7
|
from rich import box
|
|
8
8
|
from typing import Dict, List, Optional, Union, Tuple
|
|
9
9
|
from .utils import format_number
|
|
10
|
-
import
|
|
10
|
+
from pathlib import Path
|
|
11
11
|
from .__init__ import __dashboard_url__
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class SummaryPanel:
|
|
15
15
|
"""Holds a summary of the optimization run."""
|
|
16
16
|
|
|
17
|
-
def __init__(
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
maximize: bool,
|
|
20
|
+
metric_name: str,
|
|
21
|
+
total_steps: int,
|
|
22
|
+
model: str,
|
|
23
|
+
runs_dir: str,
|
|
24
|
+
run_id: str = None,
|
|
25
|
+
run_name: str = None,
|
|
26
|
+
):
|
|
18
27
|
self.maximize = maximize
|
|
19
28
|
self.metric_name = metric_name
|
|
20
|
-
self.goal = ("Maximizing" if self.maximize else "Minimizing") + f" {self.metric_name}..."
|
|
21
29
|
self.total_input_tokens = 0
|
|
22
30
|
self.total_output_tokens = 0
|
|
23
31
|
self.total_steps = total_steps
|
|
24
32
|
self.model = model
|
|
25
33
|
self.runs_dir = runs_dir
|
|
26
34
|
self.run_id = run_id if run_id is not None else "N/A"
|
|
35
|
+
self.run_name = run_name if run_name is not None else "N/A"
|
|
27
36
|
self.dashboard_url = "N/A"
|
|
37
|
+
self.thinking_content = ""
|
|
28
38
|
self.progress = Progress(
|
|
29
39
|
TextColumn("[progress.description]{task.description}"),
|
|
30
40
|
BarColumn(bar_width=20),
|
|
31
41
|
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
32
42
|
TextColumn("•"),
|
|
33
|
-
TextColumn("[bold]{task.completed}/{task.total} Steps"),
|
|
43
|
+
TextColumn("[bold]{task.completed}/{task.total} Steps "),
|
|
34
44
|
expand=False,
|
|
35
45
|
)
|
|
36
46
|
self.task_id = self.progress.add_task("", total=total_steps)
|
|
@@ -40,6 +50,10 @@ class SummaryPanel:
|
|
|
40
50
|
self.run_id = run_id
|
|
41
51
|
self.set_dashboard_url(run_id=run_id)
|
|
42
52
|
|
|
53
|
+
def set_run_name(self, run_name: str):
|
|
54
|
+
"""Set the run name."""
|
|
55
|
+
self.run_name = run_name
|
|
56
|
+
|
|
43
57
|
def set_dashboard_url(self, run_id: str):
|
|
44
58
|
"""Set the dashboard URL."""
|
|
45
59
|
self.dashboard_url = f"{__dashboard_url__}/runs/{run_id}"
|
|
@@ -51,69 +65,100 @@ class SummaryPanel:
|
|
|
51
65
|
def update_token_counts(self, usage: Dict[str, int]):
|
|
52
66
|
"""Update token counts from usage data."""
|
|
53
67
|
if not isinstance(usage, dict) or "input_tokens" not in usage or "output_tokens" not in usage:
|
|
54
|
-
raise ValueError("Invalid token usage
|
|
68
|
+
raise ValueError("Invalid token usage data received.")
|
|
55
69
|
self.total_input_tokens += usage["input_tokens"]
|
|
56
70
|
self.total_output_tokens += usage["output_tokens"]
|
|
57
71
|
|
|
72
|
+
def update_thinking(self, thinking: str):
|
|
73
|
+
"""Update the thinking content."""
|
|
74
|
+
self.thinking_content = thinking
|
|
75
|
+
|
|
76
|
+
def clear_thinking(self):
|
|
77
|
+
"""Clear the thinking content."""
|
|
78
|
+
self.thinking_content = ""
|
|
79
|
+
|
|
58
80
|
def get_display(self, final_message: Optional[str] = None) -> Panel:
|
|
59
|
-
"""
|
|
60
|
-
|
|
61
|
-
summary_table = Table(
|
|
81
|
+
"""Return a Rich panel summarising the current run."""
|
|
82
|
+
# ───────────────────── summary grid ──────────────────────
|
|
83
|
+
summary_table = Table.grid(expand=True, padding=(0, 1))
|
|
84
|
+
summary_table.add_column(ratio=1)
|
|
85
|
+
summary_table.add_column(justify="right")
|
|
86
|
+
summary_table.add_row("")
|
|
62
87
|
|
|
88
|
+
# Dashboard url
|
|
89
|
+
summary_table.add_row(f" Dashboard: [underline blue]{self.dashboard_url}[/]")
|
|
63
90
|
summary_table.add_row("")
|
|
64
|
-
|
|
91
|
+
|
|
65
92
|
if final_message is not None:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
summary_table.add_row(
|
|
69
|
-
|
|
70
|
-
#
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
# Dashboard link
|
|
77
|
-
summary_table.add_row(f"[bold cyan]Dashboard:[/] [blue underline]{self.dashboard_url}[/]")
|
|
78
|
-
summary_table.add_row("")
|
|
79
|
-
# Token counts
|
|
80
|
-
summary_table.add_row(
|
|
81
|
-
f"[bold cyan]Tokens:[/] ↑[yellow]{format_number(self.total_input_tokens)}[/] ↓[yellow]{format_number(self.total_output_tokens)}[/] = [green]{format_number(self.total_input_tokens + self.total_output_tokens)}[/]"
|
|
93
|
+
# Add the final message
|
|
94
|
+
summary_table.add_row(f"[bold cyan] Result:[/] {final_message}", "")
|
|
95
|
+
summary_table.add_row("")
|
|
96
|
+
|
|
97
|
+
# Token info
|
|
98
|
+
token_info = (
|
|
99
|
+
f"[bold cyan] {self.model}:[/] "
|
|
100
|
+
f"↑[yellow]{format_number(self.total_input_tokens)}[/] "
|
|
101
|
+
f"↓[yellow]{format_number(self.total_output_tokens)}[/] = "
|
|
102
|
+
f"[green]{format_number(self.total_input_tokens + self.total_output_tokens)} Tokens[/]"
|
|
82
103
|
)
|
|
104
|
+
summary_table.add_row(token_info)
|
|
83
105
|
summary_table.add_row("")
|
|
106
|
+
|
|
84
107
|
# Progress bar
|
|
85
108
|
summary_table.add_row(self.progress)
|
|
109
|
+
summary_table.add_row("")
|
|
86
110
|
|
|
87
|
-
#
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
class PlanPanel:
|
|
94
|
-
"""Displays the optimization plan with truncation for long plans."""
|
|
95
|
-
|
|
96
|
-
def __init__(self):
|
|
97
|
-
self.plan = ""
|
|
111
|
+
# Logs url
|
|
112
|
+
logs_url = Path(self.runs_dir) / self.run_id
|
|
113
|
+
summary_table.add_row(f" Logs: [underline blue]{logs_url}[/]")
|
|
114
|
+
summary_table.add_row("")
|
|
98
115
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
116
|
+
if final_message is not None:
|
|
117
|
+
# Don't include the thinking section
|
|
118
|
+
return Panel(
|
|
119
|
+
summary_table,
|
|
120
|
+
title=f"[bold]📊 {'Maximizing' if self.maximize else 'Minimizing'} {self.run_name}",
|
|
121
|
+
border_style="magenta",
|
|
122
|
+
expand=True,
|
|
123
|
+
padding=(0, 1),
|
|
124
|
+
)
|
|
102
125
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
126
|
+
# Include the thinking section
|
|
127
|
+
layout = Layout(name="summary")
|
|
128
|
+
layout.split_column(
|
|
129
|
+
Layout(summary_table, name="main_summary", ratio=1),
|
|
130
|
+
Layout(
|
|
131
|
+
Panel(
|
|
132
|
+
self.thinking_content or "[dim]No thinking content yet...[/]",
|
|
133
|
+
title="[bold]📝 Thinking...",
|
|
134
|
+
border_style="cyan",
|
|
135
|
+
expand=True,
|
|
136
|
+
padding=(0, 1),
|
|
137
|
+
),
|
|
138
|
+
name="thinking_section",
|
|
139
|
+
ratio=1,
|
|
140
|
+
),
|
|
141
|
+
)
|
|
106
142
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
143
|
+
return Panel(
|
|
144
|
+
layout,
|
|
145
|
+
title=f"[bold]📊 {'Maximizing' if self.maximize else 'Minimizing'} {self.run_name}",
|
|
146
|
+
border_style="magenta",
|
|
147
|
+
expand=True,
|
|
148
|
+
padding=(0, 1),
|
|
149
|
+
)
|
|
110
150
|
|
|
111
151
|
|
|
112
152
|
class Node:
|
|
113
153
|
"""Represents a node in the solution tree."""
|
|
114
154
|
|
|
115
155
|
def __init__(
|
|
116
|
-
self,
|
|
156
|
+
self,
|
|
157
|
+
id: str,
|
|
158
|
+
parent_id: Union[str, None],
|
|
159
|
+
code: Union[str, None],
|
|
160
|
+
metric: Union[float, None],
|
|
161
|
+
is_buggy: Union[bool, None],
|
|
117
162
|
):
|
|
118
163
|
self.id = id
|
|
119
164
|
self.parent_id = parent_id
|
|
@@ -144,12 +189,15 @@ class MetricTree:
|
|
|
144
189
|
# Add node to node's parent's children
|
|
145
190
|
if node.parent_id is not None:
|
|
146
191
|
if node.parent_id not in self.nodes:
|
|
147
|
-
raise ValueError("
|
|
192
|
+
raise ValueError("Cannot construct optimization tree.")
|
|
148
193
|
self.nodes[node.parent_id].children.append(node)
|
|
149
194
|
|
|
150
|
-
def
|
|
151
|
-
"""Get
|
|
152
|
-
|
|
195
|
+
def get_root_node(self) -> Node:
|
|
196
|
+
"""Get the root node from the tree."""
|
|
197
|
+
nodes = [node for node in self.nodes.values() if node.parent_id is None]
|
|
198
|
+
if len(nodes) != 1:
|
|
199
|
+
raise ValueError("Cannot construct optimization tree.")
|
|
200
|
+
return nodes[0]
|
|
153
201
|
|
|
154
202
|
def get_best_node(self) -> Optional[Node]:
|
|
155
203
|
"""Get the best node from the tree."""
|
|
@@ -157,7 +205,8 @@ class MetricTree:
|
|
|
157
205
|
node
|
|
158
206
|
for node in self.nodes.values()
|
|
159
207
|
if node.evaluated # evaluated
|
|
160
|
-
and
|
|
208
|
+
and node.is_buggy
|
|
209
|
+
is False # not buggy => is_buggy can exist in 3 states: None (solution has not yet been evaluated for bugs), True (solution has bug), False (solution does not have a bug)
|
|
161
210
|
and node.metric is not None # has metric
|
|
162
211
|
]
|
|
163
212
|
if len(measured_nodes) == 0:
|
|
@@ -247,8 +296,8 @@ class MetricTreePanel:
|
|
|
247
296
|
append_rec(child, subtree)
|
|
248
297
|
|
|
249
298
|
tree = Tree("", hide_root=True)
|
|
250
|
-
|
|
251
|
-
|
|
299
|
+
root_node = self.metric_tree.get_root_node()
|
|
300
|
+
append_rec(node=root_node, tree=tree)
|
|
252
301
|
|
|
253
302
|
return tree
|
|
254
303
|
|
|
@@ -286,7 +335,7 @@ class EvaluationOutputPanel:
|
|
|
286
335
|
class SolutionPanels:
|
|
287
336
|
"""Displays the current and best solutions side by side."""
|
|
288
337
|
|
|
289
|
-
def __init__(self, metric_name: str, source_fp:
|
|
338
|
+
def __init__(self, metric_name: str, source_fp: Path):
|
|
290
339
|
# Current solution
|
|
291
340
|
self.current_node = None
|
|
292
341
|
# Best solution
|
|
@@ -296,7 +345,7 @@ class SolutionPanels:
|
|
|
296
345
|
# Determine the lexer for the source file
|
|
297
346
|
self.lexer = self._determine_lexer(source_fp)
|
|
298
347
|
|
|
299
|
-
def _determine_lexer(self, source_fp:
|
|
348
|
+
def _determine_lexer(self, source_fp: Path) -> str:
|
|
300
349
|
"""Determine the lexer for the source file."""
|
|
301
350
|
return Syntax.from_path(source_fp).lexer
|
|
302
351
|
|
|
@@ -346,10 +395,7 @@ def create_optimization_layout() -> Layout:
|
|
|
346
395
|
)
|
|
347
396
|
|
|
348
397
|
# Split the top section into left and right
|
|
349
|
-
layout["top_section"].split_row(Layout(name="
|
|
350
|
-
|
|
351
|
-
# Split the left panels into summary and thinking
|
|
352
|
-
layout["left_panels"].split_column(Layout(name="summary", ratio=2), Layout(name="plan", ratio=1))
|
|
398
|
+
layout["top_section"].split_row(Layout(name="summary", ratio=1), Layout(name="tree", ratio=1))
|
|
353
399
|
|
|
354
400
|
# Split the middle section into left and right
|
|
355
401
|
layout["middle_section"].split_row(Layout(name="current_solution", ratio=1), Layout(name="best_solution", ratio=1))
|
weco/utils.py
CHANGED
|
@@ -45,7 +45,7 @@ def determine_default_model(llm_api_keys: Dict[str, Any]) -> str:
|
|
|
45
45
|
return "gemini-2.5-pro"
|
|
46
46
|
else:
|
|
47
47
|
raise ValueError(
|
|
48
|
-
"No LLM API keys found in environment. Please set one of the following: OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY."
|
|
48
|
+
"No LLM API keys found in environment variables. Please set one of the following: OPENAI_API_KEY, ANTHROPIC_API_KEY, or GEMINI_API_KEY based on your model of choice."
|
|
49
49
|
)
|
|
50
50
|
|
|
51
51
|
|
|
@@ -84,7 +84,7 @@ def write_to_path(fp: pathlib.Path, content: Union[str, Dict[str, Any]], is_json
|
|
|
84
84
|
elif isinstance(content, str):
|
|
85
85
|
f.write(content)
|
|
86
86
|
else:
|
|
87
|
-
raise TypeError("
|
|
87
|
+
raise TypeError("Error writing to file. Please verify the file path and try again.")
|
|
88
88
|
|
|
89
89
|
|
|
90
90
|
# Visualization helper functions
|
|
@@ -124,19 +124,54 @@ def smooth_update(
|
|
|
124
124
|
|
|
125
125
|
|
|
126
126
|
# Other helper functions
|
|
127
|
-
|
|
127
|
+
DEFAULT_MAX_LINES = 50
|
|
128
|
+
DEFAULT_MAX_CHARS = 5000
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def truncate_output(output: str, max_lines: int = DEFAULT_MAX_LINES, max_chars: int = DEFAULT_MAX_CHARS) -> str:
|
|
132
|
+
"""Truncate the output to a reasonable size."""
|
|
133
|
+
lines = output.splitlines()
|
|
134
|
+
|
|
135
|
+
# Determine what truncations are needed based on original output
|
|
136
|
+
lines_truncated = len(lines) > max_lines
|
|
137
|
+
chars_truncated = len(output) > max_chars
|
|
138
|
+
|
|
139
|
+
# Apply truncations to the original output
|
|
140
|
+
if lines_truncated:
|
|
141
|
+
output = "\n".join(lines[-max_lines:])
|
|
142
|
+
|
|
143
|
+
if chars_truncated:
|
|
144
|
+
output = output[-max_chars:]
|
|
145
|
+
|
|
146
|
+
# Add prefixes for truncations that were applied
|
|
147
|
+
prefixes = []
|
|
148
|
+
if lines_truncated:
|
|
149
|
+
prefixes.append(f"truncated to last {max_lines} lines")
|
|
150
|
+
if chars_truncated:
|
|
151
|
+
prefixes.append(f"truncated to last {max_chars} characters")
|
|
152
|
+
|
|
153
|
+
if prefixes:
|
|
154
|
+
prefix_text = ", ".join(prefixes)
|
|
155
|
+
output = f"... ({prefix_text})\n{output}"
|
|
156
|
+
|
|
157
|
+
return output
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def run_evaluation(eval_command: str, timeout: int | None = None) -> str:
|
|
128
161
|
"""Run the evaluation command on the code and return the output."""
|
|
129
162
|
|
|
130
163
|
# Run the eval command as is
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
164
|
+
try:
|
|
165
|
+
result = subprocess.run(eval_command, shell=True, capture_output=True, text=True, check=False, timeout=timeout)
|
|
166
|
+
# Combine stdout and stderr for complete output
|
|
167
|
+
output = result.stderr if result.stderr else ""
|
|
168
|
+
if result.stdout:
|
|
169
|
+
if len(output) > 0:
|
|
170
|
+
output += "\n"
|
|
171
|
+
output += result.stdout
|
|
172
|
+
return truncate_output(output)
|
|
173
|
+
except subprocess.TimeoutExpired:
|
|
174
|
+
return f"Evaluation timed out after {'an unspecified duration' if timeout is None else f'{timeout} seconds'}."
|
|
140
175
|
|
|
141
176
|
|
|
142
177
|
# Update Check Function
|