weco 0.3.5__tar.gz → 0.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {weco-0.3.5 → weco-0.3.7}/PKG-INFO +4 -1
- {weco-0.3.5 → weco-0.3.7}/README.md +2 -0
- {weco-0.3.5 → weco-0.3.7}/examples/extract-line-plot/README.md +26 -6
- {weco-0.3.5 → weco-0.3.7}/examples/extract-line-plot/eval.py +47 -11
- weco-0.3.7/examples/extract-line-plot/optimize.py +97 -0
- {weco-0.3.5 → weco-0.3.7}/pyproject.toml +3 -3
- weco-0.3.7/tests/__init__.py +1 -0
- weco-0.3.7/tests/test_byok.py +192 -0
- weco-0.3.7/tests/test_cli.py +70 -0
- {weco-0.3.5 → weco-0.3.7}/weco/api.py +31 -172
- {weco-0.3.5 → weco-0.3.7}/weco/cli.py +104 -90
- {weco-0.3.5 → weco-0.3.7}/weco/constants.py +3 -3
- {weco-0.3.5 → weco-0.3.7}/weco/optimizer.py +20 -9
- {weco-0.3.5 → weco-0.3.7}/weco/utils.py +33 -5
- {weco-0.3.5 → weco-0.3.7}/weco.egg-info/PKG-INFO +4 -1
- {weco-0.3.5 → weco-0.3.7}/weco.egg-info/SOURCES.txt +3 -1
- {weco-0.3.5 → weco-0.3.7}/weco.egg-info/requires.txt +1 -0
- weco-0.3.5/examples/extract-line-plot/optimize.py +0 -116
- weco-0.3.5/weco/chatbot.py +0 -827
- {weco-0.3.5 → weco-0.3.7}/.github/workflows/lint.yml +0 -0
- {weco-0.3.5 → weco-0.3.7}/.github/workflows/release.yml +0 -0
- {weco-0.3.5 → weco-0.3.7}/.gitignore +0 -0
- {weco-0.3.5 → weco-0.3.7}/LICENSE +0 -0
- {weco-0.3.5 → weco-0.3.7}/assets/example-optimization.gif +0 -0
- {weco-0.3.5 → weco-0.3.7}/assets/weco.svg +0 -0
- {weco-0.3.5 → weco-0.3.7}/contributing.md +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/README.md +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/cuda/README.md +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/cuda/evaluate.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/cuda/module.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/cuda/requirements.txt +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/extract-line-plot/guide.md +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/extract-line-plot/prepare_data.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/extract-line-plot/pyproject.toml +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/hello-world/README.md +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/hello-world/colab_notebook_walkthrough.ipynb +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/hello-world/evaluate.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/hello-world/module.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/hello-world/requirements.txt +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/prompt/README.md +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/prompt/eval.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/prompt/optimize.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/prompt/prompt_guide.md +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/README.md +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/competition_description.md +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/data/sample_submission.csv +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/data/test.csv +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/data/train.csv +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/evaluate.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/train.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/triton/README.md +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/triton/evaluate.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/triton/module.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/examples/triton/requirements.txt +0 -0
- {weco-0.3.5 → weco-0.3.7}/setup.cfg +0 -0
- {weco-0.3.5 → weco-0.3.7}/weco/__init__.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/weco/auth.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/weco/credits.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/weco/panels.py +0 -0
- {weco-0.3.5 → weco-0.3.7}/weco.egg-info/dependency_links.txt +0 -0
- {weco-0.3.5 → weco-0.3.7}/weco.egg-info/entry_points.txt +0 -0
- {weco-0.3.5 → weco-0.3.7}/weco.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: weco
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.7
|
|
4
4
|
Summary: Documentation for `weco`, a CLI for using Weco AI's code optimizer.
|
|
5
5
|
Author-email: Weco AI Team <contact@weco.ai>
|
|
6
6
|
License:
|
|
@@ -224,6 +224,7 @@ Provides-Extra: dev
|
|
|
224
224
|
Requires-Dist: ruff; extra == "dev"
|
|
225
225
|
Requires-Dist: build; extra == "dev"
|
|
226
226
|
Requires-Dist: setuptools_scm; extra == "dev"
|
|
227
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
227
228
|
Dynamic: license-file
|
|
228
229
|
|
|
229
230
|
<div align="center">
|
|
@@ -323,6 +324,7 @@ For more advanced examples, including [Triton](/examples/triton/README.md), [CUD
|
|
|
323
324
|
| `--eval-timeout` | Timeout in seconds for each step in evaluation. | No timeout (unlimited) | `--eval-timeout 3600` |
|
|
324
325
|
| `--save-logs` | Save execution output from each optimization step to disk. Creates timestamped directories with raw output files and a JSONL index for tracking execution history. | `False` | `--save-logs` |
|
|
325
326
|
| `--apply-change` | Automatically apply the best solution to the source file without prompting. | `False` | `--apply-change` |
|
|
327
|
+
| `--api-key` | API keys for LLM providers (BYOK). Format: `provider=key`. Can specify multiple providers. | `None` | `--api-key openai=sk-xxx` |
|
|
326
328
|
|
|
327
329
|
---
|
|
328
330
|
|
|
@@ -377,6 +379,7 @@ Arguments for `weco resume`:
|
|
|
377
379
|
|----------|-------------|---------|
|
|
378
380
|
| `run-id` | The UUID of the run to resume (shown at the start of each run) | `0002e071-1b67-411f-a514-36947f0c4b31` |
|
|
379
381
|
| `--apply-change` | Automatically apply the best solution to the source file without prompting | `--apply-change` |
|
|
382
|
+
| `--api-key` | (Optional) API keys for LLM providers (BYOK). Format: `provider=key` | `--api-key openai=sk-xxx` |
|
|
380
383
|
|
|
381
384
|
Notes:
|
|
382
385
|
- Works only for interrupted runs (status: `error`, `terminated`, etc.).
|
|
@@ -95,6 +95,7 @@ For more advanced examples, including [Triton](/examples/triton/README.md), [CUD
|
|
|
95
95
|
| `--eval-timeout` | Timeout in seconds for each step in evaluation. | No timeout (unlimited) | `--eval-timeout 3600` |
|
|
96
96
|
| `--save-logs` | Save execution output from each optimization step to disk. Creates timestamped directories with raw output files and a JSONL index for tracking execution history. | `False` | `--save-logs` |
|
|
97
97
|
| `--apply-change` | Automatically apply the best solution to the source file without prompting. | `False` | `--apply-change` |
|
|
98
|
+
| `--api-key` | API keys for LLM providers (BYOK). Format: `provider=key`. Can specify multiple providers. | `None` | `--api-key openai=sk-xxx` |
|
|
98
99
|
|
|
99
100
|
---
|
|
100
101
|
|
|
@@ -149,6 +150,7 @@ Arguments for `weco resume`:
|
|
|
149
150
|
|----------|-------------|---------|
|
|
150
151
|
| `run-id` | The UUID of the run to resume (shown at the start of each run) | `0002e071-1b67-411f-a514-36947f0c4b31` |
|
|
151
152
|
| `--apply-change` | Automatically apply the best solution to the source file without prompting | `--apply-change` |
|
|
153
|
+
| `--api-key` | (Optional) API keys for LLM providers (BYOK). Format: `provider=key` | `--api-key openai=sk-xxx` |
|
|
152
154
|
|
|
153
155
|
Notes:
|
|
154
156
|
- Works only for interrupted runs (status: `error`, `terminated`, etc.).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
## Extract Line Plot (Chart → CSV)
|
|
1
|
+
## Extract Line Plot (Chart → CSV): Accuracy/Cost Optimization for Agentic Workflow
|
|
2
2
|
|
|
3
|
-
This example
|
|
3
|
+
This example demonstrates optimizing an AI feature that turns chart images into CSV tables, showcasing how to use Weco to improve accuracy or reduce cost of a VLM-based extraction workflow.
|
|
4
4
|
|
|
5
5
|
### Prerequisites
|
|
6
6
|
|
|
@@ -15,8 +15,9 @@ export OPENAI_API_KEY=your_key_here
|
|
|
15
15
|
### Files
|
|
16
16
|
|
|
17
17
|
- `prepare_data.py`: downloads ChartQA (full) and prepares a 100-sample subset of line charts.
|
|
18
|
-
- `optimize.py`:
|
|
18
|
+
- `optimize.py`: exposes `extract_csv(image_path)` which returns CSV text plus the per-call cost (helpers stay private).
|
|
19
19
|
- `eval.py`: evaluation harness that runs the baseline on images and reports a similarity score as "accuracy".
|
|
20
|
+
- `guide.md`: optional additional instructions you can feed to Weco via `--additional-instructions guide.md`.
|
|
20
21
|
|
|
21
22
|
Generated artifacts (gitignored):
|
|
22
23
|
- `subset_line_100/` and `subset_line_100.zip`
|
|
@@ -47,12 +48,21 @@ Metric definition (summarized):
|
|
|
47
48
|
- Per-sample score = 0.2 × header match + 0.8 × Jaccard(similarity of content rows).
|
|
48
49
|
- Reported `accuracy` is the mean score over all evaluated samples.
|
|
49
50
|
|
|
51
|
+
To emit a secondary `cost` metric that Weco can minimize (while enforcing `accuracy > 0.45`), append `--cost-metric`:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
uv run --with openai python eval.py --max-samples 10 --num-workers 4 --cost-metric
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
If the final accuracy falls at or below `0.45`, the reported cost is replaced with a large penalty so Weco keeps searching for higher-accuracy solutions.
|
|
58
|
+
You can tighten or relax this constraint with `--cost-accuracy-threshold`, e.g. `--cost-accuracy-threshold 0.50`.
|
|
59
|
+
|
|
50
60
|
### 3) Optimize the baseline with Weco
|
|
51
61
|
|
|
52
62
|
Run Weco to iteratively improve `optimize.py` using 100 examples and many workers:
|
|
53
63
|
|
|
54
64
|
```bash
|
|
55
|
-
weco run --source optimize.py --eval-command 'uv run --with openai python eval.py --max-samples 100 --num-workers 50' --metric accuracy --goal maximize --steps 20 --model gpt-5
|
|
65
|
+
weco run --source optimize.py --eval-command 'uv run --with openai python eval.py --max-samples 100 --num-workers 50' --metric accuracy --goal maximize --steps 20 --model gpt-5 --additional-instructions guide.md
|
|
56
66
|
```
|
|
57
67
|
|
|
58
68
|
Arguments:
|
|
@@ -63,10 +73,20 @@ Arguments:
|
|
|
63
73
|
- `--steps 20`: number of optimization iterations.
|
|
64
74
|
- `--model gpt-5`: model used by Weco to propose edits (change as desired).
|
|
65
75
|
|
|
76
|
+
To minimize cost instead (subject to the accuracy constraint), enable the flag in the eval command and switch the optimization target:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
weco run --source optimize.py --eval-command 'uv run --with openai python eval.py --max-samples 100 --num-workers 50 --cost-metric' --metric cost --goal minimize --steps 20 --model gpt-5 --additional-instructions guide.md
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
#### Cost optimization workflow
|
|
83
|
+
- Run the evaluation command with `--cost-metric` once to confirm accuracy meets your threshold and note the baseline cost.
|
|
84
|
+
- Adjust `--cost-accuracy-threshold` if you want to tighten or relax the constraint before launching optimization.
|
|
85
|
+
- Kick off Weco with `--metric cost --goal minimize --additional-instructions guide.md` so the optimizer respects the constraint while acting on the extra tips.
|
|
86
|
+
|
|
66
87
|
### Tips
|
|
67
88
|
|
|
68
89
|
- Ensure your OpenAI key has access to a vision-capable model (default: `gpt-4o-mini` in the eval; change via `--model`).
|
|
69
90
|
- Adjust `--num-workers` to balance throughput and rate limits.
|
|
70
91
|
- You can tweak baseline behavior in `optimize.py` (prompt, temperature) — Weco will explore modifications automatically during optimization.
|
|
71
|
-
|
|
72
|
-
|
|
92
|
+
- Include `--additional-instructions guide.md` whenever you run Weco so those cost-conscious hints influence the generated proposals.
|
|
@@ -8,7 +8,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import Dict, List, Optional, Tuple
|
|
10
10
|
|
|
11
|
-
from optimize import
|
|
11
|
+
from optimize import extract_csv
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
14
|
import matplotlib
|
|
@@ -18,6 +18,9 @@ try:
|
|
|
18
18
|
except Exception: # pragma: no cover - optional dependency
|
|
19
19
|
plt = None
|
|
20
20
|
|
|
21
|
+
COST_ACCURACY_THRESHOLD_DEFAULT = 0.45
|
|
22
|
+
COST_CONSTRAINT_PENALTY = 1_000_000.0
|
|
23
|
+
|
|
21
24
|
|
|
22
25
|
def read_index(index_csv_path: Path) -> List[Tuple[str, Path, Path]]:
|
|
23
26
|
rows: List[Tuple[str, Path, Path]] = []
|
|
@@ -259,14 +262,14 @@ def evaluate_predictions(gt_csv_path: Path, pred_csv_path: Path) -> float:
|
|
|
259
262
|
|
|
260
263
|
|
|
261
264
|
def process_one(
|
|
262
|
-
|
|
263
|
-
) -> Tuple[str, float, Path, Path]:
|
|
265
|
+
base_dir: Path, example_id: str, image_rel: Path, gt_table_rel: Path, output_dir: Path
|
|
266
|
+
) -> Tuple[str, float, Path, Path, float]:
|
|
264
267
|
image_path = base_dir / image_rel
|
|
265
268
|
gt_csv_path = base_dir / gt_table_rel
|
|
266
|
-
pred_csv_text =
|
|
269
|
+
pred_csv_text, cost_usd = extract_csv(image_path)
|
|
267
270
|
pred_path = write_csv(output_dir, example_id, pred_csv_text)
|
|
268
271
|
score = evaluate_predictions(gt_csv_path, pred_path)
|
|
269
|
-
return example_id, score, pred_path, gt_csv_path
|
|
272
|
+
return example_id, score, pred_path, gt_csv_path, cost_usd
|
|
270
273
|
|
|
271
274
|
|
|
272
275
|
def main() -> None:
|
|
@@ -276,6 +279,20 @@ def main() -> None:
|
|
|
276
279
|
parser.add_argument("--out-dir", type=str, default="predictions")
|
|
277
280
|
parser.add_argument("--max-samples", type=int, default=100)
|
|
278
281
|
parser.add_argument("--num-workers", type=int, default=4)
|
|
282
|
+
parser.add_argument(
|
|
283
|
+
"--cost-metric",
|
|
284
|
+
action="store_true",
|
|
285
|
+
help=(
|
|
286
|
+
"When set, also report a `cost:` metric suitable for Weco minimization. "
|
|
287
|
+
"Requires final accuracy to exceed --cost-accuracy-threshold; otherwise a large penalty is reported."
|
|
288
|
+
),
|
|
289
|
+
)
|
|
290
|
+
parser.add_argument(
|
|
291
|
+
"--cost-accuracy-threshold",
|
|
292
|
+
type=float,
|
|
293
|
+
default=COST_ACCURACY_THRESHOLD_DEFAULT,
|
|
294
|
+
help="Minimum accuracy required when --cost-metric is set (default: 0.45).",
|
|
295
|
+
)
|
|
279
296
|
parser.add_argument(
|
|
280
297
|
"--visualize-dir",
|
|
281
298
|
type=str,
|
|
@@ -307,7 +324,6 @@ def main() -> None:
|
|
|
307
324
|
sys.exit(1)
|
|
308
325
|
|
|
309
326
|
rows = read_index(index_path)[: args.max_samples]
|
|
310
|
-
extractor = VLMExtractor()
|
|
311
327
|
|
|
312
328
|
visualize_dir: Optional[Path] = Path(args.visualize_dir) if args.visualize_dir else None
|
|
313
329
|
visualize_max = max(0, args.visualize_max)
|
|
@@ -315,22 +331,24 @@ def main() -> None:
|
|
|
315
331
|
print("[warn] matplotlib not available; skipping visualization.", file=sys.stderr)
|
|
316
332
|
visualize_dir = None
|
|
317
333
|
|
|
318
|
-
print(f"[setup] evaluating {len(rows)} samples
|
|
334
|
+
print(f"[setup] evaluating {len(rows)} samples …", flush=True)
|
|
319
335
|
start = time.time()
|
|
320
336
|
scores: List[float] = []
|
|
337
|
+
costs: List[float] = []
|
|
321
338
|
saved_visualizations = 0
|
|
322
339
|
|
|
323
340
|
with ThreadPoolExecutor(max_workers=max(1, args.num_workers)) as pool:
|
|
324
341
|
futures = [
|
|
325
|
-
pool.submit(process_one,
|
|
342
|
+
pool.submit(process_one, base_dir, example_id, image_rel, gt_table_rel, Path(args.out_dir))
|
|
326
343
|
for (example_id, image_rel, gt_table_rel) in rows
|
|
327
344
|
]
|
|
328
345
|
|
|
329
346
|
try:
|
|
330
347
|
for idx, fut in enumerate(as_completed(futures), 1):
|
|
331
348
|
try:
|
|
332
|
-
example_id, score, pred_path, gt_csv_path = fut.result()
|
|
349
|
+
example_id, score, pred_path, gt_csv_path, cost_usd = fut.result()
|
|
333
350
|
scores.append(score)
|
|
351
|
+
costs.append(cost_usd)
|
|
334
352
|
if visualize_dir and (visualize_max == 0 or saved_visualizations < visualize_max):
|
|
335
353
|
out_path = visualize_difference(
|
|
336
354
|
gt_csv_path,
|
|
@@ -346,7 +364,11 @@ def main() -> None:
|
|
|
346
364
|
if idx % 5 == 0 or idx == len(rows):
|
|
347
365
|
elapsed = time.time() - start
|
|
348
366
|
avg = sum(scores) / len(scores) if scores else 0.0
|
|
349
|
-
|
|
367
|
+
avg_cost = sum(costs) / len(costs) if costs else 0.0
|
|
368
|
+
print(
|
|
369
|
+
f"[progress] {idx}/{len(rows)} done, avg score: {avg:.4f}, avg cost: ${avg_cost:.4f}, elapsed {elapsed:.1f}s",
|
|
370
|
+
flush=True,
|
|
371
|
+
)
|
|
350
372
|
except Exception as e:
|
|
351
373
|
print(f"[error] failed on sample {idx}: {e}", file=sys.stderr)
|
|
352
374
|
except KeyboardInterrupt:
|
|
@@ -356,7 +378,7 @@ def main() -> None:
|
|
|
356
378
|
final_score = sum(scores) / len(scores) if scores else 0.0
|
|
357
379
|
|
|
358
380
|
# Apply cost cap: accuracy is zeroed if average cost/query exceeds $0.02
|
|
359
|
-
avg_cost_per_query = (
|
|
381
|
+
avg_cost_per_query = (sum(costs) / len(costs)) if costs else 0.0
|
|
360
382
|
if avg_cost_per_query > 0.02:
|
|
361
383
|
print(f"[cost] avg ${avg_cost_per_query:.4f}/query exceeds $0.02 cap; accuracy set to 0.0", flush=True)
|
|
362
384
|
final_score = 0.0
|
|
@@ -365,6 +387,20 @@ def main() -> None:
|
|
|
365
387
|
|
|
366
388
|
print(f"accuracy: {final_score:.4f}")
|
|
367
389
|
|
|
390
|
+
if args.cost_metric:
|
|
391
|
+
if final_score > args.cost_accuracy_threshold:
|
|
392
|
+
reported_cost = avg_cost_per_query
|
|
393
|
+
else:
|
|
394
|
+
print(
|
|
395
|
+
(
|
|
396
|
+
f"[constraint] accuracy {final_score:.4f} <= "
|
|
397
|
+
f"threshold {args.cost_accuracy_threshold:.2f}; reporting penalty ${COST_CONSTRAINT_PENALTY:.1f}"
|
|
398
|
+
),
|
|
399
|
+
flush=True,
|
|
400
|
+
)
|
|
401
|
+
reported_cost = COST_CONSTRAINT_PENALTY
|
|
402
|
+
print(f"cost: {reported_cost:.6f}")
|
|
403
|
+
|
|
368
404
|
|
|
369
405
|
if __name__ == "__main__":
|
|
370
406
|
main()
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""
|
|
2
|
+
optimize.py
|
|
3
|
+
|
|
4
|
+
Exposes a single public entry point `extract_csv` that turns a chart image into CSV text.
|
|
5
|
+
All helper utilities remain private to this module.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import base64
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional, Tuple
|
|
11
|
+
|
|
12
|
+
from openai import OpenAI
|
|
13
|
+
|
|
14
|
+
__all__ = ["extract_csv"]
|
|
15
|
+
|
|
16
|
+
_DEFAULT_MODEL = "gpt-4o-mini"
|
|
17
|
+
_CLIENT = OpenAI()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _build_prompt() -> str:
|
|
21
|
+
return (
|
|
22
|
+
"You are a precise data extraction model. Given a chart image, extract the underlying data table.\n"
|
|
23
|
+
"Return ONLY the CSV text with a header row and no markdown code fences.\n"
|
|
24
|
+
"Rules:\n"
|
|
25
|
+
"- The first column must be the x-axis values with its exact axis label as the header.\n"
|
|
26
|
+
"- Include one column per data series using the legend labels as headers.\n"
|
|
27
|
+
"- Preserve the original order of x-axis ticks as they appear.\n"
|
|
28
|
+
"- Use plain CSV (comma-separated), no explanations, no extra text.\n"
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _image_to_data_uri(image_path: Path) -> str:
|
|
33
|
+
mime = "image/png" if image_path.suffix.lower() == ".png" else "image/jpeg"
|
|
34
|
+
data = image_path.read_bytes()
|
|
35
|
+
b64 = base64.b64encode(data).decode("ascii")
|
|
36
|
+
return f"data:{mime};base64,{b64}"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _clean_to_csv(text: str) -> str:
|
|
40
|
+
return text.strip()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _pricing_for_model(model_name: str) -> dict:
|
|
44
|
+
"""Return pricing information for the given model in USD per token."""
|
|
45
|
+
name = (model_name or "").lower()
|
|
46
|
+
per_million = {
|
|
47
|
+
"gpt-5": {"in": 1.250, "in_cached": 0.125, "out": 10.000},
|
|
48
|
+
"gpt-5-mini": {"in": 0.250, "in_cached": 0.025, "out": 2.000},
|
|
49
|
+
"gpt-5-nano": {"in": 0.050, "in_cached": 0.005, "out": 0.400},
|
|
50
|
+
}
|
|
51
|
+
if name.startswith("gpt-5-nano"):
|
|
52
|
+
chosen = per_million["gpt-5-nano"]
|
|
53
|
+
elif name.startswith("gpt-5-mini"):
|
|
54
|
+
chosen = per_million["gpt-5-mini"]
|
|
55
|
+
elif name.startswith("gpt-5"):
|
|
56
|
+
chosen = per_million["gpt-5"]
|
|
57
|
+
else:
|
|
58
|
+
chosen = per_million["gpt-5-mini"]
|
|
59
|
+
return {k: v / 1_000_000.0 for k, v in chosen.items()}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def extract_csv(image_path: Path, model: Optional[str] = None) -> Tuple[str, float]:
|
|
63
|
+
"""
|
|
64
|
+
Extract CSV text from an image and return (csv_text, cost_usd).
|
|
65
|
+
|
|
66
|
+
The caller can optionally override the model name; otherwise the default is used.
|
|
67
|
+
"""
|
|
68
|
+
effective_model = model or _DEFAULT_MODEL
|
|
69
|
+
prompt = _build_prompt()
|
|
70
|
+
image_uri = _image_to_data_uri(image_path)
|
|
71
|
+
response = _CLIENT.chat.completions.create(
|
|
72
|
+
model=effective_model,
|
|
73
|
+
messages=[
|
|
74
|
+
{
|
|
75
|
+
"role": "user",
|
|
76
|
+
"content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": image_uri}}],
|
|
77
|
+
}
|
|
78
|
+
],
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
usage = getattr(response, "usage", None)
|
|
82
|
+
cost_usd = 0.0
|
|
83
|
+
if usage is not None:
|
|
84
|
+
prompt_tokens = int(getattr(usage, "prompt_tokens", 0) or 0)
|
|
85
|
+
completion_tokens = int(getattr(usage, "completion_tokens", 0) or 0)
|
|
86
|
+
details = getattr(usage, "prompt_tokens_details", None)
|
|
87
|
+
cached_tokens = 0
|
|
88
|
+
if details is not None:
|
|
89
|
+
cached_tokens = int(getattr(details, "cached_tokens", 0) or 0)
|
|
90
|
+
non_cached_prompt_tokens = max(0, prompt_tokens - cached_tokens)
|
|
91
|
+
rates = _pricing_for_model(effective_model)
|
|
92
|
+
cost_usd = (
|
|
93
|
+
non_cached_prompt_tokens * rates["in"] + cached_tokens * rates["in_cached"] + completion_tokens * rates["out"]
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
text = response.choices[0].message.content or ""
|
|
97
|
+
return _clean_to_csv(text), cost_usd
|
|
@@ -8,7 +8,7 @@ name = "weco"
|
|
|
8
8
|
authors = [{ name = "Weco AI Team", email = "contact@weco.ai" }]
|
|
9
9
|
description = "Documentation for `weco`, a CLI for using Weco AI's code optimizer."
|
|
10
10
|
readme = "README.md"
|
|
11
|
-
version = "0.3.
|
|
11
|
+
version = "0.3.7"
|
|
12
12
|
license = { file = "LICENSE" }
|
|
13
13
|
requires-python = ">=3.8"
|
|
14
14
|
dependencies = [
|
|
@@ -18,7 +18,7 @@ dependencies = [
|
|
|
18
18
|
"gitingest",
|
|
19
19
|
"fastapi",
|
|
20
20
|
"slowapi",
|
|
21
|
-
"psutil"
|
|
21
|
+
"psutil"
|
|
22
22
|
]
|
|
23
23
|
keywords = ["AI", "Code Optimization", "Code Generation"]
|
|
24
24
|
classifiers = [
|
|
@@ -34,7 +34,7 @@ weco = "weco.cli:main"
|
|
|
34
34
|
Homepage = "https://github.com/WecoAI/weco-cli"
|
|
35
35
|
|
|
36
36
|
[project.optional-dependencies]
|
|
37
|
-
dev = ["ruff", "build", "setuptools_scm"]
|
|
37
|
+
dev = ["ruff", "build", "setuptools_scm", "pytest>=7.0.0"]
|
|
38
38
|
|
|
39
39
|
[tool.setuptools]
|
|
40
40
|
packages = ["weco"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Tests for weco CLI."""
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""Tests to verify API keys are correctly passed through the system and sent to the API."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from unittest.mock import patch, MagicMock
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
|
|
7
|
+
from weco.api import start_optimization_run, evaluate_feedback_then_suggest_next_solution
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestApiKeysInStartOptimizationRun:
|
|
11
|
+
"""Test that api_keys are correctly included in start_optimization_run requests."""
|
|
12
|
+
|
|
13
|
+
@pytest.fixture
|
|
14
|
+
def mock_console(self):
|
|
15
|
+
"""Create a mock console for testing."""
|
|
16
|
+
return MagicMock(spec=Console)
|
|
17
|
+
|
|
18
|
+
@pytest.fixture
|
|
19
|
+
def base_params(self, mock_console):
|
|
20
|
+
"""Base parameters for start_optimization_run."""
|
|
21
|
+
return {
|
|
22
|
+
"console": mock_console,
|
|
23
|
+
"source_code": "print('hello')",
|
|
24
|
+
"source_path": "test.py",
|
|
25
|
+
"evaluation_command": "python test.py",
|
|
26
|
+
"metric_name": "accuracy",
|
|
27
|
+
"maximize": True,
|
|
28
|
+
"steps": 10,
|
|
29
|
+
"code_generator_config": {"model": "o4-mini"},
|
|
30
|
+
"evaluator_config": {"model": "o4-mini"},
|
|
31
|
+
"search_policy_config": {"num_drafts": 2},
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
@patch("weco.api.requests.post")
|
|
35
|
+
def test_api_keys_included_in_request(self, mock_post, base_params):
|
|
36
|
+
"""Test that api_keys are included in the request JSON when provided."""
|
|
37
|
+
mock_response = MagicMock()
|
|
38
|
+
mock_response.json.return_value = {
|
|
39
|
+
"run_id": "test-run-id",
|
|
40
|
+
"solution_id": "test-solution-id",
|
|
41
|
+
"code": "print('hello')",
|
|
42
|
+
"plan": "test plan",
|
|
43
|
+
}
|
|
44
|
+
mock_response.raise_for_status = MagicMock()
|
|
45
|
+
mock_post.return_value = mock_response
|
|
46
|
+
|
|
47
|
+
api_keys = {"openai": "sk-test-key", "anthropic": "sk-ant-test"}
|
|
48
|
+
start_optimization_run(**base_params, api_keys=api_keys)
|
|
49
|
+
|
|
50
|
+
# Verify the request was made with api_keys in the JSON payload
|
|
51
|
+
mock_post.assert_called_once()
|
|
52
|
+
call_kwargs = mock_post.call_args
|
|
53
|
+
request_json = call_kwargs.kwargs["json"]
|
|
54
|
+
assert "api_keys" in request_json
|
|
55
|
+
assert request_json["api_keys"] == {"openai": "sk-test-key", "anthropic": "sk-ant-test"}
|
|
56
|
+
|
|
57
|
+
@patch("weco.api.requests.post")
|
|
58
|
+
def test_api_keys_not_included_when_none(self, mock_post, base_params):
|
|
59
|
+
"""Test that api_keys field is not included when api_keys is None."""
|
|
60
|
+
mock_response = MagicMock()
|
|
61
|
+
mock_response.json.return_value = {
|
|
62
|
+
"run_id": "test-run-id",
|
|
63
|
+
"solution_id": "test-solution-id",
|
|
64
|
+
"code": "print('hello')",
|
|
65
|
+
"plan": "test plan",
|
|
66
|
+
}
|
|
67
|
+
mock_response.raise_for_status = MagicMock()
|
|
68
|
+
mock_post.return_value = mock_response
|
|
69
|
+
|
|
70
|
+
start_optimization_run(**base_params, api_keys=None)
|
|
71
|
+
|
|
72
|
+
# Verify the request was made without api_keys
|
|
73
|
+
mock_post.assert_called_once()
|
|
74
|
+
call_kwargs = mock_post.call_args
|
|
75
|
+
request_json = call_kwargs.kwargs["json"]
|
|
76
|
+
assert "api_keys" not in request_json
|
|
77
|
+
|
|
78
|
+
@patch("weco.api.requests.post")
|
|
79
|
+
def test_api_keys_not_included_when_empty_dict(self, mock_post, base_params):
|
|
80
|
+
"""Test that api_keys field is not included when api_keys is an empty dict."""
|
|
81
|
+
mock_response = MagicMock()
|
|
82
|
+
mock_response.json.return_value = {
|
|
83
|
+
"run_id": "test-run-id",
|
|
84
|
+
"solution_id": "test-solution-id",
|
|
85
|
+
"code": "print('hello')",
|
|
86
|
+
"plan": "test plan",
|
|
87
|
+
}
|
|
88
|
+
mock_response.raise_for_status = MagicMock()
|
|
89
|
+
mock_post.return_value = mock_response
|
|
90
|
+
|
|
91
|
+
# Empty dict is falsy, so api_keys should not be included
|
|
92
|
+
start_optimization_run(**base_params, api_keys={})
|
|
93
|
+
|
|
94
|
+
mock_post.assert_called_once()
|
|
95
|
+
call_kwargs = mock_post.call_args
|
|
96
|
+
request_json = call_kwargs.kwargs["json"]
|
|
97
|
+
assert "api_keys" not in request_json
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class TestApiKeysInEvaluateFeedbackThenSuggest:
|
|
101
|
+
"""Test that api_keys are correctly included in evaluate_feedback_then_suggest_next_solution requests."""
|
|
102
|
+
|
|
103
|
+
@pytest.fixture
|
|
104
|
+
def mock_console(self):
|
|
105
|
+
"""Create a mock console for testing."""
|
|
106
|
+
return MagicMock(spec=Console)
|
|
107
|
+
|
|
108
|
+
@patch("weco.api.requests.post")
|
|
109
|
+
def test_api_keys_included_in_suggest_request(self, mock_post, mock_console):
|
|
110
|
+
"""Test that api_keys are included in the suggest request JSON when provided."""
|
|
111
|
+
mock_response = MagicMock()
|
|
112
|
+
mock_response.json.return_value = {
|
|
113
|
+
"run_id": "test-run-id",
|
|
114
|
+
"solution_id": "new-solution-id",
|
|
115
|
+
"code": "print('improved')",
|
|
116
|
+
"plan": "improvement plan",
|
|
117
|
+
"is_done": False,
|
|
118
|
+
}
|
|
119
|
+
mock_response.raise_for_status = MagicMock()
|
|
120
|
+
mock_post.return_value = mock_response
|
|
121
|
+
|
|
122
|
+
api_keys = {"openai": "sk-test-key"}
|
|
123
|
+
evaluate_feedback_then_suggest_next_solution(
|
|
124
|
+
console=mock_console,
|
|
125
|
+
run_id="test-run-id",
|
|
126
|
+
step=1,
|
|
127
|
+
execution_output="accuracy: 0.95",
|
|
128
|
+
auth_headers={"Authorization": "Bearer test-token"},
|
|
129
|
+
api_keys=api_keys,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
mock_post.assert_called_once()
|
|
133
|
+
call_kwargs = mock_post.call_args
|
|
134
|
+
request_json = call_kwargs.kwargs["json"]
|
|
135
|
+
assert "api_keys" in request_json
|
|
136
|
+
assert request_json["api_keys"] == {"openai": "sk-test-key"}
|
|
137
|
+
|
|
138
|
+
@patch("weco.api.requests.post")
|
|
139
|
+
def test_api_keys_not_included_in_suggest_when_none(self, mock_post, mock_console):
|
|
140
|
+
"""Test that api_keys field is not included in suggest request when api_keys is None."""
|
|
141
|
+
mock_response = MagicMock()
|
|
142
|
+
mock_response.json.return_value = {
|
|
143
|
+
"run_id": "test-run-id",
|
|
144
|
+
"solution_id": "new-solution-id",
|
|
145
|
+
"code": "print('improved')",
|
|
146
|
+
"plan": "improvement plan",
|
|
147
|
+
"is_done": False,
|
|
148
|
+
}
|
|
149
|
+
mock_response.raise_for_status = MagicMock()
|
|
150
|
+
mock_post.return_value = mock_response
|
|
151
|
+
|
|
152
|
+
evaluate_feedback_then_suggest_next_solution(
|
|
153
|
+
console=mock_console,
|
|
154
|
+
run_id="test-run-id",
|
|
155
|
+
step=1,
|
|
156
|
+
execution_output="accuracy: 0.95",
|
|
157
|
+
auth_headers={"Authorization": "Bearer test-token"},
|
|
158
|
+
api_keys=None,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
mock_post.assert_called_once()
|
|
162
|
+
call_kwargs = mock_post.call_args
|
|
163
|
+
request_json = call_kwargs.kwargs["json"]
|
|
164
|
+
assert "api_keys" not in request_json
|
|
165
|
+
|
|
166
|
+
@patch("weco.api.requests.post")
|
|
167
|
+
def test_api_keys_not_included_in_suggest_when_empty_dict(self, mock_post, mock_console):
|
|
168
|
+
"""Test that api_keys field is not included in suggest request when api_keys is None."""
|
|
169
|
+
mock_response = MagicMock()
|
|
170
|
+
mock_response.json.return_value = {
|
|
171
|
+
"run_id": "test-run-id",
|
|
172
|
+
"solution_id": "new-solution-id",
|
|
173
|
+
"code": "print('improved')",
|
|
174
|
+
"plan": "improvement plan",
|
|
175
|
+
"is_done": False,
|
|
176
|
+
}
|
|
177
|
+
mock_response.raise_for_status = MagicMock()
|
|
178
|
+
mock_post.return_value = mock_response
|
|
179
|
+
|
|
180
|
+
evaluate_feedback_then_suggest_next_solution(
|
|
181
|
+
console=mock_console,
|
|
182
|
+
run_id="test-run-id",
|
|
183
|
+
step=1,
|
|
184
|
+
execution_output="accuracy: 0.95",
|
|
185
|
+
auth_headers={"Authorization": "Bearer test-token"},
|
|
186
|
+
api_keys={},
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
mock_post.assert_called_once()
|
|
190
|
+
call_kwargs = mock_post.call_args
|
|
191
|
+
request_json = call_kwargs.kwargs["json"]
|
|
192
|
+
assert "api_keys" not in request_json
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Tests for CLI functions, particularly parse_api_keys."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from weco.cli import parse_api_keys
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TestParseApiKeys:
|
|
8
|
+
"""Test cases for parse_api_keys function."""
|
|
9
|
+
|
|
10
|
+
def test_parse_api_keys_none(self):
|
|
11
|
+
"""Test that None input returns empty dict."""
|
|
12
|
+
result = parse_api_keys(None)
|
|
13
|
+
assert result == {}
|
|
14
|
+
assert isinstance(result, dict)
|
|
15
|
+
|
|
16
|
+
def test_parse_api_keys_empty_list(self):
|
|
17
|
+
"""Test that empty list returns empty dict."""
|
|
18
|
+
result = parse_api_keys([])
|
|
19
|
+
assert result == {}
|
|
20
|
+
assert isinstance(result, dict)
|
|
21
|
+
|
|
22
|
+
def test_parse_api_keys_single_key(self):
|
|
23
|
+
"""Test parsing a single API key."""
|
|
24
|
+
result = parse_api_keys(["openai=sk-xxx"])
|
|
25
|
+
assert result == {"openai": "sk-xxx"}
|
|
26
|
+
|
|
27
|
+
def test_parse_api_keys_multiple_keys(self):
|
|
28
|
+
"""Test parsing multiple API keys."""
|
|
29
|
+
result = parse_api_keys(["openai=sk-xxx", "anthropic=sk-ant-yyy"])
|
|
30
|
+
assert result == {"openai": "sk-xxx", "anthropic": "sk-ant-yyy"}
|
|
31
|
+
|
|
32
|
+
def test_parse_api_keys_whitespace_handling(self):
|
|
33
|
+
"""Test that whitespace is stripped from provider and key."""
|
|
34
|
+
result = parse_api_keys([" openai = sk-xxx ", " anthropic = sk-ant-yyy "])
|
|
35
|
+
assert result == {"openai": "sk-xxx", "anthropic": "sk-ant-yyy"}
|
|
36
|
+
|
|
37
|
+
def test_parse_api_keys_key_contains_equals(self):
|
|
38
|
+
"""Test that keys containing '=' are handled correctly (split on first '=' only)."""
|
|
39
|
+
result = parse_api_keys(["openai=sk-xxx=extra=more"])
|
|
40
|
+
assert result == {"openai": "sk-xxx=extra=more"}
|
|
41
|
+
|
|
42
|
+
def test_parse_api_keys_no_equals(self):
|
|
43
|
+
"""Test that missing '=' raises ValueError."""
|
|
44
|
+
with pytest.raises(ValueError, match="Invalid API key format.*Expected format: 'provider=key'"):
|
|
45
|
+
parse_api_keys(["openai"])
|
|
46
|
+
|
|
47
|
+
def test_parse_api_keys_empty_provider(self):
|
|
48
|
+
"""Test that empty provider raises ValueError."""
|
|
49
|
+
with pytest.raises(ValueError, match="Provider and key must be non-empty"):
|
|
50
|
+
parse_api_keys(["=sk-xxx"])
|
|
51
|
+
|
|
52
|
+
def test_parse_api_keys_empty_key(self):
|
|
53
|
+
"""Test that empty key raises ValueError."""
|
|
54
|
+
with pytest.raises(ValueError, match="Provider and key must be non-empty"):
|
|
55
|
+
parse_api_keys(["openai="])
|
|
56
|
+
|
|
57
|
+
def test_parse_api_keys_both_empty(self):
|
|
58
|
+
"""Test that both empty provider and key raises ValueError."""
|
|
59
|
+
with pytest.raises(ValueError, match="Provider and key must be non-empty"):
|
|
60
|
+
parse_api_keys(["="])
|
|
61
|
+
|
|
62
|
+
def test_parse_api_keys_duplicate_provider(self):
|
|
63
|
+
"""Test that duplicate providers overwrite previous value."""
|
|
64
|
+
result = parse_api_keys(["openai=sk-xxx", "openai=sk-yyy"])
|
|
65
|
+
assert result == {"openai": "sk-yyy"}
|
|
66
|
+
|
|
67
|
+
def test_parse_api_keys_mixed_case_provider(self):
|
|
68
|
+
"""Test that mixed case providers are normalized correctly."""
|
|
69
|
+
result = parse_api_keys(["OpenAI=sk-xxx", "ANTHROPIC=sk-ant-yyy"])
|
|
70
|
+
assert result == {"openai": "sk-xxx", "anthropic": "sk-ant-yyy"}
|