weco 0.3.5__tar.gz → 0.3.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {weco-0.3.5 → weco-0.3.7}/PKG-INFO +4 -1
  2. {weco-0.3.5 → weco-0.3.7}/README.md +2 -0
  3. {weco-0.3.5 → weco-0.3.7}/examples/extract-line-plot/README.md +26 -6
  4. {weco-0.3.5 → weco-0.3.7}/examples/extract-line-plot/eval.py +47 -11
  5. weco-0.3.7/examples/extract-line-plot/optimize.py +97 -0
  6. {weco-0.3.5 → weco-0.3.7}/pyproject.toml +3 -3
  7. weco-0.3.7/tests/__init__.py +1 -0
  8. weco-0.3.7/tests/test_byok.py +192 -0
  9. weco-0.3.7/tests/test_cli.py +70 -0
  10. {weco-0.3.5 → weco-0.3.7}/weco/api.py +31 -172
  11. {weco-0.3.5 → weco-0.3.7}/weco/cli.py +104 -90
  12. {weco-0.3.5 → weco-0.3.7}/weco/constants.py +3 -3
  13. {weco-0.3.5 → weco-0.3.7}/weco/optimizer.py +20 -9
  14. {weco-0.3.5 → weco-0.3.7}/weco/utils.py +33 -5
  15. {weco-0.3.5 → weco-0.3.7}/weco.egg-info/PKG-INFO +4 -1
  16. {weco-0.3.5 → weco-0.3.7}/weco.egg-info/SOURCES.txt +3 -1
  17. {weco-0.3.5 → weco-0.3.7}/weco.egg-info/requires.txt +1 -0
  18. weco-0.3.5/examples/extract-line-plot/optimize.py +0 -116
  19. weco-0.3.5/weco/chatbot.py +0 -827
  20. {weco-0.3.5 → weco-0.3.7}/.github/workflows/lint.yml +0 -0
  21. {weco-0.3.5 → weco-0.3.7}/.github/workflows/release.yml +0 -0
  22. {weco-0.3.5 → weco-0.3.7}/.gitignore +0 -0
  23. {weco-0.3.5 → weco-0.3.7}/LICENSE +0 -0
  24. {weco-0.3.5 → weco-0.3.7}/assets/example-optimization.gif +0 -0
  25. {weco-0.3.5 → weco-0.3.7}/assets/weco.svg +0 -0
  26. {weco-0.3.5 → weco-0.3.7}/contributing.md +0 -0
  27. {weco-0.3.5 → weco-0.3.7}/examples/README.md +0 -0
  28. {weco-0.3.5 → weco-0.3.7}/examples/cuda/README.md +0 -0
  29. {weco-0.3.5 → weco-0.3.7}/examples/cuda/evaluate.py +0 -0
  30. {weco-0.3.5 → weco-0.3.7}/examples/cuda/module.py +0 -0
  31. {weco-0.3.5 → weco-0.3.7}/examples/cuda/requirements.txt +0 -0
  32. {weco-0.3.5 → weco-0.3.7}/examples/extract-line-plot/guide.md +0 -0
  33. {weco-0.3.5 → weco-0.3.7}/examples/extract-line-plot/prepare_data.py +0 -0
  34. {weco-0.3.5 → weco-0.3.7}/examples/extract-line-plot/pyproject.toml +0 -0
  35. {weco-0.3.5 → weco-0.3.7}/examples/hello-world/README.md +0 -0
  36. {weco-0.3.5 → weco-0.3.7}/examples/hello-world/colab_notebook_walkthrough.ipynb +0 -0
  37. {weco-0.3.5 → weco-0.3.7}/examples/hello-world/evaluate.py +0 -0
  38. {weco-0.3.5 → weco-0.3.7}/examples/hello-world/module.py +0 -0
  39. {weco-0.3.5 → weco-0.3.7}/examples/hello-world/requirements.txt +0 -0
  40. {weco-0.3.5 → weco-0.3.7}/examples/prompt/README.md +0 -0
  41. {weco-0.3.5 → weco-0.3.7}/examples/prompt/eval.py +0 -0
  42. {weco-0.3.5 → weco-0.3.7}/examples/prompt/optimize.py +0 -0
  43. {weco-0.3.5 → weco-0.3.7}/examples/prompt/prompt_guide.md +0 -0
  44. {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/README.md +0 -0
  45. {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/competition_description.md +0 -0
  46. {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/data/sample_submission.csv +0 -0
  47. {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/data/test.csv +0 -0
  48. {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/data/train.csv +0 -0
  49. {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/evaluate.py +0 -0
  50. {weco-0.3.5 → weco-0.3.7}/examples/spaceship-titanic/train.py +0 -0
  51. {weco-0.3.5 → weco-0.3.7}/examples/triton/README.md +0 -0
  52. {weco-0.3.5 → weco-0.3.7}/examples/triton/evaluate.py +0 -0
  53. {weco-0.3.5 → weco-0.3.7}/examples/triton/module.py +0 -0
  54. {weco-0.3.5 → weco-0.3.7}/examples/triton/requirements.txt +0 -0
  55. {weco-0.3.5 → weco-0.3.7}/setup.cfg +0 -0
  56. {weco-0.3.5 → weco-0.3.7}/weco/__init__.py +0 -0
  57. {weco-0.3.5 → weco-0.3.7}/weco/auth.py +0 -0
  58. {weco-0.3.5 → weco-0.3.7}/weco/credits.py +0 -0
  59. {weco-0.3.5 → weco-0.3.7}/weco/panels.py +0 -0
  60. {weco-0.3.5 → weco-0.3.7}/weco.egg-info/dependency_links.txt +0 -0
  61. {weco-0.3.5 → weco-0.3.7}/weco.egg-info/entry_points.txt +0 -0
  62. {weco-0.3.5 → weco-0.3.7}/weco.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: weco
3
- Version: 0.3.5
3
+ Version: 0.3.7
4
4
  Summary: Documentation for `weco`, a CLI for using Weco AI's code optimizer.
5
5
  Author-email: Weco AI Team <contact@weco.ai>
6
6
  License:
@@ -224,6 +224,7 @@ Provides-Extra: dev
224
224
  Requires-Dist: ruff; extra == "dev"
225
225
  Requires-Dist: build; extra == "dev"
226
226
  Requires-Dist: setuptools_scm; extra == "dev"
227
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
227
228
  Dynamic: license-file
228
229
 
229
230
  <div align="center">
@@ -323,6 +324,7 @@ For more advanced examples, including [Triton](/examples/triton/README.md), [CUD
323
324
  | `--eval-timeout` | Timeout in seconds for each step in evaluation. | No timeout (unlimited) | `--eval-timeout 3600` |
324
325
  | `--save-logs` | Save execution output from each optimization step to disk. Creates timestamped directories with raw output files and a JSONL index for tracking execution history. | `False` | `--save-logs` |
325
326
  | `--apply-change` | Automatically apply the best solution to the source file without prompting. | `False` | `--apply-change` |
327
+ | `--api-key` | API keys for LLM providers (BYOK). Format: `provider=key`. Can specify multiple providers. | `None` | `--api-key openai=sk-xxx` |
326
328
 
327
329
  ---
328
330
 
@@ -377,6 +379,7 @@ Arguments for `weco resume`:
377
379
  |----------|-------------|---------|
378
380
  | `run-id` | The UUID of the run to resume (shown at the start of each run) | `0002e071-1b67-411f-a514-36947f0c4b31` |
379
381
  | `--apply-change` | Automatically apply the best solution to the source file without prompting | `--apply-change` |
382
+ | `--api-key` | (Optional) API keys for LLM providers (BYOK). Format: `provider=key` | `--api-key openai=sk-xxx` |
380
383
 
381
384
  Notes:
382
385
  - Works only for interrupted runs (status: `error`, `terminated`, etc.).
@@ -95,6 +95,7 @@ For more advanced examples, including [Triton](/examples/triton/README.md), [CUD
95
95
  | `--eval-timeout` | Timeout in seconds for each step in evaluation. | No timeout (unlimited) | `--eval-timeout 3600` |
96
96
  | `--save-logs` | Save execution output from each optimization step to disk. Creates timestamped directories with raw output files and a JSONL index for tracking execution history. | `False` | `--save-logs` |
97
97
  | `--apply-change` | Automatically apply the best solution to the source file without prompting. | `False` | `--apply-change` |
98
+ | `--api-key` | API keys for LLM providers (BYOK). Format: `provider=key`. Can specify multiple providers. | `None` | `--api-key openai=sk-xxx` |
98
99
 
99
100
  ---
100
101
 
@@ -149,6 +150,7 @@ Arguments for `weco resume`:
149
150
  |----------|-------------|---------|
150
151
  | `run-id` | The UUID of the run to resume (shown at the start of each run) | `0002e071-1b67-411f-a514-36947f0c4b31` |
151
152
  | `--apply-change` | Automatically apply the best solution to the source file without prompting | `--apply-change` |
153
+ | `--api-key` | (Optional) API keys for LLM providers (BYOK). Format: `provider=key` | `--api-key openai=sk-xxx` |
152
154
 
153
155
  Notes:
154
156
  - Works only for interrupted runs (status: `error`, `terminated`, etc.).
@@ -1,6 +1,6 @@
1
- ## Extract Line Plot (Chart → CSV) with a VLM
1
+ ## Extract Line Plot (Chart → CSV): Accuracy/Cost Optimization for Agentic Workflow
2
2
 
3
- This example is about optimizing an AI feature that turns image of chart into a table in csv format.
3
+ This example demonstrates optimizing an AI feature that turns chart images into CSV tables, showcasing how to use Weco to improve accuracy or reduce cost of a VLM-based extraction workflow.
4
4
 
5
5
  ### Prerequisites
6
6
 
@@ -15,8 +15,9 @@ export OPENAI_API_KEY=your_key_here
15
15
  ### Files
16
16
 
17
17
  - `prepare_data.py`: downloads ChartQA (full) and prepares a 100-sample subset of line charts.
18
- - `optimize.py`: baseline VLM function (`VLMExtractor.image_to_csv`) to be optimized.
18
+ - `optimize.py`: exposes `extract_csv(image_path)` which returns CSV text plus the per-call cost (helpers stay private).
19
19
  - `eval.py`: evaluation harness that runs the baseline on images and reports a similarity score as "accuracy".
20
+ - `guide.md`: optional additional instructions you can feed to Weco via `--additional-instructions guide.md`.
20
21
 
21
22
  Generated artifacts (gitignored):
22
23
  - `subset_line_100/` and `subset_line_100.zip`
@@ -47,12 +48,21 @@ Metric definition (summarized):
47
48
  - Per-sample score = 0.2 × header match + 0.8 × Jaccard(similarity of content rows).
48
49
  - Reported `accuracy` is the mean score over all evaluated samples.
49
50
 
51
+ To emit a secondary `cost` metric that Weco can minimize (while enforcing `accuracy > 0.45`), append `--cost-metric`:
52
+
53
+ ```bash
54
+ uv run --with openai python eval.py --max-samples 10 --num-workers 4 --cost-metric
55
+ ```
56
+
57
+ If the final accuracy falls at or below `0.45`, the reported cost is replaced with a large penalty so Weco keeps searching for higher-accuracy solutions.
58
+ You can tighten or relax this constraint with `--cost-accuracy-threshold`, e.g. `--cost-accuracy-threshold 0.50`.
59
+
50
60
  ### 3) Optimize the baseline with Weco
51
61
 
52
62
  Run Weco to iteratively improve `optimize.py` using 100 examples and many workers:
53
63
 
54
64
  ```bash
55
- weco run --source optimize.py --eval-command 'uv run --with openai python eval.py --max-samples 100 --num-workers 50' --metric accuracy --goal maximize --steps 20 --model gpt-5
65
+ weco run --source optimize.py --eval-command 'uv run --with openai python eval.py --max-samples 100 --num-workers 50' --metric accuracy --goal maximize --steps 20 --model gpt-5 --additional-instructions guide.md
56
66
  ```
57
67
 
58
68
  Arguments:
@@ -63,10 +73,20 @@ Arguments:
63
73
  - `--steps 20`: number of optimization iterations.
64
74
  - `--model gpt-5`: model used by Weco to propose edits (change as desired).
65
75
 
76
+ To minimize cost instead (subject to the accuracy constraint), enable the flag in the eval command and switch the optimization target:
77
+
78
+ ```bash
79
+ weco run --source optimize.py --eval-command 'uv run --with openai python eval.py --max-samples 100 --num-workers 50 --cost-metric' --metric cost --goal minimize --steps 20 --model gpt-5 --additional-instructions guide.md
80
+ ```
81
+
82
+ #### Cost optimization workflow
83
+ - Run the evaluation command with `--cost-metric` once to confirm accuracy meets your threshold and note the baseline cost.
84
+ - Adjust `--cost-accuracy-threshold` if you want to tighten or relax the constraint before launching optimization.
85
+ - Kick off Weco with `--metric cost --goal minimize --additional-instructions guide.md` so the optimizer respects the constraint while acting on the extra tips.
86
+
66
87
  ### Tips
67
88
 
68
89
  - Ensure your OpenAI key has access to a vision-capable model (default: `gpt-4o-mini` in the eval; change via `--model`).
69
90
  - Adjust `--num-workers` to balance throughput and rate limits.
70
91
  - You can tweak baseline behavior in `optimize.py` (prompt, temperature) — Weco will explore modifications automatically during optimization.
71
-
72
-
92
+ - Include `--additional-instructions guide.md` whenever you run Weco so those cost-conscious hints influence the generated proposals.
@@ -8,7 +8,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
8
8
  from pathlib import Path
9
9
  from typing import Dict, List, Optional, Tuple
10
10
 
11
- from optimize import VLMExtractor
11
+ from optimize import extract_csv
12
12
 
13
13
  try:
14
14
  import matplotlib
@@ -18,6 +18,9 @@ try:
18
18
  except Exception: # pragma: no cover - optional dependency
19
19
  plt = None
20
20
 
21
+ COST_ACCURACY_THRESHOLD_DEFAULT = 0.45
22
+ COST_CONSTRAINT_PENALTY = 1_000_000.0
23
+
21
24
 
22
25
  def read_index(index_csv_path: Path) -> List[Tuple[str, Path, Path]]:
23
26
  rows: List[Tuple[str, Path, Path]] = []
@@ -259,14 +262,14 @@ def evaluate_predictions(gt_csv_path: Path, pred_csv_path: Path) -> float:
259
262
 
260
263
 
261
264
  def process_one(
262
- extractor: VLMExtractor, base_dir: Path, example_id: str, image_rel: Path, gt_table_rel: Path, output_dir: Path
263
- ) -> Tuple[str, float, Path, Path]:
265
+ base_dir: Path, example_id: str, image_rel: Path, gt_table_rel: Path, output_dir: Path
266
+ ) -> Tuple[str, float, Path, Path, float]:
264
267
  image_path = base_dir / image_rel
265
268
  gt_csv_path = base_dir / gt_table_rel
266
- pred_csv_text = extractor.image_to_csv(image_path)
269
+ pred_csv_text, cost_usd = extract_csv(image_path)
267
270
  pred_path = write_csv(output_dir, example_id, pred_csv_text)
268
271
  score = evaluate_predictions(gt_csv_path, pred_path)
269
- return example_id, score, pred_path, gt_csv_path
272
+ return example_id, score, pred_path, gt_csv_path, cost_usd
270
273
 
271
274
 
272
275
  def main() -> None:
@@ -276,6 +279,20 @@ def main() -> None:
276
279
  parser.add_argument("--out-dir", type=str, default="predictions")
277
280
  parser.add_argument("--max-samples", type=int, default=100)
278
281
  parser.add_argument("--num-workers", type=int, default=4)
282
+ parser.add_argument(
283
+ "--cost-metric",
284
+ action="store_true",
285
+ help=(
286
+ "When set, also report a `cost:` metric suitable for Weco minimization. "
287
+ "Requires final accuracy to exceed --cost-accuracy-threshold; otherwise a large penalty is reported."
288
+ ),
289
+ )
290
+ parser.add_argument(
291
+ "--cost-accuracy-threshold",
292
+ type=float,
293
+ default=COST_ACCURACY_THRESHOLD_DEFAULT,
294
+ help="Minimum accuracy required when --cost-metric is set (default: 0.45).",
295
+ )
279
296
  parser.add_argument(
280
297
  "--visualize-dir",
281
298
  type=str,
@@ -307,7 +324,6 @@ def main() -> None:
307
324
  sys.exit(1)
308
325
 
309
326
  rows = read_index(index_path)[: args.max_samples]
310
- extractor = VLMExtractor()
311
327
 
312
328
  visualize_dir: Optional[Path] = Path(args.visualize_dir) if args.visualize_dir else None
313
329
  visualize_max = max(0, args.visualize_max)
@@ -315,22 +331,24 @@ def main() -> None:
315
331
  print("[warn] matplotlib not available; skipping visualization.", file=sys.stderr)
316
332
  visualize_dir = None
317
333
 
318
- print(f"[setup] evaluating {len(rows)} samples using {extractor.model} …", flush=True)
334
+ print(f"[setup] evaluating {len(rows)} samples …", flush=True)
319
335
  start = time.time()
320
336
  scores: List[float] = []
337
+ costs: List[float] = []
321
338
  saved_visualizations = 0
322
339
 
323
340
  with ThreadPoolExecutor(max_workers=max(1, args.num_workers)) as pool:
324
341
  futures = [
325
- pool.submit(process_one, extractor, base_dir, example_id, image_rel, gt_table_rel, Path(args.out_dir))
342
+ pool.submit(process_one, base_dir, example_id, image_rel, gt_table_rel, Path(args.out_dir))
326
343
  for (example_id, image_rel, gt_table_rel) in rows
327
344
  ]
328
345
 
329
346
  try:
330
347
  for idx, fut in enumerate(as_completed(futures), 1):
331
348
  try:
332
- example_id, score, pred_path, gt_csv_path = fut.result()
349
+ example_id, score, pred_path, gt_csv_path, cost_usd = fut.result()
333
350
  scores.append(score)
351
+ costs.append(cost_usd)
334
352
  if visualize_dir and (visualize_max == 0 or saved_visualizations < visualize_max):
335
353
  out_path = visualize_difference(
336
354
  gt_csv_path,
@@ -346,7 +364,11 @@ def main() -> None:
346
364
  if idx % 5 == 0 or idx == len(rows):
347
365
  elapsed = time.time() - start
348
366
  avg = sum(scores) / len(scores) if scores else 0.0
349
- print(f"[progress] {idx}/{len(rows)} done, avg score: {avg:.4f}, elapsed {elapsed:.1f}s", flush=True)
367
+ avg_cost = sum(costs) / len(costs) if costs else 0.0
368
+ print(
369
+ f"[progress] {idx}/{len(rows)} done, avg score: {avg:.4f}, avg cost: ${avg_cost:.4f}, elapsed {elapsed:.1f}s",
370
+ flush=True,
371
+ )
350
372
  except Exception as e:
351
373
  print(f"[error] failed on sample {idx}: {e}", file=sys.stderr)
352
374
  except KeyboardInterrupt:
@@ -356,7 +378,7 @@ def main() -> None:
356
378
  final_score = sum(scores) / len(scores) if scores else 0.0
357
379
 
358
380
  # Apply cost cap: accuracy is zeroed if average cost/query exceeds $0.02
359
- avg_cost_per_query = (extractor.total_cost_usd / extractor.num_queries) if getattr(extractor, "num_queries", 0) else 0.0
381
+ avg_cost_per_query = (sum(costs) / len(costs)) if costs else 0.0
360
382
  if avg_cost_per_query > 0.02:
361
383
  print(f"[cost] avg ${avg_cost_per_query:.4f}/query exceeds $0.02 cap; accuracy set to 0.0", flush=True)
362
384
  final_score = 0.0
@@ -365,6 +387,20 @@ def main() -> None:
365
387
 
366
388
  print(f"accuracy: {final_score:.4f}")
367
389
 
390
+ if args.cost_metric:
391
+ if final_score > args.cost_accuracy_threshold:
392
+ reported_cost = avg_cost_per_query
393
+ else:
394
+ print(
395
+ (
396
+ f"[constraint] accuracy {final_score:.4f} <= "
397
+ f"threshold {args.cost_accuracy_threshold:.2f}; reporting penalty ${COST_CONSTRAINT_PENALTY:.1f}"
398
+ ),
399
+ flush=True,
400
+ )
401
+ reported_cost = COST_CONSTRAINT_PENALTY
402
+ print(f"cost: {reported_cost:.6f}")
403
+
368
404
 
369
405
  if __name__ == "__main__":
370
406
  main()
@@ -0,0 +1,97 @@
1
+ """
2
+ optimize.py
3
+
4
+ Exposes a single public entry point `extract_csv` that turns a chart image into CSV text.
5
+ All helper utilities remain private to this module.
6
+ """
7
+
8
+ import base64
9
+ from pathlib import Path
10
+ from typing import Optional, Tuple
11
+
12
+ from openai import OpenAI
13
+
14
+ __all__ = ["extract_csv"]
15
+
16
+ _DEFAULT_MODEL = "gpt-4o-mini"
17
+ _CLIENT = OpenAI()
18
+
19
+
20
+ def _build_prompt() -> str:
21
+ return (
22
+ "You are a precise data extraction model. Given a chart image, extract the underlying data table.\n"
23
+ "Return ONLY the CSV text with a header row and no markdown code fences.\n"
24
+ "Rules:\n"
25
+ "- The first column must be the x-axis values with its exact axis label as the header.\n"
26
+ "- Include one column per data series using the legend labels as headers.\n"
27
+ "- Preserve the original order of x-axis ticks as they appear.\n"
28
+ "- Use plain CSV (comma-separated), no explanations, no extra text.\n"
29
+ )
30
+
31
+
32
+ def _image_to_data_uri(image_path: Path) -> str:
33
+ mime = "image/png" if image_path.suffix.lower() == ".png" else "image/jpeg"
34
+ data = image_path.read_bytes()
35
+ b64 = base64.b64encode(data).decode("ascii")
36
+ return f"data:{mime};base64,{b64}"
37
+
38
+
39
+ def _clean_to_csv(text: str) -> str:
40
+ return text.strip()
41
+
42
+
43
+ def _pricing_for_model(model_name: str) -> dict:
44
+ """Return pricing information for the given model in USD per token."""
45
+ name = (model_name or "").lower()
46
+ per_million = {
47
+ "gpt-5": {"in": 1.250, "in_cached": 0.125, "out": 10.000},
48
+ "gpt-5-mini": {"in": 0.250, "in_cached": 0.025, "out": 2.000},
49
+ "gpt-5-nano": {"in": 0.050, "in_cached": 0.005, "out": 0.400},
50
+ }
51
+ if name.startswith("gpt-5-nano"):
52
+ chosen = per_million["gpt-5-nano"]
53
+ elif name.startswith("gpt-5-mini"):
54
+ chosen = per_million["gpt-5-mini"]
55
+ elif name.startswith("gpt-5"):
56
+ chosen = per_million["gpt-5"]
57
+ else:
58
+ chosen = per_million["gpt-5-mini"]
59
+ return {k: v / 1_000_000.0 for k, v in chosen.items()}
60
+
61
+
62
+ def extract_csv(image_path: Path, model: Optional[str] = None) -> Tuple[str, float]:
63
+ """
64
+ Extract CSV text from an image and return (csv_text, cost_usd).
65
+
66
+ The caller can optionally override the model name; otherwise the default is used.
67
+ """
68
+ effective_model = model or _DEFAULT_MODEL
69
+ prompt = _build_prompt()
70
+ image_uri = _image_to_data_uri(image_path)
71
+ response = _CLIENT.chat.completions.create(
72
+ model=effective_model,
73
+ messages=[
74
+ {
75
+ "role": "user",
76
+ "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": image_uri}}],
77
+ }
78
+ ],
79
+ )
80
+
81
+ usage = getattr(response, "usage", None)
82
+ cost_usd = 0.0
83
+ if usage is not None:
84
+ prompt_tokens = int(getattr(usage, "prompt_tokens", 0) or 0)
85
+ completion_tokens = int(getattr(usage, "completion_tokens", 0) or 0)
86
+ details = getattr(usage, "prompt_tokens_details", None)
87
+ cached_tokens = 0
88
+ if details is not None:
89
+ cached_tokens = int(getattr(details, "cached_tokens", 0) or 0)
90
+ non_cached_prompt_tokens = max(0, prompt_tokens - cached_tokens)
91
+ rates = _pricing_for_model(effective_model)
92
+ cost_usd = (
93
+ non_cached_prompt_tokens * rates["in"] + cached_tokens * rates["in_cached"] + completion_tokens * rates["out"]
94
+ )
95
+
96
+ text = response.choices[0].message.content or ""
97
+ return _clean_to_csv(text), cost_usd
@@ -8,7 +8,7 @@ name = "weco"
8
8
  authors = [{ name = "Weco AI Team", email = "contact@weco.ai" }]
9
9
  description = "Documentation for `weco`, a CLI for using Weco AI's code optimizer."
10
10
  readme = "README.md"
11
- version = "0.3.5"
11
+ version = "0.3.7"
12
12
  license = { file = "LICENSE" }
13
13
  requires-python = ">=3.8"
14
14
  dependencies = [
@@ -18,7 +18,7 @@ dependencies = [
18
18
  "gitingest",
19
19
  "fastapi",
20
20
  "slowapi",
21
- "psutil",
21
+ "psutil"
22
22
  ]
23
23
  keywords = ["AI", "Code Optimization", "Code Generation"]
24
24
  classifiers = [
@@ -34,7 +34,7 @@ weco = "weco.cli:main"
34
34
  Homepage = "https://github.com/WecoAI/weco-cli"
35
35
 
36
36
  [project.optional-dependencies]
37
- dev = ["ruff", "build", "setuptools_scm"]
37
+ dev = ["ruff", "build", "setuptools_scm", "pytest>=7.0.0"]
38
38
 
39
39
  [tool.setuptools]
40
40
  packages = ["weco"]
@@ -0,0 +1 @@
1
+ """Tests for weco CLI."""
@@ -0,0 +1,192 @@
1
+ """Tests to verify API keys are correctly passed through the system and sent to the API."""
2
+
3
+ import pytest
4
+ from unittest.mock import patch, MagicMock
5
+ from rich.console import Console
6
+
7
+ from weco.api import start_optimization_run, evaluate_feedback_then_suggest_next_solution
8
+
9
+
10
+ class TestApiKeysInStartOptimizationRun:
11
+ """Test that api_keys are correctly included in start_optimization_run requests."""
12
+
13
+ @pytest.fixture
14
+ def mock_console(self):
15
+ """Create a mock console for testing."""
16
+ return MagicMock(spec=Console)
17
+
18
+ @pytest.fixture
19
+ def base_params(self, mock_console):
20
+ """Base parameters for start_optimization_run."""
21
+ return {
22
+ "console": mock_console,
23
+ "source_code": "print('hello')",
24
+ "source_path": "test.py",
25
+ "evaluation_command": "python test.py",
26
+ "metric_name": "accuracy",
27
+ "maximize": True,
28
+ "steps": 10,
29
+ "code_generator_config": {"model": "o4-mini"},
30
+ "evaluator_config": {"model": "o4-mini"},
31
+ "search_policy_config": {"num_drafts": 2},
32
+ }
33
+
34
+ @patch("weco.api.requests.post")
35
+ def test_api_keys_included_in_request(self, mock_post, base_params):
36
+ """Test that api_keys are included in the request JSON when provided."""
37
+ mock_response = MagicMock()
38
+ mock_response.json.return_value = {
39
+ "run_id": "test-run-id",
40
+ "solution_id": "test-solution-id",
41
+ "code": "print('hello')",
42
+ "plan": "test plan",
43
+ }
44
+ mock_response.raise_for_status = MagicMock()
45
+ mock_post.return_value = mock_response
46
+
47
+ api_keys = {"openai": "sk-test-key", "anthropic": "sk-ant-test"}
48
+ start_optimization_run(**base_params, api_keys=api_keys)
49
+
50
+ # Verify the request was made with api_keys in the JSON payload
51
+ mock_post.assert_called_once()
52
+ call_kwargs = mock_post.call_args
53
+ request_json = call_kwargs.kwargs["json"]
54
+ assert "api_keys" in request_json
55
+ assert request_json["api_keys"] == {"openai": "sk-test-key", "anthropic": "sk-ant-test"}
56
+
57
+ @patch("weco.api.requests.post")
58
+ def test_api_keys_not_included_when_none(self, mock_post, base_params):
59
+ """Test that api_keys field is not included when api_keys is None."""
60
+ mock_response = MagicMock()
61
+ mock_response.json.return_value = {
62
+ "run_id": "test-run-id",
63
+ "solution_id": "test-solution-id",
64
+ "code": "print('hello')",
65
+ "plan": "test plan",
66
+ }
67
+ mock_response.raise_for_status = MagicMock()
68
+ mock_post.return_value = mock_response
69
+
70
+ start_optimization_run(**base_params, api_keys=None)
71
+
72
+ # Verify the request was made without api_keys
73
+ mock_post.assert_called_once()
74
+ call_kwargs = mock_post.call_args
75
+ request_json = call_kwargs.kwargs["json"]
76
+ assert "api_keys" not in request_json
77
+
78
+ @patch("weco.api.requests.post")
79
+ def test_api_keys_not_included_when_empty_dict(self, mock_post, base_params):
80
+ """Test that api_keys field is not included when api_keys is an empty dict."""
81
+ mock_response = MagicMock()
82
+ mock_response.json.return_value = {
83
+ "run_id": "test-run-id",
84
+ "solution_id": "test-solution-id",
85
+ "code": "print('hello')",
86
+ "plan": "test plan",
87
+ }
88
+ mock_response.raise_for_status = MagicMock()
89
+ mock_post.return_value = mock_response
90
+
91
+ # Empty dict is falsy, so api_keys should not be included
92
+ start_optimization_run(**base_params, api_keys={})
93
+
94
+ mock_post.assert_called_once()
95
+ call_kwargs = mock_post.call_args
96
+ request_json = call_kwargs.kwargs["json"]
97
+ assert "api_keys" not in request_json
98
+
99
+
100
+ class TestApiKeysInEvaluateFeedbackThenSuggest:
101
+ """Test that api_keys are correctly included in evaluate_feedback_then_suggest_next_solution requests."""
102
+
103
+ @pytest.fixture
104
+ def mock_console(self):
105
+ """Create a mock console for testing."""
106
+ return MagicMock(spec=Console)
107
+
108
+ @patch("weco.api.requests.post")
109
+ def test_api_keys_included_in_suggest_request(self, mock_post, mock_console):
110
+ """Test that api_keys are included in the suggest request JSON when provided."""
111
+ mock_response = MagicMock()
112
+ mock_response.json.return_value = {
113
+ "run_id": "test-run-id",
114
+ "solution_id": "new-solution-id",
115
+ "code": "print('improved')",
116
+ "plan": "improvement plan",
117
+ "is_done": False,
118
+ }
119
+ mock_response.raise_for_status = MagicMock()
120
+ mock_post.return_value = mock_response
121
+
122
+ api_keys = {"openai": "sk-test-key"}
123
+ evaluate_feedback_then_suggest_next_solution(
124
+ console=mock_console,
125
+ run_id="test-run-id",
126
+ step=1,
127
+ execution_output="accuracy: 0.95",
128
+ auth_headers={"Authorization": "Bearer test-token"},
129
+ api_keys=api_keys,
130
+ )
131
+
132
+ mock_post.assert_called_once()
133
+ call_kwargs = mock_post.call_args
134
+ request_json = call_kwargs.kwargs["json"]
135
+ assert "api_keys" in request_json
136
+ assert request_json["api_keys"] == {"openai": "sk-test-key"}
137
+
138
+ @patch("weco.api.requests.post")
139
+ def test_api_keys_not_included_in_suggest_when_none(self, mock_post, mock_console):
140
+ """Test that api_keys field is not included in suggest request when api_keys is None."""
141
+ mock_response = MagicMock()
142
+ mock_response.json.return_value = {
143
+ "run_id": "test-run-id",
144
+ "solution_id": "new-solution-id",
145
+ "code": "print('improved')",
146
+ "plan": "improvement plan",
147
+ "is_done": False,
148
+ }
149
+ mock_response.raise_for_status = MagicMock()
150
+ mock_post.return_value = mock_response
151
+
152
+ evaluate_feedback_then_suggest_next_solution(
153
+ console=mock_console,
154
+ run_id="test-run-id",
155
+ step=1,
156
+ execution_output="accuracy: 0.95",
157
+ auth_headers={"Authorization": "Bearer test-token"},
158
+ api_keys=None,
159
+ )
160
+
161
+ mock_post.assert_called_once()
162
+ call_kwargs = mock_post.call_args
163
+ request_json = call_kwargs.kwargs["json"]
164
+ assert "api_keys" not in request_json
165
+
166
+ @patch("weco.api.requests.post")
167
+ def test_api_keys_not_included_in_suggest_when_empty_dict(self, mock_post, mock_console):
168
+ """Test that api_keys field is not included in suggest request when api_keys is None."""
169
+ mock_response = MagicMock()
170
+ mock_response.json.return_value = {
171
+ "run_id": "test-run-id",
172
+ "solution_id": "new-solution-id",
173
+ "code": "print('improved')",
174
+ "plan": "improvement plan",
175
+ "is_done": False,
176
+ }
177
+ mock_response.raise_for_status = MagicMock()
178
+ mock_post.return_value = mock_response
179
+
180
+ evaluate_feedback_then_suggest_next_solution(
181
+ console=mock_console,
182
+ run_id="test-run-id",
183
+ step=1,
184
+ execution_output="accuracy: 0.95",
185
+ auth_headers={"Authorization": "Bearer test-token"},
186
+ api_keys={},
187
+ )
188
+
189
+ mock_post.assert_called_once()
190
+ call_kwargs = mock_post.call_args
191
+ request_json = call_kwargs.kwargs["json"]
192
+ assert "api_keys" not in request_json
@@ -0,0 +1,70 @@
1
+ """Tests for CLI functions, particularly parse_api_keys."""
2
+
3
+ import pytest
4
+ from weco.cli import parse_api_keys
5
+
6
+
7
+ class TestParseApiKeys:
8
+ """Test cases for parse_api_keys function."""
9
+
10
+ def test_parse_api_keys_none(self):
11
+ """Test that None input returns empty dict."""
12
+ result = parse_api_keys(None)
13
+ assert result == {}
14
+ assert isinstance(result, dict)
15
+
16
+ def test_parse_api_keys_empty_list(self):
17
+ """Test that empty list returns empty dict."""
18
+ result = parse_api_keys([])
19
+ assert result == {}
20
+ assert isinstance(result, dict)
21
+
22
+ def test_parse_api_keys_single_key(self):
23
+ """Test parsing a single API key."""
24
+ result = parse_api_keys(["openai=sk-xxx"])
25
+ assert result == {"openai": "sk-xxx"}
26
+
27
+ def test_parse_api_keys_multiple_keys(self):
28
+ """Test parsing multiple API keys."""
29
+ result = parse_api_keys(["openai=sk-xxx", "anthropic=sk-ant-yyy"])
30
+ assert result == {"openai": "sk-xxx", "anthropic": "sk-ant-yyy"}
31
+
32
+ def test_parse_api_keys_whitespace_handling(self):
33
+ """Test that whitespace is stripped from provider and key."""
34
+ result = parse_api_keys([" openai = sk-xxx ", " anthropic = sk-ant-yyy "])
35
+ assert result == {"openai": "sk-xxx", "anthropic": "sk-ant-yyy"}
36
+
37
+ def test_parse_api_keys_key_contains_equals(self):
38
+ """Test that keys containing '=' are handled correctly (split on first '=' only)."""
39
+ result = parse_api_keys(["openai=sk-xxx=extra=more"])
40
+ assert result == {"openai": "sk-xxx=extra=more"}
41
+
42
+ def test_parse_api_keys_no_equals(self):
43
+ """Test that missing '=' raises ValueError."""
44
+ with pytest.raises(ValueError, match="Invalid API key format.*Expected format: 'provider=key'"):
45
+ parse_api_keys(["openai"])
46
+
47
+ def test_parse_api_keys_empty_provider(self):
48
+ """Test that empty provider raises ValueError."""
49
+ with pytest.raises(ValueError, match="Provider and key must be non-empty"):
50
+ parse_api_keys(["=sk-xxx"])
51
+
52
+ def test_parse_api_keys_empty_key(self):
53
+ """Test that empty key raises ValueError."""
54
+ with pytest.raises(ValueError, match="Provider and key must be non-empty"):
55
+ parse_api_keys(["openai="])
56
+
57
+ def test_parse_api_keys_both_empty(self):
58
+ """Test that both empty provider and key raises ValueError."""
59
+ with pytest.raises(ValueError, match="Provider and key must be non-empty"):
60
+ parse_api_keys(["="])
61
+
62
+ def test_parse_api_keys_duplicate_provider(self):
63
+ """Test that duplicate providers overwrite previous value."""
64
+ result = parse_api_keys(["openai=sk-xxx", "openai=sk-yyy"])
65
+ assert result == {"openai": "sk-yyy"}
66
+
67
+ def test_parse_api_keys_mixed_case_provider(self):
68
+ """Test that mixed case providers are normalized correctly."""
69
+ result = parse_api_keys(["OpenAI=sk-xxx", "ANTHROPIC=sk-ant-yyy"])
70
+ assert result == {"openai": "sk-xxx", "anthropic": "sk-ant-yyy"}