weco 0.2.12__tar.gz → 0.2.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- weco-0.2.14/.github/workflows/release.yml +151 -0
- {weco-0.2.12 → weco-0.2.14}/.gitignore +4 -1
- weco-0.2.14/.repomixignore +4 -0
- {weco-0.2.12 → weco-0.2.14}/PKG-INFO +61 -23
- {weco-0.2.12 → weco-0.2.14}/README.md +60 -22
- {weco-0.2.12 → weco-0.2.14}/pyproject.toml +1 -1
- weco-0.2.14/weco/__init__.py +14 -0
- {weco-0.2.12 → weco-0.2.14}/weco/api.py +19 -6
- weco-0.2.14/weco/auth.py +64 -0
- weco-0.2.14/weco/cli.py +583 -0
- {weco-0.2.12 → weco-0.2.14}/weco/panels.py +20 -2
- {weco-0.2.12 → weco-0.2.14}/weco.egg-info/PKG-INFO +61 -23
- {weco-0.2.12 → weco-0.2.14}/weco.egg-info/SOURCES.txt +2 -0
- weco-0.2.12/.github/workflows/release.yml +0 -106
- weco-0.2.12/weco/__init__.py +0 -4
- weco-0.2.12/weco/cli.py +0 -378
- {weco-0.2.12 → weco-0.2.14}/.github/workflows/lint.yml +0 -0
- {weco-0.2.12 → weco-0.2.14}/LICENSE +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/cuda/README.md +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/cuda/evaluate.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/cuda/guide.md +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/cuda/optimize.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/hello-kernel-world/evaluate.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/hello-kernel-world/optimize.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/metal/README.md +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/metal/evaluate.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/metal/examples.rst +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/metal/optimize.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/prompt/README.md +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/prompt/eval.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/prompt/optimize.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/prompt/prompt_guide.md +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/spaceship-titanic/README.md +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/spaceship-titanic/baseline.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/spaceship-titanic/evaluate.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/spaceship-titanic/optimize.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/spaceship-titanic/requirements-test.txt +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/spaceship-titanic/utils.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/triton/README.md +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/triton/evaluate.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/examples/triton/optimize.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/setup.cfg +0 -0
- {weco-0.2.12 → weco-0.2.14}/weco/utils.py +0 -0
- {weco-0.2.12 → weco-0.2.14}/weco.egg-info/dependency_links.txt +0 -0
- {weco-0.2.12 → weco-0.2.14}/weco.egg-info/entry_points.txt +0 -0
- {weco-0.2.12 → weco-0.2.14}/weco.egg-info/requires.txt +0 -0
- {weco-0.2.12 → weco-0.2.14}/weco.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
# Fires when the *Lint and Format Code* workflow that was
|
|
5
|
+
# started by a push to main has finished (any conclusion)
|
|
6
|
+
workflow_run:
|
|
7
|
+
workflows: ["Lint and Format Code"]
|
|
8
|
+
branches: [main]
|
|
9
|
+
types: [completed]
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
# ────────────────────────────────────────────────────────────────────
|
|
13
|
+
# 1) Pre-check — decide whether we really need to release
|
|
14
|
+
# ────────────────────────────────────────────────────────────────────
|
|
15
|
+
pre-check:
|
|
16
|
+
name: Detect new version
|
|
17
|
+
if: ${{ github.event.workflow_run.conclusion == 'success' }} # gate #1
|
|
18
|
+
runs-on: ubuntu-latest
|
|
19
|
+
outputs:
|
|
20
|
+
release_needed: ${{ steps.version_diff.outputs.release_needed }}
|
|
21
|
+
version: ${{ steps.version_diff.outputs.version }}
|
|
22
|
+
|
|
23
|
+
steps:
|
|
24
|
+
- name: Checkout code
|
|
25
|
+
uses: actions/checkout@v4
|
|
26
|
+
with:
|
|
27
|
+
ref: ${{ github.event.workflow_run.head_sha }}
|
|
28
|
+
fetch-depth: 0 # we need history to read the previous commit
|
|
29
|
+
|
|
30
|
+
- name: Compare versions
|
|
31
|
+
id: version_diff
|
|
32
|
+
shell: bash
|
|
33
|
+
# gate #2 happens here → sets release_needed=true/false
|
|
34
|
+
run: |
|
|
35
|
+
# version in current pyproject.toml
|
|
36
|
+
NEW_VERSION=$(grep -Po '(?<=^version = ")[^"]+' pyproject.toml)
|
|
37
|
+
# version in the previous commit's pyproject.toml (if the file existed)
|
|
38
|
+
BASE_COMMIT=$(git rev-parse "$GITHUB_SHA"^)
|
|
39
|
+
if git cat-file -e "$BASE_COMMIT":pyproject.toml 2>/dev/null; then
|
|
40
|
+
OLD_VERSION=$(git show "$BASE_COMMIT":pyproject.toml \
|
|
41
|
+
| grep -Po '(?<=^version = ")[^"]+' || true)
|
|
42
|
+
else
|
|
43
|
+
OLD_VERSION=""
|
|
44
|
+
fi
|
|
45
|
+
|
|
46
|
+
echo "Previous version: $OLD_VERSION"
|
|
47
|
+
echo "Current version: $NEW_VERSION"
|
|
48
|
+
|
|
49
|
+
if [[ "$NEW_VERSION" != "$OLD_VERSION" ]]; then
|
|
50
|
+
echo "release_needed=true" >>"$GITHUB_OUTPUT"
|
|
51
|
+
else
|
|
52
|
+
echo "release_needed=false" >>"$GITHUB_OUTPUT"
|
|
53
|
+
fi
|
|
54
|
+
echo "version=$NEW_VERSION" >>"$GITHUB_OUTPUT"
|
|
55
|
+
|
|
56
|
+
# ────────────────────────────────────────────────────────────────────
|
|
57
|
+
# 2) Build
|
|
58
|
+
# ────────────────────────────────────────────────────────────────────
|
|
59
|
+
build:
|
|
60
|
+
name: Build distribution 📦
|
|
61
|
+
needs: pre-check
|
|
62
|
+
if: ${{ needs.pre-check.outputs.release_needed == 'true' }} # gate #3
|
|
63
|
+
runs-on: ubuntu-latest
|
|
64
|
+
|
|
65
|
+
steps:
|
|
66
|
+
- name: Checkout code
|
|
67
|
+
uses: actions/checkout@v4
|
|
68
|
+
with:
|
|
69
|
+
ref: ${{ github.event.workflow_run.head_sha }}
|
|
70
|
+
|
|
71
|
+
- name: Set up Python
|
|
72
|
+
uses: actions/setup-python@v5
|
|
73
|
+
with:
|
|
74
|
+
python-version: "3.12"
|
|
75
|
+
|
|
76
|
+
- name: Install build dependencies
|
|
77
|
+
run: python3 -m pip install --user build
|
|
78
|
+
|
|
79
|
+
- name: Build sdist & wheel
|
|
80
|
+
run: python3 -m build
|
|
81
|
+
|
|
82
|
+
- name: Store the distribution packages
|
|
83
|
+
uses: actions/upload-artifact@v4
|
|
84
|
+
with:
|
|
85
|
+
name: python-package-distributions
|
|
86
|
+
path: dist/
|
|
87
|
+
|
|
88
|
+
# ────────────────────────────────────────────────────────────────────
|
|
89
|
+
# 3) Publish to PyPI
|
|
90
|
+
# ────────────────────────────────────────────────────────────────────
|
|
91
|
+
publish-to-pypi:
|
|
92
|
+
name: Publish Python 🐍 distribution to PyPI
|
|
93
|
+
needs: [pre-check, build]
|
|
94
|
+
if: ${{ needs.pre-check.outputs.release_needed == 'true' }}
|
|
95
|
+
runs-on: ubuntu-latest
|
|
96
|
+
environment:
|
|
97
|
+
name: release
|
|
98
|
+
url: https://pypi.org/p/weco
|
|
99
|
+
permissions:
|
|
100
|
+
id-token: write
|
|
101
|
+
|
|
102
|
+
steps:
|
|
103
|
+
- name: Download the dists
|
|
104
|
+
uses: actions/download-artifact@v4
|
|
105
|
+
with:
|
|
106
|
+
name: python-package-distributions
|
|
107
|
+
path: dist/
|
|
108
|
+
|
|
109
|
+
- name: Publish to PyPI
|
|
110
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
111
|
+
|
|
112
|
+
# ────────────────────────────────────────────────────────────────────
|
|
113
|
+
# 4) GitHub Release
|
|
114
|
+
# ────────────────────────────────────────────────────────────────────
|
|
115
|
+
github-release:
|
|
116
|
+
name: Create GitHub Release
|
|
117
|
+
needs: [pre-check, publish-to-pypi]
|
|
118
|
+
if: ${{ needs.pre-check.outputs.release_needed == 'true' }}
|
|
119
|
+
runs-on: ubuntu-latest
|
|
120
|
+
permissions:
|
|
121
|
+
contents: write
|
|
122
|
+
id-token: write
|
|
123
|
+
|
|
124
|
+
steps:
|
|
125
|
+
- name: Download the dists
|
|
126
|
+
uses: actions/download-artifact@v4
|
|
127
|
+
with:
|
|
128
|
+
name: python-package-distributions
|
|
129
|
+
path: dist/
|
|
130
|
+
|
|
131
|
+
- name: Sign dists with Sigstore
|
|
132
|
+
uses: sigstore/gh-action-sigstore-python@v3.0.0
|
|
133
|
+
with:
|
|
134
|
+
inputs: |
|
|
135
|
+
./dist/*.tar.gz
|
|
136
|
+
./dist/*.whl
|
|
137
|
+
|
|
138
|
+
- name: Create GitHub Release
|
|
139
|
+
env:
|
|
140
|
+
GITHUB_TOKEN: ${{ github.token }}
|
|
141
|
+
run: |
|
|
142
|
+
gh release create "v${{ needs.pre-check.outputs.version }}" \
|
|
143
|
+
--repo "${{ github.repository }}" \
|
|
144
|
+
--notes ""
|
|
145
|
+
|
|
146
|
+
- name: Upload artefacts to Release
|
|
147
|
+
env:
|
|
148
|
+
GITHUB_TOKEN: ${{ github.token }}
|
|
149
|
+
run: |
|
|
150
|
+
gh release upload "v${{ needs.pre-check.outputs.version }}" dist/** \
|
|
151
|
+
--repo "${{ github.repository }}"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: weco
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.14
|
|
4
4
|
Summary: Documentation for `weco`, a CLI for using Weco AI's code optimizer.
|
|
5
5
|
Author-email: Weco AI Team <contact@weco.ai>
|
|
6
6
|
License: MIT
|
|
@@ -32,7 +32,7 @@ Example applications include:
|
|
|
32
32
|
|
|
33
33
|
- **GPU Kernel Optimization**: Reimplement PyTorch functions using CUDA, Triton or Metal, optimizing for `latency`, `throughput`, or `memory_bandwidth`.
|
|
34
34
|
- **Model Development**: Tune feature transformations or architectures, optimizing for `validation_accuracy`, `AUC`, or `Sharpe Ratio`.
|
|
35
|
-
- **Prompt Engineering**: Refine prompts for LLMs, optimizing for
|
|
35
|
+
- **Prompt Engineering**: Refine prompts for LLMs, optimizing for `win_rate`, `relevance`, or `format_adherence`
|
|
36
36
|
|
|
37
37
|
https://github.com/user-attachments/assets/cb724ef1-bff6-4757-b457-d3b2201ede81
|
|
38
38
|
|
|
@@ -42,7 +42,7 @@ https://github.com/user-attachments/assets/cb724ef1-bff6-4757-b457-d3b2201ede81
|
|
|
42
42
|
|
|
43
43
|
The `weco` CLI leverages a tree search approach guided by Large Language Models (LLMs) to iteratively explore and refine your code. It automatically applies changes, runs your evaluation script, parses the results, and proposes further improvements based on the specified goal.
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
[image](https://github.com/user-attachments/assets/a6ed63fa-9c40-498e-aa98-a873e5786509)
|
|
46
46
|
|
|
47
47
|
---
|
|
48
48
|
|
|
@@ -54,17 +54,38 @@ The `weco` CLI leverages a tree search approach guided by Large Language Models
|
|
|
54
54
|
pip install weco
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
-
2. **
|
|
57
|
+
2. **Set Up LLM API Keys (Required):**
|
|
58
58
|
|
|
59
|
-
|
|
59
|
+
`weco` requires API keys for the Large Language Models (LLMs) it uses internally. You **must** provide these keys via environment variables:
|
|
60
60
|
|
|
61
|
-
-
|
|
62
|
-
-
|
|
63
|
-
-
|
|
61
|
+
- **OpenAI:** `export OPENAI_API_KEY="your_key_here"`
|
|
62
|
+
- **Anthropic:** `export ANTHROPIC_API_KEY="your_key_here"`
|
|
63
|
+
- **Google DeepMind:** `export GEMINI_API_KEY="your_key_here"` (Google AI Studio has a free API usage quota. Create a key [here](https://aistudio.google.com/apikey) to use `weco` for free.)
|
|
64
|
+
|
|
65
|
+
The optimization process will fail if the necessary keys for the chosen model are not found in your environment.
|
|
66
|
+
|
|
67
|
+
3. **Log In to Weco (Optional):**
|
|
68
|
+
|
|
69
|
+
To associate your optimization runs with your Weco account and view them on the Weco dashboard, you can log in. `weco` uses a device authentication flow:
|
|
70
|
+
|
|
71
|
+
- When you first run `weco run`, you'll be prompted if you want to log in or proceed anonymously.
|
|
72
|
+
- If you choose to log in (by pressing `l`), you'll be shown a URL and `weco` will attempt to open it in your default web browser.
|
|
73
|
+
- You then authenticate in the browser. Once authenticated, the CLI will detect this and complete the login.
|
|
74
|
+
- This saves a Weco-specific API key locally (typically at `~/.config/weco/credentials.json`).
|
|
75
|
+
|
|
76
|
+
If you choose to skip login (by pressing Enter or `s`), `weco` will still function using the environment variable LLM keys, but the run history will not be linked to a Weco account.
|
|
77
|
+
|
|
78
|
+
To log out and remove your saved Weco API key, use the `weco logout` command.
|
|
64
79
|
|
|
65
80
|
---
|
|
66
81
|
|
|
67
82
|
## Usage
|
|
83
|
+
|
|
84
|
+
The CLI has two main commands:
|
|
85
|
+
|
|
86
|
+
- `weco run`: Initiates the code optimization process.
|
|
87
|
+
- `weco logout`: Logs you out of your Weco account.
|
|
88
|
+
|
|
68
89
|
<div style="background-color: #fff3cd; border: 1px solid #ffeeba; padding: 15px; border-radius: 4px; margin-bottom: 15px;">
|
|
69
90
|
<strong>⚠️ Warning: Code Modification</strong><br>
|
|
70
91
|
<code>weco</code> directly modifies the file specified by <code>--source</code> during the optimization process. It is <strong>strongly recommended</strong> to use version control (like Git) to track changes and revert if needed. Alternatively, ensure you have a backup of your original file before running the command. Upon completion, the file will contain the best-performing version of the code found during the run.
|
|
@@ -72,7 +93,11 @@ The `weco` CLI leverages a tree search approach guided by Large Language Models
|
|
|
72
93
|
|
|
73
94
|
---
|
|
74
95
|
|
|
75
|
-
###
|
|
96
|
+
### `weco run` Command
|
|
97
|
+
|
|
98
|
+
This command starts the optimization process.
|
|
99
|
+
|
|
100
|
+
**Example: Optimizing Simple PyTorch Operations**
|
|
76
101
|
|
|
77
102
|
This basic example shows how to optimize a simple PyTorch function for speedup.
|
|
78
103
|
|
|
@@ -86,7 +111,7 @@ cd examples/hello-kernel-world
|
|
|
86
111
|
pip install torch
|
|
87
112
|
|
|
88
113
|
# Run Weco
|
|
89
|
-
weco --source optimize.py \
|
|
114
|
+
weco run --source optimize.py \
|
|
90
115
|
--eval-command "python evaluate.py --solution-path optimize.py --device cpu" \
|
|
91
116
|
--metric speedup \
|
|
92
117
|
--maximize true \
|
|
@@ -99,19 +124,29 @@ weco --source optimize.py \
|
|
|
99
124
|
|
|
100
125
|
---
|
|
101
126
|
|
|
102
|
-
|
|
127
|
+
**Arguments for `weco run`:**
|
|
103
128
|
|
|
104
|
-
| Argument | Description
|
|
105
|
-
| :-------------------------- |
|
|
106
|
-
| `--source` | Path to the source code file that will be optimized (e.g., `optimize.py`).
|
|
107
|
-
| `--eval-command` | Command to run for evaluating the code in `--source`. This command should print the target `--metric` and its value to the terminal (stdout/stderr). See note below.
|
|
108
|
-
| `--metric` | The name of the metric you want to optimize (e.g., 'accuracy', 'speedup', 'loss'). This metric name should match what's printed by your `--eval-command`.
|
|
109
|
-
| `--maximize` | Whether to maximize (`true`) or minimize (`false`) the metric.
|
|
110
|
-
| `--steps` | Number of optimization steps (LLM iterations) to run.
|
|
111
|
-
| `--model` | Model identifier for the LLM to use (e.g., `gpt-4o`, `claude-3.
|
|
112
|
-
| `--additional-instructions` | (Optional) Natural language description of specific instructions OR path to a file containing detailed instructions to guide the LLM.
|
|
113
|
-
| `--log-dir` | (Optional) Path to the directory to log intermediate steps and final optimization result. Defaults to `.runs/`.
|
|
114
|
-
| `--preserve-source` | (Optional) If set, do not overwrite the original `--source` file. Modifications and the best solution will still be saved in the `--log-dir`.
|
|
129
|
+
| Argument | Description | Required |
|
|
130
|
+
| :-------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :------- |
|
|
131
|
+
| `--source` | Path to the source code file that will be optimized (e.g., `optimize.py`). | Yes |
|
|
132
|
+
| `--eval-command` | Command to run for evaluating the code in `--source`. This command should print the target `--metric` and its value to the terminal (stdout/stderr). See note below. | Yes |
|
|
133
|
+
| `--metric` | The name of the metric you want to optimize (e.g., 'accuracy', 'speedup', 'loss'). This metric name should match what's printed by your `--eval-command`. | Yes |
|
|
134
|
+
| `--maximize` | Whether to maximize (`true`) or minimize (`false`) the metric. | Yes |
|
|
135
|
+
| `--steps` | Number of optimization steps (LLM iterations) to run. | Yes |
|
|
136
|
+
| `--model` | Model identifier for the LLM to use (e.g., `gpt-4o`, `claude-3.5-sonnet`). Recommended models to try include `o3-mini`, `claude-3-haiku`, and `gemini-2.5-pro-exp-03-25`. | Yes |
|
|
137
|
+
| `--additional-instructions` | (Optional) Natural language description of specific instructions OR path to a file containing detailed instructions to guide the LLM. | No |
|
|
138
|
+
| `--log-dir` | (Optional) Path to the directory to log intermediate steps and final optimization result. Defaults to `.runs/`. | No |
|
|
139
|
+
| `--preserve-source` | (Optional) If set, do not overwrite the original `--source` file. Modifications and the best solution will still be saved in the `--log-dir`. | No |
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
### `weco logout` Command
|
|
144
|
+
|
|
145
|
+
This command logs you out by removing the locally stored Weco API key.
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
weco logout
|
|
149
|
+
```
|
|
115
150
|
|
|
116
151
|
---
|
|
117
152
|
|
|
@@ -120,6 +155,7 @@ weco --source optimize.py \
|
|
|
120
155
|
Weco, powered by the AIDE algorithm, optimizes code iteratively based on your evaluation results. Achieving significant improvements, especially on complex research-level tasks, often requires substantial exploration time.
|
|
121
156
|
|
|
122
157
|
The following plot from the independent [Research Engineering Benchmark (RE-Bench)](https://metr.org/AI_R_D_Evaluation_Report.pdf) report shows the performance of AIDE (the algorithm behind Weco) on challenging ML research engineering tasks over different time budgets.
|
|
158
|
+
|
|
123
159
|
<p align="center">
|
|
124
160
|
<img src="https://github.com/user-attachments/assets/ff0e471d-2f50-4e2d-b718-874862f533df" alt="RE-Bench Performance Across Time" width="60%"/>
|
|
125
161
|
</p>
|
|
@@ -146,23 +182,25 @@ Final speedup value = 1.5
|
|
|
146
182
|
|
|
147
183
|
Weco will parse this output to extract the numerical value (1.5 in this case) associated with the metric name ('speedup').
|
|
148
184
|
|
|
149
|
-
|
|
150
185
|
## Contributing
|
|
151
186
|
|
|
152
187
|
We welcome contributions! To get started:
|
|
153
188
|
|
|
154
189
|
1. **Fork and Clone the Repository:**
|
|
190
|
+
|
|
155
191
|
```bash
|
|
156
192
|
git clone https://github.com/WecoAI/weco-cli.git
|
|
157
193
|
cd weco-cli
|
|
158
194
|
```
|
|
159
195
|
|
|
160
196
|
2. **Install Development Dependencies:**
|
|
197
|
+
|
|
161
198
|
```bash
|
|
162
199
|
pip install -e ".[dev]"
|
|
163
200
|
```
|
|
164
201
|
|
|
165
202
|
3. **Create a Feature Branch:**
|
|
203
|
+
|
|
166
204
|
```bash
|
|
167
205
|
git checkout -b feature/your-feature-name
|
|
168
206
|
```
|
|
@@ -10,7 +10,7 @@ Example applications include:
|
|
|
10
10
|
|
|
11
11
|
- **GPU Kernel Optimization**: Reimplement PyTorch functions using CUDA, Triton or Metal, optimizing for `latency`, `throughput`, or `memory_bandwidth`.
|
|
12
12
|
- **Model Development**: Tune feature transformations or architectures, optimizing for `validation_accuracy`, `AUC`, or `Sharpe Ratio`.
|
|
13
|
-
- **Prompt Engineering**: Refine prompts for LLMs, optimizing for
|
|
13
|
+
- **Prompt Engineering**: Refine prompts for LLMs, optimizing for `win_rate`, `relevance`, or `format_adherence`
|
|
14
14
|
|
|
15
15
|
https://github.com/user-attachments/assets/cb724ef1-bff6-4757-b457-d3b2201ede81
|
|
16
16
|
|
|
@@ -20,7 +20,7 @@ https://github.com/user-attachments/assets/cb724ef1-bff6-4757-b457-d3b2201ede81
|
|
|
20
20
|
|
|
21
21
|
The `weco` CLI leverages a tree search approach guided by Large Language Models (LLMs) to iteratively explore and refine your code. It automatically applies changes, runs your evaluation script, parses the results, and proposes further improvements based on the specified goal.
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
[image](https://github.com/user-attachments/assets/a6ed63fa-9c40-498e-aa98-a873e5786509)
|
|
24
24
|
|
|
25
25
|
---
|
|
26
26
|
|
|
@@ -32,17 +32,38 @@ The `weco` CLI leverages a tree search approach guided by Large Language Models
|
|
|
32
32
|
pip install weco
|
|
33
33
|
```
|
|
34
34
|
|
|
35
|
-
2. **
|
|
35
|
+
2. **Set Up LLM API Keys (Required):**
|
|
36
36
|
|
|
37
|
-
|
|
37
|
+
`weco` requires API keys for the Large Language Models (LLMs) it uses internally. You **must** provide these keys via environment variables:
|
|
38
38
|
|
|
39
|
-
-
|
|
40
|
-
-
|
|
41
|
-
-
|
|
39
|
+
- **OpenAI:** `export OPENAI_API_KEY="your_key_here"`
|
|
40
|
+
- **Anthropic:** `export ANTHROPIC_API_KEY="your_key_here"`
|
|
41
|
+
- **Google DeepMind:** `export GEMINI_API_KEY="your_key_here"` (Google AI Studio has a free API usage quota. Create a key [here](https://aistudio.google.com/apikey) to use `weco` for free.)
|
|
42
|
+
|
|
43
|
+
The optimization process will fail if the necessary keys for the chosen model are not found in your environment.
|
|
44
|
+
|
|
45
|
+
3. **Log In to Weco (Optional):**
|
|
46
|
+
|
|
47
|
+
To associate your optimization runs with your Weco account and view them on the Weco dashboard, you can log in. `weco` uses a device authentication flow:
|
|
48
|
+
|
|
49
|
+
- When you first run `weco run`, you'll be prompted if you want to log in or proceed anonymously.
|
|
50
|
+
- If you choose to log in (by pressing `l`), you'll be shown a URL and `weco` will attempt to open it in your default web browser.
|
|
51
|
+
- You then authenticate in the browser. Once authenticated, the CLI will detect this and complete the login.
|
|
52
|
+
- This saves a Weco-specific API key locally (typically at `~/.config/weco/credentials.json`).
|
|
53
|
+
|
|
54
|
+
If you choose to skip login (by pressing Enter or `s`), `weco` will still function using the environment variable LLM keys, but the run history will not be linked to a Weco account.
|
|
55
|
+
|
|
56
|
+
To log out and remove your saved Weco API key, use the `weco logout` command.
|
|
42
57
|
|
|
43
58
|
---
|
|
44
59
|
|
|
45
60
|
## Usage
|
|
61
|
+
|
|
62
|
+
The CLI has two main commands:
|
|
63
|
+
|
|
64
|
+
- `weco run`: Initiates the code optimization process.
|
|
65
|
+
- `weco logout`: Logs you out of your Weco account.
|
|
66
|
+
|
|
46
67
|
<div style="background-color: #fff3cd; border: 1px solid #ffeeba; padding: 15px; border-radius: 4px; margin-bottom: 15px;">
|
|
47
68
|
<strong>⚠️ Warning: Code Modification</strong><br>
|
|
48
69
|
<code>weco</code> directly modifies the file specified by <code>--source</code> during the optimization process. It is <strong>strongly recommended</strong> to use version control (like Git) to track changes and revert if needed. Alternatively, ensure you have a backup of your original file before running the command. Upon completion, the file will contain the best-performing version of the code found during the run.
|
|
@@ -50,7 +71,11 @@ The `weco` CLI leverages a tree search approach guided by Large Language Models
|
|
|
50
71
|
|
|
51
72
|
---
|
|
52
73
|
|
|
53
|
-
###
|
|
74
|
+
### `weco run` Command
|
|
75
|
+
|
|
76
|
+
This command starts the optimization process.
|
|
77
|
+
|
|
78
|
+
**Example: Optimizing Simple PyTorch Operations**
|
|
54
79
|
|
|
55
80
|
This basic example shows how to optimize a simple PyTorch function for speedup.
|
|
56
81
|
|
|
@@ -64,7 +89,7 @@ cd examples/hello-kernel-world
|
|
|
64
89
|
pip install torch
|
|
65
90
|
|
|
66
91
|
# Run Weco
|
|
67
|
-
weco --source optimize.py \
|
|
92
|
+
weco run --source optimize.py \
|
|
68
93
|
--eval-command "python evaluate.py --solution-path optimize.py --device cpu" \
|
|
69
94
|
--metric speedup \
|
|
70
95
|
--maximize true \
|
|
@@ -77,19 +102,29 @@ weco --source optimize.py \
|
|
|
77
102
|
|
|
78
103
|
---
|
|
79
104
|
|
|
80
|
-
|
|
105
|
+
**Arguments for `weco run`:**
|
|
81
106
|
|
|
82
|
-
| Argument | Description
|
|
83
|
-
| :-------------------------- |
|
|
84
|
-
| `--source` | Path to the source code file that will be optimized (e.g., `optimize.py`).
|
|
85
|
-
| `--eval-command` | Command to run for evaluating the code in `--source`. This command should print the target `--metric` and its value to the terminal (stdout/stderr). See note below.
|
|
86
|
-
| `--metric` | The name of the metric you want to optimize (e.g., 'accuracy', 'speedup', 'loss'). This metric name should match what's printed by your `--eval-command`.
|
|
87
|
-
| `--maximize` | Whether to maximize (`true`) or minimize (`false`) the metric.
|
|
88
|
-
| `--steps` | Number of optimization steps (LLM iterations) to run.
|
|
89
|
-
| `--model` | Model identifier for the LLM to use (e.g., `gpt-4o`, `claude-3.
|
|
90
|
-
| `--additional-instructions` | (Optional) Natural language description of specific instructions OR path to a file containing detailed instructions to guide the LLM.
|
|
91
|
-
| `--log-dir` | (Optional) Path to the directory to log intermediate steps and final optimization result. Defaults to `.runs/`.
|
|
92
|
-
| `--preserve-source` | (Optional) If set, do not overwrite the original `--source` file. Modifications and the best solution will still be saved in the `--log-dir`.
|
|
107
|
+
| Argument | Description | Required |
|
|
108
|
+
| :-------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | :------- |
|
|
109
|
+
| `--source` | Path to the source code file that will be optimized (e.g., `optimize.py`). | Yes |
|
|
110
|
+
| `--eval-command` | Command to run for evaluating the code in `--source`. This command should print the target `--metric` and its value to the terminal (stdout/stderr). See note below. | Yes |
|
|
111
|
+
| `--metric` | The name of the metric you want to optimize (e.g., 'accuracy', 'speedup', 'loss'). This metric name should match what's printed by your `--eval-command`. | Yes |
|
|
112
|
+
| `--maximize` | Whether to maximize (`true`) or minimize (`false`) the metric. | Yes |
|
|
113
|
+
| `--steps` | Number of optimization steps (LLM iterations) to run. | Yes |
|
|
114
|
+
| `--model` | Model identifier for the LLM to use (e.g., `gpt-4o`, `claude-3.5-sonnet`). Recommended models to try include `o3-mini`, `claude-3-haiku`, and `gemini-2.5-pro-exp-03-25`. | Yes |
|
|
115
|
+
| `--additional-instructions` | (Optional) Natural language description of specific instructions OR path to a file containing detailed instructions to guide the LLM. | No |
|
|
116
|
+
| `--log-dir` | (Optional) Path to the directory to log intermediate steps and final optimization result. Defaults to `.runs/`. | No |
|
|
117
|
+
| `--preserve-source` | (Optional) If set, do not overwrite the original `--source` file. Modifications and the best solution will still be saved in the `--log-dir`. | No |
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
### `weco logout` Command
|
|
122
|
+
|
|
123
|
+
This command logs you out by removing the locally stored Weco API key.
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
weco logout
|
|
127
|
+
```
|
|
93
128
|
|
|
94
129
|
---
|
|
95
130
|
|
|
@@ -98,6 +133,7 @@ weco --source optimize.py \
|
|
|
98
133
|
Weco, powered by the AIDE algorithm, optimizes code iteratively based on your evaluation results. Achieving significant improvements, especially on complex research-level tasks, often requires substantial exploration time.
|
|
99
134
|
|
|
100
135
|
The following plot from the independent [Research Engineering Benchmark (RE-Bench)](https://metr.org/AI_R_D_Evaluation_Report.pdf) report shows the performance of AIDE (the algorithm behind Weco) on challenging ML research engineering tasks over different time budgets.
|
|
136
|
+
|
|
101
137
|
<p align="center">
|
|
102
138
|
<img src="https://github.com/user-attachments/assets/ff0e471d-2f50-4e2d-b718-874862f533df" alt="RE-Bench Performance Across Time" width="60%"/>
|
|
103
139
|
</p>
|
|
@@ -124,23 +160,25 @@ Final speedup value = 1.5
|
|
|
124
160
|
|
|
125
161
|
Weco will parse this output to extract the numerical value (1.5 in this case) associated with the metric name ('speedup').
|
|
126
162
|
|
|
127
|
-
|
|
128
163
|
## Contributing
|
|
129
164
|
|
|
130
165
|
We welcome contributions! To get started:
|
|
131
166
|
|
|
132
167
|
1. **Fork and Clone the Repository:**
|
|
168
|
+
|
|
133
169
|
```bash
|
|
134
170
|
git clone https://github.com/WecoAI/weco-cli.git
|
|
135
171
|
cd weco-cli
|
|
136
172
|
```
|
|
137
173
|
|
|
138
174
|
2. **Install Development Dependencies:**
|
|
175
|
+
|
|
139
176
|
```bash
|
|
140
177
|
pip install -e ".[dev]"
|
|
141
178
|
```
|
|
142
179
|
|
|
143
180
|
3. **Create a Feature Branch:**
|
|
181
|
+
|
|
144
182
|
```bash
|
|
145
183
|
git checkout -b feature/your-feature-name
|
|
146
184
|
```
|
|
@@ -10,7 +10,7 @@ authors = [
|
|
|
10
10
|
]
|
|
11
11
|
description = "Documentation for `weco`, a CLI for using Weco AI's code optimizer."
|
|
12
12
|
readme = "README.md"
|
|
13
|
-
version = "0.2.
|
|
13
|
+
version = "0.2.14"
|
|
14
14
|
license = {text = "MIT"}
|
|
15
15
|
requires-python = ">=3.8"
|
|
16
16
|
dependencies = ["requests", "rich"]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
# DO NOT EDIT
|
|
4
|
+
__pkg_version__ = "0.2.14"
|
|
5
|
+
__api_version__ = "v1"
|
|
6
|
+
|
|
7
|
+
__base_url__ = f"https://api.weco.ai/{__api_version__}"
|
|
8
|
+
# If user specifies a custom base URL, use that instead
|
|
9
|
+
if os.environ.get("WECO_BASE_URL"):
|
|
10
|
+
__base_url__ = os.environ.get("WECO_BASE_URL")
|
|
11
|
+
|
|
12
|
+
__dashboard_url__ = "https://dashboard.weco.ai"
|
|
13
|
+
if os.environ.get("WECO_DASHBOARD_URL"):
|
|
14
|
+
__dashboard_url__ = os.environ.get("WECO_DASHBOARD_URL")
|
|
@@ -7,8 +7,7 @@ import sys
|
|
|
7
7
|
|
|
8
8
|
def handle_api_error(e: requests.exceptions.HTTPError, console: rich.console.Console) -> None:
|
|
9
9
|
"""Extract and display error messages from API responses in a structured format."""
|
|
10
|
-
|
|
11
|
-
console.print(f"[bold red]Error:[/] {error_message}")
|
|
10
|
+
console.print(f"[bold red]{e.response.json()['detail']}[/]")
|
|
12
11
|
sys.exit(1)
|
|
13
12
|
|
|
14
13
|
|
|
@@ -24,13 +23,15 @@ def start_optimization_session(
|
|
|
24
23
|
search_policy_config: Dict[str, Any],
|
|
25
24
|
additional_instructions: str = None,
|
|
26
25
|
api_keys: Dict[str, Any] = {},
|
|
26
|
+
auth_headers: dict = {}, # Add auth_headers
|
|
27
27
|
timeout: int = 800,
|
|
28
28
|
) -> Dict[str, Any]:
|
|
29
29
|
"""Start the optimization session."""
|
|
30
30
|
with console.status("[bold green]Starting Optimization..."):
|
|
31
31
|
try:
|
|
32
|
+
# __base_url__ already contains /v1
|
|
32
33
|
response = requests.post(
|
|
33
|
-
f"{__base_url__}/sessions",
|
|
34
|
+
f"{__base_url__}/sessions", # Path is relative to base_url
|
|
34
35
|
json={
|
|
35
36
|
"source_code": source_code,
|
|
36
37
|
"additional_instructions": additional_instructions,
|
|
@@ -43,6 +44,7 @@ def start_optimization_session(
|
|
|
43
44
|
},
|
|
44
45
|
"metadata": {"client_name": "cli", "client_version": __pkg_version__, **api_keys},
|
|
45
46
|
},
|
|
47
|
+
headers=auth_headers, # Add headers
|
|
46
48
|
timeout=timeout,
|
|
47
49
|
)
|
|
48
50
|
response.raise_for_status()
|
|
@@ -57,17 +59,20 @@ def evaluate_feedback_then_suggest_next_solution(
|
|
|
57
59
|
execution_output: str,
|
|
58
60
|
additional_instructions: str = None,
|
|
59
61
|
api_keys: Dict[str, Any] = {},
|
|
62
|
+
auth_headers: dict = {}, # Add auth_headers
|
|
60
63
|
timeout: int = 800,
|
|
61
64
|
) -> Dict[str, Any]:
|
|
62
65
|
"""Evaluate the feedback and suggest the next solution."""
|
|
63
66
|
try:
|
|
67
|
+
# __base_url__ already contains /v1
|
|
64
68
|
response = requests.post(
|
|
65
|
-
f"{__base_url__}/sessions/{session_id}/suggest",
|
|
69
|
+
f"{__base_url__}/sessions/{session_id}/suggest", # Path is relative to base_url
|
|
66
70
|
json={
|
|
67
71
|
"execution_output": execution_output,
|
|
68
72
|
"additional_instructions": additional_instructions,
|
|
69
73
|
"metadata": {**api_keys},
|
|
70
74
|
},
|
|
75
|
+
headers=auth_headers, # Add headers
|
|
71
76
|
timeout=timeout,
|
|
72
77
|
)
|
|
73
78
|
response.raise_for_status()
|
|
@@ -77,12 +82,20 @@ def evaluate_feedback_then_suggest_next_solution(
|
|
|
77
82
|
|
|
78
83
|
|
|
79
84
|
def get_optimization_session_status(
|
|
80
|
-
console: rich.console.Console,
|
|
85
|
+
console: rich.console.Console,
|
|
86
|
+
session_id: str,
|
|
87
|
+
include_history: bool = False,
|
|
88
|
+
auth_headers: dict = {},
|
|
89
|
+
timeout: int = 800, # Add auth_headers
|
|
81
90
|
) -> Dict[str, Any]:
|
|
82
91
|
"""Get the current status of the optimization session."""
|
|
83
92
|
try:
|
|
93
|
+
# __base_url__ already contains /v1
|
|
84
94
|
response = requests.get(
|
|
85
|
-
f"{__base_url__}/sessions/{session_id}",
|
|
95
|
+
f"{__base_url__}/sessions/{session_id}", # Path is relative to base_url
|
|
96
|
+
params={"include_history": include_history},
|
|
97
|
+
headers=auth_headers, # Add headers
|
|
98
|
+
timeout=timeout,
|
|
86
99
|
)
|
|
87
100
|
response.raise_for_status()
|
|
88
101
|
return response.json()
|