weco 0.2.22__tar.gz → 0.2.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {weco-0.2.22 → weco-0.2.24}/.gitignore +2 -0
- {weco-0.2.22 → weco-0.2.24}/PKG-INFO +36 -25
- {weco-0.2.22 → weco-0.2.24}/README.md +33 -24
- weco-0.2.24/contributing.md +23 -0
- {weco-0.2.22 → weco-0.2.24}/examples/cuda/README.md +7 -1
- {weco-0.2.22 → weco-0.2.24}/examples/cuda/evaluate.py +3 -0
- {weco-0.2.22 → weco-0.2.24}/examples/hello-kernel-world/evaluate.py +3 -0
- {weco-0.2.22 → weco-0.2.24}/examples/prompt/README.md +8 -1
- {weco-0.2.22 → weco-0.2.24}/examples/spaceship-titanic/README.md +5 -1
- {weco-0.2.22 → weco-0.2.24}/examples/triton/README.md +5 -1
- {weco-0.2.22 → weco-0.2.24}/examples/triton/evaluate.py +3 -0
- {weco-0.2.22 → weco-0.2.24}/pyproject.toml +3 -1
- {weco-0.2.22 → weco-0.2.24}/weco/api.py +84 -87
- {weco-0.2.22 → weco-0.2.24}/weco/auth.py +7 -5
- {weco-0.2.22 → weco-0.2.24}/weco/chatbot.py +34 -23
- {weco-0.2.22 → weco-0.2.24}/weco/cli.py +10 -1
- weco-0.2.24/weco/constants.py +7 -0
- {weco-0.2.22 → weco-0.2.24}/weco/optimizer.py +26 -21
- {weco-0.2.22 → weco-0.2.24}/weco/panels.py +105 -59
- {weco-0.2.22 → weco-0.2.24}/weco/utils.py +47 -12
- {weco-0.2.22 → weco-0.2.24}/weco.egg-info/PKG-INFO +36 -25
- {weco-0.2.22 → weco-0.2.24}/weco.egg-info/SOURCES.txt +2 -1
- {weco-0.2.22 → weco-0.2.24}/weco.egg-info/requires.txt +2 -0
- weco-0.2.22/.repomixignore +0 -4
- {weco-0.2.22 → weco-0.2.24}/.github/workflows/lint.yml +0 -0
- {weco-0.2.22 → weco-0.2.24}/.github/workflows/release.yml +0 -0
- {weco-0.2.22 → weco-0.2.24}/LICENSE +0 -0
- {weco-0.2.22 → weco-0.2.24}/assets/example-optimization.gif +0 -0
- {weco-0.2.22 → weco-0.2.24}/assets/weco.svg +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/cuda/guide.md +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/cuda/optimize.py +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/hello-kernel-world/colab_notebook_walkthrough.ipynb +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/hello-kernel-world/optimize.py +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/prompt/eval.py +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/prompt/optimize.py +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/prompt/prompt_guide.md +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/spaceship-titanic/competition_description.md +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/spaceship-titanic/data/sample_submission.csv +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/spaceship-titanic/data/test.csv +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/spaceship-titanic/data/train.csv +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/spaceship-titanic/evaluate.py +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/spaceship-titanic/train.py +0 -0
- {weco-0.2.22 → weco-0.2.24}/examples/triton/optimize.py +0 -0
- {weco-0.2.22 → weco-0.2.24}/setup.cfg +0 -0
- {weco-0.2.22 → weco-0.2.24}/weco/__init__.py +0 -0
- {weco-0.2.22 → weco-0.2.24}/weco.egg-info/dependency_links.txt +0 -0
- {weco-0.2.22 → weco-0.2.24}/weco.egg-info/entry_points.txt +0 -0
- {weco-0.2.22 → weco-0.2.24}/weco.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: weco
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.24
|
|
4
4
|
Summary: Documentation for `weco`, a CLI for using Weco AI's code optimizer.
|
|
5
5
|
Author-email: Weco AI Team <contact@weco.ai>
|
|
6
6
|
License: MIT
|
|
@@ -16,6 +16,8 @@ Requires-Dist: requests
|
|
|
16
16
|
Requires-Dist: rich
|
|
17
17
|
Requires-Dist: packaging
|
|
18
18
|
Requires-Dist: gitingest
|
|
19
|
+
Requires-Dist: fastapi
|
|
20
|
+
Requires-Dist: slowapi
|
|
19
21
|
Provides-Extra: dev
|
|
20
22
|
Requires-Dist: ruff; extra == "dev"
|
|
21
23
|
Requires-Dist: build; extra == "dev"
|
|
@@ -30,10 +32,11 @@ Dynamic: license-file
|
|
|
30
32
|
</div>
|
|
31
33
|
|
|
32
34
|
[](https://www.python.org)
|
|
35
|
+
[](https://badge.fury.io/py/weco)
|
|
33
36
|
[](https://docs.weco.ai/)
|
|
34
|
-
[](https://colab.research.google.com/github/WecoAI/weco-cli/blob/main/examples/hello-kernel-world/colab_notebook_walkthrough.ipynb)
|
|
37
|
+
[](https://pepy.tech/projects/weco)
|
|
38
|
+
[](https://arxiv.org/abs/2502.13138)
|
|
39
|
+
[](https://colab.research.google.com/github/WecoAI/weco-cli/blob/main/examples/hello-kernel-world/colab_notebook_walkthrough.ipynb)
|
|
37
40
|
|
|
38
41
|
`pip install weco`
|
|
39
42
|
|
|
@@ -73,9 +76,9 @@ The `weco` CLI leverages a tree search approach guided by LLMs to iteratively ex
|
|
|
73
76
|
|
|
74
77
|
`weco` requires API keys for the LLMs it uses internally. You **must** provide these keys via environment variables:
|
|
75
78
|
|
|
76
|
-
- **OpenAI:** `export OPENAI_API_KEY="your_key_here"` (Create your API key [here](https://platform.openai.com/api-keys))
|
|
77
|
-
- **Anthropic:** `export ANTHROPIC_API_KEY="your_key_here"` (Create your API key [here](https://console.anthropic.com/settings/keys))
|
|
78
|
-
- **Google:** `export GEMINI_API_KEY="your_key_here"` (Google AI Studio has a free API usage quota. Create your API key [here](https://aistudio.google.com/apikey) to use `weco` for free.)
|
|
79
|
+
- **OpenAI:** `export OPENAI_API_KEY="your_key_here"` (Create your OpenAI API key [here](https://platform.openai.com/api-keys))
|
|
80
|
+
- **Anthropic:** `export ANTHROPIC_API_KEY="your_key_here"` (Create your Anthropic API key [here](https://console.anthropic.com/settings/keys))
|
|
81
|
+
- **Google:** `export GEMINI_API_KEY="your_key_here"` (Google AI Studio has a free API usage quota. Create your Gemini API key [here](https://aistudio.google.com/apikey) to use `weco` for free.)
|
|
79
82
|
|
|
80
83
|
---
|
|
81
84
|
|
|
@@ -157,6 +160,7 @@ For more advanced examples, including [Triton](/examples/triton/README.md), [CUD
|
|
|
157
160
|
| `-M, --model` | Model identifier for the LLM to use (e.g., `o4-mini`, `claude-sonnet-4-0`). | `o4-mini` when `OPENAI_API_KEY` is set; `claude-sonnet-4-0` when `ANTHROPIC_API_KEY` is set; `gemini-2.5-pro` when `GEMINI_API_KEY` is set. | `-M o4-mini` |
|
|
158
161
|
| `-i, --additional-instructions`| Natural language description of specific instructions **or** path to a file containing detailed instructions to guide the LLM. | `None` | `-i instructions.md` or `-i "Optimize the model for faster inference"`|
|
|
159
162
|
| `-l, --log-dir` | Path to the directory to log intermediate steps and final optimization result. | `.runs/` | `-l ./logs/` |
|
|
163
|
+
| `--eval-timeout` | Timeout in seconds for each step in evaluation. | No timeout (unlimited) | `--eval-timeout 3600` |
|
|
160
164
|
|
|
161
165
|
---
|
|
162
166
|
|
|
@@ -247,31 +251,38 @@ Final speedup value = 1.5
|
|
|
247
251
|
|
|
248
252
|
Weco will parse this output to extract the numerical value (1.5 in this case) associated with the metric name ('speedup').
|
|
249
253
|
|
|
250
|
-
##
|
|
254
|
+
## Supported Models
|
|
251
255
|
|
|
252
|
-
|
|
256
|
+
Weco supports the following LLM models:
|
|
253
257
|
|
|
254
|
-
|
|
258
|
+
### OpenAI Models
|
|
259
|
+
- `o3`
|
|
260
|
+
- `o3-mini`
|
|
261
|
+
- `o4-mini`
|
|
262
|
+
- `o1-pro`
|
|
263
|
+
- `o1`
|
|
264
|
+
- `gpt-4.1`
|
|
265
|
+
- `gpt-4.1-mini`
|
|
266
|
+
- `gpt-4.1-nano`
|
|
267
|
+
- `gpt-4o`
|
|
268
|
+
- `gpt-4o-mini`
|
|
255
269
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
270
|
+
### Anthropic Models
|
|
271
|
+
- `claude-opus-4-0`
|
|
272
|
+
- `claude-sonnet-4-0`
|
|
273
|
+
- `claude-3-7-sonnet-latest`
|
|
260
274
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
```
|
|
275
|
+
### Gemini Models
|
|
276
|
+
- `gemini-2.5-pro`
|
|
277
|
+
- `gemini-2.5-flash`
|
|
278
|
+
- `gemini-2.5-flash-lite`
|
|
266
279
|
|
|
267
|
-
|
|
280
|
+
You can specify any of these models using the `-M` or `--model` flag. Ensure you have the corresponding API key set as an environment variable for the model provider you wish to use.
|
|
268
281
|
|
|
269
|
-
|
|
270
|
-
git checkout -b feature/your-feature-name
|
|
271
|
-
```
|
|
282
|
+
---
|
|
272
283
|
|
|
273
|
-
|
|
284
|
+
## Contributing
|
|
274
285
|
|
|
275
|
-
|
|
286
|
+
We welcome contributions! Please see [contributing.md](contributing.md) for detailed guidelines on how to contribute to this project.
|
|
276
287
|
|
|
277
288
|
---
|
|
@@ -6,10 +6,11 @@
|
|
|
6
6
|
</div>
|
|
7
7
|
|
|
8
8
|
[](https://www.python.org)
|
|
9
|
+
[](https://badge.fury.io/py/weco)
|
|
9
10
|
[](https://docs.weco.ai/)
|
|
10
|
-
[](https://colab.research.google.com/github/WecoAI/weco-cli/blob/main/examples/hello-kernel-world/colab_notebook_walkthrough.ipynb)
|
|
11
|
+
[](https://pepy.tech/projects/weco)
|
|
12
|
+
[](https://arxiv.org/abs/2502.13138)
|
|
13
|
+
[](https://colab.research.google.com/github/WecoAI/weco-cli/blob/main/examples/hello-kernel-world/colab_notebook_walkthrough.ipynb)
|
|
13
14
|
|
|
14
15
|
`pip install weco`
|
|
15
16
|
|
|
@@ -49,9 +50,9 @@ The `weco` CLI leverages a tree search approach guided by LLMs to iteratively ex
|
|
|
49
50
|
|
|
50
51
|
`weco` requires API keys for the LLMs it uses internally. You **must** provide these keys via environment variables:
|
|
51
52
|
|
|
52
|
-
- **OpenAI:** `export OPENAI_API_KEY="your_key_here"` (Create your API key [here](https://platform.openai.com/api-keys))
|
|
53
|
-
- **Anthropic:** `export ANTHROPIC_API_KEY="your_key_here"` (Create your API key [here](https://console.anthropic.com/settings/keys))
|
|
54
|
-
- **Google:** `export GEMINI_API_KEY="your_key_here"` (Google AI Studio has a free API usage quota. Create your API key [here](https://aistudio.google.com/apikey) to use `weco` for free.)
|
|
53
|
+
- **OpenAI:** `export OPENAI_API_KEY="your_key_here"` (Create your OpenAI API key [here](https://platform.openai.com/api-keys))
|
|
54
|
+
- **Anthropic:** `export ANTHROPIC_API_KEY="your_key_here"` (Create your Anthropic API key [here](https://console.anthropic.com/settings/keys))
|
|
55
|
+
- **Google:** `export GEMINI_API_KEY="your_key_here"` (Google AI Studio has a free API usage quota. Create your Gemini API key [here](https://aistudio.google.com/apikey) to use `weco` for free.)
|
|
55
56
|
|
|
56
57
|
---
|
|
57
58
|
|
|
@@ -133,6 +134,7 @@ For more advanced examples, including [Triton](/examples/triton/README.md), [CUD
|
|
|
133
134
|
| `-M, --model` | Model identifier for the LLM to use (e.g., `o4-mini`, `claude-sonnet-4-0`). | `o4-mini` when `OPENAI_API_KEY` is set; `claude-sonnet-4-0` when `ANTHROPIC_API_KEY` is set; `gemini-2.5-pro` when `GEMINI_API_KEY` is set. | `-M o4-mini` |
|
|
134
135
|
| `-i, --additional-instructions`| Natural language description of specific instructions **or** path to a file containing detailed instructions to guide the LLM. | `None` | `-i instructions.md` or `-i "Optimize the model for faster inference"`|
|
|
135
136
|
| `-l, --log-dir` | Path to the directory to log intermediate steps and final optimization result. | `.runs/` | `-l ./logs/` |
|
|
137
|
+
| `--eval-timeout` | Timeout in seconds for each step in evaluation. | No timeout (unlimited) | `--eval-timeout 3600` |
|
|
136
138
|
|
|
137
139
|
---
|
|
138
140
|
|
|
@@ -223,31 +225,38 @@ Final speedup value = 1.5
|
|
|
223
225
|
|
|
224
226
|
Weco will parse this output to extract the numerical value (1.5 in this case) associated with the metric name ('speedup').
|
|
225
227
|
|
|
226
|
-
##
|
|
228
|
+
## Supported Models
|
|
227
229
|
|
|
228
|
-
|
|
230
|
+
Weco supports the following LLM models:
|
|
229
231
|
|
|
230
|
-
|
|
232
|
+
### OpenAI Models
|
|
233
|
+
- `o3`
|
|
234
|
+
- `o3-mini`
|
|
235
|
+
- `o4-mini`
|
|
236
|
+
- `o1-pro`
|
|
237
|
+
- `o1`
|
|
238
|
+
- `gpt-4.1`
|
|
239
|
+
- `gpt-4.1-mini`
|
|
240
|
+
- `gpt-4.1-nano`
|
|
241
|
+
- `gpt-4o`
|
|
242
|
+
- `gpt-4o-mini`
|
|
231
243
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
244
|
+
### Anthropic Models
|
|
245
|
+
- `claude-opus-4-0`
|
|
246
|
+
- `claude-sonnet-4-0`
|
|
247
|
+
- `claude-3-7-sonnet-latest`
|
|
236
248
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
```
|
|
249
|
+
### Gemini Models
|
|
250
|
+
- `gemini-2.5-pro`
|
|
251
|
+
- `gemini-2.5-flash`
|
|
252
|
+
- `gemini-2.5-flash-lite`
|
|
242
253
|
|
|
243
|
-
|
|
254
|
+
You can specify any of these models using the `-M` or `--model` flag. Ensure you have the corresponding API key set as an environment variable for the model provider you wish to use.
|
|
244
255
|
|
|
245
|
-
|
|
246
|
-
git checkout -b feature/your-feature-name
|
|
247
|
-
```
|
|
256
|
+
---
|
|
248
257
|
|
|
249
|
-
|
|
258
|
+
## Contributing
|
|
250
259
|
|
|
251
|
-
|
|
260
|
+
We welcome contributions! Please see [contributing.md](contributing.md) for detailed guidelines on how to contribute to this project.
|
|
252
261
|
|
|
253
262
|
---
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
We welcome your contributions! To get started:
|
|
4
|
+
|
|
5
|
+
1. **Fork & Clone the Repository:**
|
|
6
|
+
```bash
|
|
7
|
+
git clone https://github.com/WecoAI/weco-cli.git
|
|
8
|
+
cd weco-cli
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
2. **Install Dependencies:**
|
|
12
|
+
```bash
|
|
13
|
+
pip install -e ".[dev]"
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
3. **Create a Feature Branch:**
|
|
17
|
+
```bash
|
|
18
|
+
git checkout -b feature/your-feature-name
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
4. **Make Changes:** Ensure your code adheres to our style guidelines and includes relevant tests.
|
|
22
|
+
|
|
23
|
+
5. **Commit, Push & Open a PR**: Commit your changes, and open a pull request with a clear description of your enhancements.
|
|
@@ -11,7 +11,7 @@ Install the CLI using `pip`:
|
|
|
11
11
|
pip install weco
|
|
12
12
|
```
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
Create your OpenAI API key [here](https://platform.openai.com/api-keys), then run:
|
|
15
15
|
```bash
|
|
16
16
|
export OPENAI_API_KEY="your_key_here"
|
|
17
17
|
```
|
|
@@ -46,3 +46,9 @@ weco run --source optimize.py \
|
|
|
46
46
|
* `--additional-instructions guide.md`: Provides guidance to the LLM on the optimization approach.
|
|
47
47
|
|
|
48
48
|
Weco will iteratively modify `optimize.py`, generating and integrating CUDA C++ code, guided by the evaluation results and the instructions in `guide.md`.
|
|
49
|
+
|
|
50
|
+
## Next Steps
|
|
51
|
+
|
|
52
|
+
Now that you've optimized your code with CUDA kernels, try [Triton Optimization](/examples/triton/README.md) for a higher-level GPU programming approach. If you're more interested in [Model Development](/examples/spaceship-titanic/README.md) or [Prompt Engineering](/examples/prompt/README.md), we've got you covered!
|
|
53
|
+
|
|
54
|
+
You can check out our [CLI Reference](https://docs.weco.ai/cli/cli-reference) to learn more about what you can do with the tool.
|
|
@@ -110,6 +110,7 @@ if __name__ == "__main__":
|
|
|
110
110
|
|
|
111
111
|
# benchmarking parameters
|
|
112
112
|
n_correctness_trials = 10
|
|
113
|
+
correctness_tolerance = 1e-5
|
|
113
114
|
n_warmup = 1000
|
|
114
115
|
n_rep = 5000
|
|
115
116
|
|
|
@@ -152,6 +153,8 @@ if __name__ == "__main__":
|
|
|
152
153
|
max_diff_avg += torch.max(torch.abs(optimized_output - baseline_output))
|
|
153
154
|
max_diff_avg /= n_correctness_trials
|
|
154
155
|
print(f"max float diff between values of baseline and optimized model: {max_diff_avg}")
|
|
156
|
+
if max_diff_avg > correctness_tolerance:
|
|
157
|
+
print("invalid solution: max float diff is too high")
|
|
155
158
|
|
|
156
159
|
# measure performance
|
|
157
160
|
inputs = get_inputs(batch_size=batch_size, seq_len=seq_len, n_embd=n_embd, device="cuda")
|
|
@@ -101,6 +101,7 @@ if __name__ == "__main__":
|
|
|
101
101
|
|
|
102
102
|
# benchmark parameters
|
|
103
103
|
n_correctness_trials = 10
|
|
104
|
+
correctness_tolerance = 1e-5
|
|
104
105
|
n_warmup = 1000
|
|
105
106
|
n_rep = 5000
|
|
106
107
|
|
|
@@ -130,6 +131,8 @@ if __name__ == "__main__":
|
|
|
130
131
|
max_diff_avg += torch.max(torch.abs(optimized_output - baseline_output))
|
|
131
132
|
max_diff_avg /= n_correctness_trials
|
|
132
133
|
print(f"max float diff between values of baseline and optimized model: {max_diff_avg}")
|
|
134
|
+
if max_diff_avg > correctness_tolerance:
|
|
135
|
+
print("invalid solution: max float diff is too high")
|
|
133
136
|
|
|
134
137
|
# measure performance
|
|
135
138
|
inputs = get_inputs(batch_size, input_size, args.device)
|
|
@@ -16,7 +16,7 @@ The experiment runs locally, requires only two short Python files and a prompt g
|
|
|
16
16
|
pip install weco openai datasets
|
|
17
17
|
```
|
|
18
18
|
|
|
19
|
-
3. This example uses `o4-mini` via the OpenAI API by default.
|
|
19
|
+
3. This example uses `o4-mini` via the OpenAI API by default. Create your OpenAI API key [here](https://platform.openai.com/api-keys), then run:
|
|
20
20
|
```bash
|
|
21
21
|
export OPENAI_API_KEY="your_key_here"
|
|
22
22
|
```
|
|
@@ -62,3 +62,10 @@ Weco then mutates the prompt instructions in `optimize.py`, tries again, and gra
|
|
|
62
62
|
* The script sends model calls in parallel via `ThreadPoolExecutor`, so network latency is hidden.
|
|
63
63
|
* Every five completed items, the script logs progress and elapsed time.
|
|
64
64
|
* The final line `accuracy: value` is the only part Weco needs for guidance.
|
|
65
|
+
|
|
66
|
+
## Next Steps
|
|
67
|
+
|
|
68
|
+
Now that you've automated prompt engineering for yourself, check out our guide on [Model Development](/examples/spaceship-titanic/README.md) or [CUDA Kernel Engineering](/examples/cuda/README.md).
|
|
69
|
+
|
|
70
|
+
You can check out our [CLI Reference](https://docs.weco.ai/cli/cli-reference) to learn more about what you can do with the tool.
|
|
71
|
+
|
|
@@ -10,7 +10,7 @@ The goal is to improve the model's `accuracy` metric by optimizing the `train.py
|
|
|
10
10
|
```bash
|
|
11
11
|
pip install weco pandas numpy scikit-learn torch xgboost lightgbm catboost
|
|
12
12
|
```
|
|
13
|
-
3.
|
|
13
|
+
3. Create your OpenAI API key [here](https://platform.openai.com/api-keys), then run:
|
|
14
14
|
```bash
|
|
15
15
|
export OPENAI_API_KEY="your_key_here"
|
|
16
16
|
```
|
|
@@ -44,3 +44,7 @@ weco run --source train.py \
|
|
|
44
44
|
* `--log-dir .runs/spaceship-titanic`: Specifies the directory where Weco should save logs and results for this run.
|
|
45
45
|
|
|
46
46
|
Weco will iteratively update the feature engineering or modeling code within `train.py` guided by the evaluation method defined in `evaluate.py`
|
|
47
|
+
|
|
48
|
+
## Next Steps
|
|
49
|
+
|
|
50
|
+
With model development covered, you might be curious to see how you can make your AI code run faster, saving you time and more importantly GPU credits. Check out our example on automating kernel engineering in [CUDA](/examples/cuda/README.md) and [Triton](/examples/triton/README.md), or dive into the [CLI Reference](https://docs.weco.ai/cli/cli-reference).
|
|
@@ -12,7 +12,7 @@ Install the CLI using `pip`:
|
|
|
12
12
|
pip install weco
|
|
13
13
|
```
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
Create your OpenAI API key [here](https://platform.openai.com/api-keys), then run:
|
|
16
16
|
```bash
|
|
17
17
|
export OPENAI_API_KEY="your_key_here"
|
|
18
18
|
```
|
|
@@ -47,3 +47,7 @@ weco run --source optimize.py \
|
|
|
47
47
|
* `--additional-instructions "..."`: Provides specific guidance to the LLM. In this case, it directs the model to use Triton for optimization, ensure the numerical difference ("max float diff") between the original and optimized code remains small, and keep the overall code structure consistent.
|
|
48
48
|
|
|
49
49
|
Weco will iteratively modify `optimize.py`, incorporating Triton kernels, guided by the performance feedback (`speedup`) from the evaluation script and the instructions provided.
|
|
50
|
+
|
|
51
|
+
## Next Steps
|
|
52
|
+
|
|
53
|
+
After mastering Triton kernels, explore [CUDA Optimization](/examples/cuda/README.md) for even lower-level GPU programming, or check the [CLI Reference](https://docs.weco.ai/cli/cli-reference) to improve the results you get with Weco.
|
|
@@ -105,6 +105,7 @@ if __name__ == "__main__":
|
|
|
105
105
|
|
|
106
106
|
# benchmarking parameters
|
|
107
107
|
n_correctness_trials = 10
|
|
108
|
+
correctness_tolerance = 1e-5
|
|
108
109
|
n_warmup = 1000
|
|
109
110
|
n_rep = 5000
|
|
110
111
|
|
|
@@ -147,6 +148,8 @@ if __name__ == "__main__":
|
|
|
147
148
|
max_diff_avg += torch.max(torch.abs(optimized_output - baseline_output))
|
|
148
149
|
max_diff_avg /= n_correctness_trials
|
|
149
150
|
print(f"max float diff between values of baseline and optimized model: {max_diff_avg}")
|
|
151
|
+
if max_diff_avg > correctness_tolerance:
|
|
152
|
+
print("invalid solution: max float diff is too high")
|
|
150
153
|
|
|
151
154
|
# measure performance
|
|
152
155
|
inputs = get_inputs(batch_size=batch_size, seq_len=seq_len, n_embd=n_embd, device="cuda")
|
|
@@ -8,7 +8,7 @@ name = "weco"
|
|
|
8
8
|
authors = [{ name = "Weco AI Team", email = "contact@weco.ai" }]
|
|
9
9
|
description = "Documentation for `weco`, a CLI for using Weco AI's code optimizer."
|
|
10
10
|
readme = "README.md"
|
|
11
|
-
version = "0.2.
|
|
11
|
+
version = "0.2.24"
|
|
12
12
|
license = { text = "MIT" }
|
|
13
13
|
requires-python = ">=3.8"
|
|
14
14
|
dependencies = [
|
|
@@ -16,6 +16,8 @@ dependencies = [
|
|
|
16
16
|
"rich",
|
|
17
17
|
"packaging",
|
|
18
18
|
"gitingest",
|
|
19
|
+
"fastapi",
|
|
20
|
+
"slowapi",
|
|
19
21
|
]
|
|
20
22
|
keywords = ["AI", "Code Optimization", "Code Generation"]
|
|
21
23
|
classifiers = [
|