synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/agora_ex/README_MoE.md +224 -0
- examples/agora_ex/__init__.py +7 -0
- examples/agora_ex/agora_ex.py +65 -0
- examples/agora_ex/agora_ex_task_app.py +590 -0
- examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +121 -0
- examples/agora_ex/reward_fn_grpo-human.py +129 -0
- examples/agora_ex/system_prompt_CURRENT.md +63 -0
- examples/agora_ex/task_app/agora_ex_task_app.py +590 -0
- examples/agora_ex/task_app/reward_fn_grpo-human.py +129 -0
- examples/agora_ex/task_app/system_prompt_CURRENT.md +63 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +175 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
- examples/multi_step/crafter_rl_lora.md +51 -10
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +7 -1
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
- examples/warming_up_to_rl/run_eval.py +127 -18
- examples/warming_up_to_rl/task_app/grpo_crafter.py +3 -33
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +42 -46
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +232 -193
- synth_ai/__init__.py +41 -1
- synth_ai/api/train/builders.py +49 -19
- synth_ai/api/train/configs/__init__.py +44 -0
- synth_ai/api/train/configs/rl.py +133 -0
- synth_ai/api/train/configs/sft.py +94 -0
- synth_ai/api/train/configs/shared.py +24 -0
- synth_ai/cli/demo.py +38 -39
- synth_ai/cli/rl_demo.py +81 -102
- synth_ai/cli/task_apps.py +3 -0
- synth_ai/demos/core/cli.py +121 -159
- synth_ai/environments/examples/crafter_classic/environment.py +16 -0
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/client.py +85 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/judge_schemas.py +127 -0
- synth_ai/rubrics/__init__.py +22 -0
- synth_ai/rubrics/validators.py +126 -0
- synth_ai/tracing_v3/serialization.py +130 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/METADATA +1 -1
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/RECORD +48 -22
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# Agora EX - Qwen3 MoE Training
|
|
2
|
+
|
|
3
|
+
## Model: Qwen3-30B-A3B (Small MoE)
|
|
4
|
+
|
|
5
|
+
**Architecture:**
|
|
6
|
+
- Total Parameters: 30B
|
|
7
|
+
- Activated per Token: 3B (~10% activation)
|
|
8
|
+
- Type: Mixture of Experts (MoE)
|
|
9
|
+
- Context: 4K tokens
|
|
10
|
+
|
|
11
|
+
**Why MoE for Agora EX?**
|
|
12
|
+
- ✅ Efficient: Only 3B params active → faster inference
|
|
13
|
+
- ✅ Powerful: 30B total capacity → better code generation
|
|
14
|
+
- ✅ Cost-effective: Lower memory footprint than dense 30B
|
|
15
|
+
- ✅ H200 friendly: Fits comfortably on 2x80GB setup
|
|
16
|
+
|
|
17
|
+
## Hardware: 2xH200
|
|
18
|
+
|
|
19
|
+
**Configuration:**
|
|
20
|
+
- GPU 0 (H200): vLLM inference server
|
|
21
|
+
- GPU 1 (H200): LoRA training
|
|
22
|
+
- Memory: 80GB per GPU (160GB total)
|
|
23
|
+
- Topology: Single-node split
|
|
24
|
+
|
|
25
|
+
**Resource Usage:**
|
|
26
|
+
- vLLM (MoE): ~40GB VRAM (3B active + routing)
|
|
27
|
+
- Training: ~50GB VRAM (gradients + optimizer states)
|
|
28
|
+
- Headroom: ~70GB available
|
|
29
|
+
|
|
30
|
+
## Training Configuration
|
|
31
|
+
|
|
32
|
+
### File: `configs/rl_lora_qwen3_moe_2xh200.toml`
|
|
33
|
+
|
|
34
|
+
**Key Parameters:**
|
|
35
|
+
```toml
|
|
36
|
+
[model]
|
|
37
|
+
base = "Qwen/Qwen3-30B-A3B" # MoE with 3B activation
|
|
38
|
+
trainer_mode = "lora"
|
|
39
|
+
|
|
40
|
+
[lora]
|
|
41
|
+
r = 16 # LoRA rank
|
|
42
|
+
target_modules = ["all-linear"] # Wide coverage for MoE
|
|
43
|
+
|
|
44
|
+
[rollout]
|
|
45
|
+
episodes_per_batch = 16 # 16 episodes per batch
|
|
46
|
+
max_concurrent_rollouts = 4 # Limited by human judge
|
|
47
|
+
batches_per_step = 2 # 32 episodes per training step
|
|
48
|
+
|
|
49
|
+
[training]
|
|
50
|
+
num_epochs = 3
|
|
51
|
+
iterations_per_epoch = 4 # 12 total iterations
|
|
52
|
+
batch_size = 2
|
|
53
|
+
group_size = 4 # GSPO advantage estimation
|
|
54
|
+
learning_rate = 3e-5 # Conservative for MoE
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Usage
|
|
58
|
+
|
|
59
|
+
### 1. Start Task App (with Human Judge)
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
cd /Users/joshpurtell/Documents/GitHub/synth-ai
|
|
63
|
+
|
|
64
|
+
# Set environment variables
|
|
65
|
+
export GROQ_API_KEY=gsk_... # For inference
|
|
66
|
+
export ENVIRONMENT_API_KEY=sk_env_... # For auth
|
|
67
|
+
export EAMES_JUDGE_URL=https://eames-judge-api... # Human judge
|
|
68
|
+
|
|
69
|
+
# Serve task app
|
|
70
|
+
uvx synth-ai serve agora-ex --port 8101
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### 2. Run RL Training
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Train with MoE on 2xH200
|
|
77
|
+
uvx synth-ai train \
|
|
78
|
+
--type rl \
|
|
79
|
+
--config examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml \
|
|
80
|
+
--task-url http://localhost:8101 \
|
|
81
|
+
--env-file backend/.env.dev
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### 3. Monitor Progress
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
# Check logs
|
|
88
|
+
tail -f ~/.synth-ai/logs/train_*.log
|
|
89
|
+
|
|
90
|
+
# View checkpoints
|
|
91
|
+
ls -lh ~/.synth-ai/checkpoints/agora-ex-qwen3-moe-rl/
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Expected Training Time
|
|
95
|
+
|
|
96
|
+
**With Human Judge (5-30 min per eval):**
|
|
97
|
+
- 12 iterations × 32 episodes = 384 rollouts
|
|
98
|
+
- At 10 min average: ~64 hours (2.7 days)
|
|
99
|
+
- At 4 concurrent: ~16 hours wall time
|
|
100
|
+
|
|
101
|
+
**Speedup Options:**
|
|
102
|
+
1. **Use AI Judge:** 10 sec/eval → 2 hours total
|
|
103
|
+
2. **Increase concurrency:** More parallel rollouts
|
|
104
|
+
3. **Reduce episodes:** Fewer samples per iteration
|
|
105
|
+
|
|
106
|
+
## Training Timeline
|
|
107
|
+
|
|
108
|
+
```
|
|
109
|
+
Iteration 1: 32 rollouts → ~5 hours
|
|
110
|
+
Iteration 2: 32 rollouts → ~5 hours
|
|
111
|
+
...
|
|
112
|
+
Iteration 12: 32 rollouts → ~5 hours
|
|
113
|
+
────────────────────────────────────
|
|
114
|
+
Total: 384 rollouts → ~60 hours
|
|
115
|
+
|
|
116
|
+
With 4 concurrent: ~15 hours wall time
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Memory Usage
|
|
120
|
+
|
|
121
|
+
### vLLM Server (GPU 0)
|
|
122
|
+
|
|
123
|
+
```
|
|
124
|
+
Model weights (MoE): ~25GB (BF16, 3B active + routing)
|
|
125
|
+
KV cache: ~10GB (batch_size=4, context=4K)
|
|
126
|
+
Overhead: ~5GB (vLLM runtime)
|
|
127
|
+
────────────────────────────────────
|
|
128
|
+
Total: ~40GB / 80GB
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Training (GPU 1)
|
|
132
|
+
|
|
133
|
+
```
|
|
134
|
+
LoRA adapters: ~2GB (r=16, all-linear)
|
|
135
|
+
Gradients: ~10GB (accumulation)
|
|
136
|
+
Optimizer states: ~20GB (AdamW)
|
|
137
|
+
Activations: ~15GB (forward pass)
|
|
138
|
+
Overhead: ~3GB (PyTorch)
|
|
139
|
+
────────────────────────────────────
|
|
140
|
+
Total: ~50GB / 80GB
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Hyperparameter Tuning
|
|
144
|
+
|
|
145
|
+
### If overfitting (train reward >> eval reward):
|
|
146
|
+
```toml
|
|
147
|
+
[lora]
|
|
148
|
+
dropout = 0.1 # Increase from 0.05
|
|
149
|
+
|
|
150
|
+
[training]
|
|
151
|
+
learning_rate = 1e-5 # Decrease from 3e-5
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### If underfitting (slow improvement):
|
|
155
|
+
```toml
|
|
156
|
+
[training]
|
|
157
|
+
learning_rate = 5e-5 # Increase from 3e-5
|
|
158
|
+
gradient_accumulation_steps = 16 # More accumulation
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### If out of memory:
|
|
162
|
+
```toml
|
|
163
|
+
[training]
|
|
164
|
+
batch_size = 1 # Reduce from 2
|
|
165
|
+
gradient_accumulation_steps = 16 # Compensate with more accumulation
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Comparison: MoE vs Dense
|
|
169
|
+
|
|
170
|
+
| Metric | Qwen3-30B-A3B (MoE) | Qwen2.5-Coder-7B (Dense) |
|
|
171
|
+
|--------|---------------------|--------------------------|
|
|
172
|
+
| **Total Params** | 30B | 7B |
|
|
173
|
+
| **Active Params** | 3B | 7B |
|
|
174
|
+
| **Inference Speed** | ~30 tok/s | ~50 tok/s |
|
|
175
|
+
| **VRAM (vLLM)** | ~40GB | ~20GB |
|
|
176
|
+
| **Quality** | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐ |
|
|
177
|
+
| **Training Time** | Same | Same |
|
|
178
|
+
| **Best For** | Code quality | Speed/cost |
|
|
179
|
+
|
|
180
|
+
## Next Steps
|
|
181
|
+
|
|
182
|
+
1. **Baseline:** Train MoE with human judge
|
|
183
|
+
2. **Fast iteration:** Switch to AI judge (10s/eval)
|
|
184
|
+
3. **Scale up:** Move to larger MoE (235B-A22B)
|
|
185
|
+
4. **Deploy:** Export trained adapter for production
|
|
186
|
+
|
|
187
|
+
## Troubleshooting
|
|
188
|
+
|
|
189
|
+
### Out of Memory (OOM)
|
|
190
|
+
```bash
|
|
191
|
+
# Reduce batch size
|
|
192
|
+
[training]
|
|
193
|
+
batch_size = 1
|
|
194
|
+
|
|
195
|
+
# Or reduce context length
|
|
196
|
+
[vllm]
|
|
197
|
+
max_model_len = 3072
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Slow Rollouts
|
|
201
|
+
```bash
|
|
202
|
+
# Switch to AI judge for development
|
|
203
|
+
uvx synth-ai serve agora-ex-ai-judge --port 8102
|
|
204
|
+
|
|
205
|
+
# Update task_url in training command
|
|
206
|
+
--task-url http://localhost:8102
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Model Not Found
|
|
210
|
+
```bash
|
|
211
|
+
# Ensure model is in permitted list
|
|
212
|
+
python3 -c "
|
|
213
|
+
from backend.app.routes.clustered_training.core.algorithms.gspo.permitted_models import list_permitted_models
|
|
214
|
+
print('\n'.join(list_permitted_models()))
|
|
215
|
+
"
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
**Status:** ✅ Ready for training
|
|
221
|
+
**Model:** Qwen3-30B-A3B (MoE, 3B active)
|
|
222
|
+
**Hardware:** 2xH200 (160GB total)
|
|
223
|
+
**Judge:** Human (Eames) or AI (gpt-oss-120b)
|
|
224
|
+
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Task App configuration for Agora EX landing page generation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
9
|
+
|
|
10
|
+
# Add this directory to path to import the task app module
|
|
11
|
+
_HERE = Path(__file__).resolve().parent
|
|
12
|
+
if str(_HERE) not in sys.path:
|
|
13
|
+
sys.path.insert(0, str(_HERE))
|
|
14
|
+
|
|
15
|
+
from agora_ex_task_app import APP_DESCRIPTION, APP_ID, build_config
|
|
16
|
+
|
|
17
|
+
# Resolve repo root for Modal mounts
|
|
18
|
+
def _resolve_repo_root() -> Path:
|
|
19
|
+
"""Find repo root from this file's location."""
|
|
20
|
+
candidates = [_HERE.parent.parent.parent] # examples/agora_ex -> synth-ai
|
|
21
|
+
for candidate in candidates:
|
|
22
|
+
if (candidate / "pyproject.toml").exists():
|
|
23
|
+
return candidate
|
|
24
|
+
return _HERE.parent.parent.parent
|
|
25
|
+
|
|
26
|
+
REPO_ROOT = _resolve_repo_root()
|
|
27
|
+
|
|
28
|
+
# Register at module level
|
|
29
|
+
register_task_app(
|
|
30
|
+
entry=TaskAppEntry(
|
|
31
|
+
app_id=APP_ID,
|
|
32
|
+
description=APP_DESCRIPTION,
|
|
33
|
+
config_factory=build_config,
|
|
34
|
+
aliases=(APP_ID, "agora-ex", "agora_ex"),
|
|
35
|
+
env_files=(),
|
|
36
|
+
modal=ModalDeploymentConfig(
|
|
37
|
+
app_name="agora-ex-task-app",
|
|
38
|
+
python_version="3.11",
|
|
39
|
+
pip_packages=(
|
|
40
|
+
"fastapi>=0.100.0",
|
|
41
|
+
"uvicorn>=0.23.0",
|
|
42
|
+
"pydantic>=2.0.0",
|
|
43
|
+
"httpx>=0.24.0",
|
|
44
|
+
"python-dotenv>=1.0.1",
|
|
45
|
+
# Tracing/DB runtime deps
|
|
46
|
+
"sqlalchemy>=2.0.42",
|
|
47
|
+
"aiosqlite>=0.21.0",
|
|
48
|
+
"greenlet>=3.2.3",
|
|
49
|
+
),
|
|
50
|
+
extra_local_dirs=(
|
|
51
|
+
# Mount repo root so local modules resolve when deployed on Modal
|
|
52
|
+
(str(REPO_ROOT), "/opt/synth_ai_repo"),
|
|
53
|
+
(str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai"),
|
|
54
|
+
(str(_HERE), "/opt/synth_ai_repo/examples/agora_ex"),
|
|
55
|
+
),
|
|
56
|
+
secret_names=("groq-api-key", "openai-api-key"),
|
|
57
|
+
memory=8192, # 8GB memory for inference + judge calls
|
|
58
|
+
cpu=2.0, # 2 CPUs
|
|
59
|
+
max_containers=10,
|
|
60
|
+
),
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
__all__ = ["build_config"]
|
|
65
|
+
|