synth-ai 0.2.10__py3-none-any.whl → 0.2.13.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (73) hide show
  1. examples/agora_ex/README_MoE.md +224 -0
  2. examples/agora_ex/__init__.py +7 -0
  3. examples/agora_ex/agora_ex.py +65 -0
  4. examples/agora_ex/agora_ex_task_app.py +590 -0
  5. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +121 -0
  6. examples/agora_ex/reward_fn_grpo-human.py +129 -0
  7. examples/agora_ex/system_prompt_CURRENT.md +63 -0
  8. examples/agora_ex/task_app/agora_ex_task_app.py +590 -0
  9. examples/agora_ex/task_app/reward_fn_grpo-human.py +129 -0
  10. examples/agora_ex/task_app/system_prompt_CURRENT.md +63 -0
  11. examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
  12. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +175 -0
  13. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
  14. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
  15. examples/multi_step/crafter_rl_lora.md +51 -10
  16. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  17. examples/multi_step/task_app_config_notes.md +494 -0
  18. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  19. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  20. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  21. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  22. examples/warming_up_to_rl/run_eval.py +267 -41
  23. examples/warming_up_to_rl/task_app/grpo_crafter.py +3 -33
  24. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
  25. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +42 -46
  26. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +376 -193
  27. synth_ai/__init__.py +41 -1
  28. synth_ai/api/train/builders.py +74 -33
  29. synth_ai/api/train/cli.py +29 -6
  30. synth_ai/api/train/configs/__init__.py +44 -0
  31. synth_ai/api/train/configs/rl.py +133 -0
  32. synth_ai/api/train/configs/sft.py +94 -0
  33. synth_ai/api/train/configs/shared.py +24 -0
  34. synth_ai/api/train/env_resolver.py +18 -19
  35. synth_ai/api/train/supported_algos.py +8 -5
  36. synth_ai/api/train/utils.py +6 -1
  37. synth_ai/cli/__init__.py +4 -2
  38. synth_ai/cli/_storage.py +19 -0
  39. synth_ai/cli/balance.py +14 -2
  40. synth_ai/cli/calc.py +37 -22
  41. synth_ai/cli/demo.py +38 -39
  42. synth_ai/cli/legacy_root_backup.py +12 -14
  43. synth_ai/cli/recent.py +12 -7
  44. synth_ai/cli/rl_demo.py +81 -102
  45. synth_ai/cli/status.py +4 -3
  46. synth_ai/cli/task_apps.py +146 -137
  47. synth_ai/cli/traces.py +4 -3
  48. synth_ai/cli/watch.py +3 -2
  49. synth_ai/demos/core/cli.py +121 -159
  50. synth_ai/environments/examples/crafter_classic/environment.py +16 -0
  51. synth_ai/evals/__init__.py +15 -0
  52. synth_ai/evals/client.py +85 -0
  53. synth_ai/evals/types.py +42 -0
  54. synth_ai/jobs/client.py +15 -3
  55. synth_ai/judge_schemas.py +127 -0
  56. synth_ai/rubrics/__init__.py +22 -0
  57. synth_ai/rubrics/validators.py +126 -0
  58. synth_ai/task/server.py +14 -7
  59. synth_ai/tracing_v3/decorators.py +51 -26
  60. synth_ai/tracing_v3/examples/basic_usage.py +12 -7
  61. synth_ai/tracing_v3/llm_call_record_helpers.py +107 -53
  62. synth_ai/tracing_v3/replica_sync.py +8 -4
  63. synth_ai/tracing_v3/serialization.py +130 -0
  64. synth_ai/tracing_v3/storage/utils.py +11 -9
  65. synth_ai/tracing_v3/turso/__init__.py +12 -0
  66. synth_ai/tracing_v3/turso/daemon.py +2 -1
  67. synth_ai/tracing_v3/turso/native_manager.py +28 -15
  68. {synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/METADATA +4 -2
  69. {synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/RECORD +73 -40
  70. {synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/entry_points.txt +0 -1
  71. {synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/WHEEL +0 -0
  72. {synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/licenses/LICENSE +0 -0
  73. {synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,224 @@
1
+ # Agora EX - Qwen3 MoE Training
2
+
3
+ ## Model: Qwen3-30B-A3B (Small MoE)
4
+
5
+ **Architecture:**
6
+ - Total Parameters: 30B
7
+ - Activated per Token: 3B (~10% activation)
8
+ - Type: Mixture of Experts (MoE)
9
+ - Context: 4K tokens
10
+
11
+ **Why MoE for Agora EX?**
12
+ - ✅ Efficient: Only 3B params active → faster inference
13
+ - ✅ Powerful: 30B total capacity → better code generation
14
+ - ✅ Cost-effective: Lower memory footprint than dense 30B
15
+ - ✅ H200 friendly: Fits comfortably on 2x80GB setup
16
+
17
+ ## Hardware: 2xH200
18
+
19
+ **Configuration:**
20
+ - GPU 0 (H200): vLLM inference server
21
+ - GPU 1 (H200): LoRA training
22
+ - Memory: 80GB per GPU (160GB total)
23
+ - Topology: Single-node split
24
+
25
+ **Resource Usage:**
26
+ - vLLM (MoE): ~40GB VRAM (3B active + routing)
27
+ - Training: ~50GB VRAM (gradients + optimizer states)
28
+ - Headroom: ~70GB available
29
+
30
+ ## Training Configuration
31
+
32
+ ### File: `configs/rl_lora_qwen3_moe_2xh200.toml`
33
+
34
+ **Key Parameters:**
35
+ ```toml
36
+ [model]
37
+ base = "Qwen/Qwen3-30B-A3B" # MoE with 3B activation
38
+ trainer_mode = "lora"
39
+
40
+ [lora]
41
+ r = 16 # LoRA rank
42
+ target_modules = ["all-linear"] # Wide coverage for MoE
43
+
44
+ [rollout]
45
+ episodes_per_batch = 16 # 16 episodes per batch
46
+ max_concurrent_rollouts = 4 # Limited by human judge
47
+ batches_per_step = 2 # 32 episodes per training step
48
+
49
+ [training]
50
+ num_epochs = 3
51
+ iterations_per_epoch = 4 # 12 total iterations
52
+ batch_size = 2
53
+ group_size = 4 # GSPO advantage estimation
54
+ learning_rate = 3e-5 # Conservative for MoE
55
+ ```
56
+
57
+ ## Usage
58
+
59
+ ### 1. Start Task App (with Human Judge)
60
+
61
+ ```bash
62
+ cd /Users/joshpurtell/Documents/GitHub/synth-ai
63
+
64
+ # Set environment variables
65
+ export GROQ_API_KEY=gsk_... # For inference
66
+ export ENVIRONMENT_API_KEY=sk_env_... # For auth
67
+ export EAMES_JUDGE_URL=https://eames-judge-api... # Human judge
68
+
69
+ # Serve task app
70
+ uvx synth-ai serve agora-ex --port 8101
71
+ ```
72
+
73
+ ### 2. Run RL Training
74
+
75
+ ```bash
76
+ # Train with MoE on 2xH200
77
+ uvx synth-ai train \
78
+ --type rl \
79
+ --config examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml \
80
+ --task-url http://localhost:8101 \
81
+ --env-file backend/.env.dev
82
+ ```
83
+
84
+ ### 3. Monitor Progress
85
+
86
+ ```bash
87
+ # Check logs
88
+ tail -f ~/.synth-ai/logs/train_*.log
89
+
90
+ # View checkpoints
91
+ ls -lh ~/.synth-ai/checkpoints/agora-ex-qwen3-moe-rl/
92
+ ```
93
+
94
+ ## Expected Training Time
95
+
96
+ **With Human Judge (5-30 min per eval):**
97
+ - 12 iterations × 32 episodes = 384 rollouts
98
+ - At 10 min average: ~64 hours (2.7 days)
99
+ - At 4 concurrent: ~16 hours wall time
100
+
101
+ **Speedup Options:**
102
+ 1. **Use AI Judge:** 10 sec/eval → 2 hours total
103
+ 2. **Increase concurrency:** More parallel rollouts
104
+ 3. **Reduce episodes:** Fewer samples per iteration
105
+
106
+ ## Training Timeline
107
+
108
+ ```
109
+ Iteration 1: 32 rollouts → ~5 hours
110
+ Iteration 2: 32 rollouts → ~5 hours
111
+ ...
112
+ Iteration 12: 32 rollouts → ~5 hours
113
+ ────────────────────────────────────
114
+ Total: 384 rollouts → ~60 hours
115
+
116
+ With 4 concurrent: ~15 hours wall time
117
+ ```
118
+
119
+ ## Memory Usage
120
+
121
+ ### vLLM Server (GPU 0)
122
+
123
+ ```
124
+ Model weights (MoE): ~25GB (BF16, 3B active + routing)
125
+ KV cache: ~10GB (batch_size=4, context=4K)
126
+ Overhead: ~5GB (vLLM runtime)
127
+ ────────────────────────────────────
128
+ Total: ~40GB / 80GB
129
+ ```
130
+
131
+ ### Training (GPU 1)
132
+
133
+ ```
134
+ LoRA adapters: ~2GB (r=16, all-linear)
135
+ Gradients: ~10GB (accumulation)
136
+ Optimizer states: ~20GB (AdamW)
137
+ Activations: ~15GB (forward pass)
138
+ Overhead: ~3GB (PyTorch)
139
+ ────────────────────────────────────
140
+ Total: ~50GB / 80GB
141
+ ```
142
+
143
+ ## Hyperparameter Tuning
144
+
145
+ ### If overfitting (train reward >> eval reward):
146
+ ```toml
147
+ [lora]
148
+ dropout = 0.1 # Increase from 0.05
149
+
150
+ [training]
151
+ learning_rate = 1e-5 # Decrease from 3e-5
152
+ ```
153
+
154
+ ### If underfitting (slow improvement):
155
+ ```toml
156
+ [training]
157
+ learning_rate = 5e-5 # Increase from 3e-5
158
+ gradient_accumulation_steps = 16 # More accumulation
159
+ ```
160
+
161
+ ### If out of memory:
162
+ ```toml
163
+ [training]
164
+ batch_size = 1 # Reduce from 2
165
+ gradient_accumulation_steps = 16 # Compensate with more accumulation
166
+ ```
167
+
168
+ ## Comparison: MoE vs Dense
169
+
170
+ | Metric | Qwen3-30B-A3B (MoE) | Qwen2.5-Coder-7B (Dense) |
171
+ |--------|---------------------|--------------------------|
172
+ | **Total Params** | 30B | 7B |
173
+ | **Active Params** | 3B | 7B |
174
+ | **Inference Speed** | ~30 tok/s | ~50 tok/s |
175
+ | **VRAM (vLLM)** | ~40GB | ~20GB |
176
+ | **Quality** | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐ |
177
+ | **Training Time** | Same | Same |
178
+ | **Best For** | Code quality | Speed/cost |
179
+
180
+ ## Next Steps
181
+
182
+ 1. **Baseline:** Train MoE with human judge
183
+ 2. **Fast iteration:** Switch to AI judge (10s/eval)
184
+ 3. **Scale up:** Move to larger MoE (235B-A22B)
185
+ 4. **Deploy:** Export trained adapter for production
186
+
187
+ ## Troubleshooting
188
+
189
+ ### Out of Memory (OOM)
190
+ ```bash
191
+ # Reduce batch size
192
+ [training]
193
+ batch_size = 1
194
+
195
+ # Or reduce context length
196
+ [vllm]
197
+ max_model_len = 3072
198
+ ```
199
+
200
+ ### Slow Rollouts
201
+ ```bash
202
+ # Switch to AI judge for development
203
+ uvx synth-ai serve agora-ex-ai-judge --port 8102
204
+
205
+ # Update task_url in training command
206
+ --task-url http://localhost:8102
207
+ ```
208
+
209
+ ### Model Not Found
210
+ ```bash
211
+ # Ensure model is in permitted list
212
+ python3 -c "
213
+ from backend.app.routes.clustered_training.core.algorithms.gspo.permitted_models import list_permitted_models
214
+ print('\n'.join(list_permitted_models()))
215
+ "
216
+ ```
217
+
218
+ ---
219
+
220
+ **Status:** ✅ Ready for training
221
+ **Model:** Qwen3-30B-A3B (MoE, 3B active)
222
+ **Hardware:** 2xH200 (160GB total)
223
+ **Judge:** Human (Eames) or AI (gpt-oss-120b)
224
+
@@ -0,0 +1,7 @@
1
+ """Agora EX landing page generation task app."""
2
+
3
+ # Import to trigger registration
4
+ from .task_app import agora_ex_task_app # noqa: F401
5
+
6
+ __all__ = ["agora_ex_task_app"]
7
+
@@ -0,0 +1,65 @@
1
+ """Task App configuration for Agora EX landing page generation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
9
+
10
+ # Add this directory to path to import the task app module
11
+ _HERE = Path(__file__).resolve().parent
12
+ if str(_HERE) not in sys.path:
13
+ sys.path.insert(0, str(_HERE))
14
+
15
+ from agora_ex_task_app import APP_DESCRIPTION, APP_ID, build_config
16
+
17
+ # Resolve repo root for Modal mounts
18
+ def _resolve_repo_root() -> Path:
19
+ """Find repo root from this file's location."""
20
+ candidates = [_HERE.parent.parent.parent] # examples/agora_ex -> synth-ai
21
+ for candidate in candidates:
22
+ if (candidate / "pyproject.toml").exists():
23
+ return candidate
24
+ return _HERE.parent.parent.parent
25
+
26
+ REPO_ROOT = _resolve_repo_root()
27
+
28
+ # Register at module level
29
+ register_task_app(
30
+ entry=TaskAppEntry(
31
+ app_id=APP_ID,
32
+ description=APP_DESCRIPTION,
33
+ config_factory=build_config,
34
+ aliases=(APP_ID, "agora-ex", "agora_ex"),
35
+ env_files=(),
36
+ modal=ModalDeploymentConfig(
37
+ app_name="agora-ex-task-app",
38
+ python_version="3.11",
39
+ pip_packages=(
40
+ "fastapi>=0.100.0",
41
+ "uvicorn>=0.23.0",
42
+ "pydantic>=2.0.0",
43
+ "httpx>=0.24.0",
44
+ "python-dotenv>=1.0.1",
45
+ # Tracing/DB runtime deps
46
+ "sqlalchemy>=2.0.42",
47
+ "aiosqlite>=0.21.0",
48
+ "greenlet>=3.2.3",
49
+ ),
50
+ extra_local_dirs=(
51
+ # Mount repo root so local modules resolve when deployed on Modal
52
+ (str(REPO_ROOT), "/opt/synth_ai_repo"),
53
+ (str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai"),
54
+ (str(_HERE), "/opt/synth_ai_repo/examples/agora_ex"),
55
+ ),
56
+ secret_names=("groq-api-key", "openai-api-key"),
57
+ memory=8192, # 8GB memory for inference + judge calls
58
+ cpu=2.0, # 2 CPUs
59
+ max_containers=10,
60
+ ),
61
+ )
62
+ )
63
+
64
+ __all__ = ["build_config"]
65
+