synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (293) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/README_verilog_rl.md +77 -0
  4. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  5. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  6. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  7. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  8. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
  9. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  10. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  11. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  12. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  13. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  14. examples/multi_step/convert_traces_to_sft.py +84 -0
  15. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  16. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  17. examples/multi_step/readme.md +48 -0
  18. examples/multi_step/run_sft_qwen30b.sh +45 -0
  19. examples/multi_step/verilog_rl_lora.md +218 -0
  20. examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
  21. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  22. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  23. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  24. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  25. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  26. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  27. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  28. examples/qwen_vl/QUICKSTART.md +327 -0
  29. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  30. examples/qwen_vl/README.md +154 -0
  31. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  32. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  33. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  34. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  35. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  36. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  37. examples/qwen_vl/__init__.py +2 -0
  38. examples/qwen_vl/collect_data_via_cli.md +423 -0
  39. examples/qwen_vl/collect_vision_traces.py +368 -0
  40. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  41. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  42. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  43. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  44. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  45. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  46. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  47. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  48. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  49. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  50. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  51. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  52. examples/qwen_vl/run_vision_comparison.sh +62 -0
  53. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  54. examples/qwen_vl/test_image_validation.py +201 -0
  55. examples/qwen_vl/test_sft_vision_data.py +110 -0
  56. examples/rl/README.md +1 -1
  57. examples/rl/configs/eval_base_qwen.toml +17 -0
  58. examples/rl/configs/eval_rl_qwen.toml +13 -0
  59. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  60. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  61. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  62. examples/rl/run_eval.py +436 -0
  63. examples/rl/run_rl_and_save.py +111 -0
  64. examples/rl/task_app/README.md +22 -0
  65. examples/rl/task_app/math_single_step.py +990 -0
  66. examples/rl/task_app/math_task_app.py +111 -0
  67. examples/sft/README.md +5 -5
  68. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  69. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  70. examples/sft/evaluate.py +4 -4
  71. examples/sft/export_dataset.py +7 -4
  72. examples/sft/generate_traces.py +2 -0
  73. examples/swe/task_app/README.md +1 -1
  74. examples/swe/task_app/grpo_swe_mini.py +1 -1
  75. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  76. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  77. examples/swe/task_app/hosted/policy_routes.py +0 -2
  78. examples/swe/task_app/hosted/rollout.py +2 -8
  79. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  80. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  81. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  82. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  83. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  84. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  85. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  86. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  87. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  88. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  89. examples/task_apps/crafter/task_app/__init__.py +3 -0
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
  91. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
  97. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
  98. examples/task_apps/enron/__init__.py +1 -0
  99. examples/task_apps/enron/filter_sft.toml +5 -0
  100. examples/task_apps/enron/tests/__init__.py +2 -0
  101. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  102. examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
  103. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  104. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  105. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  106. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  107. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  108. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
  109. examples/task_apps/pokemon_red/task_app.py +199 -6
  110. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
  111. examples/task_apps/sokoban/filter_sft.toml +5 -0
  112. examples/task_apps/sokoban/tests/__init__.py +2 -0
  113. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  115. examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
  116. examples/task_apps/verilog/filter_sft.toml +5 -0
  117. examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
  118. examples/task_apps/verilog/tests/__init__.py +2 -0
  119. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  120. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
  121. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  122. examples/vlm/README.md +3 -3
  123. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  124. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  125. examples/vlm/filter_image_rows.py +1 -1
  126. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  127. examples/warming_up_to_rl/_utils.py +92 -0
  128. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  129. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  130. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  131. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  132. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  133. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  134. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  135. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  136. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  137. examples/warming_up_to_rl/groq_test.py +2 -0
  138. examples/warming_up_to_rl/readme.md +63 -132
  139. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  140. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  141. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  142. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  143. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  144. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  145. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  146. examples/warming_up_to_rl/task_app/README.md +42 -0
  147. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  148. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  152. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  153. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  154. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  155. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  156. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  157. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  158. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  159. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  160. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  161. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  162. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  163. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  164. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  165. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  166. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  167. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  168. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  169. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  170. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  171. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  172. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  173. synth_ai/__init__.py +44 -30
  174. synth_ai/_utils/__init__.py +47 -0
  175. synth_ai/_utils/base_url.py +10 -0
  176. synth_ai/_utils/http.py +10 -0
  177. synth_ai/_utils/prompts.py +10 -0
  178. synth_ai/_utils/task_app_state.py +12 -0
  179. synth_ai/_utils/user_config.py +10 -0
  180. synth_ai/api/models/supported.py +145 -7
  181. synth_ai/api/train/__init__.py +13 -1
  182. synth_ai/api/train/cli.py +30 -7
  183. synth_ai/api/train/config_finder.py +18 -11
  184. synth_ai/api/train/env_resolver.py +13 -10
  185. synth_ai/cli/__init__.py +66 -49
  186. synth_ai/cli/_modal_wrapper.py +9 -6
  187. synth_ai/cli/_typer_patch.py +0 -2
  188. synth_ai/cli/_validate_task_app.py +22 -4
  189. synth_ai/cli/legacy_root_backup.py +3 -1
  190. synth_ai/cli/lib/__init__.py +10 -0
  191. synth_ai/cli/lib/task_app_discovery.py +7 -0
  192. synth_ai/cli/lib/task_app_env.py +518 -0
  193. synth_ai/cli/recent.py +1 -0
  194. synth_ai/cli/setup.py +266 -0
  195. synth_ai/cli/task_app_deploy.py +16 -0
  196. synth_ai/cli/task_app_list.py +25 -0
  197. synth_ai/cli/task_app_modal_serve.py +16 -0
  198. synth_ai/cli/task_app_serve.py +18 -0
  199. synth_ai/cli/task_apps.py +392 -141
  200. synth_ai/cli/train.py +18 -0
  201. synth_ai/cli/tui.py +62 -0
  202. synth_ai/demos/__init__.py +10 -0
  203. synth_ai/demos/core/__init__.py +28 -1
  204. synth_ai/demos/crafter/__init__.py +1 -0
  205. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  206. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  207. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  208. synth_ai/demos/demo_registry.py +176 -0
  209. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  210. synth_ai/demos/math/__init__.py +1 -0
  211. synth_ai/demos/math/_common.py +16 -0
  212. synth_ai/demos/math/app.py +38 -0
  213. synth_ai/demos/math/config.toml +76 -0
  214. synth_ai/demos/math/deploy_modal.py +54 -0
  215. synth_ai/demos/math/modal_task_app.py +702 -0
  216. synth_ai/demos/math/task_app_entry.py +51 -0
  217. synth_ai/environments/environment/core.py +7 -1
  218. synth_ai/environments/examples/bandit/engine.py +0 -1
  219. synth_ai/environments/examples/bandit/environment.py +0 -1
  220. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  221. synth_ai/environments/examples/verilog/engine.py +76 -10
  222. synth_ai/environments/examples/wordle/environment.py +0 -1
  223. synth_ai/evals/base.py +16 -5
  224. synth_ai/evals/client.py +1 -1
  225. synth_ai/inference/client.py +1 -1
  226. synth_ai/learning/client.py +1 -1
  227. synth_ai/learning/health.py +1 -1
  228. synth_ai/learning/jobs.py +1 -1
  229. synth_ai/learning/rl/client.py +1 -1
  230. synth_ai/learning/rl/env_keys.py +1 -1
  231. synth_ai/learning/rl/secrets.py +1 -1
  232. synth_ai/learning/sft/client.py +1 -1
  233. synth_ai/learning/sft/data.py +407 -4
  234. synth_ai/learning/validators.py +4 -1
  235. synth_ai/task/__init__.py +11 -1
  236. synth_ai/task/apps/__init__.py +5 -2
  237. synth_ai/task/config.py +259 -0
  238. synth_ai/task/contracts.py +15 -2
  239. synth_ai/task/rubrics/__init__.py +4 -2
  240. synth_ai/task/rubrics/loaders.py +27 -4
  241. synth_ai/task/rubrics/scoring.py +3 -0
  242. synth_ai/task/rubrics.py +219 -0
  243. synth_ai/task/trace_correlation_helpers.py +328 -0
  244. synth_ai/task/tracing_utils.py +14 -3
  245. synth_ai/task/validators.py +145 -2
  246. synth_ai/tracing_v3/config.py +15 -13
  247. synth_ai/tracing_v3/constants.py +21 -0
  248. synth_ai/tracing_v3/db_config.py +3 -1
  249. synth_ai/tracing_v3/decorators.py +10 -7
  250. synth_ai/tracing_v3/session_tracer.py +10 -0
  251. synth_ai/tracing_v3/turso/daemon.py +2 -2
  252. synth_ai/tracing_v3/turso/native_manager.py +108 -77
  253. synth_ai/tracing_v3/utils.py +1 -1
  254. synth_ai/tui/__init__.py +5 -0
  255. synth_ai/tui/__main__.py +13 -0
  256. synth_ai/tui/cli/__init__.py +1 -0
  257. synth_ai/tui/cli/query_experiments.py +164 -0
  258. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  259. synth_ai/tui/dashboard.py +911 -0
  260. synth_ai/utils/__init__.py +101 -0
  261. synth_ai/utils/base_url.py +94 -0
  262. synth_ai/utils/cli.py +131 -0
  263. synth_ai/utils/env.py +287 -0
  264. synth_ai/utils/http.py +169 -0
  265. synth_ai/utils/modal.py +308 -0
  266. synth_ai/utils/process.py +212 -0
  267. synth_ai/utils/prompts.py +39 -0
  268. synth_ai/utils/sqld.py +122 -0
  269. synth_ai/utils/task_app_discovery.py +882 -0
  270. synth_ai/utils/task_app_env.py +186 -0
  271. synth_ai/utils/task_app_state.py +318 -0
  272. synth_ai/utils/user_config.py +137 -0
  273. synth_ai/v0/config/__init__.py +1 -5
  274. synth_ai/v0/config/base_url.py +1 -7
  275. synth_ai/v0/tracing/config.py +1 -1
  276. synth_ai/v0/tracing/decorators.py +1 -1
  277. synth_ai/v0/tracing/upload.py +1 -1
  278. synth_ai/v0/tracing_v1/config.py +1 -1
  279. synth_ai/v0/tracing_v1/decorators.py +1 -1
  280. synth_ai/v0/tracing_v1/upload.py +1 -1
  281. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  282. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
  283. synth_ai/cli/man.py +0 -106
  284. synth_ai/compound/cais.py +0 -0
  285. synth_ai/core/experiment.py +0 -13
  286. synth_ai/core/system.py +0 -15
  287. synth_ai/demo_registry.py +0 -295
  288. synth_ai/handshake.py +0 -109
  289. synth_ai/http.py +0 -26
  290. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  291. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  292. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  293. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,234 @@
1
+ """Verilog backend judge that calls the Synth judge API with inline rubric."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, TypedDict
9
+
10
+ import httpx
11
+
12
+
13
+ class TraceMetadata(TypedDict, total=False):
14
+ """Metadata for the trace."""
15
+ env_id: str
16
+ policy_id: str
17
+ length: int
18
+
19
+
20
+ class JudgeTracePayload(TypedDict):
21
+ """Trace payload sent to backend judge."""
22
+ event_history: List[Dict[str, Any]]
23
+ markov_blanket_message_history: List[Dict[str, Any]]
24
+ metadata: TraceMetadata
25
+
26
+
27
+ class JudgeOptions(TypedDict, total=False):
28
+ """Options for judge scoring."""
29
+ model: str
30
+ timeout_s: int
31
+ event: bool
32
+ outcome: bool
33
+
34
+
35
+ class TaskApp(TypedDict):
36
+ """Task application metadata."""
37
+ id: str
38
+
39
+
40
+ class JudgeScoreRequest(TypedDict):
41
+ """Request to backend judge API."""
42
+ policy_name: str
43
+ task_app: TaskApp
44
+ trace: JudgeTracePayload
45
+ rubric: Dict[str, Any]
46
+ options: JudgeOptions
47
+
48
+
49
+ # Load rubric from file (cached at module level)
50
+ _RUBRIC_PATH = Path(__file__).parent.parent / "rubrics" / "verilog_backend_judge.json"
51
+ _RUBRIC: Dict[str, Any] | None = None
52
+
53
+
54
+ def _load_rubric() -> Dict[str, Any]:
55
+ """Load rubric from file with fallback to inline default."""
56
+ global _RUBRIC
57
+ if _RUBRIC is None:
58
+ try:
59
+ with open(_RUBRIC_PATH, 'r') as f:
60
+ _RUBRIC = json.load(f)
61
+ assert isinstance(_RUBRIC, dict), "Rubric must be a dict"
62
+ assert "outcome" in _RUBRIC, "Rubric must have 'outcome' key"
63
+ assert isinstance(_RUBRIC["outcome"], list), "Rubric 'outcome' must be a list"
64
+ except Exception as e:
65
+ print(f"[verilog_backend_judge] Warning: Failed to load rubric from {_RUBRIC_PATH}: {e}")
66
+ # Fallback inline rubric (matching RubricCriteriaBlock format)
67
+ _RUBRIC = {
68
+ "event": [],
69
+ "outcome": [
70
+ {"id": "correctness.tests_pass", "description": "Tests pass", "weight": 0.5, "scale": "bounded"},
71
+ {"id": "efficiency.code_quality", "description": "Code quality", "weight": 0.3, "scale": "bounded"},
72
+ {"id": "efficiency.solution_steps", "description": "Solution efficiency", "weight": 0.2, "scale": "bounded"}
73
+ ]
74
+ }
75
+ return _RUBRIC
76
+
77
+
78
+ def judge(payload: Dict[str, Any], **kwargs: Any) -> float:
79
+ """
80
+ Call the Synth backend judge API to score a Verilog rollout.
81
+
82
+ Args:
83
+ payload: Dict with keys: seed, prompt, completion, metrics, response, trace
84
+ **kwargs: Additional config (backend_url, model, timeout_s, etc.)
85
+
86
+ Returns:
87
+ float: Aggregate score from 0.0 to 1.0
88
+ """
89
+ try:
90
+ # Extract configuration
91
+ backend_url = kwargs.get("backend_url", "http://localhost:8000/api")
92
+ model = kwargs.get("model", "openai/gpt-oss-120b")
93
+ timeout = kwargs.get("timeout_s", 45)
94
+
95
+ assert isinstance(backend_url, str), "backend_url must be a string"
96
+ assert isinstance(model, str), "model must be a string"
97
+ assert isinstance(timeout, (int, float)), "timeout_s must be numeric"
98
+
99
+ # Extract trajectory from response
100
+ response_data = payload.get("response", {})
101
+ assert isinstance(response_data, dict), "response must be a dict"
102
+
103
+ trajectories = response_data.get("trajectories", [])
104
+ assert isinstance(trajectories, list), "trajectories must be a list"
105
+
106
+ if not trajectories:
107
+ print("[verilog_backend_judge] No trajectories in response")
108
+ return 0.0
109
+
110
+ trajectory = trajectories[0] # First trajectory
111
+ assert isinstance(trajectory, dict), "trajectory must be a dict"
112
+
113
+ # Load rubric
114
+ rubric = _load_rubric()
115
+
116
+ # Transform trajectory into JudgeTracePayload format
117
+ # The backend expects: event_history, markov_blanket_message_history, metadata
118
+ steps = trajectory.get("steps", [])
119
+ assert isinstance(steps, list), "trajectory steps must be a list"
120
+
121
+ event_history: List[Dict[str, Any]] = []
122
+ for idx, step in enumerate(steps):
123
+ assert isinstance(step, dict), f"step {idx} must be a dict"
124
+ # Each step becomes an event with obs, tool_calls, reward, done, info
125
+ event_history.append({
126
+ "observation": step.get("obs", {}),
127
+ "tool_calls": step.get("tool_calls", []),
128
+ "reward": step.get("reward", 0.0),
129
+ "done": step.get("done", False),
130
+ "truncated": step.get("truncated", False),
131
+ "info": step.get("info", {}),
132
+ })
133
+
134
+ # Add final observation - backend will extract this as outcome context
135
+ final_data = trajectory.get("final", {})
136
+ if final_data:
137
+ assert isinstance(final_data, dict), "final data must be a dict"
138
+ final_obs = final_data.get("observation", {})
139
+ assert isinstance(final_obs, dict), "final observation must be a dict"
140
+
141
+ event_history.append({
142
+ "observation": final_obs,
143
+ "reward": final_data.get("reward", 0.0),
144
+ "done": final_data.get("done", True),
145
+ "truncated": final_data.get("truncated", False),
146
+ "info": final_data.get("info", {}),
147
+ })
148
+
149
+ # Build trace metadata
150
+ metadata: TraceMetadata = {
151
+ "env_id": trajectory.get("env_id", "verilog"),
152
+ "policy_id": trajectory.get("policy_id", "verilog-designer"),
153
+ "length": trajectory.get("length", len(steps)),
154
+ }
155
+
156
+ # Build judge request with rubric included
157
+ judge_request: JudgeScoreRequest = {
158
+ "policy_name": "verilog-designer",
159
+ "task_app": {"id": "grpo-verilog"},
160
+ "trace": {
161
+ "event_history": event_history,
162
+ "markov_blanket_message_history": [],
163
+ "metadata": metadata,
164
+ },
165
+ "rubric": rubric,
166
+ "options": {
167
+ "model": model,
168
+ "timeout_s": timeout,
169
+ "event": False, # Not scoring per-event
170
+ "outcome": True, # Score the final outcome
171
+ }
172
+ }
173
+
174
+ # Call backend judge API
175
+ with httpx.Client(timeout=timeout) as client:
176
+ # Get API key from env
177
+ api_key = os.environ.get("SYNTH_API_KEY") or os.environ.get("OPENAI_API_KEY")
178
+ headers = {}
179
+ if api_key:
180
+ headers["Authorization"] = f"Bearer {api_key}"
181
+
182
+ url = f"{backend_url.rstrip('/')}/judge/v1/score"
183
+
184
+ # Debug: print request details
185
+ print(f"\n[verilog_backend_judge] REQUEST DEBUG:")
186
+ print(f" URL: {url}")
187
+ print(f" Request body keys: {list(judge_request.keys())}")
188
+ rubric_data = judge_request.get('rubric', {})
189
+ print(f" Rubric event criteria: {len(rubric_data.get('event', []))}")
190
+ print(f" Rubric outcome criteria: {len(rubric_data.get('outcome', []))}")
191
+ trace_data = judge_request.get('trace', {})
192
+ event_hist = trace_data.get('event_history', [])
193
+ print(f" Trace event_history count: {len(event_hist)}")
194
+ if event_hist:
195
+ last_event = event_hist[-1]
196
+ last_obs = last_event.get('observation', {})
197
+ print(f" Last event done: {last_event.get('done', False)}")
198
+ print(f" Last obs keys: {list(last_obs.keys())}")
199
+ print(f" Task completed: {last_obs.get('task_completed', 'N/A')}")
200
+
201
+ response = client.post(url, json=judge_request, headers=headers)
202
+
203
+ # Debug: print response details
204
+ print(f"\n[verilog_backend_judge] RESPONSE DEBUG:")
205
+ print(f" Status: {response.status_code}")
206
+ print(f" Response body: {response.text[:500]}") # First 500 chars
207
+
208
+ response.raise_for_status()
209
+ result = response.json()
210
+ assert isinstance(result, dict), "Response must be a dict"
211
+
212
+ # Extract aggregate score
213
+ aggregate_score = result.get("aggregate_score", 0.0)
214
+
215
+ # Try outcome_review.total if aggregate_score not found
216
+ if aggregate_score == 0.0 and "outcome_review" in result:
217
+ outcome_review = result["outcome_review"]
218
+ if isinstance(outcome_review, dict):
219
+ aggregate_score = outcome_review.get("total", 0.0)
220
+
221
+ print(f" Aggregate score: {aggregate_score}\n")
222
+ return float(aggregate_score)
223
+
224
+ except httpx.HTTPStatusError as e:
225
+ print(f"\n[verilog_backend_judge] HTTP ERROR:")
226
+ print(f" Status: {e.response.status_code}")
227
+ print(f" Response body: {e.response.text}\n")
228
+ return 0.0
229
+ except AssertionError as e:
230
+ print(f"[verilog_backend_judge] Assertion error: {e}")
231
+ return 0.0
232
+ except Exception as e:
233
+ print(f"[verilog_backend_judge] Unexpected error: {e}")
234
+ return 0.0
@@ -0,0 +1,48 @@
1
+
2
+
3
+ Crafter
4
+
5
+ cd /Users/joshpurtell/Documents/GitHub/synth-ai && uvx synth-ai modal-serve grpo-crafter-task-app --name grpo-crafter-task-app --env-file /Users/joshpurtell/Documents/GitHub/monorepo/environments/crafter/.env
6
+
7
+ cd /Users/joshpurtell/Documents/GitHub/monorepo && uv run modal deploy backend/app/routes/clustered_training/core/algorithms/gspo/app.py --env dev
8
+
9
+ uvx synth-ai eval --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml
10
+
11
+
12
+ uvx synth-ai train \
13
+ --type rl \
14
+ --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml \
15
+ --task-url https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run \
16
+ --backend https://synth-backend-dev-docker.onrender.com/api \
17
+ --env-file /Users/joshpurtell/Documents/GitHub/monorepo/environments/crafter/.env
18
+
19
+
20
+
21
+ ---
22
+
23
+ Verilog
24
+
25
+ # 1. Deploy Verilog task app
26
+ cd /Users/joshpurtell/Documents/GitHub/synth-ai && uvx synth-ai modal-serve grpo-verilog --name grpo-verilog-task-app --env-file /Users/joshpurtell/Documents/GitHub/monorepo/environments/verilog/.env
27
+
28
+ # 2. Baseline eval using Synth backend (pre-training)
29
+ uvx synth-ai eval --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/verilog_eval_synth_qwen4b.toml
30
+
31
+ # 3. (Optional) External reference eval using Groq Qwen 32B
32
+ uvx synth-ai eval --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/verilog_eval_groq_qwen32b.toml
33
+
34
+ # 4. Deploy training backend
35
+ cd /Users/joshpurtell/Documents/GitHub/monorepo && uv run modal deploy backend/app/routes/clustered_training/core/algorithms/gspo/app.py --env dev
36
+
37
+ # 5. Run RL training
38
+ uvx synth-ai train \
39
+ --type rl \
40
+ --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/verilog_rl_lora.toml \
41
+ --task-url https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run \
42
+ --backend https://synth-backend-dev-docker.onrender.com/api \
43
+ --env-file /Users/joshpurtell/Documents/GitHub/monorepo/environments/verilog/.env
44
+
45
+ # 6. Post-training eval (update job_id in config first!)
46
+ # After training, note the job_id from logs (e.g., job_19a1823e56303de604f)
47
+ # Update verilog_eval_synth_trained_qwen8b.toml with your job_id
48
+ uvx synth-ai eval --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/verilog_eval_synth_trained_qwen8b.toml
@@ -0,0 +1,45 @@
1
+ #!/bin/bash
2
+ # Run SFT for Qwen3-Coder-30B with LoRA on Crafter data
3
+
4
+ # Usage:
5
+ # ./run_sft_qwen30b.sh <dataset_path> [env_file]
6
+ #
7
+ # Example:
8
+ # ./run_sft_qwen30b.sh examples/multi_step/ft_data/crafter_traces.jsonl
9
+ # ./run_sft_qwen30b.sh examples/multi_step/ft_data/crafter_traces.jsonl backend/.env.dev
10
+
11
+ set -e
12
+
13
+ DATASET_PATH="${1:-examples/sft/ft_data/crafter_traces.jsonl}"
14
+ ENV_FILE="${2:-backend/.env.dev}"
15
+
16
+ if [ ! -f "$DATASET_PATH" ]; then
17
+ echo "Error: Dataset not found at $DATASET_PATH"
18
+ echo "Usage: $0 <dataset_path> [env_file]"
19
+ exit 1
20
+ fi
21
+
22
+ if [ ! -f "$ENV_FILE" ]; then
23
+ echo "Error: Env file not found at $ENV_FILE"
24
+ echo "Usage: $0 <dataset_path> [env_file]"
25
+ exit 1
26
+ fi
27
+
28
+ echo "🚀 Starting SFT training for Qwen3-Coder-30B with LoRA"
29
+ echo " Model: Qwen/Qwen3-Coder-30B-A3B-Instruct"
30
+ echo " Dataset: $DATASET_PATH"
31
+ echo " Config: examples/multi_step/configs/crafter_sft_qwen30b_lora.toml"
32
+ echo " GPUs: 4x H200"
33
+ echo " LoRA: r=16, alpha=32, all-linear"
34
+ echo ""
35
+
36
+ uvx synth-ai train \
37
+ --type sft \
38
+ --config examples/multi_step/configs/crafter_sft_qwen30b_lora.toml \
39
+ --dataset "$DATASET_PATH" \
40
+ --env-file "$ENV_FILE"
41
+
42
+ echo ""
43
+ echo "✅ SFT training job submitted!"
44
+ echo " Monitor progress in your Synth dashboard"
45
+
@@ -0,0 +1,218 @@
1
+ # Verilog RL with LoRA Analysis
2
+
3
+ ## Executive Summary
4
+
5
+ **✅ YES, Verilog can absolutely do RL with LoRA just like Crafter!** The architecture is nearly identical, but there are important considerations around model size and task complexity.
6
+
7
+ ## Architecture Compatibility ✅
8
+
9
+ ### **Same Foundation** (No changes needed)
10
+ - ✅ **Contracts**: Uses identical `RolloutRequest`/`RolloutResponse` as Crafter
11
+ - ✅ **Task App Framework**: Same `synth_ai.task.apps` framework
12
+ - ✅ **Environment Pattern**: Same `StatefulEnvironment` + tool-based architecture
13
+ - ✅ **Rubrics System**: Same evaluation and reward system
14
+ - ✅ **Trace Correlation**: Already implemented in `rollout_executor` (line 817 in `grpo_verilog.py`)
15
+ - ✅ **Modal Deployment**: Same deployment pattern as Crafter
16
+
17
+ ### **Key Differences** (Considerations for LoRA)
18
+
19
+ #### 1. **Model Size: 8x Larger** ⚠️
20
+ ```toml
21
+ # Verilog (current)
22
+ model = "qwen/qwen3-32b" # 32B parameters
23
+
24
+ # Crafter (working)
25
+ model = "Qwen/Qwen3-4B" # 4B parameters
26
+ ```
27
+ **Impact**: Memory requirements 8x higher for LoRA training
28
+ **Solution**: Use gradient checkpointing, smaller batch sizes, or distributed training
29
+
30
+ #### 2. **Tool Set: Simpler but More Structured**
31
+ ```python
32
+ # Verilog Tools (4 tools)
33
+ TOOLS = ["write_file", "compile", "simulate", "submit"]
34
+
35
+ # Crafter Tools (20+ tools)
36
+ # craft, move, attack, gather, etc.
37
+ ```
38
+
39
+ **Verilog Advantages**:
40
+ - ✅ **Deterministic**: Write → Compile → Simulate → Submit workflow
41
+ - ✅ **Clear Success Criteria**: Tests pass = high reward
42
+ - ✅ **Sparse but Meaningful Rewards**: +10 for submit success, +1 for simulation pass
43
+
44
+ **Verilog Challenges**:
45
+ - ❌ **Sparser Rewards**: Fewer intermediate signals for learning
46
+ - ❌ **Longer Sequences**: Multi-step compilation chains
47
+ - ❌ **Error Recovery**: Must debug compilation failures
48
+
49
+ #### 3. **State Representation**
50
+ ```python
51
+ # Verilog State (file-based)
52
+ {
53
+ "files": {"TopModule.v": "module TopModule(..."},
54
+ "compile_status": "Last compile: Success",
55
+ "simulate_status": "Last simulation: Passed",
56
+ "task_completed": false
57
+ }
58
+
59
+ # Crafter State (world-based)
60
+ {
61
+ "inventory": {"wood": 5, "stone": 3},
62
+ "position": [x, y],
63
+ "nearby_entities": [...],
64
+ "achievement_unlocked": true
65
+ }
66
+ ```
67
+
68
+ ## Configuration for LoRA RL
69
+
70
+ ### **Option 1: Qwen3-0.6B (Recommended for testing)** ⭐
71
+ ```toml
72
+ [algorithm]
73
+ type = "online"
74
+ method = "policy_gradient"
75
+ variety = "gspo"
76
+
77
+ [model]
78
+ base = "Qwen/Qwen3-0.6B" # ✅ Same as existing SFT configs
79
+ trainer_mode = "lora"
80
+
81
+ [lora]
82
+ r = 16
83
+ alpha = 32
84
+ dropout = 0.05
85
+ target_modules = ["all-linear"]
86
+
87
+ [rollout]
88
+ env_name = "verilog"
89
+ max_turns = 15
90
+ policy_name = "verilog-designer"
91
+
92
+ [training]
93
+ batch_size = 4 # ✅ Same as Crafter
94
+ gradient_accumulation_steps = 1
95
+ ```
96
+
97
+ ### **Option 2: Qwen3-32B (Production)** ⚠️
98
+ ```toml
99
+ [algorithm]
100
+ type = "online"
101
+ method = "policy_gradient"
102
+ variety = "gspo"
103
+
104
+ [model]
105
+ base = "qwen/qwen3-32b" # ⚠️ 8x memory vs Crafter's 4B
106
+ trainer_mode = "lora"
107
+
108
+ [lora]
109
+ r = 16
110
+ alpha = 32
111
+ dropout = 0.05
112
+ target_modules = ["all-linear"]
113
+
114
+ [rollout]
115
+ env_name = "verilog"
116
+ max_turns = 15
117
+ policy_name = "verilog-designer"
118
+ ```
119
+
120
+ ### **Memory Optimization** (for 32B model)
121
+ ```toml
122
+ [vllm]
123
+ max_model_len = 4096 # Shorter than Crafter's 8192
124
+ tensor_parallel_size = 2 # Distribute across GPUs
125
+
126
+ [training]
127
+ batch_size = 2 # Smaller than Crafter's 4
128
+ gradient_accumulation_steps = 4
129
+ ```
130
+
131
+ ## Task App Changes Needed
132
+
133
+ ### **1. Mode Parameter Support** ✅ (Already implemented)
134
+ The Verilog task app already handles `mode="rl"` correctly:
135
+ ```python
136
+ # In grpo_verilog.py rollout_executor
137
+ policy_config = dict(policy_config_raw)
138
+ # ... mode parameter flows through naturally
139
+ ```
140
+
141
+ ### **2. Trace Correlation** ✅ (Already implemented)
142
+ ```python
143
+ # Line 817 in grpo_verilog.py
144
+ trajectory = RolloutTrajectory(
145
+ # ...
146
+ inference_url=agent.inference_url, # ✅ Required for trace correlation
147
+ decision_samples=None,
148
+ )
149
+ ```
150
+
151
+ ### **3. Rubric Integration** ✅ (Already configured)
152
+ ```python
153
+ # In grpo_verilog.py
154
+ rubrics=RubricBundle(
155
+ outcome=OUTCOME_RUBRIC, # Tests pass reward
156
+ events=EVENTS_RUBRIC, # Process efficiency reward
157
+ )
158
+ ```
159
+
160
+ ## RL Training Feasibility
161
+
162
+ ### **✅ Works Great**
163
+ 1. **Clear Success Signal**: Submit passing tests = +10 reward
164
+ 2. **Guided Process**: Natural write→compile→simulate→submit progression
165
+ 3. **Error Learning**: Agent must learn to debug compilation failures
166
+ 4. **Hardware Design**: Real-world applicable skills
167
+
168
+ ### **⚠️ Challenges**
169
+ 1. **Model Size**: 32B vs 4B = 8x memory, slower training
170
+ 2. **Sparse Rewards**: Fewer learning signals than Crafter's dense rewards
171
+ 3. **Longer Episodes**: 15+ steps vs Crafter's 10 steps
172
+ 4. **Compilation Errors**: Must learn to interpret and fix syntax errors
173
+
174
+ ## Recommended Approach
175
+
176
+ ### **Phase 1: Start with Qwen3-0.6B** ⭐ (as you requested)
177
+ ```toml
178
+ # Perfect for testing - same model used in existing SFT configs
179
+ model = "Qwen/Qwen3-0.6B"
180
+ batch_size = 4 # Same as Crafter
181
+ ```
182
+ - ✅ **Zero setup**: Already configured in `synth-ai/examples/sft/configs/crafter_lora_qwen0p6b.toml`
183
+ - ✅ **Fast iteration**: 0.6B parameters = quick training cycles
184
+ - ✅ **Memory efficient**: Fits on single GPU easily
185
+ - ✅ **Proven baseline**: Same model used in RL demos and SFT examples
186
+
187
+ ### **Phase 2: Scale to Qwen3-8B** (if 0.6B works well)
188
+ ```toml
189
+ model = "qwen/qwen3-8b"
190
+ batch_size = 2
191
+ gradient_accumulation_steps = 2
192
+ ```
193
+
194
+ ### **Phase 3: Production with Qwen3-32B**
195
+ ```toml
196
+ model = "qwen/qwen3-32b"
197
+ tensor_parallel_size = 2
198
+ batch_size = 1
199
+ gradient_accumulation_steps = 4
200
+ ```
201
+
202
+ ### **Phase 3: Optimize for Verilog Domain**
203
+ Consider fine-tuning the base model on:
204
+ - Verilog syntax and semantics
205
+ - Hardware design patterns
206
+ - Compilation error messages
207
+ - Testbench writing
208
+
209
+ ## Conclusion
210
+
211
+ **✅ Verilog RL with LoRA is absolutely feasible** and should work with the same pipeline as Crafter. The main differences are:
212
+
213
+ 1. **Larger model** (32B vs 4B) requires memory optimization
214
+ 2. **Sparser rewards** may need different reward shaping
215
+ 3. **More structured tasks** could actually make learning easier
216
+ 4. **Real hardware skills** make it more valuable than game tasks
217
+
218
+ **Recommended next step**: Create a `verilog_rl_lora.toml` config starting with Qwen3-8B and adapt the reward rubrics for the compilation workflow.
@@ -1,9 +1,11 @@
1
1
  # Qwen3 Coder 30B LoRA SFT – all-linear adapters
2
2
 
3
+ type = "sft"
4
+
3
5
  [algorithm]
4
6
  type = "offline"
5
7
  method = "sft"
6
- variety = "fft"
8
+ variety = "lora"
7
9
 
8
10
  [job]
9
11
  model = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
@@ -58,4 +60,3 @@ alpha = 32
58
60
  dropout = 0.05
59
61
  target_modules = ["all-linear"]
60
62
 
61
-
@@ -1,5 +1,7 @@
1
1
  # Qwen3 Coder 4B LoRA SFT – all-linear adapters
2
2
 
3
+ type = "sft"
4
+
3
5
  [job]
4
6
  model = "Qwen/Qwen3-4B"
5
7
 
@@ -54,4 +56,3 @@ dropout = 0.05
54
56
  target_modules = ["all-linear"]
55
57
 
56
58
 
57
-
@@ -1,5 +1,7 @@
1
1
  # Qwen3 Coder LoRA SFT – all-linear adapters
2
2
 
3
+ type = "sft"
4
+
3
5
  [algorithm]
4
6
  type = "offline"
5
7
  method = "sft"
@@ -55,4 +57,3 @@ alpha = 32
55
57
  dropout = 0.05
56
58
  target_modules = ["all-linear"]
57
59
 
58
-