synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show
  1. examples/README.md +1 -0
  2. examples/analyze_semantic_words.sh +2 -2
  3. examples/blog_posts/pokemon_vl/README.md +98 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  5. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  6. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  7. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  8. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  9. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  12. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  13. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  15. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  16. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  17. examples/multi_step/SFT_README.md +147 -0
  18. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
  20. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  21. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  22. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  23. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  24. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  25. examples/multi_step/convert_traces_to_sft.py +84 -0
  26. examples/multi_step/run_sft_qwen30b.sh +45 -0
  27. examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
  28. examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
  29. examples/qwen_coder/configs/coder_lora_small.toml +1 -2
  30. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  31. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  32. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  33. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  34. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  35. examples/qwen_vl/QUICKSTART.md +327 -0
  36. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  37. examples/qwen_vl/README.md +152 -0
  38. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  39. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  40. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  41. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  42. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  43. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  44. examples/qwen_vl/__init__.py +2 -0
  45. examples/qwen_vl/collect_data_via_cli.md +415 -0
  46. examples/qwen_vl/collect_vision_traces.py +368 -0
  47. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  48. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  49. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  50. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  51. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  52. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  53. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  54. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  55. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  56. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  57. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  58. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  59. examples/qwen_vl/run_vision_comparison.sh +61 -0
  60. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  61. examples/qwen_vl/test_image_validation.py +201 -0
  62. examples/qwen_vl/test_sft_vision_data.py +110 -0
  63. examples/rl/README.md +6 -6
  64. examples/rl/configs/eval_base_qwen.toml +17 -0
  65. examples/rl/configs/eval_rl_qwen.toml +13 -0
  66. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  67. examples/rl/configs/rl_from_base_qwen17.toml +79 -0
  68. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  69. examples/rl/run_eval.py +436 -0
  70. examples/rl/run_rl_and_save.py +111 -0
  71. examples/rl/task_app/README.md +21 -0
  72. examples/rl/task_app/math_single_step.py +990 -0
  73. examples/rl/task_app/math_task_app.py +111 -0
  74. examples/run_crafter_demo.sh +2 -2
  75. examples/sft/README.md +6 -6
  76. examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
  77. examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
  78. examples/sft/evaluate.py +2 -4
  79. examples/sft/export_dataset.py +7 -4
  80. examples/swe/task_app/README.md +33 -3
  81. examples/swe/task_app/grpo_swe_mini.py +4 -1
  82. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  83. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  84. examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
  85. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  86. examples/swe/task_app/hosted/policy_routes.py +0 -2
  87. examples/swe/task_app/hosted/rollout.py +0 -8
  88. examples/swe/task_app/morph_backend.py +178 -0
  89. examples/task_apps/crafter/task_app/README.md +1 -1
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
  91. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
  97. examples/task_apps/enron/__init__.py +1 -0
  98. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  99. examples/task_apps/math/README.md +1 -2
  100. examples/task_apps/pokemon_red/README.md +3 -4
  101. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  102. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  103. examples/task_apps/pokemon_red/task_app.py +36 -5
  104. examples/task_apps/sokoban/README.md +2 -3
  105. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  106. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  107. examples/vlm/README.md +3 -3
  108. examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
  109. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  110. examples/vlm/filter_image_rows.py +1 -1
  111. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  112. examples/warming_up_to_rl/_utils.py +92 -0
  113. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  114. examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
  115. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  116. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  117. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  118. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  119. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  120. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  121. examples/warming_up_to_rl/readme.md +63 -132
  122. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  123. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  124. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  125. examples/warming_up_to_rl/task_app/README.md +42 -0
  126. examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
  127. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  128. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  129. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  130. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  131. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  132. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  133. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  134. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  135. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  136. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  137. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  138. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  139. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  147. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  148. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  152. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  153. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  154. synth_ai/__init__.py +44 -30
  155. synth_ai/_utils/__init__.py +47 -0
  156. synth_ai/_utils/base_url.py +10 -0
  157. synth_ai/_utils/http.py +10 -0
  158. synth_ai/_utils/prompts.py +10 -0
  159. synth_ai/_utils/task_app_state.py +12 -0
  160. synth_ai/_utils/user_config.py +10 -0
  161. synth_ai/api/models/supported.py +144 -7
  162. synth_ai/api/train/__init__.py +13 -1
  163. synth_ai/api/train/builders.py +9 -3
  164. synth_ai/api/train/cli.py +155 -17
  165. synth_ai/api/train/config_finder.py +18 -11
  166. synth_ai/api/train/configs/__init__.py +8 -1
  167. synth_ai/api/train/configs/rl.py +32 -7
  168. synth_ai/api/train/configs/sft.py +6 -2
  169. synth_ai/api/train/configs/shared.py +59 -2
  170. synth_ai/api/train/env_resolver.py +13 -10
  171. synth_ai/auth/credentials.py +119 -0
  172. synth_ai/cli/__init__.py +61 -69
  173. synth_ai/cli/_modal_wrapper.py +7 -5
  174. synth_ai/cli/_typer_patch.py +0 -2
  175. synth_ai/cli/_validate_task_app.py +22 -4
  176. synth_ai/cli/commands/__init__.py +17 -0
  177. synth_ai/cli/commands/demo/__init__.py +6 -0
  178. synth_ai/cli/commands/demo/core.py +163 -0
  179. synth_ai/cli/commands/deploy/__init__.py +23 -0
  180. synth_ai/cli/commands/deploy/core.py +614 -0
  181. synth_ai/cli/commands/deploy/errors.py +72 -0
  182. synth_ai/cli/commands/deploy/validation.py +11 -0
  183. synth_ai/cli/commands/eval/__init__.py +19 -0
  184. synth_ai/cli/commands/eval/core.py +1109 -0
  185. synth_ai/cli/commands/eval/errors.py +81 -0
  186. synth_ai/cli/commands/eval/validation.py +133 -0
  187. synth_ai/cli/commands/filter/__init__.py +12 -0
  188. synth_ai/cli/commands/filter/core.py +388 -0
  189. synth_ai/cli/commands/filter/errors.py +55 -0
  190. synth_ai/cli/commands/filter/validation.py +77 -0
  191. synth_ai/cli/commands/help/__init__.py +177 -0
  192. synth_ai/cli/commands/help/core.py +73 -0
  193. synth_ai/cli/commands/status/__init__.py +64 -0
  194. synth_ai/cli/commands/status/client.py +192 -0
  195. synth_ai/cli/commands/status/config.py +92 -0
  196. synth_ai/cli/commands/status/errors.py +20 -0
  197. synth_ai/cli/commands/status/formatters.py +164 -0
  198. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  199. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  200. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  201. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  202. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  203. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  204. synth_ai/cli/commands/status/utils.py +114 -0
  205. synth_ai/cli/commands/train/__init__.py +53 -0
  206. synth_ai/cli/commands/train/core.py +21 -0
  207. synth_ai/cli/commands/train/errors.py +117 -0
  208. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  209. synth_ai/cli/commands/train/judge_validation.py +304 -0
  210. synth_ai/cli/commands/train/validation.py +443 -0
  211. synth_ai/cli/demo.py +2 -162
  212. synth_ai/cli/deploy/__init__.py +28 -0
  213. synth_ai/cli/deploy/core.py +5 -0
  214. synth_ai/cli/deploy/errors.py +23 -0
  215. synth_ai/cli/deploy/validation.py +5 -0
  216. synth_ai/cli/eval/__init__.py +36 -0
  217. synth_ai/cli/eval/core.py +5 -0
  218. synth_ai/cli/eval/errors.py +31 -0
  219. synth_ai/cli/eval/validation.py +5 -0
  220. synth_ai/cli/filter/__init__.py +28 -0
  221. synth_ai/cli/filter/core.py +5 -0
  222. synth_ai/cli/filter/errors.py +23 -0
  223. synth_ai/cli/filter/validation.py +5 -0
  224. synth_ai/cli/legacy_root_backup.py +3 -1
  225. synth_ai/cli/lib/__init__.py +10 -0
  226. synth_ai/cli/lib/task_app_discovery.py +7 -0
  227. synth_ai/cli/lib/task_app_env.py +518 -0
  228. synth_ai/cli/modal_serve/__init__.py +12 -0
  229. synth_ai/cli/modal_serve/core.py +14 -0
  230. synth_ai/cli/modal_serve/errors.py +8 -0
  231. synth_ai/cli/modal_serve/validation.py +11 -0
  232. synth_ai/cli/recent.py +2 -1
  233. synth_ai/cli/serve/__init__.py +12 -0
  234. synth_ai/cli/serve/core.py +14 -0
  235. synth_ai/cli/serve/errors.py +8 -0
  236. synth_ai/cli/serve/validation.py +11 -0
  237. synth_ai/cli/setup.py +21 -0
  238. synth_ai/cli/status.py +7 -126
  239. synth_ai/cli/task_app_deploy.py +7 -0
  240. synth_ai/cli/task_app_list.py +25 -0
  241. synth_ai/cli/task_app_modal_serve.py +11 -0
  242. synth_ai/cli/task_app_serve.py +11 -0
  243. synth_ai/cli/task_apps.py +110 -1499
  244. synth_ai/cli/traces.py +1 -1
  245. synth_ai/cli/train/__init__.py +12 -0
  246. synth_ai/cli/train/core.py +21 -0
  247. synth_ai/cli/train/errors.py +8 -0
  248. synth_ai/cli/train/validation.py +24 -0
  249. synth_ai/cli/train.py +5 -0
  250. synth_ai/cli/turso.py +1 -1
  251. synth_ai/cli/watch.py +1 -1
  252. synth_ai/demos/__init__.py +10 -0
  253. synth_ai/demos/core/__init__.py +28 -1
  254. synth_ai/demos/crafter/__init__.py +1 -0
  255. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  256. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  257. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  258. synth_ai/demos/demo_registry.py +176 -0
  259. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  260. synth_ai/demos/math/__init__.py +1 -0
  261. synth_ai/demos/math/_common.py +16 -0
  262. synth_ai/demos/math/app.py +38 -0
  263. synth_ai/demos/math/config.toml +76 -0
  264. synth_ai/demos/math/deploy_modal.py +54 -0
  265. synth_ai/demos/math/modal_task_app.py +702 -0
  266. synth_ai/demos/math/task_app_entry.py +51 -0
  267. synth_ai/environments/environment/core.py +7 -1
  268. synth_ai/environments/examples/bandit/engine.py +0 -1
  269. synth_ai/environments/examples/bandit/environment.py +0 -1
  270. synth_ai/environments/examples/red/engine.py +33 -12
  271. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  272. synth_ai/environments/examples/red/environment.py +26 -0
  273. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  274. synth_ai/environments/examples/wordle/environment.py +0 -1
  275. synth_ai/evals/base.py +16 -5
  276. synth_ai/evals/client.py +1 -1
  277. synth_ai/http.py +8 -22
  278. synth_ai/inference/client.py +1 -1
  279. synth_ai/judge_schemas.py +4 -5
  280. synth_ai/learning/client.py +1 -1
  281. synth_ai/learning/health.py +1 -1
  282. synth_ai/learning/jobs.py +1 -1
  283. synth_ai/learning/rl/client.py +4 -2
  284. synth_ai/learning/rl/env_keys.py +1 -1
  285. synth_ai/learning/rl/secrets.py +1 -1
  286. synth_ai/learning/sft/client.py +1 -1
  287. synth_ai/learning/sft/data.py +407 -4
  288. synth_ai/learning/validators.py +4 -1
  289. synth_ai/streaming/__init__.py +29 -0
  290. synth_ai/streaming/config.py +94 -0
  291. synth_ai/streaming/handlers.py +469 -0
  292. synth_ai/streaming/streamer.py +301 -0
  293. synth_ai/streaming/types.py +95 -0
  294. synth_ai/task/apps/__init__.py +4 -2
  295. synth_ai/task/config.py +6 -4
  296. synth_ai/task/rubrics/__init__.py +1 -2
  297. synth_ai/task/rubrics/loaders.py +14 -10
  298. synth_ai/task/rubrics.py +219 -0
  299. synth_ai/task/trace_correlation_helpers.py +24 -11
  300. synth_ai/task/tracing_utils.py +14 -3
  301. synth_ai/task/validators.py +0 -1
  302. synth_ai/tracing_v3/abstractions.py +3 -3
  303. synth_ai/tracing_v3/config.py +15 -13
  304. synth_ai/tracing_v3/constants.py +21 -0
  305. synth_ai/tracing_v3/db_config.py +3 -1
  306. synth_ai/tracing_v3/decorators.py +10 -7
  307. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  308. synth_ai/tracing_v3/migration_helper.py +1 -2
  309. synth_ai/tracing_v3/session_tracer.py +7 -7
  310. synth_ai/tracing_v3/storage/base.py +29 -29
  311. synth_ai/tracing_v3/storage/config.py +3 -3
  312. synth_ai/tracing_v3/turso/daemon.py +8 -9
  313. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  314. synth_ai/tracing_v3/utils.py +2 -2
  315. synth_ai/utils/__init__.py +101 -0
  316. synth_ai/utils/base_url.py +94 -0
  317. synth_ai/utils/cli.py +131 -0
  318. synth_ai/utils/env.py +294 -0
  319. synth_ai/utils/http.py +172 -0
  320. synth_ai/utils/modal.py +308 -0
  321. synth_ai/utils/process.py +212 -0
  322. synth_ai/utils/prompts.py +39 -0
  323. synth_ai/utils/sqld.py +122 -0
  324. synth_ai/utils/task_app_discovery.py +882 -0
  325. synth_ai/utils/task_app_env.py +186 -0
  326. synth_ai/utils/task_app_state.py +318 -0
  327. synth_ai/utils/user_config.py +137 -0
  328. synth_ai/v0/config/__init__.py +1 -5
  329. synth_ai/v0/config/base_url.py +1 -7
  330. synth_ai/v0/tracing/config.py +1 -1
  331. synth_ai/v0/tracing/decorators.py +1 -1
  332. synth_ai/v0/tracing/upload.py +1 -1
  333. synth_ai/v0/tracing_v1/config.py +1 -1
  334. synth_ai/v0/tracing_v1/decorators.py +1 -1
  335. synth_ai/v0/tracing_v1/upload.py +1 -1
  336. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
  337. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
  338. synth_ai/cli/man.py +0 -106
  339. synth_ai/cli/tui.py +0 -57
  340. synth_ai/compound/cais.py +0 -0
  341. synth_ai/core/experiment.py +0 -13
  342. synth_ai/core/system.py +0 -15
  343. synth_ai/demo_registry.py +0 -295
  344. synth_ai/handshake.py +0 -109
  345. synth_ai/tui/__init__.py +0 -5
  346. synth_ai/tui/__main__.py +0 -13
  347. synth_ai/tui/cli/__init__.py +0 -1
  348. synth_ai/tui/cli/query_experiments.py +0 -164
  349. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  350. synth_ai/tui/dashboard.py +0 -906
  351. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  354. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -417,8 +417,6 @@ async def step_policy(
417
417
  inf_req = meta["inference_request"]
418
418
  msgs = inf_req["messages"]
419
419
  model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
420
- system_messages: list[str] = []
421
- user_messages: list[str] = []
422
420
  if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
423
421
  sys_text = msgs[0]["content"]
424
422
  policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
@@ -464,6 +462,8 @@ async def step_policy(
464
462
  )
465
463
 
466
464
  # Emit full system/user prompts for observability (no secrets included)
465
+ system_prompt_records: list[dict[str, Any]] = []
466
+ user_prompt_records: list[dict[str, Any]] = []
467
467
  try:
468
468
 
469
469
  def _as_text(content: object) -> str:
@@ -483,8 +483,6 @@ async def step_policy(
483
483
  return "".join(parts)
484
484
  return str(content)
485
485
 
486
- system_prompt_records: list[dict[str, Any]] = []
487
- user_prompt_records: list[dict[str, Any]] = []
488
486
  for message in msgs:
489
487
  role = message.get("role")
490
488
  raw_content = message.get("content")
@@ -527,6 +525,11 @@ async def step_policy(
527
525
 
528
526
  if tracing_context is not None:
529
527
  try:
528
+ logger.info(
529
+ "[TRACE_DEBUG] record_policy_prompts sys=%s user=%s",
530
+ len(system_prompt_records),
531
+ len(user_prompt_records),
532
+ )
530
533
  await tracing_context.record_policy_prompts(
531
534
  system_prompt_records, user_prompt_records
532
535
  )
@@ -782,9 +785,10 @@ async def step_policy(
782
785
  "sokoban-react",
783
786
  "crafter-react",
784
787
  ) and getattr(policy, "use_tools", True):
785
- req_tools = meta["inference_request"]["tools"]
786
- req_tool_choice = meta["inference_request"]["tool_choice"]
787
- req_stop_after = meta["inference_request"]["stop_after_tool_calls"]
788
+ inf_req = meta.get("inference_request", {})
789
+ req_tools = inf_req.get("tools")
790
+ req_tool_choice = inf_req.get("tool_choice")
791
+ req_stop_after = inf_req.get("stop_after_tool_calls")
788
792
  logger.info(
789
793
  f"TOOLCALL_CONFIG: policy={policy_name} tools_present={bool(req_tools)} tool_choice={req_tool_choice} stop_after={req_stop_after}"
790
794
  )
@@ -793,6 +797,8 @@ async def step_policy(
793
797
  status_code=500,
794
798
  detail=f"TOOLCALL_ASSERTION_FAIL: Missing tools or tool_choice!=required for policy {policy_name}",
795
799
  )
800
+ if req_stop_after is None:
801
+ inf_req["stop_after_tool_calls"] = 1
796
802
 
797
803
  # Call inference service with retries for Flash cold-start (503)
798
804
  import time as _t
@@ -901,38 +907,71 @@ async def step_policy(
901
907
  req_body["temperature"] = 0.1
902
908
  meta["inference_request"] = req_body
903
909
 
904
- # Strip image parts: Crafter policy currently only uses text prompts.
905
- # Some providers reject image_url payloads entirely, so always flatten to plain text.
906
- req_body2 = meta.get("inference_request", {})
907
- if isinstance(req_body2, dict):
908
- msgs = req_body2.get("messages")
909
- if isinstance(msgs, list):
910
- new_msgs = []
911
- changed = False
912
- for m in msgs:
913
- try:
914
- if isinstance(m, dict):
915
- content = m.get("content")
916
- if isinstance(content, list):
917
- parts: list[str] = []
918
- for seg in content:
919
- if isinstance(seg, dict):
920
- txt = seg.get("text") or seg.get("content")
921
- if isinstance(txt, str) and txt:
922
- parts.append(txt)
923
- m2 = dict(m)
924
- m2["content"] = "\n".join(parts)
925
- new_msgs.append(m2)
926
- changed = True
910
+ # Message flattening: Convert multimodal content to text-only for non-vision models.
911
+ # SKIP message flattening for vision models to preserve image_url parts!
912
+ # The old code here was flattening multimodal content (list) to text-only (str),
913
+ # which strips out image_url parts. This breaks vision models.
914
+ # Only flatten for non-vision models that can't handle multimodal format.
915
+ is_vision_model = False
916
+ try:
917
+ # Check if the policy is a vision-capable policy
918
+ if isinstance(policy, CrafterPolicy):
919
+ is_vision_model = getattr(policy, "use_vision", False)
920
+ except Exception:
921
+ pass
922
+
923
+ logger.debug(f"🔊 [POLICY_ROUTES] is_vision_model={is_vision_model}, will_flatten={not is_vision_model}")
924
+
925
+ if not is_vision_model:
926
+ # Only flatten for non-vision models (backward compatibility)
927
+ req_body2 = meta.get("inference_request", {})
928
+ if isinstance(req_body2, dict):
929
+ msgs = req_body2.get("messages")
930
+ if isinstance(msgs, list):
931
+ new_msgs = []
932
+ changed = False
933
+ for m in msgs:
934
+ try:
935
+ if isinstance(m, dict):
936
+ content = m.get("content")
937
+ if isinstance(content, list):
938
+ parts: list[str] = []
939
+ for seg in content:
940
+ if isinstance(seg, dict):
941
+ txt = seg.get("text") or seg.get("content")
942
+ if isinstance(txt, str) and txt:
943
+ parts.append(txt)
944
+ m2 = dict(m)
945
+ m2["content"] = "\n".join(parts)
946
+ new_msgs.append(m2)
947
+ changed = True
948
+ else:
949
+ new_msgs.append(m)
927
950
  else:
928
951
  new_msgs.append(m)
929
- else:
952
+ except Exception:
930
953
  new_msgs.append(m)
931
- except Exception:
932
- new_msgs.append(m)
933
- if changed:
934
- req_body2["messages"] = new_msgs
935
- meta["inference_request"] = req_body2
954
+ if changed:
955
+ req_body2["messages"] = new_msgs
956
+ meta["inference_request"] = req_body2
957
+ logger.debug(f"🔊 [POLICY_ROUTES] Flattened messages for non-vision model")
958
+ else:
959
+ logger.debug(f"🔊 [POLICY_ROUTES] Preserving multimodal content for vision model")
960
+
961
+ # DEBUG: Log final message structure before calling inference
962
+ final_req = meta.get("inference_request", {})
963
+ if isinstance(final_req, dict):
964
+ final_msgs = final_req.get("messages", [])
965
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Sending {len(final_msgs)} messages to inference")
966
+ for idx, msg in enumerate(final_msgs):
967
+ if isinstance(msg, dict):
968
+ content = msg.get("content")
969
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}")
970
+ if isinstance(content, list):
971
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Content list has {len(content)} items")
972
+ for part_idx, part in enumerate(content[:3]): # Show first 3 items
973
+ if isinstance(part, dict):
974
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Part[{part_idx}]: type={part.get('type')}")
936
975
 
937
976
  _t_start = _t.time()
938
977
  call_started_at = datetime.utcnow()
@@ -491,6 +491,11 @@ class RolloutTracingContext:
491
491
  getattr(request.record, "trace_format", "compact") or "compact"
492
492
  ).lower()
493
493
  self.return_trace = bool(getattr(request.record, "return_trace", False))
494
+ logger.warning(
495
+ "[TRACE_DEBUG] RolloutTracingContext init: trace_format=%s return_trace=%s",
496
+ self.trace_format,
497
+ self.return_trace,
498
+ )
494
499
  self.sft_output_dir = getattr(fastapi_request.app.state, "sft_output_dir", None)
495
500
  self.session_trace = None
496
501
  self.metadata_updates: dict[str, Any] = {}
@@ -590,7 +595,7 @@ class RolloutTracingContext:
590
595
  # Debug: Check message count
591
596
  if self.tracer and self.tracer._current_trace:
592
597
  msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
593
- logger.info(f"[TRACE_DEBUG] After record_policy_prompts: {msg_count} messages in trace")
598
+ logger.warning("[TRACE_DEBUG] After record_policy_prompts: %s messages", msg_count)
594
599
 
595
600
  def _content_to_text(self, content: Any) -> str:
596
601
  if isinstance(content, str):
@@ -669,6 +674,11 @@ class RolloutTracingContext:
669
674
  message_type="assistant", # Map to standard assistant message type
670
675
  metadata={**self._message_metadata(), "is_tool_call": True},
671
676
  )
677
+ if self.tracer._current_trace:
678
+ logger.warning(
679
+ "[TRACE_DEBUG] After tool invocation: messages=%s",
680
+ len(self.tracer._current_trace.markov_blanket_message_history),
681
+ )
672
682
  except Exception as exc:
673
683
  logger.debug("TRACING_TOOL_MSG_FAIL: %s", exc)
674
684
 
@@ -985,10 +995,19 @@ class RolloutTracingContext:
985
995
  def build_trace_payload(self, session_trace: Any) -> dict[str, Any] | None:
986
996
  if not self.return_trace or session_trace is None:
987
997
  return None
988
- if self.trace_format == "full":
998
+
999
+ # For both "full" and "structured" formats, return the complete session trace
1000
+ # The CLI (synth-ai eval) expects this for proper trace storage
1001
+ if self.trace_format in ("full", "structured"):
989
1002
  payload = session_trace.to_dict()
990
1003
  payload.setdefault("metadata", {}).update(self.metadata_updates)
1004
+ logger.warning(
1005
+ "[TRACE_DEBUG] build_trace_payload returning structured trace with messages=%s",
1006
+ len(payload.get("markov_blanket_message_history") or []),
1007
+ )
991
1008
  return payload
1009
+
1010
+ # For "compact" format, return only summary stats
992
1011
  metadata = dict(session_trace.metadata)
993
1012
  metadata.update(self.metadata_updates)
994
1013
  return {
@@ -1173,14 +1192,6 @@ async def execute_rollout(
1173
1192
  logger.debug(f"TRACER_FACTORY_FAIL: {exc}")
1174
1193
  tracing_context = RolloutTracingContext(tracer_instance, request, req)
1175
1194
  await tracing_context.start_session()
1176
- # Print whether tracing is active for this rollout
1177
- try:
1178
- print(
1179
- f"[rollout] tracing enabled={bool(tracing_context.enabled)} run_id={request.run_id}",
1180
- flush=True,
1181
- )
1182
- except Exception:
1183
- pass
1184
1195
 
1185
1196
  # Register run
1186
1197
  registry.register_run(request.run_id)
@@ -1625,16 +1636,21 @@ async def execute_rollout(
1625
1636
 
1626
1637
  elif op == "env":
1627
1638
  if not pending_tool_calls:
1639
+ # Instead of failing, inject a no-op action to keep the rollout going
1628
1640
  with contextlib.suppress(Exception):
1629
1641
  logger.warning(
1630
- "POLICY_STEP_FAIL: missing tool_calls; failing rollout run_id=%s op_idx=%s",
1642
+ "POLICY_STEP_NOOP: missing tool_calls; injecting noop action run_id=%s op_idx=%s",
1631
1643
  request.run_id,
1632
1644
  str(op_idx),
1633
1645
  )
1634
- raise HTTPException(
1635
- status_code=500,
1636
- detail="policy_step_failed: missing tool_calls (no_tool_calls)",
1637
- )
1646
+ # Create a noop tool call in the format expected by the environment
1647
+ pending_tool_calls = [
1648
+ {
1649
+ "id": f"noop_{op_idx}",
1650
+ "tool": "interact",
1651
+ "arguments": {"action": "noop"},
1652
+ }
1653
+ ]
1638
1654
 
1639
1655
  # Environment step
1640
1656
  from .environment_routes import EnvStepRequest, step_environment
@@ -1 +1,2 @@
1
1
 
2
+
@@ -2,7 +2,7 @@
2
2
 
3
3
  This mirrors the structure of the Crafter task app wrapper while delegating
4
4
  all configuration to the colocated `grpo_enron.py` module. Normal usage should
5
- prefer invoking `uvx synth-ai serve grpo-enron`, but this module remains for
5
+ prefer invoking `uvx synth-ai deploy --runtime uvicorn grpo-enron`, but this module remains for
6
6
  direct execution or importing the FastAPI app object.
7
7
  """
8
8
 
@@ -3,7 +3,7 @@
3
3
  This directory hosts the legacy entrypoint for the math single-step task app. Prefer starting the app via:
4
4
 
5
5
  ```bash
6
- uvx synth-ai serve math-single-step --env-file examples/rl/.env --port 8101
6
+ uvx synth-ai deploy --runtime uvicorn math-single-step --env-file examples/rl/.env --port 8101
7
7
  ```
8
8
 
9
9
  If you need to run it directly (e.g., for Modal `modal deploy` compatibility), use:
@@ -19,4 +19,3 @@ Environment variables:
19
19
  - `MATH_DATASET_DEFAULT_SPLIT`, `MATH_DATASET_VALIDATION_SPLIT`, `MATH_DATASET_TEST_SPLIT`
20
20
 
21
21
  The task app enforces a single `math_submit` tool call per episode, enabling RL to reward correct final answers and penalise missing or malformed submissions.
22
-
@@ -17,7 +17,7 @@ A reinforcement learning environment for Pokémon Red using PyBoy emulation with
17
17
 
18
18
  ```bash
19
19
  # From synth-ai root
20
- uv run -m synth_ai task-app serve pokemon_red --port 8913
20
+ uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8913
21
21
  ```
22
22
 
23
23
  ### 2. Run a Random Rollout
@@ -232,7 +232,7 @@ uv add pyboy
232
232
  lsof -ti :8913 | xargs -r kill -9
233
233
 
234
234
  # Or use a different port
235
- uv run -m synth_ai task-app serve pokemon_red --port 8914
235
+ uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8914
236
236
  ```
237
237
 
238
238
  ## Examples
@@ -249,7 +249,7 @@ cd /Users/joshpurtell/Documents/GitHub/synth-ai
249
249
  echo "OPENAI_API_KEY=sk-..." >> .env
250
250
 
251
251
  # 2. Start the task app server (in background)
252
- nohup sh -c 'printf "n\n" | uv run -m synth_ai task-app serve pokemon_red --port 8913 --no-reload' > nohup_pokemon.log 2>&1 &
252
+ nohup sh -c 'printf "n\n" | uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8913 --no-reload' > nohup_pokemon.log 2>&1 &
253
253
 
254
254
  # Wait for startup
255
255
  sleep 8
@@ -354,4 +354,3 @@ TOTAL REWARD: 705 points
354
354
  - **PyBoy**: Game Boy emulator - https://github.com/Baekalfen/PyBoy
355
355
  - **Pokémon Red Disassembly**: RAM map reference - https://github.com/pret/pokered
356
356
  - **Datacrystal.org**: Memory address documentation
357
-
@@ -1,11 +1,12 @@
1
- # Evaluation config for Pokemon Red with image-only input
1
+ # Evaluation config for Pokemon Red with image-only input and NEW REWARD SYSTEM
2
2
  # This config uses GPT-4o mini with only image data (no text observations)
3
+ # Uses the comprehensive reward system with deterministic progress milestones
3
4
 
4
5
  [eval]
5
6
  app_id = "pokemon_red"
6
7
  model = "gpt-4o-mini-2024-07-18"
7
- seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
8
- max_turns = 10
8
+ seeds = [0, 1, 2, 3, 4] # Test with fewer seeds for quick results
9
+ max_turns = 20 # Allow more turns to see progress
9
10
  concurrency = 1 # Keep low initially to avoid issues
10
11
  env_name = "pokemon_red"
11
12
  policy_name = "pokemon_red_policy"
@@ -13,7 +14,7 @@ trace_format = "full"
13
14
  return_trace = true
14
15
 
15
16
  [eval.env_config]
16
- max_steps_per_episode = 10
17
+ max_steps_per_episode = 20
17
18
 
18
19
  [eval.policy_config]
19
20
  provider = "openai"
@@ -24,6 +25,6 @@ top_p = 0.95
24
25
  max_tokens = 512
25
26
  use_vision = true
26
27
  image_only_mode = true
27
- max_llm_calls = 10
28
+ max_llm_calls = 20
28
29
 
29
30
 
@@ -129,7 +129,7 @@ async def main():
129
129
  print("✓ Server is healthy")
130
130
  except Exception as e:
131
131
  print(f"❌ Server not responding: {e}")
132
- print(f" Start it with: uv run -m synth_ai task-app serve pokemon_red --port 8913")
132
+ print(f" Start it with: uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8913")
133
133
  return
134
134
 
135
135
  # Check API key
@@ -222,4 +222,3 @@ async def main():
222
222
 
223
223
  if __name__ == "__main__":
224
224
  asyncio.run(main())
225
-
@@ -12,7 +12,7 @@ from synth_ai.environments.examples.red.taskset import INSTANCE as RED_DEFAULT_I
12
12
  from synth_ai.environments.examples.red.engine_helpers.reward_library.pallet_town_progression import (
13
13
  PalletTownProgressionCompositeReward,
14
14
  )
15
- from synth_ai.task.apps import TaskAppEntry, register_task_app
15
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
16
16
  from synth_ai.task.contracts import (
17
17
  RolloutMetrics,
18
18
  RolloutRequest,
@@ -260,8 +260,10 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
260
260
  {
261
261
  "role": "system",
262
262
  "content": (
263
- "You are controlling Pokémon Red. Respond with a single tool call named 'press_button' "
264
- "with JSON arguments {button: 'A|B|UP|DOWN|LEFT|RIGHT|START|SELECT', frames: 1-120}."
263
+ "You are controlling Pokémon Red, a classic Game Boy game. You can see the game screen in the images provided. "
264
+ "Your goal is to make progress in the game. Use the execute_sequence tool to press buttons. "
265
+ "Choose appropriate button presses based on what you see in the game screen. "
266
+ "Always respond with exactly one tool call in the format: <tool_call>{\"name\": \"execute_sequence\", \"arguments\": {...}}</tool_call>"
265
267
  ),
266
268
  },
267
269
  {
@@ -788,11 +790,40 @@ def build_config() -> TaskAppConfig:
788
790
  register_task_app(
789
791
  entry=TaskAppEntry(
790
792
  app_id="pokemon_red",
791
- description="Pokémon Red demo task app",
793
+ description="Pokémon Red demo task app with vision support",
792
794
  config_factory=build_config,
793
795
  aliases=("pokemon_red_demo",),
794
796
  env_files=(),
795
- modal=None,
797
+ modal=ModalDeploymentConfig(
798
+ app_name="pokemon-red-vision-task-app",
799
+ python_version="3.11",
800
+ pip_packages=(
801
+ "fastapi>=0.100.0",
802
+ "uvicorn>=0.23.0",
803
+ "pydantic>=2.0.0",
804
+ "numpy>=1.24.0",
805
+ "aiohttp>=3.8.0",
806
+ "httpx>=0.24.0",
807
+ "python-dotenv>=1.0.1",
808
+ # Tracing/DB runtime deps
809
+ "sqlalchemy>=2.0.42",
810
+ "aiosqlite>=0.21.0",
811
+ "greenlet>=3.2.3",
812
+ # Pokemon Red environment
813
+ "pyboy>=2.0.0",
814
+ "pillow>=9.0.0",
815
+ ),
816
+ extra_local_dirs=(
817
+ # Mount repo root so local modules resolve when deployed on Modal
818
+ ("/Users/joshpurtell/Documents/GitHub/synth-ai", "/opt/synth_ai_repo"),
819
+ ("/Users/joshpurtell/Documents/GitHub/synth-ai/synth_ai", "/opt/synth_ai_repo/synth_ai"),
820
+ ("/Users/joshpurtell/Documents/GitHub/synth-ai/examples/task_apps/pokemon_red", "/opt/synth_ai_repo/examples/task_apps/pokemon_red"),
821
+ ),
822
+ secret_names=("openai-api-key", "groq-api-key"),
823
+ memory=16384,
824
+ cpu=4.0,
825
+ max_containers=10,
826
+ ),
796
827
  )
797
828
  )
798
829
 
@@ -20,7 +20,7 @@ Sokoban is a classic puzzle game where the player must push boxes onto target lo
20
20
  cd /path/to/synth-ai
21
21
 
22
22
  # Start the Sokoban task app on port 8911
23
- uvx synth-ai task-app serve sokoban --port 8911
23
+ uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
24
24
  ```
25
25
 
26
26
  The server will be available at `http://localhost:8911`.
@@ -283,7 +283,7 @@ lsof -i :8911
283
283
  kill -9 $(lsof -ti :8911)
284
284
 
285
285
  # Restart
286
- uvx synth-ai task-app serve sokoban --port 8911
286
+ uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
287
287
  ```
288
288
 
289
289
  ## Examples
@@ -304,4 +304,3 @@ To add new features:
304
304
  ## License
305
305
 
306
306
  MIT
307
-
@@ -1,24 +1,22 @@
1
1
  # Verilog Eval Config for Groq Qwen3-32B
2
- # Quick eval to test Verilog task app before RL training
3
-
4
- [task_app]
5
- # Update this with your Modal URL after deployment
6
- url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
2
+ # Quick eval to test the Verilog task app before RL training
7
3
 
8
4
  [eval]
9
- num_episodes = 3 # Quick test with 3 seeds
5
+ app_id = "grpo-verilog"
6
+ task_app_url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
7
+ model = "groq:qwen3-32b"
10
8
  seeds = [0, 1, 2]
11
- max_steps = 15 # More steps for Verilog compilation chains
9
+ max_turns = 15
10
+ concurrency = 1
11
+ return_trace = true
12
+ trace_format = "structured"
13
+
14
+ [eval.env_config]
15
+ difficulty = "medium"
12
16
 
13
- [policy]
17
+ [eval.policy_config]
14
18
  provider = "groq"
15
19
  model = "qwen/qwen3-32b"
16
20
  temperature = 0.2
17
21
  max_tokens = 768
18
22
  inference_url = "https://api.groq.com/openai/v1/chat/completions"
19
-
20
- [env]
21
- difficulty = "medium" # Can be "easy", "medium", or "hard"
22
-
23
-
24
-
@@ -1,7 +1,7 @@
1
1
  """Compatibility wrapper for the GRPO Verilog task app.
2
2
 
3
3
  This mirrors the Crafter task app wrapper while delegating configuration to
4
- `grpo_verilog.py`. Normal usage should prefer `uvx synth-ai serve grpo-verilog`,
4
+ `grpo_verilog.py`. Normal usage should prefer `uvx synth-ai deploy --runtime uvicorn grpo-verilog`,
5
5
  but the module remains for direct execution or importing the FastAPI app.
6
6
  """
7
7
 
examples/vlm/README.md CHANGED
@@ -21,8 +21,8 @@ plumbing with lightweight utilities for dataset curation and training.
21
21
  3. **Export multimodal SFT rows**
22
22
  ```
23
23
  uv run python examples/warming_up_to_rl/export_trace_sft.py \
24
- --db traces/v3/synth_ai.db \
25
- --output examples/vlm/output/crafter_traces_full.jsonl
24
+ --db traces/v3/task_app_traces_<timestamp>.db \
25
+ --output examples/vlm/output/crafter_sft_full.jsonl
26
26
  ```
27
27
  The exporter now emits `metadata.has_image`, `metadata.user_has_image`, and
28
28
  `metadata.assistant_has_image` flags per turn.
@@ -30,7 +30,7 @@ plumbing with lightweight utilities for dataset curation and training.
30
30
  4. **Filter to image-rich turns**
31
31
  ```
32
32
  uv run python examples/vlm/filter_image_rows.py \
33
- --input examples/vlm/output/crafter_traces_full.jsonl \
33
+ --input examples/vlm/output/crafter_sft_full.jsonl \
34
34
  --output examples/vlm/output/crafter_vlm_dataset.jsonl
35
35
  ```
36
36
 
@@ -1,3 +1,8 @@
1
+ [algorithm]
2
+ type = "offline"
3
+ method = "sft"
4
+ variety = "fft"
5
+
1
6
  [job]
2
7
  model = "openai/gpt-4o-mini-2024-07-18"
3
8
  modalities = ["text", "image"]
@@ -24,6 +24,7 @@ import asyncio
24
24
  import base64
25
25
  import json
26
26
  import os
27
+ from contextlib import suppress
27
28
  from pathlib import Path
28
29
  from typing import Any
29
30
  from uuid import uuid4
@@ -62,7 +63,7 @@ class EpisodeResult:
62
63
  if unlocked:
63
64
  self.achievements.add(str(name))
64
65
  reward = obs.get("reward_last_step")
65
- if isinstance(reward, (int, float)):
66
+ if isinstance(reward, int | float):
66
67
  self.total_reward += float(reward)
67
68
 
68
69
 
@@ -107,11 +108,8 @@ def _decode_and_save_image(observation: dict[str, Any], path: Path) -> None:
107
108
  if not isinstance(base64_data, str) or not base64_data:
108
109
  return
109
110
  path.parent.mkdir(parents=True, exist_ok=True)
110
- try:
111
+ with suppress(Exception):
111
112
  path.write_bytes(base64.b64decode(base64_data))
112
- except Exception:
113
- # Best-effort; corrupted frames should not halt rollout
114
- pass
115
113
 
116
114
 
117
115
  def _normalise_openai_request(payload: dict[str, Any], model: str, temperature: float) -> dict[str, Any]:
@@ -8,7 +8,7 @@ output now that each record's metadata includes `has_image`, `user_has_image`, a
8
8
 
9
9
  Usage:
10
10
  uv run python examples/vlm/filter_image_rows.py \
11
- --input examples/sft/ft_data/crafter_traces.jsonl \
11
+ --input examples/sft/ft_data/crafter_sft.jsonl \
12
12
  --output examples/vlm/output/crafter_vlm_dataset.jsonl
13
13
  """
14
14
 
@@ -224,7 +224,7 @@ async def _run_episode(
224
224
  if unlocked:
225
225
  achievements.add(str(name))
226
226
  reward = obs.get("reward_last_step")
227
- if isinstance(reward, (int, float)):
227
+ if isinstance(reward, int | float):
228
228
  total_reward += float(reward)
229
229
 
230
230
  _save_observation_frame(env_response, frames_dir / f"step_{step_idx + 1:03d}.png")
@@ -263,7 +263,7 @@ def _summarise(results: list[EpisodeResult]) -> dict[str, Any]:
263
263
  "mean_steps": round(mean_steps, 2),
264
264
  "mean_achievements": round(mean_achievements, 2),
265
265
  "total_tool_calls": sum(r.tool_calls for r in mode_results),
266
- "achievements": {name: count for name, count in sorted(achievement_counts.items())},
266
+ "achievements": dict(sorted(achievement_counts.items())),
267
267
  }
268
268
  return summary
269
269