synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show
  1. examples/README.md +1 -0
  2. examples/analyze_semantic_words.sh +2 -2
  3. examples/blog_posts/pokemon_vl/README.md +98 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  5. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  6. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  7. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  8. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  9. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  12. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  13. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  15. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  16. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  17. examples/multi_step/SFT_README.md +147 -0
  18. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
  20. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  21. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  22. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  23. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  24. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  25. examples/multi_step/convert_traces_to_sft.py +84 -0
  26. examples/multi_step/run_sft_qwen30b.sh +45 -0
  27. examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
  28. examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
  29. examples/qwen_coder/configs/coder_lora_small.toml +1 -2
  30. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  31. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  32. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  33. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  34. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  35. examples/qwen_vl/QUICKSTART.md +327 -0
  36. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  37. examples/qwen_vl/README.md +152 -0
  38. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  39. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  40. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  41. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  42. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  43. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  44. examples/qwen_vl/__init__.py +2 -0
  45. examples/qwen_vl/collect_data_via_cli.md +415 -0
  46. examples/qwen_vl/collect_vision_traces.py +368 -0
  47. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  48. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  49. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  50. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  51. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  52. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  53. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  54. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  55. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  56. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  57. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  58. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  59. examples/qwen_vl/run_vision_comparison.sh +61 -0
  60. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  61. examples/qwen_vl/test_image_validation.py +201 -0
  62. examples/qwen_vl/test_sft_vision_data.py +110 -0
  63. examples/rl/README.md +6 -6
  64. examples/rl/configs/eval_base_qwen.toml +17 -0
  65. examples/rl/configs/eval_rl_qwen.toml +13 -0
  66. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  67. examples/rl/configs/rl_from_base_qwen17.toml +79 -0
  68. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  69. examples/rl/run_eval.py +436 -0
  70. examples/rl/run_rl_and_save.py +111 -0
  71. examples/rl/task_app/README.md +21 -0
  72. examples/rl/task_app/math_single_step.py +990 -0
  73. examples/rl/task_app/math_task_app.py +111 -0
  74. examples/run_crafter_demo.sh +2 -2
  75. examples/sft/README.md +6 -6
  76. examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
  77. examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
  78. examples/sft/evaluate.py +2 -4
  79. examples/sft/export_dataset.py +7 -4
  80. examples/swe/task_app/README.md +33 -3
  81. examples/swe/task_app/grpo_swe_mini.py +4 -1
  82. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  83. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  84. examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
  85. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  86. examples/swe/task_app/hosted/policy_routes.py +0 -2
  87. examples/swe/task_app/hosted/rollout.py +0 -8
  88. examples/swe/task_app/morph_backend.py +178 -0
  89. examples/task_apps/crafter/task_app/README.md +1 -1
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
  91. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
  97. examples/task_apps/enron/__init__.py +1 -0
  98. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  99. examples/task_apps/math/README.md +1 -2
  100. examples/task_apps/pokemon_red/README.md +3 -4
  101. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  102. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  103. examples/task_apps/pokemon_red/task_app.py +36 -5
  104. examples/task_apps/sokoban/README.md +2 -3
  105. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  106. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  107. examples/vlm/README.md +3 -3
  108. examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
  109. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  110. examples/vlm/filter_image_rows.py +1 -1
  111. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  112. examples/warming_up_to_rl/_utils.py +92 -0
  113. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  114. examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
  115. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  116. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  117. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  118. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  119. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  120. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  121. examples/warming_up_to_rl/readme.md +63 -132
  122. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  123. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  124. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  125. examples/warming_up_to_rl/task_app/README.md +42 -0
  126. examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
  127. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  128. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  129. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  130. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  131. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  132. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  133. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  134. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  135. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  136. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  137. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  138. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  139. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  147. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  148. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  152. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  153. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  154. synth_ai/__init__.py +44 -30
  155. synth_ai/_utils/__init__.py +47 -0
  156. synth_ai/_utils/base_url.py +10 -0
  157. synth_ai/_utils/http.py +10 -0
  158. synth_ai/_utils/prompts.py +10 -0
  159. synth_ai/_utils/task_app_state.py +12 -0
  160. synth_ai/_utils/user_config.py +10 -0
  161. synth_ai/api/models/supported.py +144 -7
  162. synth_ai/api/train/__init__.py +13 -1
  163. synth_ai/api/train/builders.py +9 -3
  164. synth_ai/api/train/cli.py +155 -17
  165. synth_ai/api/train/config_finder.py +18 -11
  166. synth_ai/api/train/configs/__init__.py +8 -1
  167. synth_ai/api/train/configs/rl.py +32 -7
  168. synth_ai/api/train/configs/sft.py +6 -2
  169. synth_ai/api/train/configs/shared.py +59 -2
  170. synth_ai/api/train/env_resolver.py +13 -10
  171. synth_ai/auth/credentials.py +119 -0
  172. synth_ai/cli/__init__.py +61 -69
  173. synth_ai/cli/_modal_wrapper.py +7 -5
  174. synth_ai/cli/_typer_patch.py +0 -2
  175. synth_ai/cli/_validate_task_app.py +22 -4
  176. synth_ai/cli/commands/__init__.py +17 -0
  177. synth_ai/cli/commands/demo/__init__.py +6 -0
  178. synth_ai/cli/commands/demo/core.py +163 -0
  179. synth_ai/cli/commands/deploy/__init__.py +23 -0
  180. synth_ai/cli/commands/deploy/core.py +614 -0
  181. synth_ai/cli/commands/deploy/errors.py +72 -0
  182. synth_ai/cli/commands/deploy/validation.py +11 -0
  183. synth_ai/cli/commands/eval/__init__.py +19 -0
  184. synth_ai/cli/commands/eval/core.py +1109 -0
  185. synth_ai/cli/commands/eval/errors.py +81 -0
  186. synth_ai/cli/commands/eval/validation.py +133 -0
  187. synth_ai/cli/commands/filter/__init__.py +12 -0
  188. synth_ai/cli/commands/filter/core.py +388 -0
  189. synth_ai/cli/commands/filter/errors.py +55 -0
  190. synth_ai/cli/commands/filter/validation.py +77 -0
  191. synth_ai/cli/commands/help/__init__.py +177 -0
  192. synth_ai/cli/commands/help/core.py +73 -0
  193. synth_ai/cli/commands/status/__init__.py +64 -0
  194. synth_ai/cli/commands/status/client.py +192 -0
  195. synth_ai/cli/commands/status/config.py +92 -0
  196. synth_ai/cli/commands/status/errors.py +20 -0
  197. synth_ai/cli/commands/status/formatters.py +164 -0
  198. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  199. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  200. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  201. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  202. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  203. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  204. synth_ai/cli/commands/status/utils.py +114 -0
  205. synth_ai/cli/commands/train/__init__.py +53 -0
  206. synth_ai/cli/commands/train/core.py +21 -0
  207. synth_ai/cli/commands/train/errors.py +117 -0
  208. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  209. synth_ai/cli/commands/train/judge_validation.py +304 -0
  210. synth_ai/cli/commands/train/validation.py +443 -0
  211. synth_ai/cli/demo.py +2 -162
  212. synth_ai/cli/deploy/__init__.py +28 -0
  213. synth_ai/cli/deploy/core.py +5 -0
  214. synth_ai/cli/deploy/errors.py +23 -0
  215. synth_ai/cli/deploy/validation.py +5 -0
  216. synth_ai/cli/eval/__init__.py +36 -0
  217. synth_ai/cli/eval/core.py +5 -0
  218. synth_ai/cli/eval/errors.py +31 -0
  219. synth_ai/cli/eval/validation.py +5 -0
  220. synth_ai/cli/filter/__init__.py +28 -0
  221. synth_ai/cli/filter/core.py +5 -0
  222. synth_ai/cli/filter/errors.py +23 -0
  223. synth_ai/cli/filter/validation.py +5 -0
  224. synth_ai/cli/legacy_root_backup.py +3 -1
  225. synth_ai/cli/lib/__init__.py +10 -0
  226. synth_ai/cli/lib/task_app_discovery.py +7 -0
  227. synth_ai/cli/lib/task_app_env.py +518 -0
  228. synth_ai/cli/modal_serve/__init__.py +12 -0
  229. synth_ai/cli/modal_serve/core.py +14 -0
  230. synth_ai/cli/modal_serve/errors.py +8 -0
  231. synth_ai/cli/modal_serve/validation.py +11 -0
  232. synth_ai/cli/recent.py +2 -1
  233. synth_ai/cli/serve/__init__.py +12 -0
  234. synth_ai/cli/serve/core.py +14 -0
  235. synth_ai/cli/serve/errors.py +8 -0
  236. synth_ai/cli/serve/validation.py +11 -0
  237. synth_ai/cli/setup.py +21 -0
  238. synth_ai/cli/status.py +7 -126
  239. synth_ai/cli/task_app_deploy.py +7 -0
  240. synth_ai/cli/task_app_list.py +25 -0
  241. synth_ai/cli/task_app_modal_serve.py +11 -0
  242. synth_ai/cli/task_app_serve.py +11 -0
  243. synth_ai/cli/task_apps.py +110 -1499
  244. synth_ai/cli/traces.py +1 -1
  245. synth_ai/cli/train/__init__.py +12 -0
  246. synth_ai/cli/train/core.py +21 -0
  247. synth_ai/cli/train/errors.py +8 -0
  248. synth_ai/cli/train/validation.py +24 -0
  249. synth_ai/cli/train.py +5 -0
  250. synth_ai/cli/turso.py +1 -1
  251. synth_ai/cli/watch.py +1 -1
  252. synth_ai/demos/__init__.py +10 -0
  253. synth_ai/demos/core/__init__.py +28 -1
  254. synth_ai/demos/crafter/__init__.py +1 -0
  255. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  256. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  257. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  258. synth_ai/demos/demo_registry.py +176 -0
  259. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  260. synth_ai/demos/math/__init__.py +1 -0
  261. synth_ai/demos/math/_common.py +16 -0
  262. synth_ai/demos/math/app.py +38 -0
  263. synth_ai/demos/math/config.toml +76 -0
  264. synth_ai/demos/math/deploy_modal.py +54 -0
  265. synth_ai/demos/math/modal_task_app.py +702 -0
  266. synth_ai/demos/math/task_app_entry.py +51 -0
  267. synth_ai/environments/environment/core.py +7 -1
  268. synth_ai/environments/examples/bandit/engine.py +0 -1
  269. synth_ai/environments/examples/bandit/environment.py +0 -1
  270. synth_ai/environments/examples/red/engine.py +33 -12
  271. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  272. synth_ai/environments/examples/red/environment.py +26 -0
  273. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  274. synth_ai/environments/examples/wordle/environment.py +0 -1
  275. synth_ai/evals/base.py +16 -5
  276. synth_ai/evals/client.py +1 -1
  277. synth_ai/http.py +8 -22
  278. synth_ai/inference/client.py +1 -1
  279. synth_ai/judge_schemas.py +4 -5
  280. synth_ai/learning/client.py +1 -1
  281. synth_ai/learning/health.py +1 -1
  282. synth_ai/learning/jobs.py +1 -1
  283. synth_ai/learning/rl/client.py +4 -2
  284. synth_ai/learning/rl/env_keys.py +1 -1
  285. synth_ai/learning/rl/secrets.py +1 -1
  286. synth_ai/learning/sft/client.py +1 -1
  287. synth_ai/learning/sft/data.py +407 -4
  288. synth_ai/learning/validators.py +4 -1
  289. synth_ai/streaming/__init__.py +29 -0
  290. synth_ai/streaming/config.py +94 -0
  291. synth_ai/streaming/handlers.py +469 -0
  292. synth_ai/streaming/streamer.py +301 -0
  293. synth_ai/streaming/types.py +95 -0
  294. synth_ai/task/apps/__init__.py +4 -2
  295. synth_ai/task/config.py +6 -4
  296. synth_ai/task/rubrics/__init__.py +1 -2
  297. synth_ai/task/rubrics/loaders.py +14 -10
  298. synth_ai/task/rubrics.py +219 -0
  299. synth_ai/task/trace_correlation_helpers.py +24 -11
  300. synth_ai/task/tracing_utils.py +14 -3
  301. synth_ai/task/validators.py +0 -1
  302. synth_ai/tracing_v3/abstractions.py +3 -3
  303. synth_ai/tracing_v3/config.py +15 -13
  304. synth_ai/tracing_v3/constants.py +21 -0
  305. synth_ai/tracing_v3/db_config.py +3 -1
  306. synth_ai/tracing_v3/decorators.py +10 -7
  307. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  308. synth_ai/tracing_v3/migration_helper.py +1 -2
  309. synth_ai/tracing_v3/session_tracer.py +7 -7
  310. synth_ai/tracing_v3/storage/base.py +29 -29
  311. synth_ai/tracing_v3/storage/config.py +3 -3
  312. synth_ai/tracing_v3/turso/daemon.py +8 -9
  313. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  314. synth_ai/tracing_v3/utils.py +2 -2
  315. synth_ai/utils/__init__.py +101 -0
  316. synth_ai/utils/base_url.py +94 -0
  317. synth_ai/utils/cli.py +131 -0
  318. synth_ai/utils/env.py +294 -0
  319. synth_ai/utils/http.py +172 -0
  320. synth_ai/utils/modal.py +308 -0
  321. synth_ai/utils/process.py +212 -0
  322. synth_ai/utils/prompts.py +39 -0
  323. synth_ai/utils/sqld.py +122 -0
  324. synth_ai/utils/task_app_discovery.py +882 -0
  325. synth_ai/utils/task_app_env.py +186 -0
  326. synth_ai/utils/task_app_state.py +318 -0
  327. synth_ai/utils/user_config.py +137 -0
  328. synth_ai/v0/config/__init__.py +1 -5
  329. synth_ai/v0/config/base_url.py +1 -7
  330. synth_ai/v0/tracing/config.py +1 -1
  331. synth_ai/v0/tracing/decorators.py +1 -1
  332. synth_ai/v0/tracing/upload.py +1 -1
  333. synth_ai/v0/tracing_v1/config.py +1 -1
  334. synth_ai/v0/tracing_v1/decorators.py +1 -1
  335. synth_ai/v0/tracing_v1/upload.py +1 -1
  336. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
  337. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
  338. synth_ai/cli/man.py +0 -106
  339. synth_ai/cli/tui.py +0 -57
  340. synth_ai/compound/cais.py +0 -0
  341. synth_ai/core/experiment.py +0 -13
  342. synth_ai/core/system.py +0 -15
  343. synth_ai/demo_registry.py +0 -295
  344. synth_ai/handshake.py +0 -109
  345. synth_ai/tui/__init__.py +0 -5
  346. synth_ai/tui/__main__.py +0 -13
  347. synth_ai/tui/cli/__init__.py +0 -1
  348. synth_ai/tui/cli/query_experiments.py +0 -164
  349. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  350. synth_ai/tui/dashboard.py +0 -906
  351. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  354. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,827 @@
1
+ """Task App configuration for the GRPO Crafter example."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ import sys
8
+ from collections.abc import Iterable, Sequence
9
+ from contextlib import suppress
10
+ from dataclasses import dataclass
11
+ from datetime import UTC, datetime
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from fastapi import HTTPException
16
+ from pydantic import BaseModel
17
+
18
+ from pydantic import BaseModel
19
+
20
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
21
+ from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
22
+ from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
23
+ from synth_ai.task.json import to_jsonable # noqa: F401 (imported for side-effect compatibility)
24
+ from synth_ai.task.rubrics import load_rubric
25
+ from synth_ai.task.server import ProxyConfig, RubricBundle, TaskAppConfig
26
+ from synth_ai.task.tracing_utils import (
27
+ build_tracer_factory,
28
+ resolve_sft_output_dir,
29
+ resolve_tracing_db_url,
30
+ tracing_env_enabled,
31
+ )
32
+ from synth_ai.tracing_v3.session_tracer import SessionTracer
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ DEFAULT_ALIAS_OPS: list[str] = ["agent", "env"] * 10
37
+ DEFAULT_ALIAS_STEP_REWARDS: dict[str, Any] = {
38
+ "enabled": True,
39
+ "mode": "decision_stepwise",
40
+ "indicator_lambda": 1.0,
41
+ "step_beta": 0.0,
42
+ }
43
+
44
+ _HERE = Path(__file__).resolve()
45
+
46
+
47
+ def _resolve_repo_root() -> Path:
48
+ """Best-effort detection of the Synth AI repo root across local and Modal mounts."""
49
+
50
+ candidates: list[Path] = []
51
+ env_root = os.getenv("SYNTH_AI_REPO_ROOT")
52
+ if env_root:
53
+ candidates.append(Path(env_root).expanduser())
54
+ candidates.append(Path("/opt/synth_ai_repo"))
55
+ candidates.extend(parent for parent in [_HERE.parent, *_HERE.parents])
56
+
57
+ for candidate in candidates:
58
+ try:
59
+ resolved = candidate.resolve()
60
+ except Exception:
61
+ continue
62
+ if not resolved.exists():
63
+ continue
64
+ if (resolved / "pyproject.toml").exists() or (resolved / "uv.lock").exists():
65
+ return resolved
66
+ if (resolved / "synth_ai").is_dir():
67
+ return resolved
68
+
69
+ try:
70
+ return _HERE.parents[3]
71
+ except IndexError:
72
+ return _HERE.parent
73
+
74
+
75
+ def _resolve_task_app_root(repo_root: Path) -> Path:
76
+ """Locate the task_app directory even when the module is copied to a temp mount."""
77
+
78
+ preferred = (repo_root / "examples" / "warming_up_to_rl" / "task_app").resolve()
79
+ if preferred.is_dir():
80
+ return preferred
81
+
82
+ local_parent = _HERE.parent.resolve()
83
+ if (local_parent / "synth_envs_hosted").is_dir():
84
+ return local_parent
85
+
86
+ for parent in _HERE.parents:
87
+ candidate = parent.resolve()
88
+ if (candidate / "synth_envs_hosted").is_dir():
89
+ return candidate
90
+
91
+ fallback = Path("/opt/synth_ai_repo/examples/warming_up_to_rl/task_app")
92
+ if fallback.is_dir():
93
+ return fallback.resolve()
94
+
95
+ return local_parent
96
+
97
+
98
+ REPO_ROOT = _resolve_repo_root()
99
+ TASK_APP_ROOT = _resolve_task_app_root(REPO_ROOT)
100
+ SYNTH_ENVS_HOSTED_ROOT = (TASK_APP_ROOT / "synth_envs_hosted").resolve()
101
+
102
+ EXAMPLES_ROOT = (REPO_ROOT / "examples").resolve()
103
+
104
+ for path in (REPO_ROOT, TASK_APP_ROOT, SYNTH_ENVS_HOSTED_ROOT, EXAMPLES_ROOT):
105
+ try:
106
+ resolved = path.resolve()
107
+ except Exception:
108
+ resolved = path
109
+ if resolved.exists():
110
+ path_str = str(resolved)
111
+ if path_str not in sys.path:
112
+ sys.path.insert(0, path_str)
113
+
114
+ # Fallback: explicitly add Modal mount path for 'examples' if REPO_ROOT detection fails
115
+ try:
116
+ _hard_examples = Path("/opt/synth_ai_repo/examples")
117
+ if _hard_examples.exists():
118
+ _hard_examples_str = str(_hard_examples.resolve())
119
+ if _hard_examples_str not in sys.path:
120
+ sys.path.insert(0, _hard_examples_str)
121
+ except Exception:
122
+ pass
123
+
124
+ try:
125
+ from .synth_envs_hosted.utils import (
126
+ ensure_chat_completions_url,
127
+ extract_trace_correlation_id,
128
+ )
129
+ except Exception: # pragma: no cover - fallback when optional deps missing
130
+ def ensure_chat_completions_url(raw_url, mode=None):
131
+ return raw_url
132
+
133
+ def extract_trace_correlation_id(_raw_url):
134
+ return None
135
+
136
+ HAS_HOSTED = True
137
+ try:
138
+ import crafter # type: ignore
139
+ import crafter.constants as crafter_constants # type: ignore
140
+ from synth_ai.environments.examples.crafter_classic.taskset import TRAIT_BOUNDS
141
+ from synth_envs_hosted.branching import router as branching_router # type: ignore
142
+ from synth_envs_hosted.environment_routes import router as environment_router # type: ignore
143
+ from synth_envs_hosted.hosted_app import TaskApp as HostedTaskApp # type: ignore
144
+ from synth_envs_hosted.policy_routes import router as policy_router # type: ignore
145
+ from synth_envs_hosted.rollout import ( # type: ignore
146
+ RolloutEnvSpec as LegacyRolloutEnvSpec,
147
+ )
148
+ from synth_envs_hosted.rollout import (
149
+ RolloutPolicySpec as LegacyRolloutPolicySpec,
150
+ )
151
+ from synth_envs_hosted.rollout import (
152
+ RolloutRecordConfig as LegacyRolloutRecordConfig,
153
+ )
154
+ from synth_envs_hosted.rollout import (
155
+ RolloutRequest as LegacyRolloutRequest,
156
+ )
157
+ from synth_envs_hosted.rollout import (
158
+ RolloutResponse as LegacyRolloutResponse,
159
+ )
160
+ from synth_envs_hosted.rollout import (
161
+ RolloutSafetyConfig as LegacyRolloutSafetyConfig,
162
+ )
163
+ from synth_envs_hosted.rollout import (
164
+ execute_rollout as legacy_execute_rollout,
165
+ )
166
+ except Exception as exc: # pragma: no cover - import-time validation
167
+ # Provide a more actionable error with the missing module and fix hints
168
+ missing_mod = None
169
+ if isinstance(exc, ModuleNotFoundError):
170
+ missing_mod = (
171
+ getattr(exc, "name", None) or str(exc).split("'")[1] if "'" in str(exc) else None
172
+ )
173
+ fix_hint = None
174
+ if missing_mod:
175
+ mapping = {
176
+ "dotenv": "python-dotenv",
177
+ "crafter": "crafter",
178
+ "httpx": "httpx",
179
+ "aiohttp": "aiohttp",
180
+ "fastapi": "fastapi",
181
+ "uvicorn": "uvicorn",
182
+ "sqlalchemy": "sqlalchemy",
183
+ "aiosqlite": "aiosqlite",
184
+ "greenlet": "greenlet",
185
+ }
186
+ pkg = mapping.get(missing_mod, missing_mod)
187
+ fix_hint = (
188
+ f"Missing Python module '{missing_mod}'. Install the package '{pkg}'.\n"
189
+ f"For Modal: add '{pkg}' to ModalDeploymentConfig.pip_packages in synth_ai/task/apps/grpo_crafter.py.\n"
190
+ f"Locally: pip install {pkg}"
191
+ )
192
+ # Allow running without synth_envs_hosted; gate hosted features off
193
+ if missing_mod == "synth_envs_hosted":
194
+ HAS_HOSTED = False
195
+ else:
196
+ detailed = (
197
+ "grpo_crafter task app requires example dependencies and runtime libs.\n"
198
+ + (fix_hint + "\n" if fix_hint else "")
199
+ + f"Original error: {exc}"
200
+ )
201
+ raise RuntimeError(detailed) from exc
202
+
203
+
204
+ CRAFTING_RULES_SYSTEM_HINT = (
205
+ "Crafter crafting rules (from the paper):\n"
206
+ "- Make Wood Pickaxe: Nearby a table; have wood in inventory.\n"
207
+ "- Make Stone Pickaxe: Nearby a table; have wood and stone in inventory.\n"
208
+ "- Make Iron Pickaxe: Nearby a table; furnace exists; have wood, coal, and iron in inventory.\n"
209
+ "- Make Wood Sword: Nearby a table; have wood in inventory.\n"
210
+ "- Make Stone Sword: Nearby a table; have wood and stone in inventory.\n"
211
+ "- Make Iron Sword: Nearby a table; furnace exists; have wood, coal, and iron in inventory."
212
+ )
213
+
214
+
215
+ DATASET_SPEC = TaskDatasetSpec(
216
+ id="crafter_classic_procedural",
217
+ name="Crafter Classic Procedural Seeds",
218
+ version="1.0.0",
219
+ splits=["train"],
220
+ default_split="train",
221
+ description="Procedural Crafter Classic seeds with reproducible world traits.",
222
+ )
223
+
224
+
225
+ @dataclass
226
+ class CrafterDataset:
227
+ spec: TaskDatasetSpec
228
+
229
+ def __post_init__(self) -> None:
230
+ self.default_seed = int(env_value("CRAFTER_DEFAULT_SEED", 42))
231
+ self.seed_min = 0
232
+ self.seed_max = int(env_value("CRAFTER_MAX_SEED", 2**31 - 1))
233
+ area_env = env_value("CRAFTER_AREA", "64,64")
234
+ self.area = tuple(int(x) for x in str(area_env).split(","))
235
+ self.length = int(env_value("CRAFTER_EPISODE_LENGTH", 10000))
236
+ self._cache: dict[int, dict[str, Any]] = {}
237
+
238
+ def config_for_seed(self, seed: int) -> dict[str, Any]:
239
+ return {
240
+ "seed": int(seed),
241
+ "area": list(self.area),
242
+ "length": self.length,
243
+ }
244
+
245
+ def describe_seed(self, seed: int) -> dict[str, Any]:
246
+ seed = int(seed)
247
+ if seed in self._cache:
248
+ return self._cache[seed]
249
+ env = crafter.Env(area=self.area, length=self.length, seed=seed)
250
+ try:
251
+ env.reset()
252
+ traits = _compute_world_traits(env)
253
+ player = getattr(env, "_player", None)
254
+ inventory = dict(getattr(player, "inventory", {})) if player else {}
255
+ position = getattr(player, "pos", None)
256
+ finally:
257
+ close_fn = getattr(env, "close", None)
258
+ if callable(close_fn):
259
+ close_fn()
260
+ summary = {
261
+ "seed": seed,
262
+ "difficulty": self._difficulty(traits),
263
+ "traits": traits,
264
+ "inventory": inventory,
265
+ "player_position": list(position) if position is not None else None,
266
+ "config": self.config_for_seed(seed),
267
+ }
268
+ self._cache[seed] = summary
269
+ return summary
270
+
271
+ def _difficulty(self, traits: dict[str, int]) -> str:
272
+ for difficulty, bounds in TRAIT_BOUNDS.items():
273
+ if traits.get("trees", 0) >= bounds.get("min_trees", 0) and traits.get(
274
+ "hostiles", 0
275
+ ) <= bounds.get("max_hostiles", 0):
276
+ return difficulty
277
+ return "custom"
278
+
279
+ @property
280
+ def seed_range(self) -> list[int]:
281
+ return [self.seed_min, self.seed_max]
282
+
283
+
284
+ def _compute_world_traits(env: crafter.Env, radius: int = 10) -> dict[str, int]:
285
+ # Local copy to avoid import-time issues; mirrors synth_ai.environments.examples.crafter_classic.taskset.world_traits
286
+ import numpy as _np # type: ignore
287
+ from crafter import objects as _objects # type: ignore
288
+
289
+ player = getattr(env, "_player", None)
290
+ if player is None:
291
+ return {"trees": 0, "cows": 0, "hostiles": 0}
292
+ pos = _np.array(getattr(player, "pos", [0, 0]))
293
+ counts = {"trees": 0, "cows": 0, "hostiles": 0}
294
+ world = getattr(env, "_world", None)
295
+ objects = getattr(world, "_objects", []) if world is not None else []
296
+ for obj in objects:
297
+ if obj is None or obj is player:
298
+ continue
299
+ try:
300
+ if _np.abs(obj.pos - pos).sum() > radius:
301
+ continue
302
+ except Exception:
303
+ continue
304
+ if isinstance(obj, _objects.Plant) and getattr(obj, "kind", "") == "tree":
305
+ counts["trees"] += 1
306
+ elif isinstance(obj, _objects.Cow):
307
+ counts["cows"] += 1
308
+ elif isinstance(obj, _objects.Zombie | _objects.Skeleton):
309
+ counts["hostiles"] += 1
310
+ return counts
311
+
312
+
313
+ def env_value(key: str, default: Any) -> Any:
314
+ return os.getenv(key, default)
315
+
316
+
317
+ def build_dataset() -> tuple[TaskDatasetRegistry, CrafterDataset]:
318
+ registry = TaskDatasetRegistry()
319
+ dataset = CrafterDataset(DATASET_SPEC)
320
+ registry.register(DATASET_SPEC, lambda _spec: dataset, cache=True)
321
+ return registry, dataset
322
+
323
+
324
+ def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
325
+ return TaskInfo(
326
+ task={"id": "crafter_classic", "name": "Crafter Classic", "version": "1.0.0"},
327
+ environment="crafter",
328
+ action_space={
329
+ "type": "discrete",
330
+ "size": len(crafter_constants.actions),
331
+ "actions": list(crafter_constants.actions),
332
+ },
333
+ observation={
334
+ "summary": "RGB frame plus inventory, achievements, and semantic map patches.",
335
+ "keys": ["image", "inventory", "achievements", "semantic_map_patch7"],
336
+ "image_shape": [64, 64, 3],
337
+ },
338
+ dataset={
339
+ **DATASET_SPEC.model_dump(),
340
+ "seed_range": dataset.seed_range,
341
+ "default_seed": dataset.default_seed,
342
+ },
343
+ rubric={
344
+ "version": "1",
345
+ "criteria_count": 2,
346
+ "source": "inline",
347
+ "aggregation": "weighted_sum",
348
+ },
349
+ inference={
350
+ "supports_proxy": True,
351
+ "endpoints": {
352
+ "openai": "/proxy/v1/chat/completions",
353
+ "groq": "/proxy/groq/v1/chat/completions",
354
+ },
355
+ "tool": {"name": "interact", "parallel_tool_calls": False},
356
+ },
357
+ capabilities={
358
+ "supports_rollout": True,
359
+ "supports_env_lifecycle": True,
360
+ "requires_api_key_header": True,
361
+ },
362
+ limits={"max_ops": 100000, "max_time_s": 3600},
363
+ )
364
+
365
+
366
+ OUTCOME_RUBRIC = load_rubric(
367
+ {
368
+ "version": "1",
369
+ "goal_text": "Reward unlocking Crafter achievements and survival.",
370
+ "aggregation": "weighted_sum",
371
+ "criteria": [
372
+ {
373
+ "id": "achievements",
374
+ "description": "Unlock achievements or crafting milestones.",
375
+ "weight": 1.0,
376
+ },
377
+ {
378
+ "id": "survival",
379
+ "description": "Maintain health, food, and drink levels.",
380
+ "weight": 1.0,
381
+ },
382
+ ],
383
+ }
384
+ )
385
+
386
+ EVENTS_RUBRIC = load_rubric(
387
+ {
388
+ "version": "1",
389
+ "goal_text": "Encourage purposeful step-wise exploration and crafting.",
390
+ "aggregation": "weighted_sum",
391
+ "criteria": [
392
+ {
393
+ "id": "progress_steps",
394
+ "description": "Actions progress quests, crafting, or exploration.",
395
+ "weight": 1.0,
396
+ }
397
+ ],
398
+ }
399
+ )
400
+
401
+
402
+ def describe_taskset(dataset: CrafterDataset) -> dict[str, Any]:
403
+ return {
404
+ **DATASET_SPEC.model_dump(),
405
+ "seed_range": dataset.seed_range,
406
+ "default_seed": dataset.default_seed,
407
+ "config": {
408
+ "area": list(dataset.area),
409
+ "length": dataset.length,
410
+ },
411
+ }
412
+
413
+
414
+ def provide_task_instances(
415
+ dataset: CrafterDataset, base_info: TaskInfo, seeds: Sequence[int]
416
+ ) -> Iterable[TaskInfo]:
417
+ infos: list[TaskInfo] = []
418
+ for seed_value in seeds:
419
+ summary = dataset.describe_seed(seed_value)
420
+ infos.append(
421
+ TaskInfo(
422
+ task=base_info.task,
423
+ environment=base_info.environment,
424
+ action_space=base_info.action_space,
425
+ observation={
426
+ **base_info.observation,
427
+ "seed": seed_value,
428
+ "traits": summary["traits"],
429
+ "inventory": summary["inventory"],
430
+ "player_position": summary["player_position"],
431
+ },
432
+ dataset={
433
+ **base_info.dataset,
434
+ "seed": seed_value,
435
+ "difficulty": summary["difficulty"],
436
+ "config": summary["config"],
437
+ },
438
+ rubric=base_info.rubric,
439
+ inference=base_info.inference,
440
+ capabilities=base_info.capabilities,
441
+ limits=base_info.limits,
442
+ )
443
+ )
444
+ return infos
445
+
446
+
447
+ def _normalise_op(op_value: Any, index: int) -> str:
448
+ if isinstance(op_value, str):
449
+ candidate = op_value
450
+ elif isinstance(op_value, dict):
451
+ candidate = op_value.get("type") or op_value.get("op")
452
+ else:
453
+ candidate = None
454
+ if not candidate:
455
+ raise ValueError(f"Missing op type at index {index}")
456
+ lowered = str(candidate).strip().lower()
457
+ if lowered in {"policy", "agent", "model"}:
458
+ return "agent"
459
+ if lowered in {"env", "environment", "step"}:
460
+ return "env"
461
+ raise ValueError(f"Unsupported op type '{candidate}' at index {index}")
462
+
463
+
464
+ def _coerce_math_to_crafter(request: RolloutRequest) -> RolloutRequest:
465
+ """Map legacy math env/policy names to crafter and enrich rollout defaults."""
466
+
467
+ def _needs_crafter(name: str | None) -> bool:
468
+ if not name:
469
+ return False
470
+ lowered = str(name).strip().lower()
471
+ return lowered.startswith("math")
472
+
473
+ env_updates: dict[str, Any] = {}
474
+ policy_updates: dict[str, Any] = {}
475
+ alias_applied = False
476
+
477
+ if _needs_crafter(request.env.env_name):
478
+ env_updates["env_name"] = "crafter"
479
+ alias_applied = True
480
+ if request.env.env_id and _needs_crafter(request.env.env_id):
481
+ env_updates["env_id"] = None
482
+ alias_applied = True
483
+ if _needs_crafter(request.policy.policy_name):
484
+ policy_updates["policy_name"] = "crafter-react"
485
+ alias_applied = True
486
+ if request.policy.policy_id and _needs_crafter(request.policy.policy_id):
487
+ policy_updates["policy_id"] = None
488
+ alias_applied = True
489
+
490
+ if not alias_applied:
491
+ return request
492
+
493
+ updated_env = request.env.model_copy(update=env_updates) if env_updates else request.env
494
+ updated_policy = (
495
+ request.policy.model_copy(update=policy_updates) if policy_updates else request.policy
496
+ )
497
+
498
+ env_cfg = dict(updated_env.config or {})
499
+ env_cfg.setdefault("difficulty", "normal")
500
+ env_cfg.setdefault("step_rewards", dict(DEFAULT_ALIAS_STEP_REWARDS))
501
+ env_cfg.setdefault("env_params", {"max_steps_per_episode": 200})
502
+ updated_env = updated_env.model_copy(update={"config": env_cfg})
503
+
504
+ policy_cfg = dict(updated_policy.config or {})
505
+ policy_cfg.setdefault("max_llm_calls", 10)
506
+ policy_cfg.setdefault("max_completion_tokens", 1024)
507
+ policy_cfg.setdefault("temperature", 0.2)
508
+ policy_cfg.setdefault("step_rewards", dict(DEFAULT_ALIAS_STEP_REWARDS))
509
+ updated_policy = updated_policy.model_copy(update={"config": policy_cfg})
510
+
511
+ ops_override = request.ops
512
+ if not ops_override or len(ops_override) < len(DEFAULT_ALIAS_OPS):
513
+ ops_override = list(DEFAULT_ALIAS_OPS)
514
+
515
+ coerced = request.model_copy(update={"env": updated_env, "policy": updated_policy, "ops": ops_override})
516
+
517
+ with suppress(Exception):
518
+ print(
519
+ "[rollout] remapped math request -> crafter "
520
+ f"(env={request.env.env_name!r}→{coerced.env.env_name!r}, "
521
+ f"policy={request.policy.policy_name!r}→{coerced.policy.policy_name!r})",
522
+ flush=True,
523
+ )
524
+ with suppress(Exception):
525
+ logger.info(
526
+ "ROLLOUT_ALIAS: remapped math env/policy to crafter (env=%s→%s, policy=%s→%s)",
527
+ request.env.env_name,
528
+ coerced.env.env_name,
529
+ request.policy.policy_name,
530
+ coerced.policy.policy_name,
531
+ )
532
+
533
+ return coerced
534
+
535
+
536
+ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
537
+ # If hosted env service code is not bundled, return a no-op rollout response compatible with contracts
538
+ if not HAS_HOSTED:
539
+ return RolloutResponse(
540
+ run_id=request.run_id,
541
+ trajectories=[],
542
+ branches={},
543
+ metrics=RolloutMetrics(
544
+ episode_returns=[],
545
+ mean_return=0.0,
546
+ num_steps=0,
547
+ num_episodes=0,
548
+ details={},
549
+ ),
550
+ aborted=False,
551
+ ops_executed=0,
552
+ trace=None,
553
+ )
554
+
555
+ request = _coerce_math_to_crafter(request)
556
+
557
+ record_cfg = request.record.model_copy(
558
+ update={
559
+ "return_trace": True,
560
+ "trace_format": "structured",
561
+ }
562
+ )
563
+ request = request.model_copy(update={"record": record_cfg})
564
+
565
+ policy_cfg = dict(request.policy.config or {})
566
+ logger.info(
567
+ "ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
568
+ sorted(policy_cfg.keys()),
569
+ policy_cfg.get("inference_url"),
570
+ request.run_id,
571
+ request.mode,
572
+ )
573
+ inferred_url = ensure_chat_completions_url(policy_cfg.get("inference_url"), mode=request.mode)
574
+ if isinstance(inferred_url, str) and inferred_url:
575
+ policy_cfg["inference_url"] = inferred_url
576
+ else:
577
+ logger.warning(
578
+ "ROLLOUT_EXEC: inference_url missing or not normalized run_id=%s raw=%s",
579
+ request.run_id,
580
+ policy_cfg.get("inference_url"),
581
+ )
582
+
583
+ trace_correlation_id = extract_trace_correlation_id(policy_cfg.get("inference_url"))
584
+ if request.mode == RolloutMode.RL:
585
+ assert trace_correlation_id, (
586
+ f"FATAL: trace_correlation_id extraction failed for run_id={request.run_id}. "
587
+ f"policy_cfg_keys={sorted(policy_cfg.keys())} inference_url={policy_cfg.get('inference_url')}"
588
+ )
589
+ if trace_correlation_id:
590
+ policy_cfg["trace_correlation_id"] = trace_correlation_id
591
+
592
+ pipeline_metadata: dict[str, Any] = {}
593
+ if trace_correlation_id:
594
+ pipeline_metadata["trace_correlation_id"] = trace_correlation_id
595
+ if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
596
+ pipeline_metadata.setdefault("inference_url", policy_cfg["inference_url"])
597
+
598
+ try:
599
+ max_llm_calls = int(policy_cfg.get("max_llm_calls") or 10)
600
+ except Exception:
601
+ max_llm_calls = 10
602
+ policy_cfg.setdefault("max_llm_calls", max_llm_calls)
603
+ policy_cfg.setdefault("max_tokens", 512)
604
+ policy_cfg.setdefault("max_completion_tokens", 512)
605
+ policy_cfg.setdefault("temperature", 0.2)
606
+ policy_cfg.setdefault("top_p", 0.95)
607
+
608
+ env_cfg = dict(request.env.config or {})
609
+ env_params = dict(env_cfg.get("env_params") or {})
610
+ try:
611
+ max_steps_episode = int(env_params.get("max_steps_per_episode") or max_llm_calls)
612
+ except Exception:
613
+ max_steps_episode = max_llm_calls
614
+ desired_steps = max(max_llm_calls, max_steps_episode)
615
+ env_params["max_steps_per_episode"] = int(desired_steps)
616
+ env_cfg["env_params"] = env_params
617
+
618
+ updated_policy = request.policy.model_copy(update={"config": policy_cfg})
619
+ updated_env = request.env.model_copy(update={"config": env_cfg})
620
+ request = request.model_copy(update={"policy": updated_policy, "env": updated_env})
621
+
622
+ converted_ops: list[str] = [_normalise_op(op, idx) for idx, op in enumerate(request.ops)]
623
+ max_ops_allowed = max_llm_calls * 2 if max_llm_calls > 0 else len(converted_ops)
624
+ if max_ops_allowed and len(converted_ops) > max_ops_allowed:
625
+ converted_ops = converted_ops[:max_ops_allowed]
626
+ legacy_request = LegacyRolloutRequest(
627
+ run_id=request.run_id,
628
+ env=LegacyRolloutEnvSpec(
629
+ env_id=request.env.env_id,
630
+ env_name=request.env.env_name,
631
+ config=env_cfg,
632
+ seed=request.env.seed,
633
+ ),
634
+ policy=LegacyRolloutPolicySpec(
635
+ policy_id=request.policy.policy_id,
636
+ policy_name=request.policy.policy_name,
637
+ config=policy_cfg,
638
+ ),
639
+ ops=converted_ops,
640
+ record=LegacyRolloutRecordConfig(**request.record.model_dump()),
641
+ on_done=request.on_done,
642
+ branch=None,
643
+ safety=LegacyRolloutSafetyConfig(**request.safety.model_dump()),
644
+ training_session_id=request.training_session_id,
645
+ synth_base_url=request.synth_base_url,
646
+ mode=request.mode,
647
+ )
648
+
649
+ legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
650
+ legacy_request, fastapi_request
651
+ )
652
+ data = legacy_response.model_dump()
653
+ logger.debug(
654
+ "ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
655
+ sorted(data.keys()),
656
+ bool(data.get("trace")),
657
+ )
658
+ metrics = data.get("metrics", {}) or {}
659
+ metrics.setdefault("outcome_score", None)
660
+ metrics.setdefault("events_score", None)
661
+ metrics.setdefault("details", {})
662
+ data["metrics"] = metrics
663
+
664
+ if data.get("trace") is None:
665
+ legacy_trace = getattr(legacy_response, "trace", None)
666
+ if legacy_trace is not None:
667
+ data["trace"] = legacy_trace
668
+ else:
669
+ tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
670
+ if callable(tracer_factory):
671
+ tracer = tracer_factory()
672
+ logger.debug(
673
+ "ROLLOUT_EXEC: trace backfill factory=%s", type(tracer)
674
+ )
675
+ if isinstance(tracer, SessionTracer):
676
+ try:
677
+ await tracer.initialize()
678
+ if tracer.db is not None:
679
+ trace_row = await tracer.db.get_session_trace(request.run_id)
680
+ if trace_row is not None:
681
+ data["trace"] = trace_row
682
+ except Exception as exc:
683
+ logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
684
+ finally:
685
+ with suppress(Exception):
686
+ await tracer.close()
687
+
688
+ final_cid = trace_correlation_id or f"trace_{request.run_id}"
689
+ data["trace_correlation_id"] = final_cid
690
+
691
+ existing_meta = data.get("pipeline_metadata")
692
+ if not isinstance(existing_meta, dict):
693
+ existing_meta = {}
694
+ existing_meta.setdefault("trace_correlation_id", final_cid)
695
+ if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
696
+ existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
697
+ data["pipeline_metadata"] = existing_meta
698
+
699
+ # Propagate inference_url into each legacy trajectory entry for downstream tooling.
700
+ inferred_url = policy_cfg.get("inference_url")
701
+
702
+ if "trajectories" in data:
703
+ normalized_trajs: list[dict[str, Any]] = []
704
+ for traj in data.get("trajectories", []):
705
+ if isinstance(traj, BaseModel):
706
+ traj_dict = traj.model_dump()
707
+ elif isinstance(traj, dict):
708
+ traj_dict = dict(traj)
709
+ else:
710
+ continue
711
+ traj_dict.setdefault("trace_correlation_id", final_cid)
712
+ if isinstance(inferred_url, str) and inferred_url and not traj_dict.get("inference_url"):
713
+ traj_dict["inference_url"] = inferred_url
714
+ normalized_trajs.append(traj_dict)
715
+ if normalized_trajs:
716
+ data["trajectories"] = normalized_trajs
717
+
718
+ if data.get("trace") is None:
719
+ data["trace"] = {
720
+ "session_id": request.run_id,
721
+ "created_at": datetime.now(UTC).isoformat(),
722
+ "metadata": dict(existing_meta),
723
+ "event_history": [],
724
+ "markov_blanket_message_history": [],
725
+ }
726
+ raise HTTPException(
727
+ status_code=500, detail="trace_payload_missing: task app did not emit a SessionTrace"
728
+ )
729
+
730
+ return RolloutResponse.model_validate(data)
731
+
732
+
733
+ def build_config() -> TaskAppConfig:
734
+ registry, dataset = build_dataset()
735
+ base_info = _base_task_info(dataset)
736
+
737
+ hosted_task_app = HostedTaskApp() if HAS_HOSTED else None
738
+
739
+ tracing_enabled = tracing_env_enabled()
740
+ tracing_db_url = resolve_tracing_db_url()
741
+ tracer_factory = build_tracer_factory(
742
+ SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url
743
+ )
744
+ sft_output_dir = resolve_sft_output_dir()
745
+
746
+ app_state: dict[str, Any] = {
747
+ "task_app": hosted_task_app,
748
+ "allowed_environments": ["crafter"],
749
+ "tracing_enabled": tracing_enabled,
750
+ }
751
+ if tracer_factory is not None:
752
+ app_state["session_tracer_factory"] = tracer_factory
753
+ if sft_output_dir:
754
+ app_state["sft_output_dir"] = sft_output_dir
755
+
756
+ if tracing_enabled:
757
+ status_msg = f"[task:tracing] enabled (db={tracing_db_url or 'default'})"
758
+ else:
759
+ status_msg = "[task:tracing] disabled"
760
+ print(status_msg, flush=True)
761
+ if sft_output_dir:
762
+ print(f"[task:sft] writing JSONL to {sft_output_dir}", flush=True)
763
+
764
+ def _describe_taskset() -> dict[str, Any]:
765
+ return describe_taskset(dataset)
766
+
767
+ def _provide_instances(seeds: Sequence[int]):
768
+ return provide_task_instances(dataset, base_info, seeds)
769
+
770
+ routers: tuple = (environment_router, policy_router, branching_router) if HAS_HOSTED else ()
771
+
772
+ config = TaskAppConfig(
773
+ app_id="grpo-crafter",
774
+ name="GRPO Crafter Task App",
775
+ description="Crafter Classic environment with GRPO task endpoints and LLM proxies.",
776
+ base_task_info=base_info,
777
+ describe_taskset=_describe_taskset,
778
+ provide_task_instances=_provide_instances,
779
+ rollout=rollout_executor,
780
+ dataset_registry=registry,
781
+ rubrics=RubricBundle(outcome=OUTCOME_RUBRIC, events=EVENTS_RUBRIC),
782
+ proxy=ProxyConfig(
783
+ enable_openai=True, enable_groq=True, system_hint=CRAFTING_RULES_SYSTEM_HINT
784
+ ),
785
+ routers=routers,
786
+ app_state=app_state,
787
+ cors_origins=["*"],
788
+ )
789
+ return config
790
+
791
+
792
+ register_task_app(
793
+ entry=TaskAppEntry(
794
+ app_id="grpo-crafter",
795
+ description="Crafter Classic task app with rollout + proxy endpoints",
796
+ config_factory=build_config,
797
+ aliases=("crafter", "crafter-task"),
798
+ modal=ModalDeploymentConfig(
799
+ app_name="grpo-crafter-task-app",
800
+ python_version="3.11",
801
+ pip_packages=(
802
+ "fastapi>=0.100.0",
803
+ "uvicorn>=0.23.0",
804
+ "pydantic>=2.0.0",
805
+ "numpy>=1.24.0",
806
+ "aiohttp>=3.8.0",
807
+ "httpx>=0.24.0",
808
+ "python-dotenv>=1.0.1",
809
+ # Tracing/DB runtime deps
810
+ "sqlalchemy>=2.0.42",
811
+ "aiosqlite>=0.21.0",
812
+ "greenlet>=3.2.3",
813
+ "crafter",
814
+ ),
815
+ extra_local_dirs=(
816
+ # Mount repo root so local modules resolve when deployed on Modal
817
+ (str(REPO_ROOT), "/opt/synth_ai_repo"),
818
+ (str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai"),
819
+ (str(TASK_APP_ROOT), "/opt/synth_ai_repo/examples/warming_up_to_rl/task_app"),
820
+ ),
821
+ secret_names=("groq-api-key", "openai-api-key"),
822
+ memory=16384,
823
+ cpu=4.0,
824
+ max_containers=10,
825
+ ),
826
+ )
827
+ )