synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (291) hide show
  1. examples/multi_step/configs/README_verilog_rl.md +77 -0
  2. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  3. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  4. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  5. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  6. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
  7. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  8. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  9. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  10. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  11. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  12. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  13. examples/multi_step/readme.md +48 -0
  14. examples/multi_step/verilog_rl_lora.md +218 -0
  15. examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
  16. examples/sft/evaluate.py +2 -0
  17. examples/sft/generate_traces.py +2 -0
  18. examples/swe/task_app/grpo_swe_mini.py +56 -26
  19. examples/swe/task_app/hosted/rollout.py +42 -0
  20. examples/swe/task_app/hosted/test_service.py +5 -6
  21. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  22. examples/task_apps/TESTING.md +275 -0
  23. examples/task_apps/__init__.py +0 -0
  24. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  25. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  26. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  27. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  28. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  29. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  30. examples/task_apps/crafter/__init__.py +0 -0
  31. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  32. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  33. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  34. examples/task_apps/crafter/task_app/__init__.py +5 -0
  35. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
  36. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  37. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  38. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
  39. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  40. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
  41. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
  42. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
  43. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  44. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
  45. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  78. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  79. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  80. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  81. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  82. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  83. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  84. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  85. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  86. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  87. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  88. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  89. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  90. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  91. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  92. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  93. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  94. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  95. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  96. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  97. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  98. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  99. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  100. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  101. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  102. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  103. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  104. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  105. examples/task_apps/enron/__init__.py +1 -0
  106. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  107. examples/task_apps/enron/filter_sft.toml +5 -0
  108. examples/task_apps/enron/task_app/README.md +14 -0
  109. examples/task_apps/enron/task_app/__init__.py +1 -0
  110. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  111. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  112. examples/task_apps/enron/tests/__init__.py +4 -0
  113. examples/task_apps/enron/tests/conftest.py +115 -0
  114. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  115. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  116. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  117. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  118. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  119. examples/task_apps/math/__init__.py +0 -0
  120. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  121. examples/task_apps/pokemon_battle/__init__.py +2 -0
  122. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  123. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  124. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  125. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  126. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  127. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  128. examples/task_apps/pokemon_red/README.md +357 -0
  129. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  130. examples/task_apps/pokemon_red/__init__.py +3 -0
  131. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  132. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  133. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  134. examples/task_apps/pokemon_red/task_app.py +799 -0
  135. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  136. examples/task_apps/sokoban/README.md +307 -0
  137. examples/task_apps/sokoban/__init__.py +3 -0
  138. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  139. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  140. examples/task_apps/sokoban/filter_sft.toml +5 -0
  141. examples/task_apps/sokoban/task_app.py +1058 -0
  142. examples/task_apps/sokoban/tests/__init__.py +4 -0
  143. examples/task_apps/sokoban/tests/conftest.py +113 -0
  144. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  145. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  146. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  147. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  148. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  149. examples/task_apps/verilog/__init__.py +1 -0
  150. examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
  151. examples/task_apps/verilog/filter_sft.toml +5 -0
  152. examples/task_apps/verilog/task_app/README.md +12 -0
  153. examples/task_apps/verilog/task_app/__init__.py +1 -0
  154. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  155. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  156. examples/task_apps/verilog/tests/__init__.py +4 -0
  157. examples/task_apps/verilog/tests/conftest.py +115 -0
  158. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  159. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  160. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  161. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  162. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  163. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  164. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  165. examples/warming_up_to_rl/groq_test.py +2 -0
  166. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  167. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  168. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  169. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  170. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  171. examples/workflows/__init__.py +0 -0
  172. examples/workflows/math_rl/__init__.py +0 -0
  173. examples/workflows/math_rl/download_dataset.py +80 -0
  174. synth_ai/__init__.py +2 -2
  175. synth_ai/api/models/supported.py +1 -0
  176. synth_ai/api/train/builders.py +25 -11
  177. synth_ai/api/train/cli.py +12 -6
  178. synth_ai/api/train/configs/__init__.py +10 -10
  179. synth_ai/api/train/configs/rl.py +5 -4
  180. synth_ai/api/train/configs/sft.py +4 -3
  181. synth_ai/api/train/env_resolver.py +5 -2
  182. synth_ai/api/train/supported_algos.py +10 -5
  183. synth_ai/api/train/utils.py +7 -4
  184. synth_ai/cli/__init__.py +48 -59
  185. synth_ai/cli/_modal_wrapper.py +3 -2
  186. synth_ai/cli/_storage.py +4 -3
  187. synth_ai/cli/_validate_task_app.py +11 -0
  188. synth_ai/cli/balance.py +4 -3
  189. synth_ai/cli/calc.py +2 -2
  190. synth_ai/cli/demo.py +14 -7
  191. synth_ai/cli/legacy_root_backup.py +1 -1
  192. synth_ai/cli/recent.py +1 -1
  193. synth_ai/cli/rl_demo.py +8 -7
  194. synth_ai/cli/root.py +0 -97
  195. synth_ai/cli/status.py +1 -1
  196. synth_ai/cli/task_apps.py +1922 -190
  197. synth_ai/cli/traces.py +1 -1
  198. synth_ai/cli/tui.py +57 -0
  199. synth_ai/cli/turso.py +1 -1
  200. synth_ai/cli/watch.py +1 -1
  201. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
  202. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  203. synth_ai/environments/examples/enron/engine.py +7 -2
  204. synth_ai/environments/examples/enron/environment.py +68 -0
  205. synth_ai/environments/examples/red/engine.py +27 -0
  206. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  207. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  208. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  209. synth_ai/environments/examples/red/environment.py +60 -0
  210. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  211. synth_ai/environments/examples/verilog/engine.py +104 -12
  212. synth_ai/evals/client.py +58 -61
  213. synth_ai/jobs/client.py +16 -4
  214. synth_ai/judge_schemas.py +9 -9
  215. synth_ai/py.typed +0 -0
  216. synth_ai/task/__init__.py +24 -5
  217. synth_ai/task/apps/__init__.py +1 -0
  218. synth_ai/task/config.py +257 -0
  219. synth_ai/task/contracts.py +138 -39
  220. synth_ai/task/proxy.py +48 -56
  221. synth_ai/task/rubrics/__init__.py +56 -0
  222. synth_ai/task/rubrics/loaders.py +152 -0
  223. synth_ai/task/rubrics/models.py +57 -0
  224. synth_ai/task/rubrics/scoring.py +116 -0
  225. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  226. synth_ai/task/server.py +8 -7
  227. synth_ai/task/trace_correlation_helpers.py +315 -0
  228. synth_ai/task/validators.py +413 -6
  229. synth_ai/tracing_v3/abstractions.py +3 -3
  230. synth_ai/tracing_v3/decorators.py +7 -3
  231. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  232. synth_ai/tracing_v3/replica_sync.py +4 -4
  233. synth_ai/tracing_v3/serialization.py +5 -5
  234. synth_ai/tracing_v3/session_tracer.py +16 -6
  235. synth_ai/tracing_v3/storage/base.py +29 -29
  236. synth_ai/tracing_v3/storage/config.py +3 -3
  237. synth_ai/tracing_v3/trace_utils.py +317 -0
  238. synth_ai/tracing_v3/turso/daemon.py +8 -7
  239. synth_ai/tracing_v3/turso/native_manager.py +66 -43
  240. synth_ai/tracing_v3/utils.py +3 -3
  241. synth_ai/tui/__init__.py +5 -0
  242. synth_ai/tui/__main__.py +13 -0
  243. synth_ai/tui/cli/__init__.py +1 -0
  244. synth_ai/tui/cli/query_experiments.py +164 -0
  245. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  246. synth_ai/tui/dashboard.py +906 -0
  247. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
  248. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
  249. examples/agora_ex/README_MoE.md +0 -224
  250. examples/agora_ex/__init__.py +0 -7
  251. examples/agora_ex/agora_ex.py +0 -65
  252. examples/agora_ex/agora_ex_task_app.py +0 -590
  253. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  254. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  255. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  256. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  257. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  258. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  259. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
  260. synth_ai/rubrics/__init__.py +0 -22
  261. synth_ai/task/rubrics.py +0 -219
  262. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  263. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  264. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  265. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  266. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  267. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  268. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  269. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  270. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  271. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  272. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  273. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  274. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  275. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  276. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  277. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  278. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  279. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  280. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  281. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  282. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  283. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  284. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  285. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  286. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  287. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  288. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
  289. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
  290. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
  291. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,315 @@
1
+ """Helpers for trace correlation ID extraction and inclusion in task apps.
2
+
3
+ This module provides utilities for task apps to:
4
+ 1. Extract trace_correlation_id from rollout requests
5
+ 2. Include trace_correlation_id in rollout responses (3 required locations)
6
+
7
+ See monorepo/trace_creation_and_judgement.txt "Fatal Guards" section for requirements.
8
+ """
9
+
10
+ import logging
11
+ from typing import Any
12
+ from urllib.parse import parse_qs, urlparse
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def extract_trace_correlation_id(
18
+ policy_config: dict[str, Any],
19
+ inference_url: str | None = None,
20
+ mode: Any = None
21
+ ) -> str | None:
22
+ """
23
+ Extract trace_correlation_id from policy config or inference URL.
24
+
25
+ This is the standardized method for all task apps to extract the correlation ID
26
+ that the RL trainer generates and passes to the task app.
27
+
28
+ Args:
29
+ policy_config: Policy configuration dict from RolloutRequest.policy.config
30
+ inference_url: Inference URL (optional, used as fallback)
31
+ mode: RolloutMode or string ("rl" or "eval"). Controls warning behavior -
32
+ warnings only logged for RL mode, not EVAL mode.
33
+
34
+ Returns:
35
+ trace_correlation_id if found, None otherwise
36
+
37
+ Extraction order:
38
+ 1. policy_config["trace_correlation_id"] (preferred)
39
+ 2. policy_config["trace"] (legacy fallback)
40
+ 3. URL query param ?cid=... (fallback)
41
+ 4. URL query param ?trace_correlation_id=... (fallback)
42
+ """
43
+ # Try policy_config first (preferred method)
44
+ candidates: list[Any] = [
45
+ policy_config.get("trace_correlation_id"),
46
+ policy_config.get("trace"),
47
+ ]
48
+
49
+ logger.debug(
50
+ "extract_trace_correlation_id: policy_cfg keys=%s candidates=%s",
51
+ sorted(policy_config.keys()),
52
+ candidates,
53
+ )
54
+
55
+ for candidate in candidates:
56
+ if isinstance(candidate, str):
57
+ stripped = candidate.strip()
58
+ if stripped:
59
+ logger.info(
60
+ "extract_trace_correlation_id: extracted from policy_config=%s",
61
+ stripped
62
+ )
63
+ return stripped
64
+
65
+ # Determine if we're in EVAL mode (trace_correlation_id not required for eval)
66
+ try:
67
+ from synth_ai.task.contracts import RolloutMode
68
+ is_eval_mode = (mode == "eval" or mode == RolloutMode.EVAL or
69
+ (hasattr(mode, 'value') and mode.value == "eval"))
70
+ except ImportError:
71
+ # If RolloutMode not available, fall back to string comparison
72
+ is_eval_mode = (mode == "eval")
73
+
74
+ # Fallback: try to extract from inference_url query params
75
+ if not inference_url or not isinstance(inference_url, str):
76
+ if is_eval_mode:
77
+ logger.debug(
78
+ "extract_trace_correlation_id: no correlation ID found in policy_config "
79
+ "and no inference_url provided (EVAL mode - expected)"
80
+ )
81
+ else:
82
+ logger.warning(
83
+ "extract_trace_correlation_id: no correlation ID found in policy_config "
84
+ "and no inference_url provided"
85
+ )
86
+ return None
87
+
88
+ try:
89
+ parsed = urlparse(inference_url)
90
+ query_params = parse_qs(parsed.query or "")
91
+ # Try multiple possible query param names
92
+ for param_name in ["cid", "trace_correlation_id", "trace"]:
93
+ values = query_params.get(param_name, [])
94
+ for value in values:
95
+ if isinstance(value, str) and value.strip():
96
+ correlation_id = value.strip()
97
+ logger.info(
98
+ "extract_trace_correlation_id: extracted from URL param %s=%s",
99
+ param_name,
100
+ correlation_id,
101
+ )
102
+ return correlation_id
103
+ except Exception as e:
104
+ logger.warning(
105
+ "extract_trace_correlation_id: failed to parse inference_url=%s error=%s",
106
+ inference_url,
107
+ e,
108
+ )
109
+
110
+ if is_eval_mode:
111
+ logger.debug(
112
+ "extract_trace_correlation_id: no trace_correlation_id found in "
113
+ "policy_config or inference_url=%s (EVAL mode - expected)",
114
+ inference_url,
115
+ )
116
+ else:
117
+ logger.warning(
118
+ "extract_trace_correlation_id: no trace_correlation_id found in "
119
+ "policy_config or inference_url=%s",
120
+ inference_url,
121
+ )
122
+ return None
123
+
124
+
125
+ def validate_trace_correlation_id(
126
+ trace_correlation_id: str | None,
127
+ run_id: str,
128
+ policy_config: dict[str, Any],
129
+ fatal: bool = False
130
+ ) -> str | None:
131
+ """
132
+ Validate that trace_correlation_id was successfully extracted.
133
+
134
+ Args:
135
+ trace_correlation_id: The extracted correlation ID (or None)
136
+ run_id: Rollout run_id for logging
137
+ policy_config: Policy configuration for debugging
138
+ fatal: If True, raise ValueError on missing ID. If False, log error only.
139
+
140
+ Returns:
141
+ trace_correlation_id if present, None if missing (when fatal=False)
142
+
143
+ Raises:
144
+ ValueError: If trace_correlation_id is missing and fatal=True
145
+ """
146
+ if not trace_correlation_id:
147
+ error_msg = (
148
+ f"🚨 CRITICAL: Cannot extract trace_correlation_id!\n"
149
+ "\n"
150
+ f"Run ID: {run_id}\n"
151
+ f"Policy config keys: {sorted(policy_config.keys())}\n"
152
+ f"Inference URL: {policy_config.get('inference_url', 'NOT_SET')}\n"
153
+ "\n"
154
+ "Checked:\n"
155
+ f"1. policy_config['trace_correlation_id']: {policy_config.get('trace_correlation_id')}\n"
156
+ f"2. policy_config['trace']: {policy_config.get('trace')}\n"
157
+ f"3. inference_url query params\n"
158
+ "\n"
159
+ "Task app CANNOT proceed without trace_correlation_id.\n"
160
+ "This indicates the RL trainer is not sending it correctly.\n"
161
+ "\n"
162
+ "See monorepo/trace_creation_and_judgement.txt 'Fatal Guards' section.\n"
163
+ )
164
+
165
+ if fatal:
166
+ raise ValueError(error_msg)
167
+ else:
168
+ logger.error(error_msg)
169
+
170
+ return trace_correlation_id
171
+
172
+
173
+ def include_trace_correlation_id_in_response(
174
+ response_data: dict[str, Any],
175
+ trace_correlation_id: str | None,
176
+ run_id: str
177
+ ) -> dict[str, Any]:
178
+ """
179
+ Include trace_correlation_id in all required locations of rollout response.
180
+
181
+ Required locations (per Fatal Guards section):
182
+ 1. Top-level response["trace_correlation_id"]
183
+ 2. response["pipeline_metadata"]["trace_correlation_id"]
184
+ 3. Each trajectory["trace_correlation_id"]
185
+
186
+ Args:
187
+ response_data: RolloutResponse dict (from .model_dump())
188
+ trace_correlation_id: The correlation ID to include
189
+ run_id: Rollout run_id for logging
190
+
191
+ Returns:
192
+ Modified response_data with trace_correlation_id in all required places
193
+ """
194
+ if not trace_correlation_id:
195
+ logger.error(
196
+ "include_trace_correlation_id_in_response: missing trace_correlation_id "
197
+ "for run_id=%s - cannot include in response",
198
+ run_id
199
+ )
200
+ return response_data
201
+
202
+ # 1. Add to top-level (REQUIRED)
203
+ if "trace_correlation_id" not in response_data:
204
+ response_data["trace_correlation_id"] = trace_correlation_id
205
+ logger.info(
206
+ "include_trace_correlation_id: added to top-level run_id=%s cid=%s",
207
+ run_id,
208
+ trace_correlation_id
209
+ )
210
+
211
+ # 2. Add to pipeline_metadata (REQUIRED)
212
+ pipeline_meta = response_data.get("pipeline_metadata")
213
+ if not isinstance(pipeline_meta, dict):
214
+ pipeline_meta = {}
215
+ response_data["pipeline_metadata"] = pipeline_meta
216
+
217
+ if "trace_correlation_id" not in pipeline_meta:
218
+ pipeline_meta["trace_correlation_id"] = trace_correlation_id
219
+ logger.info(
220
+ "include_trace_correlation_id: added to pipeline_metadata run_id=%s cid=%s",
221
+ run_id,
222
+ trace_correlation_id
223
+ )
224
+
225
+ # 3. Add to each trajectory (REQUIRED)
226
+ trajectories = response_data.get("trajectories", [])
227
+ if isinstance(trajectories, list):
228
+ for idx, traj in enumerate(trajectories):
229
+ if isinstance(traj, dict) and "trace_correlation_id" not in traj:
230
+ traj["trace_correlation_id"] = trace_correlation_id
231
+ logger.debug(
232
+ "include_trace_correlation_id: added to trajectory[%d] run_id=%s cid=%s",
233
+ idx,
234
+ run_id,
235
+ trace_correlation_id
236
+ )
237
+
238
+ logger.info(
239
+ "include_trace_correlation_id: completed run_id=%s cid=%s "
240
+ "added to %d locations (top-level, metadata, %d trajectories)",
241
+ run_id,
242
+ trace_correlation_id,
243
+ 2 + len(trajectories),
244
+ len(trajectories)
245
+ )
246
+
247
+ return response_data
248
+
249
+
250
+ def verify_trace_correlation_id_in_response(
251
+ response_data: dict[str, Any],
252
+ expected_correlation_id: str | None,
253
+ run_id: str
254
+ ) -> bool:
255
+ """
256
+ Verify that trace_correlation_id is present in all required locations.
257
+
258
+ Args:
259
+ response_data: RolloutResponse dict to verify
260
+ expected_correlation_id: The correlation ID that should be present
261
+ run_id: Rollout run_id for logging
262
+
263
+ Returns:
264
+ True if all required locations have the correlation ID, False otherwise
265
+ """
266
+ if not expected_correlation_id:
267
+ logger.error(
268
+ "verify_trace_correlation_id: no expected_correlation_id provided for run_id=%s",
269
+ run_id
270
+ )
271
+ return False
272
+
273
+ errors = []
274
+
275
+ # Check top-level
276
+ if response_data.get("trace_correlation_id") != expected_correlation_id:
277
+ errors.append(
278
+ f"Top-level missing or mismatch: "
279
+ f"expected={expected_correlation_id} actual={response_data.get('trace_correlation_id')}"
280
+ )
281
+
282
+ # Check pipeline_metadata
283
+ pipeline_meta = response_data.get("pipeline_metadata", {})
284
+ if not isinstance(pipeline_meta, dict) or pipeline_meta.get("trace_correlation_id") != expected_correlation_id:
285
+ errors.append(
286
+ f"pipeline_metadata missing or mismatch: "
287
+ f"expected={expected_correlation_id} actual={pipeline_meta.get('trace_correlation_id') if isinstance(pipeline_meta, dict) else 'NOT_A_DICT'}"
288
+ )
289
+
290
+ # Check trajectories
291
+ trajectories = response_data.get("trajectories", [])
292
+ if isinstance(trajectories, list):
293
+ for idx, traj in enumerate(trajectories):
294
+ if isinstance(traj, dict) and traj.get("trace_correlation_id") != expected_correlation_id:
295
+ errors.append(
296
+ f"trajectory[{idx}] missing or mismatch: "
297
+ f"expected={expected_correlation_id} actual={traj.get('trace_correlation_id')}"
298
+ )
299
+
300
+ if errors:
301
+ logger.error(
302
+ "verify_trace_correlation_id: FAILED run_id=%s\n%s",
303
+ run_id,
304
+ "\n".join(errors)
305
+ )
306
+ return False
307
+
308
+ logger.info(
309
+ "verify_trace_correlation_id: PASSED run_id=%s cid=%s",
310
+ run_id,
311
+ expected_correlation_id
312
+ )
313
+ return True
314
+
315
+