synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (291) hide show
  1. examples/multi_step/configs/README_verilog_rl.md +77 -0
  2. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  3. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  4. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  5. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  6. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
  7. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  8. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  9. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  10. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  11. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  12. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  13. examples/multi_step/readme.md +48 -0
  14. examples/multi_step/verilog_rl_lora.md +218 -0
  15. examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
  16. examples/sft/evaluate.py +2 -0
  17. examples/sft/generate_traces.py +2 -0
  18. examples/swe/task_app/grpo_swe_mini.py +56 -26
  19. examples/swe/task_app/hosted/rollout.py +42 -0
  20. examples/swe/task_app/hosted/test_service.py +5 -6
  21. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  22. examples/task_apps/TESTING.md +275 -0
  23. examples/task_apps/__init__.py +0 -0
  24. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  25. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  26. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  27. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  28. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  29. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  30. examples/task_apps/crafter/__init__.py +0 -0
  31. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  32. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  33. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  34. examples/task_apps/crafter/task_app/__init__.py +5 -0
  35. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
  36. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  37. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  38. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
  39. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  40. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
  41. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
  42. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
  43. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  44. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
  45. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  78. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  79. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  80. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  81. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  82. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  83. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  84. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  85. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  86. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  87. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  88. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  89. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  90. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  91. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  92. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  93. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  94. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  95. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  96. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  97. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  98. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  99. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  100. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  101. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  102. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  103. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  104. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  105. examples/task_apps/enron/__init__.py +1 -0
  106. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  107. examples/task_apps/enron/filter_sft.toml +5 -0
  108. examples/task_apps/enron/task_app/README.md +14 -0
  109. examples/task_apps/enron/task_app/__init__.py +1 -0
  110. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  111. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  112. examples/task_apps/enron/tests/__init__.py +4 -0
  113. examples/task_apps/enron/tests/conftest.py +115 -0
  114. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  115. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  116. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  117. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  118. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  119. examples/task_apps/math/__init__.py +0 -0
  120. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  121. examples/task_apps/pokemon_battle/__init__.py +2 -0
  122. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  123. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  124. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  125. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  126. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  127. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  128. examples/task_apps/pokemon_red/README.md +357 -0
  129. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  130. examples/task_apps/pokemon_red/__init__.py +3 -0
  131. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  132. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  133. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  134. examples/task_apps/pokemon_red/task_app.py +799 -0
  135. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  136. examples/task_apps/sokoban/README.md +307 -0
  137. examples/task_apps/sokoban/__init__.py +3 -0
  138. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  139. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  140. examples/task_apps/sokoban/filter_sft.toml +5 -0
  141. examples/task_apps/sokoban/task_app.py +1058 -0
  142. examples/task_apps/sokoban/tests/__init__.py +4 -0
  143. examples/task_apps/sokoban/tests/conftest.py +113 -0
  144. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  145. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  146. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  147. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  148. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  149. examples/task_apps/verilog/__init__.py +1 -0
  150. examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
  151. examples/task_apps/verilog/filter_sft.toml +5 -0
  152. examples/task_apps/verilog/task_app/README.md +12 -0
  153. examples/task_apps/verilog/task_app/__init__.py +1 -0
  154. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  155. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  156. examples/task_apps/verilog/tests/__init__.py +4 -0
  157. examples/task_apps/verilog/tests/conftest.py +115 -0
  158. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  159. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  160. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  161. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  162. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  163. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  164. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  165. examples/warming_up_to_rl/groq_test.py +2 -0
  166. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  167. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  168. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  169. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  170. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  171. examples/workflows/__init__.py +0 -0
  172. examples/workflows/math_rl/__init__.py +0 -0
  173. examples/workflows/math_rl/download_dataset.py +80 -0
  174. synth_ai/__init__.py +2 -2
  175. synth_ai/api/models/supported.py +1 -0
  176. synth_ai/api/train/builders.py +25 -11
  177. synth_ai/api/train/cli.py +12 -6
  178. synth_ai/api/train/configs/__init__.py +10 -10
  179. synth_ai/api/train/configs/rl.py +5 -4
  180. synth_ai/api/train/configs/sft.py +4 -3
  181. synth_ai/api/train/env_resolver.py +5 -2
  182. synth_ai/api/train/supported_algos.py +10 -5
  183. synth_ai/api/train/utils.py +7 -4
  184. synth_ai/cli/__init__.py +48 -59
  185. synth_ai/cli/_modal_wrapper.py +3 -2
  186. synth_ai/cli/_storage.py +4 -3
  187. synth_ai/cli/_validate_task_app.py +11 -0
  188. synth_ai/cli/balance.py +4 -3
  189. synth_ai/cli/calc.py +2 -2
  190. synth_ai/cli/demo.py +14 -7
  191. synth_ai/cli/legacy_root_backup.py +1 -1
  192. synth_ai/cli/recent.py +1 -1
  193. synth_ai/cli/rl_demo.py +8 -7
  194. synth_ai/cli/root.py +0 -97
  195. synth_ai/cli/status.py +1 -1
  196. synth_ai/cli/task_apps.py +1922 -190
  197. synth_ai/cli/traces.py +1 -1
  198. synth_ai/cli/tui.py +57 -0
  199. synth_ai/cli/turso.py +1 -1
  200. synth_ai/cli/watch.py +1 -1
  201. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
  202. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  203. synth_ai/environments/examples/enron/engine.py +7 -2
  204. synth_ai/environments/examples/enron/environment.py +68 -0
  205. synth_ai/environments/examples/red/engine.py +27 -0
  206. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  207. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  208. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  209. synth_ai/environments/examples/red/environment.py +60 -0
  210. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  211. synth_ai/environments/examples/verilog/engine.py +104 -12
  212. synth_ai/evals/client.py +58 -61
  213. synth_ai/jobs/client.py +16 -4
  214. synth_ai/judge_schemas.py +9 -9
  215. synth_ai/py.typed +0 -0
  216. synth_ai/task/__init__.py +24 -5
  217. synth_ai/task/apps/__init__.py +1 -0
  218. synth_ai/task/config.py +257 -0
  219. synth_ai/task/contracts.py +138 -39
  220. synth_ai/task/proxy.py +48 -56
  221. synth_ai/task/rubrics/__init__.py +56 -0
  222. synth_ai/task/rubrics/loaders.py +152 -0
  223. synth_ai/task/rubrics/models.py +57 -0
  224. synth_ai/task/rubrics/scoring.py +116 -0
  225. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  226. synth_ai/task/server.py +8 -7
  227. synth_ai/task/trace_correlation_helpers.py +315 -0
  228. synth_ai/task/validators.py +413 -6
  229. synth_ai/tracing_v3/abstractions.py +3 -3
  230. synth_ai/tracing_v3/decorators.py +7 -3
  231. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  232. synth_ai/tracing_v3/replica_sync.py +4 -4
  233. synth_ai/tracing_v3/serialization.py +5 -5
  234. synth_ai/tracing_v3/session_tracer.py +16 -6
  235. synth_ai/tracing_v3/storage/base.py +29 -29
  236. synth_ai/tracing_v3/storage/config.py +3 -3
  237. synth_ai/tracing_v3/trace_utils.py +317 -0
  238. synth_ai/tracing_v3/turso/daemon.py +8 -7
  239. synth_ai/tracing_v3/turso/native_manager.py +66 -43
  240. synth_ai/tracing_v3/utils.py +3 -3
  241. synth_ai/tui/__init__.py +5 -0
  242. synth_ai/tui/__main__.py +13 -0
  243. synth_ai/tui/cli/__init__.py +1 -0
  244. synth_ai/tui/cli/query_experiments.py +164 -0
  245. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  246. synth_ai/tui/dashboard.py +906 -0
  247. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
  248. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
  249. examples/agora_ex/README_MoE.md +0 -224
  250. examples/agora_ex/__init__.py +0 -7
  251. examples/agora_ex/agora_ex.py +0 -65
  252. examples/agora_ex/agora_ex_task_app.py +0 -590
  253. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  254. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  255. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  256. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  257. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  258. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  259. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
  260. synth_ai/rubrics/__init__.py +0 -22
  261. synth_ai/task/rubrics.py +0 -219
  262. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  263. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  264. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  265. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  266. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  267. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  268. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  269. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  270. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  271. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  272. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  273. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  274. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  275. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  276. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  277. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  278. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  279. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  280. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  281. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  282. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  283. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  284. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  285. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  286. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  287. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  288. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
  289. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
  290. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
  291. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@
2
2
 
3
3
  from abc import ABC, abstractmethod
4
4
  from datetime import datetime
5
- from typing import Any
5
+ from typing import Any, Optional
6
6
 
7
7
  from ..abstractions import SessionTrace
8
8
 
@@ -28,7 +28,7 @@ class TraceStorage(ABC):
28
28
  pass
29
29
 
30
30
  @abstractmethod
31
- async def get_session_trace(self, session_id: str) -> dict[str, Any] | None:
31
+ async def get_session_trace(self, session_id: str) -> Optional[dict[str, Any]]:
32
32
  """Retrieve a session trace by ID.
33
33
 
34
34
  Args:
@@ -40,7 +40,7 @@ class TraceStorage(ABC):
40
40
  pass
41
41
 
42
42
  @abstractmethod
43
- async def query_traces(self, query: str, params: dict[str, Any] | None = None) -> Any:
43
+ async def query_traces(self, query: str, params: Optional[dict[str, Any]] = None) -> Any:
44
44
  """Execute a query and return results.
45
45
 
46
46
  Args:
@@ -55,9 +55,9 @@ class TraceStorage(ABC):
55
55
  @abstractmethod
56
56
  async def get_model_usage(
57
57
  self,
58
- start_date: datetime | None = None,
59
- end_date: datetime | None = None,
60
- model_name: str | None = None,
58
+ start_date: Optional[datetime] = None,
59
+ end_date: Optional[datetime] = None,
60
+ model_name: Optional[str] = None,
61
61
  ) -> Any:
62
62
  """Get model usage statistics.
63
63
 
@@ -95,8 +95,8 @@ class TraceStorage(ABC):
95
95
  self,
96
96
  session_id: str,
97
97
  *,
98
- created_at: datetime | None = None,
99
- metadata: dict[str, Any] | None = None,
98
+ created_at: Optional[datetime] = None,
99
+ metadata: Optional[dict[str, Any]] = None,
100
100
  ) -> None:
101
101
  """Ensure a session row exists for the given session id."""
102
102
  pass
@@ -108,10 +108,10 @@ class TraceStorage(ABC):
108
108
  *,
109
109
  step_id: str,
110
110
  step_index: int,
111
- turn_number: int | None = None,
112
- started_at: datetime | None = None,
113
- completed_at: datetime | None = None,
114
- metadata: dict[str, Any] | None = None,
111
+ turn_number: Optional[int] = None,
112
+ started_at: Optional[datetime] = None,
113
+ completed_at: Optional[datetime] = None,
114
+ metadata: Optional[dict[str, Any]] = None,
115
115
  ) -> int:
116
116
  """Ensure a timestep row exists and return its database id."""
117
117
  pass
@@ -121,9 +121,9 @@ class TraceStorage(ABC):
121
121
  self,
122
122
  session_id: str,
123
123
  *,
124
- timestep_db_id: int | None,
124
+ timestep_db_id: Optional[int],
125
125
  event: Any,
126
- metadata_override: dict[str, Any] | None = None,
126
+ metadata_override: Optional[dict[str, Any]] = None,
127
127
  ) -> int:
128
128
  """Insert an event and return its database id."""
129
129
  pass
@@ -133,12 +133,12 @@ class TraceStorage(ABC):
133
133
  self,
134
134
  session_id: str,
135
135
  *,
136
- timestep_db_id: int | None,
136
+ timestep_db_id: Optional[int],
137
137
  message_type: str,
138
138
  content: Any,
139
- event_time: float | None = None,
140
- message_time: int | None = None,
141
- metadata: dict[str, Any] | None = None,
139
+ event_time: Optional[float] = None,
140
+ message_time: Optional[int] = None,
141
+ metadata: Optional[dict[str, Any]] = None,
142
142
  ) -> int:
143
143
  """Insert a message row linked to a session/timestep."""
144
144
  pass
@@ -151,7 +151,7 @@ class TraceStorage(ABC):
151
151
  total_reward: int,
152
152
  achievements_count: int,
153
153
  total_steps: int,
154
- reward_metadata: dict | None = None,
154
+ reward_metadata: Optional[dict] = None,
155
155
  ) -> int:
156
156
  """Record an outcome reward for a session."""
157
157
  pass
@@ -162,13 +162,13 @@ class TraceStorage(ABC):
162
162
  session_id: str,
163
163
  *,
164
164
  event_id: int,
165
- message_id: int | None = None,
166
- turn_number: int | None = None,
165
+ message_id: Optional[int] = None,
166
+ turn_number: Optional[int] = None,
167
167
  reward_value: float = 0.0,
168
- reward_type: str | None = None,
169
- key: str | None = None,
170
- annotation: dict[str, Any] | None = None,
171
- source: str | None = None,
168
+ reward_type: Optional[str] = None,
169
+ key: Optional[str] = None,
170
+ annotation: Optional[dict[str, Any]] = None,
171
+ source: Optional[str] = None,
172
172
  ) -> int:
173
173
  """Record a reward tied to a specific event."""
174
174
  pass
@@ -178,8 +178,8 @@ class TraceStorage(ABC):
178
178
  self,
179
179
  experiment_id: str,
180
180
  name: str,
181
- description: str | None = None,
182
- configuration: dict[str, Any] | None = None,
181
+ description: Optional[str] = None,
182
+ configuration: Optional[dict[str, Any]] = None,
183
183
  ) -> str:
184
184
  """Create a new experiment."""
185
185
  raise NotImplementedError("Experiment management not supported by this backend")
@@ -189,14 +189,14 @@ class TraceStorage(ABC):
189
189
  raise NotImplementedError("Experiment management not supported by this backend")
190
190
 
191
191
  async def get_sessions_by_experiment(
192
- self, experiment_id: str, limit: int | None = None
192
+ self, experiment_id: str, limit: Optional[int] = None
193
193
  ) -> list[dict[str, Any]]:
194
194
  """Get all sessions for an experiment."""
195
195
  raise NotImplementedError("Experiment management not supported by this backend")
196
196
 
197
197
  # Batch operations
198
198
  async def batch_insert_sessions(
199
- self, traces: list[SessionTrace], batch_size: int | None = 1000
199
+ self, traces: list[SessionTrace], batch_size: Optional[int] = 1000
200
200
  ) -> list[str]:
201
201
  """Batch insert multiple session traces.
202
202
 
@@ -3,7 +3,7 @@
3
3
  import os
4
4
  from dataclasses import dataclass
5
5
  from enum import Enum
6
- from typing import Any
6
+ from typing import Any, Optional
7
7
 
8
8
 
9
9
  class StorageBackend(str, Enum):
@@ -14,7 +14,7 @@ class StorageBackend(str, Enum):
14
14
  POSTGRES = "postgres" # Future support
15
15
 
16
16
 
17
- def _is_enabled(value: str | None) -> bool:
17
+ def _is_enabled(value: Optional[str]) -> bool:
18
18
  if value is None:
19
19
  return False
20
20
  return value.lower() in {"1", "true", "yes", "on"}
@@ -25,7 +25,7 @@ class StorageConfig:
25
25
  """Configuration for storage backend."""
26
26
 
27
27
  backend: StorageBackend = StorageBackend.TURSO_NATIVE
28
- connection_string: str | None = None
28
+ connection_string: Optional[str] = None
29
29
 
30
30
  # Turso-specific settings
31
31
  turso_url: str = os.getenv("TURSO_DATABASE_URL", "sqlite+libsql://http://127.0.0.1:8080")
@@ -0,0 +1,317 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import sqlite3
5
+ from collections.abc import Sequence
6
+ from dataclasses import dataclass
7
+ from typing import Any
8
+
9
+ Row = sqlite3.Row
10
+
11
+
12
+ def connect(db_path: str | bytes | int) -> sqlite3.Connection:
13
+ conn = sqlite3.connect(db_path)
14
+ conn.row_factory = sqlite3.Row
15
+ return conn
16
+
17
+
18
+ def _json_load(value: Any) -> Any:
19
+ if value is None:
20
+ return None
21
+ if isinstance(value, dict | list):
22
+ return value
23
+ if isinstance(value, bytes | bytearray):
24
+ value = value.decode("utf-8", errors="ignore")
25
+ try:
26
+ return json.loads(value)
27
+ except Exception:
28
+ return value
29
+
30
+
31
+ def fetch_crafter_sessions(
32
+ conn: sqlite3.Connection,
33
+ *,
34
+ limit: int,
35
+ metadata_filter: str | None = None,
36
+ session_ids: Sequence[str] | None = None,
37
+ min_event_count: int = 0,
38
+ ) -> list[str]:
39
+ if session_ids:
40
+ placeholders = ",".join("?" for _ in session_ids)
41
+ rows = conn.execute(
42
+ f"""
43
+ SELECT session_id
44
+ FROM session_traces
45
+ WHERE session_id IN ({placeholders})
46
+ ORDER BY created_at DESC
47
+ """,
48
+ tuple(session_ids),
49
+ ).fetchall()
50
+ return [row["session_id"] for row in rows]
51
+
52
+ params: list[Any] = []
53
+ where_clauses: list[str] = []
54
+ if metadata_filter:
55
+ where_clauses.append("session_traces.metadata LIKE ?")
56
+ params.append(f"%{metadata_filter}%")
57
+ where_sql = ""
58
+ if where_clauses:
59
+ where_sql = "WHERE " + " AND ".join(where_clauses)
60
+
61
+ having_sql = ""
62
+ if min_event_count > 0:
63
+ having_sql = "HAVING COUNT(events.id) >= ?"
64
+ params.append(min_event_count)
65
+
66
+ query = f"""
67
+ SELECT session_traces.session_id
68
+ FROM session_traces
69
+ LEFT JOIN events ON session_traces.session_id = events.session_id
70
+ {where_sql}
71
+ GROUP BY session_traces.session_id
72
+ {having_sql}
73
+ ORDER BY session_traces.created_at DESC
74
+ LIMIT ?
75
+ """
76
+ rows = conn.execute(query, (*params, limit)).fetchall()
77
+ return [row["session_id"] for row in rows]
78
+
79
+
80
+ def load_session_trace(conn: sqlite3.Connection, session_id: str) -> dict[str, Any]:
81
+ session_row = conn.execute(
82
+ """
83
+ SELECT session_id, created_at, metadata
84
+ FROM session_traces
85
+ WHERE session_id = ?
86
+ """,
87
+ (session_id,),
88
+ ).fetchone()
89
+ if not session_row:
90
+ raise ValueError(f"Session {session_id} not found")
91
+
92
+ timesteps = conn.execute(
93
+ """
94
+ SELECT step_id,
95
+ step_index,
96
+ turn_number,
97
+ started_at,
98
+ completed_at,
99
+ step_metadata
100
+ FROM session_timesteps
101
+ WHERE session_id = ?
102
+ ORDER BY step_index ASC
103
+ """,
104
+ (session_id,),
105
+ ).fetchall()
106
+
107
+ event_rows = conn.execute(
108
+ """
109
+ SELECT *
110
+ FROM events
111
+ WHERE session_id = ?
112
+ ORDER BY event_time ASC, id ASC
113
+ """,
114
+ (session_id,),
115
+ ).fetchall()
116
+
117
+ message_rows = conn.execute(
118
+ """
119
+ SELECT *
120
+ FROM messages
121
+ WHERE session_id = ?
122
+ ORDER BY event_time ASC, id ASC
123
+ """,
124
+ (session_id,),
125
+ ).fetchall()
126
+
127
+ event_rewards = conn.execute(
128
+ """
129
+ SELECT *
130
+ FROM event_rewards
131
+ WHERE session_id = ?
132
+ ORDER BY turn_number ASC, id ASC
133
+ """,
134
+ (session_id,),
135
+ ).fetchall()
136
+
137
+ outcome_rewards = conn.execute(
138
+ """
139
+ SELECT *
140
+ FROM outcome_rewards
141
+ WHERE session_id = ?
142
+ ORDER BY created_at ASC
143
+ """,
144
+ (session_id,),
145
+ ).fetchall()
146
+
147
+ metadata = _json_load(session_row["metadata"]) or {}
148
+ if isinstance(metadata, dict):
149
+ episode_id = metadata.get("episode_id")
150
+ if episode_id is not None and not isinstance(episode_id, str):
151
+ metadata["episode_id"] = str(episode_id)
152
+
153
+ events_payload = [
154
+ {
155
+ "id": row["id"],
156
+ "event_type": row["event_type"],
157
+ "system_instance_id": row["system_instance_id"],
158
+ "time_record": {
159
+ "event_time": row["event_time"],
160
+ "message_time": row["message_time"],
161
+ "created_at": row["created_at"],
162
+ },
163
+ "model_name": row["model_name"],
164
+ "provider": row["provider"],
165
+ "input_tokens": row["input_tokens"],
166
+ "output_tokens": row["output_tokens"],
167
+ "total_tokens": row["total_tokens"],
168
+ "cost_usd": row["cost_usd"],
169
+ "latency_ms": row["latency_ms"],
170
+ "span_id": row["span_id"],
171
+ "trace_id": row["trace_id"],
172
+ "call_records": _json_load(row["call_records"]) or [],
173
+ "reward": row["reward"],
174
+ "terminated": row["terminated"],
175
+ "truncated": row["truncated"],
176
+ "system_state_before": _json_load(row["system_state_before"]),
177
+ "system_state_after": _json_load(row["system_state_after"]),
178
+ "metadata": _json_load(row["metadata"]) or {},
179
+ "event_metadata": _json_load(row["event_metadata"]),
180
+ }
181
+ for row in event_rows
182
+ ]
183
+
184
+ messages_payload = [
185
+ {
186
+ "id": row["id"],
187
+ "message_type": row["message_type"],
188
+ "content": row["content"],
189
+ "time_record": {
190
+ "event_time": row["event_time"],
191
+ "message_time": row["message_time"],
192
+ "timestamp": row["timestamp"],
193
+ },
194
+ "metadata": _json_load(row["metadata"]) or {},
195
+ }
196
+ for row in message_rows
197
+ ]
198
+
199
+ trace: dict[str, Any] = {
200
+ "session_id": session_row["session_id"],
201
+ "created_at": session_row["created_at"],
202
+ "metadata": metadata,
203
+ "session_time_steps": [
204
+ {
205
+ "step_id": row["step_id"],
206
+ "step_index": row["step_index"],
207
+ "turn_number": row["turn_number"],
208
+ "started_at": row["started_at"],
209
+ "completed_at": row["completed_at"],
210
+ "metadata": _json_load(row["step_metadata"]) or {},
211
+ }
212
+ for row in timesteps
213
+ ],
214
+ "event_history": events_payload,
215
+ "events": events_payload,
216
+ "markov_blanket_message_history": messages_payload,
217
+ "messages": messages_payload,
218
+ "event_rewards": [
219
+ {
220
+ "id": row["id"],
221
+ "event_id": row["event_id"],
222
+ "turn_number": row["turn_number"],
223
+ "reward_value": row["reward_value"],
224
+ "reward_type": row["reward_type"],
225
+ "key": row["key"],
226
+ "annotation": _json_load(row["annotation"]) or {},
227
+ "source": row["source"],
228
+ "created_at": row["created_at"],
229
+ }
230
+ for row in event_rewards
231
+ ],
232
+ "outcome_rewards": [
233
+ {
234
+ "id": row["id"],
235
+ "total_reward": row["total_reward"],
236
+ "reward_metadata": _json_load(row["reward_metadata"]) or {},
237
+ "created_at": row["created_at"],
238
+ }
239
+ for row in outcome_rewards
240
+ ],
241
+ }
242
+ return trace
243
+
244
+
245
+ @dataclass
246
+ class DeterministicMetrics:
247
+ session_id: str
248
+ unique_achievement_reward: float
249
+ achievement_reward: float
250
+ outcome_total_reward: float
251
+ unique_achievement_count: int
252
+ final_achievement_count: int
253
+
254
+
255
+ def compute_deterministic_metrics(conn: sqlite3.Connection, session_id: str) -> DeterministicMetrics:
256
+ event_rows = conn.execute(
257
+ """
258
+ SELECT reward_type, reward_value, annotation
259
+ FROM event_rewards
260
+ WHERE session_id = ?
261
+ """,
262
+ (session_id,),
263
+ ).fetchall()
264
+
265
+ unique_total = 0.0
266
+ all_total = 0.0
267
+ unique_achievements: set[str] = set()
268
+
269
+ for row in event_rows:
270
+ reward_type = row["reward_type"]
271
+ value = float(row["reward_value"] or 0.0)
272
+ if reward_type == "unique_achievement_delta":
273
+ unique_total += value
274
+ annotation = _json_load(row["annotation"]) or {}
275
+ for name in annotation.get("new_unique") or []:
276
+ if isinstance(name, str):
277
+ unique_achievements.add(name)
278
+ elif reward_type == "achievement_delta":
279
+ all_total += value
280
+
281
+ outcome_rows = conn.execute(
282
+ """
283
+ SELECT total_reward, reward_metadata
284
+ FROM outcome_rewards
285
+ WHERE session_id = ?
286
+ """,
287
+ (session_id,),
288
+ ).fetchall()
289
+
290
+ outcome_total = 0.0
291
+ final_achievements: set[str] = set()
292
+ for row in outcome_rows:
293
+ outcome_total += float(row["total_reward"] or 0.0)
294
+ metadata = _json_load(row["reward_metadata"]) or {}
295
+ for name in metadata.get("achievements") or []:
296
+ if isinstance(name, str):
297
+ final_achievements.add(name)
298
+
299
+ return DeterministicMetrics(
300
+ session_id=session_id,
301
+ unique_achievement_reward=unique_total,
302
+ achievement_reward=all_total,
303
+ outcome_total_reward=outcome_total,
304
+ unique_achievement_count=len(unique_achievements),
305
+ final_achievement_count=len(final_achievements),
306
+ )
307
+
308
+
309
+ __all__ = [
310
+ "DeterministicMetrics",
311
+ "compute_deterministic_metrics",
312
+ "connect",
313
+ "fetch_crafter_sessions",
314
+ "load_session_trace",
315
+ ]
316
+
317
+
@@ -7,6 +7,7 @@ import time
7
7
 
8
8
  import requests
9
9
  from requests import RequestException
10
+ from typing import Any, Optional
10
11
 
11
12
  from ..config import CONFIG
12
13
 
@@ -16,9 +17,9 @@ class SqldDaemon:
16
17
 
17
18
  def __init__(
18
19
  self,
19
- db_path: str | None = None,
20
- http_port: int | None = None,
21
- binary_path: str | None = None,
20
+ db_path: Optional[str] = None,
21
+ http_port: Optional[int] = None,
22
+ binary_path: Optional[str] = None,
22
23
  ):
23
24
  """Initialize sqld daemon manager.
24
25
 
@@ -30,7 +31,7 @@ class SqldDaemon:
30
31
  self.db_path = db_path or CONFIG.sqld_db_path
31
32
  self.http_port = http_port or CONFIG.sqld_http_port
32
33
  self.binary_path = binary_path or self._find_binary()
33
- self.process: subprocess.Popen | None = None
34
+ self.process: Optional[Any] = None
34
35
 
35
36
  def _find_binary(self) -> str:
36
37
  """Find sqld binary in PATH."""
@@ -123,10 +124,10 @@ class SqldDaemon:
123
124
 
124
125
 
125
126
  # Convenience functions
126
- _daemon: SqldDaemon | None = None
127
+ _daemon: Optional[SqldDaemon] = None
127
128
 
128
129
 
129
- def start_sqld(db_path: str | None = None, port: int | None = None) -> SqldDaemon:
130
+ def start_sqld(db_path: Optional[str] = None, port: Optional[int] = None) -> SqldDaemon:
130
131
  """Start a global sqld daemon instance."""
131
132
  global _daemon
132
133
  if _daemon and _daemon.is_running():
@@ -145,6 +146,6 @@ def stop_sqld():
145
146
  _daemon = None
146
147
 
147
148
 
148
- def get_daemon() -> SqldDaemon | None:
149
+ def get_daemon() -> Optional[SqldDaemon]:
149
150
  """Get the global daemon instance."""
150
151
  return _daemon