synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (291) hide show
  1. examples/multi_step/configs/README_verilog_rl.md +77 -0
  2. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  3. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  4. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  5. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  6. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
  7. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  8. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  9. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  10. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  11. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  12. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  13. examples/multi_step/readme.md +48 -0
  14. examples/multi_step/verilog_rl_lora.md +218 -0
  15. examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
  16. examples/sft/evaluate.py +2 -0
  17. examples/sft/generate_traces.py +2 -0
  18. examples/swe/task_app/grpo_swe_mini.py +56 -26
  19. examples/swe/task_app/hosted/rollout.py +42 -0
  20. examples/swe/task_app/hosted/test_service.py +5 -6
  21. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  22. examples/task_apps/TESTING.md +275 -0
  23. examples/task_apps/__init__.py +0 -0
  24. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  25. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  26. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  27. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  28. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  29. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  30. examples/task_apps/crafter/__init__.py +0 -0
  31. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  32. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  33. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  34. examples/task_apps/crafter/task_app/__init__.py +5 -0
  35. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
  36. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  37. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  38. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
  39. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  40. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
  41. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
  42. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
  43. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  44. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
  45. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  78. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  79. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  80. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  81. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  82. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  83. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  84. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  85. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  86. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  87. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  88. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  89. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  90. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  91. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  92. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  93. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  94. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  95. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  96. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  97. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  98. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  99. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  100. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  101. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  102. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  103. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  104. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  105. examples/task_apps/enron/__init__.py +1 -0
  106. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  107. examples/task_apps/enron/filter_sft.toml +5 -0
  108. examples/task_apps/enron/task_app/README.md +14 -0
  109. examples/task_apps/enron/task_app/__init__.py +1 -0
  110. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  111. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  112. examples/task_apps/enron/tests/__init__.py +4 -0
  113. examples/task_apps/enron/tests/conftest.py +115 -0
  114. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  115. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  116. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  117. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  118. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  119. examples/task_apps/math/__init__.py +0 -0
  120. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  121. examples/task_apps/pokemon_battle/__init__.py +2 -0
  122. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  123. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  124. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  125. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  126. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  127. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  128. examples/task_apps/pokemon_red/README.md +357 -0
  129. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  130. examples/task_apps/pokemon_red/__init__.py +3 -0
  131. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  132. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  133. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  134. examples/task_apps/pokemon_red/task_app.py +799 -0
  135. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  136. examples/task_apps/sokoban/README.md +307 -0
  137. examples/task_apps/sokoban/__init__.py +3 -0
  138. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  139. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  140. examples/task_apps/sokoban/filter_sft.toml +5 -0
  141. examples/task_apps/sokoban/task_app.py +1058 -0
  142. examples/task_apps/sokoban/tests/__init__.py +4 -0
  143. examples/task_apps/sokoban/tests/conftest.py +113 -0
  144. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  145. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  146. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  147. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  148. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  149. examples/task_apps/verilog/__init__.py +1 -0
  150. examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
  151. examples/task_apps/verilog/filter_sft.toml +5 -0
  152. examples/task_apps/verilog/task_app/README.md +12 -0
  153. examples/task_apps/verilog/task_app/__init__.py +1 -0
  154. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  155. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  156. examples/task_apps/verilog/tests/__init__.py +4 -0
  157. examples/task_apps/verilog/tests/conftest.py +115 -0
  158. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  159. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  160. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  161. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  162. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  163. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  164. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  165. examples/warming_up_to_rl/groq_test.py +2 -0
  166. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  167. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  168. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  169. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  170. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  171. examples/workflows/__init__.py +0 -0
  172. examples/workflows/math_rl/__init__.py +0 -0
  173. examples/workflows/math_rl/download_dataset.py +80 -0
  174. synth_ai/__init__.py +2 -2
  175. synth_ai/api/models/supported.py +1 -0
  176. synth_ai/api/train/builders.py +25 -11
  177. synth_ai/api/train/cli.py +12 -6
  178. synth_ai/api/train/configs/__init__.py +10 -10
  179. synth_ai/api/train/configs/rl.py +5 -4
  180. synth_ai/api/train/configs/sft.py +4 -3
  181. synth_ai/api/train/env_resolver.py +5 -2
  182. synth_ai/api/train/supported_algos.py +10 -5
  183. synth_ai/api/train/utils.py +7 -4
  184. synth_ai/cli/__init__.py +48 -59
  185. synth_ai/cli/_modal_wrapper.py +3 -2
  186. synth_ai/cli/_storage.py +4 -3
  187. synth_ai/cli/_validate_task_app.py +11 -0
  188. synth_ai/cli/balance.py +4 -3
  189. synth_ai/cli/calc.py +2 -2
  190. synth_ai/cli/demo.py +14 -7
  191. synth_ai/cli/legacy_root_backup.py +1 -1
  192. synth_ai/cli/recent.py +1 -1
  193. synth_ai/cli/rl_demo.py +8 -7
  194. synth_ai/cli/root.py +0 -97
  195. synth_ai/cli/status.py +1 -1
  196. synth_ai/cli/task_apps.py +1922 -190
  197. synth_ai/cli/traces.py +1 -1
  198. synth_ai/cli/tui.py +57 -0
  199. synth_ai/cli/turso.py +1 -1
  200. synth_ai/cli/watch.py +1 -1
  201. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
  202. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  203. synth_ai/environments/examples/enron/engine.py +7 -2
  204. synth_ai/environments/examples/enron/environment.py +68 -0
  205. synth_ai/environments/examples/red/engine.py +27 -0
  206. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  207. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  208. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  209. synth_ai/environments/examples/red/environment.py +60 -0
  210. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  211. synth_ai/environments/examples/verilog/engine.py +104 -12
  212. synth_ai/evals/client.py +58 -61
  213. synth_ai/jobs/client.py +16 -4
  214. synth_ai/judge_schemas.py +9 -9
  215. synth_ai/py.typed +0 -0
  216. synth_ai/task/__init__.py +24 -5
  217. synth_ai/task/apps/__init__.py +1 -0
  218. synth_ai/task/config.py +257 -0
  219. synth_ai/task/contracts.py +138 -39
  220. synth_ai/task/proxy.py +48 -56
  221. synth_ai/task/rubrics/__init__.py +56 -0
  222. synth_ai/task/rubrics/loaders.py +152 -0
  223. synth_ai/task/rubrics/models.py +57 -0
  224. synth_ai/task/rubrics/scoring.py +116 -0
  225. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  226. synth_ai/task/server.py +8 -7
  227. synth_ai/task/trace_correlation_helpers.py +315 -0
  228. synth_ai/task/validators.py +413 -6
  229. synth_ai/tracing_v3/abstractions.py +3 -3
  230. synth_ai/tracing_v3/decorators.py +7 -3
  231. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  232. synth_ai/tracing_v3/replica_sync.py +4 -4
  233. synth_ai/tracing_v3/serialization.py +5 -5
  234. synth_ai/tracing_v3/session_tracer.py +16 -6
  235. synth_ai/tracing_v3/storage/base.py +29 -29
  236. synth_ai/tracing_v3/storage/config.py +3 -3
  237. synth_ai/tracing_v3/trace_utils.py +317 -0
  238. synth_ai/tracing_v3/turso/daemon.py +8 -7
  239. synth_ai/tracing_v3/turso/native_manager.py +66 -43
  240. synth_ai/tracing_v3/utils.py +3 -3
  241. synth_ai/tui/__init__.py +5 -0
  242. synth_ai/tui/__main__.py +13 -0
  243. synth_ai/tui/cli/__init__.py +1 -0
  244. synth_ai/tui/cli/query_experiments.py +164 -0
  245. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  246. synth_ai/tui/dashboard.py +906 -0
  247. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
  248. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
  249. examples/agora_ex/README_MoE.md +0 -224
  250. examples/agora_ex/__init__.py +0 -7
  251. examples/agora_ex/agora_ex.py +0 -65
  252. examples/agora_ex/agora_ex_task_app.py +0 -590
  253. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  254. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  255. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  256. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  257. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  258. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  259. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
  260. synth_ai/rubrics/__init__.py +0 -22
  261. synth_ai/task/rubrics.py +0 -219
  262. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  263. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  264. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  265. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  266. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  267. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  268. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  269. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  270. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  271. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  272. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  273. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  274. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  275. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  276. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  277. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  278. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  279. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  280. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  281. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  282. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  283. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  284. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  285. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  286. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  287. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  288. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
  289. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
  290. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
  291. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
synth_ai/cli/traces.py CHANGED
@@ -11,7 +11,7 @@ from rich import box
11
11
  from rich.console import Console
12
12
  from rich.table import Table
13
13
 
14
- from ._storage import load_storage
14
+ from synth_ai.cli._storage import load_storage
15
15
 
16
16
 
17
17
  def register(cli):
synth_ai/cli/tui.py ADDED
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CLI: Interactive TUI dashboard for Synth AI.
4
+ """
5
+
6
+ import os
7
+
8
+ import click
9
+ from rich.console import Console
10
+
11
+
12
+ def register(cli):
13
+ @cli.command()
14
+ @click.option(
15
+ "--url",
16
+ "db_url",
17
+ default="sqlite+libsql://http://127.0.0.1:8080",
18
+ help="Database URL (default: sqlite+libsql://http://127.0.0.1:8080)",
19
+ )
20
+ @click.option("--debug", is_flag=True, help="Enable debug logging")
21
+ def tui(db_url: str, debug: bool):
22
+ """Launch interactive TUI dashboard showing experiments, balance, and active runs."""
23
+ console = Console()
24
+
25
+ # Import here to avoid circular imports and handle optional dependencies
26
+ try:
27
+ from synth_ai.tui.dashboard import main as tui_main
28
+ except (ImportError, ModuleNotFoundError) as e:
29
+ console.print("[red]Error:[/red] TUI dashboard not available.")
30
+ console.print(f"Missing dependencies: {e}")
31
+ console.print("Install with: pip install textual")
32
+ return
33
+ except Exception as e:
34
+ # Handle other import errors (like missing libsql, type annotation issues, etc.)
35
+ console.print("[red]Error:[/red] TUI dashboard not available.")
36
+ console.print("This may be due to missing dependencies or Python version compatibility.")
37
+ console.print("Try: pip install textual libsql")
38
+ console.print("If using Python < 3.10, you may need to update Python or install eval_type_backport.")
39
+ return
40
+
41
+ # Set environment variables for the TUI to use
42
+ os.environ.setdefault("TUI_DB_URL", db_url)
43
+ if debug:
44
+ os.environ["TUI_DEBUG"] = "1"
45
+
46
+ # Run the TUI by calling the module directly with sanitized argv
47
+ try:
48
+ tui_args = ["--url", db_url]
49
+ if debug:
50
+ tui_args.append("--debug")
51
+ tui_main(tui_args)
52
+ except KeyboardInterrupt:
53
+ console.print("\n[blue]TUI closed.[/blue]")
54
+ except Exception as e:
55
+ console.print(f"\n[red]Error running TUI:[/red] {e}")
56
+ if debug:
57
+ raise
synth_ai/cli/turso.py CHANGED
@@ -6,7 +6,7 @@ import subprocess
6
6
 
7
7
  import click
8
8
 
9
- from .root import SQLD_VERSION, find_sqld_binary, install_sqld
9
+ from synth_ai.cli.root import SQLD_VERSION, find_sqld_binary, install_sqld
10
10
 
11
11
 
12
12
  def register(cli: click.Group) -> None:
synth_ai/cli/watch.py CHANGED
@@ -16,7 +16,7 @@ from rich.console import Console, Group
16
16
  from rich.panel import Panel
17
17
  from rich.table import Table
18
18
 
19
- from ._storage import load_storage
19
+ from synth_ai.cli._storage import load_storage
20
20
 
21
21
 
22
22
  def _open_db(db_url: str):
@@ -1,7 +1,7 @@
1
1
  """Compatibility wrapper for the GRPO Crafter task app.
2
2
 
3
3
  This module now delegates to the TaskAppConfig defined in the local example at
4
- `examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
4
+ `examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
5
5
  (running the file directly or targeting `fastapi_app` from external tooling).
6
6
  Prefer using `uvx synth-ai serve grpo-crafter` for local development and testing.
7
7
  """
@@ -21,31 +21,43 @@ from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
21
21
 
22
22
 
23
23
  def _load_build_config():
24
- # Find synth_ai package location to locate examples/
25
- import synth_ai
24
+ """Load the example's build_config, preferring package import with file fallback."""
25
+ # First try to import by package name (installed 'examples' package)
26
+ try:
27
+ module = importlib.import_module("examples.task_apps.crafter.task_app.grpo_crafter")
28
+ return module.build_config # type: ignore[attr-defined]
29
+ except Exception:
30
+ # Fallback: locate the file within the installed synth_ai distribution and exec it
31
+ import sys as _sys
32
+
33
+ import synth_ai
34
+
35
+ synth_ai_path = Path(synth_ai.__file__).resolve().parent.parent
36
+ module_path = (
37
+ synth_ai_path / "examples" / "task_apps" / "crafter" / "task_app" / "grpo_crafter.py"
38
+ )
26
39
 
27
- synth_ai_path = Path(synth_ai.__file__).resolve().parent.parent
28
- module_path = synth_ai_path / "examples" / "warming_up_to_rl" / "task_app" / "grpo_crafter.py"
40
+ if not module_path.exists():
41
+ raise ImportError(
42
+ f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
43
+ ) from None
29
44
 
30
- if not module_path.exists():
31
- raise ImportError(
32
- f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
45
+ spec = importlib.util.spec_from_file_location(
46
+ "examples.task_apps.crafter.task_app.grpo_crafter", module_path
33
47
  )
48
+ if spec is None or spec.loader is None:
49
+ raise ImportError(f"Could not load task app module at {module_path}") from None
34
50
 
35
- spec = importlib.util.spec_from_file_location(
36
- "warming_up_to_rl.task_app.grpo_crafter", module_path
37
- )
38
- if spec is None or spec.loader is None:
39
- raise ImportError(f"Could not load task app module at {module_path}")
40
- module = importlib.util.module_from_spec(spec)
41
- spec.loader.exec_module(module)
42
- return module.build_config
51
+ module = importlib.util.module_from_spec(spec)
52
+ _sys.modules[spec.name] = module
53
+ spec.loader.exec_module(module)
54
+ return module.build_config # type: ignore[attr-defined]
43
55
 
44
56
 
45
57
  build_config = _load_build_config()
46
58
 
47
59
 
48
- APP_ID = "grpo-crafter"
60
+ APP_ID = "grpo-crafter-task-app"
49
61
 
50
62
 
51
63
  def _build_base_config() -> TaskAppConfig:
@@ -397,7 +397,7 @@ class CrafterClassicEnvironment(StatefulEnvironment, ReproducibleEnvironment[Cra
397
397
  priv_state, pub_state, self.custom_step_observation_callable
398
398
  )
399
399
  total_step_time = time.time() - step_start_time
400
- logger.info(
400
+ logger.debug(
401
401
  f"CrafterClassic step completed in {total_step_time:.3f}s (interact: {interact_time:.3f}s)"
402
402
  )
403
403
  return obs
@@ -26,7 +26,10 @@ from synth_ai.environments.examples.enron.taskset import EnronTaskInstance
26
26
 
27
27
  # SQLite-backed helpers
28
28
  from synth_ai.environments.stateful.engine import StatefulEngine, StatefulEngineSnapshot
29
- from synth_ai.zyk import LM # Import LM class
29
+ try: # pragma: no cover - optional dependency
30
+ from synth_ai.zyk import LM # type: ignore
31
+ except ImportError: # pragma: no cover - fallback when LM unavailable
32
+ LM = None
30
33
 
31
34
  # --------------------------------------------------------------------------- actions
32
35
  ACTION_SEARCH = "search"
@@ -244,7 +247,9 @@ class EnronEngine(StatefulEngine):
244
247
  async def determine_if_answer_is_correct(
245
248
  question: str, gold_answer: str, agent_answer: str
246
249
  ) -> bool:
247
- # Instantiate LM for the judge
250
+ if LM is None:
251
+ return gold_answer.strip().lower() == agent_answer.strip().lower()
252
+
248
253
  llm = LM(model_name="gpt-4.1-nano", formatting_model_name="gpt-4.1-nano", temperature=0.0)
249
254
 
250
255
  system_prompt = (
@@ -9,6 +9,7 @@ from synth_ai.environments.environment.shared_engine import (
9
9
  InternalObservation,
10
10
  )
11
11
  from synth_ai.environments.environment.tools import (
12
+ AbstractTool,
12
13
  TOOL_REGISTRY,
13
14
  EnvToolCall,
14
15
  ToolResult,
@@ -65,6 +66,73 @@ class Terminate(EnvToolCall):
65
66
  self.action = (ACTION_ANSWER, "")
66
67
 
67
68
 
69
+ class TerminateArgs(BaseModel):
70
+ pass
71
+
72
+
73
+ class SearchEmailsTool(AbstractTool):
74
+ name = "search_emails"
75
+ call_schema = SearchEmailsArgs
76
+
77
+ def __init__(self, engine: EnronEngine):
78
+ self.engine = engine
79
+
80
+ async def __call__(self, call: EnvToolCall) -> ToolResult:
81
+ try:
82
+ args = self.call_schema.model_validate(call.args or {})
83
+ results = await self.engine.search_emails_action(args.model_dump())
84
+ return ToolResult(ok=True, payload={"search_results": results})
85
+ except Exception as exc: # pragma: no cover - runtime safety
86
+ return ToolResult(ok=False, error=str(exc))
87
+
88
+
89
+ class ReadEmailTool(AbstractTool):
90
+ name = "read_email"
91
+ call_schema = ReadEmailArgs
92
+
93
+ def __init__(self, engine: EnronEngine):
94
+ self.engine = engine
95
+
96
+ async def __call__(self, call: EnvToolCall) -> ToolResult:
97
+ try:
98
+ args = self.call_schema.model_validate(call.args or {})
99
+ email = await self.engine.read_email_action(args.message_id)
100
+ return ToolResult(ok=True, payload={"email": email})
101
+ except Exception as exc: # pragma: no cover
102
+ return ToolResult(ok=False, error=str(exc))
103
+
104
+
105
+ class AnswerQuestionTool(AbstractTool):
106
+ name = "answer_question"
107
+ call_schema = AnswerQuestionArgs
108
+
109
+ def __init__(self, engine: EnronEngine):
110
+ self.engine = engine
111
+
112
+ async def __call__(self, call: EnvToolCall) -> ToolResult:
113
+ try:
114
+ args = self.call_schema.model_validate(call.args or {})
115
+ await self.engine.answer_question_action(args.answer)
116
+ return ToolResult(ok=True, payload={"status": "answer_recorded"})
117
+ except Exception as exc: # pragma: no cover
118
+ return ToolResult(ok=False, error=str(exc))
119
+
120
+
121
+ class TerminateTool(AbstractTool):
122
+ name = "terminate"
123
+ call_schema = TerminateArgs
124
+
125
+ def __init__(self, engine: EnronEngine):
126
+ self.engine = engine
127
+
128
+ async def __call__(self, call: EnvToolCall) -> ToolResult:
129
+ try:
130
+ await self.engine.answer_question_action("")
131
+ return ToolResult(ok=True, payload={"status": "terminated"})
132
+ except Exception as exc: # pragma: no cover
133
+ return ToolResult(ok=False, error=str(exc))
134
+
135
+
68
136
  # -------- observation callable (optional for formatted observations)
69
137
  class SynthEnronObservationCallable(GetObservationCallable):
70
138
  async def get_observation(
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  from dataclasses import dataclass
5
+ import os
5
6
  from pathlib import Path
6
7
  from typing import Any, Dict, List, Optional
7
8
 
@@ -122,6 +123,13 @@ class GameSystemState:
122
123
  menu_state: int
123
124
  text_box_active: bool
124
125
  warp_flag: int
126
+ # Battle-specific data
127
+ enemy_hp_current: int = 0
128
+ enemy_hp_max: int = 0
129
+ enemy_hp_percentage: float = 0.0
130
+ enemy_level: int = 0
131
+ enemy_species_id: int = 0
132
+ battle_turn: int = 0
125
133
  # TODO: Add when available
126
134
  # current_menu_type: str = ""
127
135
  # dialogue_speaker: str = ""
@@ -278,12 +286,21 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
278
286
 
279
287
  def _get_rom_path(self) -> Path:
280
288
  """Get path to Pokemon Red ROM file"""
289
+ # Highest priority: explicit environment variable
290
+ env_rom = os.getenv("POKEMON_RED_ROM")
291
+ if env_rom:
292
+ p = Path(env_rom).expanduser()
293
+ if p.exists():
294
+ return p
295
+
281
296
  # Check several possible locations
282
297
  possible_paths = [
283
298
  Path(__file__).parent / "roms" / "pokemon_red.gb",
284
299
  Path(__file__).parent / "roms" / "PokemonRed.gb",
285
300
  Path(__file__).parent / "vendor" / "pokemon_red.gb",
286
301
  Path.home() / "Games" / "pokemon_red.gb",
302
+ # Common example location where users may drop the ROM
303
+ Path(__file__).resolve().parents[5] / "examples" / "task_apps" / "pokemon_red" / "Pokemon - Red Version (USA, Europe) (SGB Enhanced).gb",
287
304
  ]
288
305
 
289
306
  for path in possible_paths:
@@ -533,6 +550,12 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
533
550
  menu_state=int(current_state.get("menu_state", 0)),
534
551
  text_box_active=bool(current_state.get("text_box_active", False)),
535
552
  warp_flag=int(current_state.get("warp_flag", 0)),
553
+ enemy_hp_current=int(current_state.get("enemy_hp_current", 0)),
554
+ enemy_hp_max=int(current_state.get("enemy_hp_max", 0)),
555
+ enemy_hp_percentage=float(current_state.get("enemy_hp_percentage", 0.0)),
556
+ enemy_level=int(current_state.get("enemy_level", 0)),
557
+ enemy_species_id=int(current_state.get("enemy_species_id", 0)),
558
+ battle_turn=int(current_state.get("battle_turn", 0)),
536
559
  ),
537
560
  )
538
561
 
@@ -613,6 +636,10 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
613
636
  "prev_in_battle": bool(prev_state.get("in_battle", False)),
614
637
  "prev_party_level": int(prev_state.get("party_level", 0)),
615
638
  "prev_party_xp": int(prev_state.get("party_xp", 0)),
639
+ "prev_party_count": int(prev_state.get("party_count", 0)),
640
+ "prev_text_box_active": bool(prev_state.get("text_box_active", False)),
641
+ "prev_enemy_hp_current": int(prev_state.get("enemy_hp_current", 0)),
642
+ "prev_enemy_hp_percentage": float(prev_state.get("enemy_hp_percentage", 0.0)),
616
643
  },
617
644
  )
618
645
  except Exception as e:
@@ -6,6 +6,13 @@ PLAYER_Y = 0xD361 # player Y coordinate
6
6
  IN_BATTLE_FLAG = 0xD057 # battle state flag
7
7
  BATTLE_OUTCOME = 0xD089 # 0=ongoing, 1=win, 2=lose
8
8
 
9
+ # Battle-specific data
10
+ ENEMY_HP_CURRENT = 0xCFE6 # enemy Pokemon current HP (2 bytes)
11
+ ENEMY_HP_MAX = 0xCFE8 # enemy Pokemon max HP (2 bytes)
12
+ ENEMY_LEVEL = 0xD127 # enemy Pokemon level
13
+ ENEMY_SPECIES = 0xCFE5 # enemy Pokemon species ID
14
+ BATTLE_TURN = 0xCC2F # current battle turn counter
15
+
9
16
  # Party Pokemon data (up to 6 Pokemon)
10
17
  PARTY_COUNT = 0xD163 # number of Pokemon in party (0-6)
11
18
  PARTY_SPECIES = 0xD164 # species of each Pokemon (6 bytes)