synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show
  1. examples/README.md +1 -0
  2. examples/analyze_semantic_words.sh +2 -2
  3. examples/blog_posts/pokemon_vl/README.md +98 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  5. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  6. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  7. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  8. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  9. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  12. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  13. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  15. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  16. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  17. examples/multi_step/SFT_README.md +147 -0
  18. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
  20. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  21. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  22. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  23. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  24. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  25. examples/multi_step/convert_traces_to_sft.py +84 -0
  26. examples/multi_step/run_sft_qwen30b.sh +45 -0
  27. examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
  28. examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
  29. examples/qwen_coder/configs/coder_lora_small.toml +1 -2
  30. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  31. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  32. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  33. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  34. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  35. examples/qwen_vl/QUICKSTART.md +327 -0
  36. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  37. examples/qwen_vl/README.md +152 -0
  38. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  39. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  40. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  41. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  42. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  43. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  44. examples/qwen_vl/__init__.py +2 -0
  45. examples/qwen_vl/collect_data_via_cli.md +415 -0
  46. examples/qwen_vl/collect_vision_traces.py +368 -0
  47. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  48. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  49. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  50. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  51. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  52. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  53. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  54. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  55. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  56. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  57. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  58. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  59. examples/qwen_vl/run_vision_comparison.sh +61 -0
  60. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  61. examples/qwen_vl/test_image_validation.py +201 -0
  62. examples/qwen_vl/test_sft_vision_data.py +110 -0
  63. examples/rl/README.md +6 -6
  64. examples/rl/configs/eval_base_qwen.toml +17 -0
  65. examples/rl/configs/eval_rl_qwen.toml +13 -0
  66. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  67. examples/rl/configs/rl_from_base_qwen17.toml +79 -0
  68. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  69. examples/rl/run_eval.py +436 -0
  70. examples/rl/run_rl_and_save.py +111 -0
  71. examples/rl/task_app/README.md +21 -0
  72. examples/rl/task_app/math_single_step.py +990 -0
  73. examples/rl/task_app/math_task_app.py +111 -0
  74. examples/run_crafter_demo.sh +2 -2
  75. examples/sft/README.md +6 -6
  76. examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
  77. examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
  78. examples/sft/evaluate.py +2 -4
  79. examples/sft/export_dataset.py +7 -4
  80. examples/swe/task_app/README.md +33 -3
  81. examples/swe/task_app/grpo_swe_mini.py +4 -1
  82. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  83. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  84. examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
  85. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  86. examples/swe/task_app/hosted/policy_routes.py +0 -2
  87. examples/swe/task_app/hosted/rollout.py +0 -8
  88. examples/swe/task_app/morph_backend.py +178 -0
  89. examples/task_apps/crafter/task_app/README.md +1 -1
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
  91. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
  97. examples/task_apps/enron/__init__.py +1 -0
  98. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  99. examples/task_apps/math/README.md +1 -2
  100. examples/task_apps/pokemon_red/README.md +3 -4
  101. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  102. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  103. examples/task_apps/pokemon_red/task_app.py +36 -5
  104. examples/task_apps/sokoban/README.md +2 -3
  105. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  106. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  107. examples/vlm/README.md +3 -3
  108. examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
  109. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  110. examples/vlm/filter_image_rows.py +1 -1
  111. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  112. examples/warming_up_to_rl/_utils.py +92 -0
  113. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  114. examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
  115. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  116. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  117. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  118. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  119. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  120. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  121. examples/warming_up_to_rl/readme.md +63 -132
  122. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  123. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  124. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  125. examples/warming_up_to_rl/task_app/README.md +42 -0
  126. examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
  127. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  128. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  129. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  130. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  131. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  132. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  133. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  134. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  135. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  136. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  137. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  138. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  139. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  147. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  148. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  152. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  153. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  154. synth_ai/__init__.py +44 -30
  155. synth_ai/_utils/__init__.py +47 -0
  156. synth_ai/_utils/base_url.py +10 -0
  157. synth_ai/_utils/http.py +10 -0
  158. synth_ai/_utils/prompts.py +10 -0
  159. synth_ai/_utils/task_app_state.py +12 -0
  160. synth_ai/_utils/user_config.py +10 -0
  161. synth_ai/api/models/supported.py +144 -7
  162. synth_ai/api/train/__init__.py +13 -1
  163. synth_ai/api/train/builders.py +9 -3
  164. synth_ai/api/train/cli.py +155 -17
  165. synth_ai/api/train/config_finder.py +18 -11
  166. synth_ai/api/train/configs/__init__.py +8 -1
  167. synth_ai/api/train/configs/rl.py +32 -7
  168. synth_ai/api/train/configs/sft.py +6 -2
  169. synth_ai/api/train/configs/shared.py +59 -2
  170. synth_ai/api/train/env_resolver.py +13 -10
  171. synth_ai/auth/credentials.py +119 -0
  172. synth_ai/cli/__init__.py +61 -69
  173. synth_ai/cli/_modal_wrapper.py +7 -5
  174. synth_ai/cli/_typer_patch.py +0 -2
  175. synth_ai/cli/_validate_task_app.py +22 -4
  176. synth_ai/cli/commands/__init__.py +17 -0
  177. synth_ai/cli/commands/demo/__init__.py +6 -0
  178. synth_ai/cli/commands/demo/core.py +163 -0
  179. synth_ai/cli/commands/deploy/__init__.py +23 -0
  180. synth_ai/cli/commands/deploy/core.py +614 -0
  181. synth_ai/cli/commands/deploy/errors.py +72 -0
  182. synth_ai/cli/commands/deploy/validation.py +11 -0
  183. synth_ai/cli/commands/eval/__init__.py +19 -0
  184. synth_ai/cli/commands/eval/core.py +1109 -0
  185. synth_ai/cli/commands/eval/errors.py +81 -0
  186. synth_ai/cli/commands/eval/validation.py +133 -0
  187. synth_ai/cli/commands/filter/__init__.py +12 -0
  188. synth_ai/cli/commands/filter/core.py +388 -0
  189. synth_ai/cli/commands/filter/errors.py +55 -0
  190. synth_ai/cli/commands/filter/validation.py +77 -0
  191. synth_ai/cli/commands/help/__init__.py +177 -0
  192. synth_ai/cli/commands/help/core.py +73 -0
  193. synth_ai/cli/commands/status/__init__.py +64 -0
  194. synth_ai/cli/commands/status/client.py +192 -0
  195. synth_ai/cli/commands/status/config.py +92 -0
  196. synth_ai/cli/commands/status/errors.py +20 -0
  197. synth_ai/cli/commands/status/formatters.py +164 -0
  198. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  199. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  200. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  201. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  202. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  203. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  204. synth_ai/cli/commands/status/utils.py +114 -0
  205. synth_ai/cli/commands/train/__init__.py +53 -0
  206. synth_ai/cli/commands/train/core.py +21 -0
  207. synth_ai/cli/commands/train/errors.py +117 -0
  208. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  209. synth_ai/cli/commands/train/judge_validation.py +304 -0
  210. synth_ai/cli/commands/train/validation.py +443 -0
  211. synth_ai/cli/demo.py +2 -162
  212. synth_ai/cli/deploy/__init__.py +28 -0
  213. synth_ai/cli/deploy/core.py +5 -0
  214. synth_ai/cli/deploy/errors.py +23 -0
  215. synth_ai/cli/deploy/validation.py +5 -0
  216. synth_ai/cli/eval/__init__.py +36 -0
  217. synth_ai/cli/eval/core.py +5 -0
  218. synth_ai/cli/eval/errors.py +31 -0
  219. synth_ai/cli/eval/validation.py +5 -0
  220. synth_ai/cli/filter/__init__.py +28 -0
  221. synth_ai/cli/filter/core.py +5 -0
  222. synth_ai/cli/filter/errors.py +23 -0
  223. synth_ai/cli/filter/validation.py +5 -0
  224. synth_ai/cli/legacy_root_backup.py +3 -1
  225. synth_ai/cli/lib/__init__.py +10 -0
  226. synth_ai/cli/lib/task_app_discovery.py +7 -0
  227. synth_ai/cli/lib/task_app_env.py +518 -0
  228. synth_ai/cli/modal_serve/__init__.py +12 -0
  229. synth_ai/cli/modal_serve/core.py +14 -0
  230. synth_ai/cli/modal_serve/errors.py +8 -0
  231. synth_ai/cli/modal_serve/validation.py +11 -0
  232. synth_ai/cli/recent.py +2 -1
  233. synth_ai/cli/serve/__init__.py +12 -0
  234. synth_ai/cli/serve/core.py +14 -0
  235. synth_ai/cli/serve/errors.py +8 -0
  236. synth_ai/cli/serve/validation.py +11 -0
  237. synth_ai/cli/setup.py +21 -0
  238. synth_ai/cli/status.py +7 -126
  239. synth_ai/cli/task_app_deploy.py +7 -0
  240. synth_ai/cli/task_app_list.py +25 -0
  241. synth_ai/cli/task_app_modal_serve.py +11 -0
  242. synth_ai/cli/task_app_serve.py +11 -0
  243. synth_ai/cli/task_apps.py +110 -1499
  244. synth_ai/cli/traces.py +1 -1
  245. synth_ai/cli/train/__init__.py +12 -0
  246. synth_ai/cli/train/core.py +21 -0
  247. synth_ai/cli/train/errors.py +8 -0
  248. synth_ai/cli/train/validation.py +24 -0
  249. synth_ai/cli/train.py +5 -0
  250. synth_ai/cli/turso.py +1 -1
  251. synth_ai/cli/watch.py +1 -1
  252. synth_ai/demos/__init__.py +10 -0
  253. synth_ai/demos/core/__init__.py +28 -1
  254. synth_ai/demos/crafter/__init__.py +1 -0
  255. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  256. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  257. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  258. synth_ai/demos/demo_registry.py +176 -0
  259. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  260. synth_ai/demos/math/__init__.py +1 -0
  261. synth_ai/demos/math/_common.py +16 -0
  262. synth_ai/demos/math/app.py +38 -0
  263. synth_ai/demos/math/config.toml +76 -0
  264. synth_ai/demos/math/deploy_modal.py +54 -0
  265. synth_ai/demos/math/modal_task_app.py +702 -0
  266. synth_ai/demos/math/task_app_entry.py +51 -0
  267. synth_ai/environments/environment/core.py +7 -1
  268. synth_ai/environments/examples/bandit/engine.py +0 -1
  269. synth_ai/environments/examples/bandit/environment.py +0 -1
  270. synth_ai/environments/examples/red/engine.py +33 -12
  271. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  272. synth_ai/environments/examples/red/environment.py +26 -0
  273. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  274. synth_ai/environments/examples/wordle/environment.py +0 -1
  275. synth_ai/evals/base.py +16 -5
  276. synth_ai/evals/client.py +1 -1
  277. synth_ai/http.py +8 -22
  278. synth_ai/inference/client.py +1 -1
  279. synth_ai/judge_schemas.py +4 -5
  280. synth_ai/learning/client.py +1 -1
  281. synth_ai/learning/health.py +1 -1
  282. synth_ai/learning/jobs.py +1 -1
  283. synth_ai/learning/rl/client.py +4 -2
  284. synth_ai/learning/rl/env_keys.py +1 -1
  285. synth_ai/learning/rl/secrets.py +1 -1
  286. synth_ai/learning/sft/client.py +1 -1
  287. synth_ai/learning/sft/data.py +407 -4
  288. synth_ai/learning/validators.py +4 -1
  289. synth_ai/streaming/__init__.py +29 -0
  290. synth_ai/streaming/config.py +94 -0
  291. synth_ai/streaming/handlers.py +469 -0
  292. synth_ai/streaming/streamer.py +301 -0
  293. synth_ai/streaming/types.py +95 -0
  294. synth_ai/task/apps/__init__.py +4 -2
  295. synth_ai/task/config.py +6 -4
  296. synth_ai/task/rubrics/__init__.py +1 -2
  297. synth_ai/task/rubrics/loaders.py +14 -10
  298. synth_ai/task/rubrics.py +219 -0
  299. synth_ai/task/trace_correlation_helpers.py +24 -11
  300. synth_ai/task/tracing_utils.py +14 -3
  301. synth_ai/task/validators.py +0 -1
  302. synth_ai/tracing_v3/abstractions.py +3 -3
  303. synth_ai/tracing_v3/config.py +15 -13
  304. synth_ai/tracing_v3/constants.py +21 -0
  305. synth_ai/tracing_v3/db_config.py +3 -1
  306. synth_ai/tracing_v3/decorators.py +10 -7
  307. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  308. synth_ai/tracing_v3/migration_helper.py +1 -2
  309. synth_ai/tracing_v3/session_tracer.py +7 -7
  310. synth_ai/tracing_v3/storage/base.py +29 -29
  311. synth_ai/tracing_v3/storage/config.py +3 -3
  312. synth_ai/tracing_v3/turso/daemon.py +8 -9
  313. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  314. synth_ai/tracing_v3/utils.py +2 -2
  315. synth_ai/utils/__init__.py +101 -0
  316. synth_ai/utils/base_url.py +94 -0
  317. synth_ai/utils/cli.py +131 -0
  318. synth_ai/utils/env.py +294 -0
  319. synth_ai/utils/http.py +172 -0
  320. synth_ai/utils/modal.py +308 -0
  321. synth_ai/utils/process.py +212 -0
  322. synth_ai/utils/prompts.py +39 -0
  323. synth_ai/utils/sqld.py +122 -0
  324. synth_ai/utils/task_app_discovery.py +882 -0
  325. synth_ai/utils/task_app_env.py +186 -0
  326. synth_ai/utils/task_app_state.py +318 -0
  327. synth_ai/utils/user_config.py +137 -0
  328. synth_ai/v0/config/__init__.py +1 -5
  329. synth_ai/v0/config/base_url.py +1 -7
  330. synth_ai/v0/tracing/config.py +1 -1
  331. synth_ai/v0/tracing/decorators.py +1 -1
  332. synth_ai/v0/tracing/upload.py +1 -1
  333. synth_ai/v0/tracing_v1/config.py +1 -1
  334. synth_ai/v0/tracing_v1/decorators.py +1 -1
  335. synth_ai/v0/tracing_v1/upload.py +1 -1
  336. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
  337. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
  338. synth_ai/cli/man.py +0 -106
  339. synth_ai/cli/tui.py +0 -57
  340. synth_ai/compound/cais.py +0 -0
  341. synth_ai/core/experiment.py +0 -13
  342. synth_ai/core/system.py +0 -15
  343. synth_ai/demo_registry.py +0 -295
  344. synth_ai/handshake.py +0 -109
  345. synth_ai/tui/__init__.py +0 -5
  346. synth_ai/tui/__main__.py +0 -13
  347. synth_ai/tui/cli/__init__.py +0 -1
  348. synth_ai/tui/cli/query_experiments.py +0 -164
  349. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  350. synth_ai/tui/dashboard.py +0 -906
  351. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  354. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,168 @@
1
+ """
2
+ Trace hooks for Pokemon Red environment - v3 version.
3
+ Captures reward information and saves to Turso database.
4
+ """
5
+
6
+ from datetime import datetime
7
+ from typing import Any, Dict, Optional
8
+
9
+ from synth_ai.tracing_v3.abstractions import BaseEvent, EnvironmentEvent
10
+ from synth_ai.tracing_v3.hooks import HookManager
11
+
12
+ # Pokemon Red achievement categories by reward value
13
+ EXPLORATION_ACHIEVEMENTS = {
14
+ 0.02: "explore_new_area",
15
+ 0.04: "explore_multiple_areas",
16
+ 1.0: "leave_starting_area",
17
+ 1.5: "enter_new_city",
18
+ 2.0: "explore_new_route",
19
+ 5.0: "enter_gym_building",
20
+ }
21
+
22
+ TRAINING_ACHIEVEMENTS = {
23
+ 0.2: "pokemon_level_up",
24
+ 0.3: "reach_power_level",
25
+ 3.0: "pokemon_ready_for_battle",
26
+ }
27
+
28
+ BATTLE_ACHIEVEMENTS = {
29
+ 0.1: "encounter_wild_pokemon",
30
+ }
31
+
32
+ RESOURCE_ACHIEVEMENTS = {
33
+ 0.05: "keep_pokemon_healthy",
34
+ 0.5: "find_valuable_item",
35
+ 0.8: "visit_pokemon_center",
36
+ }
37
+
38
+ MAJOR_ACHIEVEMENTS = {
39
+ 50.0: "defeat_brock_win_badge",
40
+ }
41
+
42
+
43
+ async def track_pokemon_rewards(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
44
+ """Hook that captures detailed Pokemon Red reward information."""
45
+ # Only process EnvironmentEvents
46
+ if not isinstance(event_obj, EnvironmentEvent):
47
+ return None
48
+
49
+ reward = event_obj.reward
50
+ if reward is None or reward == 0.0:
51
+ return None
52
+
53
+ # Determine achievement type based on reward value
54
+ achievement_type = "unknown"
55
+ achievement_category = "other"
56
+
57
+ # Check each category
58
+ if reward in EXPLORATION_ACHIEVEMENTS:
59
+ achievement_type = EXPLORATION_ACHIEVEMENTS[reward]
60
+ achievement_category = "exploration"
61
+ elif reward in TRAINING_ACHIEVEMENTS:
62
+ achievement_type = TRAINING_ACHIEVEMENTS[reward]
63
+ achievement_category = "training"
64
+ elif reward in BATTLE_ACHIEVEMENTS:
65
+ achievement_type = BATTLE_ACHIEVEMENTS[reward]
66
+ achievement_category = "battle"
67
+ elif reward in RESOURCE_ACHIEVEMENTS:
68
+ achievement_type = RESOURCE_ACHIEVEMENTS[reward]
69
+ achievement_category = "resource"
70
+ elif reward in MAJOR_ACHIEVEMENTS:
71
+ achievement_type = MAJOR_ACHIEVEMENTS[reward]
72
+ achievement_category = "major"
73
+
74
+ return {
75
+ "reward_value": reward,
76
+ "achievement_type": achievement_type,
77
+ "achievement_category": achievement_category,
78
+ "timestamp": datetime.now().isoformat(),
79
+ "system_state_before": event_obj.system_state_before,
80
+ "system_state_after": event_obj.system_state_after,
81
+ }
82
+
83
+
84
+ async def track_pokemon_milestones(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
85
+ """Hook that tracks significant Pokemon Red milestones."""
86
+ # Only process EnvironmentEvents
87
+ if not isinstance(event_obj, EnvironmentEvent):
88
+ return None
89
+
90
+ reward = event_obj.reward
91
+ if reward is None:
92
+ return None
93
+
94
+ # Track major milestones
95
+ if reward >= 1.0: # Significant progress rewards
96
+ return {
97
+ "milestone": "major_progress",
98
+ "reward": reward,
99
+ "timestamp": datetime.now().isoformat(),
100
+ }
101
+ elif reward >= 0.5: # Moderate rewards
102
+ return {
103
+ "milestone": "moderate_progress",
104
+ "reward": reward,
105
+ "timestamp": datetime.now().isoformat(),
106
+ }
107
+
108
+ return None
109
+
110
+
111
+ async def track_pokemon_outcomes(event_obj: BaseEvent, **kwargs) -> Optional[Dict[str, Any]]:
112
+ """Hook that tracks episode outcomes for Pokemon Red."""
113
+ # Only process EnvironmentEvents
114
+ if not isinstance(event_obj, EnvironmentEvent):
115
+ return None
116
+
117
+ # Check for termination conditions
118
+ if event_obj.terminated or event_obj.truncated:
119
+ total_reward = getattr(event_obj, 'total_reward', 0.0)
120
+ steps_taken = getattr(event_obj, 'step_count', 0)
121
+
122
+ # Extract achievement information from system state
123
+ achievements_count = 0
124
+ if event_obj.system_state_after:
125
+ # Count positive rewards as achievements
126
+ # This is a simplified count - in practice you'd track actual achievements
127
+ achievements_count = max(1, int(total_reward / 0.1)) # Rough estimate
128
+
129
+ return {
130
+ "outcome_type": "episode_end",
131
+ "total_reward": total_reward,
132
+ "steps_taken": steps_taken,
133
+ "achievements_count": achievements_count,
134
+ "terminated": event_obj.terminated,
135
+ "truncated": event_obj.truncated,
136
+ "timestamp": datetime.now().isoformat(),
137
+ }
138
+
139
+ return None
140
+
141
+
142
+ # Create the global POKEMON_RED_HOOKS instance
143
+ POKEMON_RED_HOOKS = HookManager()
144
+
145
+ # Register all hooks
146
+ POKEMON_RED_HOOKS.register(
147
+ "event_recorded",
148
+ track_pokemon_rewards,
149
+ name="pokemon_rewards",
150
+ priority=10,
151
+ event_types=["environment"],
152
+ )
153
+
154
+ POKEMON_RED_HOOKS.register(
155
+ "event_recorded",
156
+ track_pokemon_milestones,
157
+ name="pokemon_milestones",
158
+ priority=5,
159
+ event_types=["environment"],
160
+ )
161
+
162
+ POKEMON_RED_HOOKS.register(
163
+ "event_recorded",
164
+ track_pokemon_outcomes,
165
+ name="pokemon_outcomes",
166
+ priority=5,
167
+ event_types=["environment"],
168
+ )
@@ -3,7 +3,6 @@ from __future__ import annotations
3
3
  from typing import Any
4
4
 
5
5
  from pydantic import BaseModel, Field
6
-
7
6
  from synth_ai.environments.environment.shared_engine import (
8
7
  GetObservationCallable,
9
8
  InternalObservation,
synth_ai/evals/base.py CHANGED
@@ -1,13 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any
5
+
6
+
1
7
  class Judgement:
2
8
  def __init__(
3
- self, criteria: str, score: float, reasoning: str = "", evidence: list[str] = None
4
- ):
9
+ self,
10
+ criteria: str,
11
+ score: float,
12
+ reasoning: str = "",
13
+ evidence: list[str] | None = None,
14
+ ) -> None:
5
15
  self.criteria = criteria
6
16
  self.score = score
7
17
  self.reasoning = reasoning
8
18
  self.evidence = evidence or []
9
19
 
10
20
 
11
- class BaseEval:
12
- async def run(self, data: any) -> list[Judgement]:
13
- pass
21
+ class BaseEval(ABC):
22
+ @abstractmethod
23
+ async def run(self, data: Any) -> list[Judgement]:
24
+ """Execute the evaluation and return a list of judgements."""
synth_ai/evals/client.py CHANGED
@@ -10,7 +10,7 @@ import os
10
10
  import warnings
11
11
  from typing import Any, Literal, TypedDict
12
12
 
13
- from synth_ai.http import AsyncHttpClient, HTTPError
13
+ from synth_ai.http_client import AsyncHttpClient, HTTPError
14
14
  from synth_ai.tracing_v3.serialization import normalize_for_json
15
15
 
16
16
  Provider = Literal["groq", "gemini"]
synth_ai/http.py CHANGED
@@ -1,26 +1,12 @@
1
1
  """
2
- Compatibility shim to avoid shadowing Python's stdlib `http` module.
3
- This re-exports the actual client implementation from http_client.py and
4
- supports both package and script execution contexts.
2
+ Backward-compatible HTTP client exports.
3
+
4
+ Historically, some modules imported ``synth_ai.http``. The canonical location
5
+ is ``synth_ai.http_client``; this module simply re-exports the same symbols so
6
+ legacy imports keep working.
5
7
  """
6
8
 
7
- try:
8
- from synth_ai.http_client import * # type: ignore F401,F403
9
- except Exception:
10
- try:
11
- from .http_client import * # type: ignore F401,F403
12
- except Exception:
13
- import importlib.util as _ilu
14
- import sys as _sys
15
- from pathlib import Path as _Path
16
9
 
17
- _here = _Path(__file__).resolve()
18
- _client_path = _here.parent / "http_client.py"
19
- _spec = _ilu.spec_from_file_location("http_client", str(_client_path))
20
- if not _spec or not _spec.loader:
21
- raise ImportError("Could not load http_client module") from None
22
- _mod = _ilu.module_from_spec(_spec)
23
- _spec.loader.exec_module(_mod)
24
- _sys.modules["synth_ai.http_client"] = _mod
25
- for _name in ("HTTPError", "AsyncHttpClient", "sleep"):
26
- globals()[_name] = getattr(_mod, _name)
10
+ from synth_ai.http_client import AsyncHttpClient, HTTPError, sleep
11
+
12
+ __all__ = ["AsyncHttpClient", "HTTPError", "sleep"]
@@ -7,7 +7,7 @@ from synth_ai.api.models.supported import (
7
7
  normalize_model_identifier,
8
8
  )
9
9
 
10
- from ..http import AsyncHttpClient
10
+ from .._utils.http import AsyncHttpClient
11
11
 
12
12
 
13
13
  class InferenceClient:
synth_ai/judge_schemas.py CHANGED
@@ -9,7 +9,7 @@ This is the canonical contract that the backend MUST conform to.
9
9
 
10
10
  from __future__ import annotations
11
11
 
12
- from typing import Any, Dict, Literal, Optional
12
+ from typing import Any, Literal, Optional
13
13
 
14
14
  from pydantic import BaseModel, Field
15
15
 
@@ -63,7 +63,7 @@ class JudgeScoreResponse(BaseModel):
63
63
  description="Request metadata (provider, options, etc.)"
64
64
  )
65
65
 
66
- def aggregate_event_reward(self) -> float | None:
66
+ def aggregate_event_reward(self) -> Optional[float]:
67
67
  """
68
68
  Aggregate all event totals into a single reward.
69
69
 
@@ -74,7 +74,7 @@ class JudgeScoreResponse(BaseModel):
74
74
  return None
75
75
  return sum(self.event_totals)
76
76
 
77
- def aggregate_outcome_reward(self) -> float | None:
77
+ def aggregate_outcome_reward(self) -> Optional[float]:
78
78
  """
79
79
  Extract outcome reward from outcome_review.
80
80
 
@@ -123,5 +123,4 @@ class JudgeScoreRequest(BaseModel):
123
123
  task_app: JudgeTaskApp = Field(..., description="Task application metadata")
124
124
  trace: JudgeTracePayload = Field(..., description="Trajectory trace to evaluate")
125
125
  options: JudgeOptions = Field(default_factory=lambda: JudgeOptions(), description="Judge options")
126
- rubric: Optional[Dict[str, Any]] = Field(None, description="Optional explicit rubric criteria")
127
-
126
+ rubric: Optional[dict[str, Any]] = Field(None, description="Optional explicit rubric criteria")
@@ -11,7 +11,7 @@ from synth_ai.api.models.supported import (
11
11
  )
12
12
  from synth_ai.learning.sft.config import prepare_sft_job_payload
13
13
 
14
- from ..http import AsyncHttpClient, HTTPError, sleep
14
+ from .._utils.http import AsyncHttpClient, HTTPError, sleep
15
15
 
16
16
 
17
17
  class LearningClient:
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from typing import Any
4
4
 
5
- from ..http import AsyncHttpClient
5
+ from .._utils.http import AsyncHttpClient
6
6
 
7
7
 
8
8
  def _api_base(b: str) -> str:
synth_ai/learning/jobs.py CHANGED
@@ -5,7 +5,7 @@ from collections.abc import Callable
5
5
  from contextlib import suppress
6
6
  from typing import Any
7
7
 
8
- from ..http import AsyncHttpClient, sleep
8
+ from .._utils.http import AsyncHttpClient, sleep
9
9
  from .constants import TERMINAL_EVENT_FAILURE, TERMINAL_EVENT_SUCCESS, TERMINAL_STATUSES
10
10
 
11
11
 
@@ -10,7 +10,7 @@ from synth_ai.api.models.supported import (
10
10
  normalize_model_identifier,
11
11
  )
12
12
 
13
- from ...http import AsyncHttpClient, HTTPError, sleep
13
+ from ..._utils.http import AsyncHttpClient, HTTPError, sleep
14
14
 
15
15
 
16
16
  def _api_base(b: str) -> str:
@@ -107,7 +107,9 @@ class RlClient:
107
107
  async with AsyncHttpClient(self._base_url, self._api_key, timeout=30.0) as http:
108
108
  try:
109
109
  js = await http.get(
110
- f"{_api_base(self._base_url)}/learning/jobs/{job_id}/events", params=params
110
+ f"{_api_base(self._base_url)}/learning/jobs/{job_id}/events",
111
+ params=params,
112
+ headers={"accept": "application/json"},
111
113
  )
112
114
  except HTTPError as he:
113
115
  with suppress(Exception):
@@ -1,4 +1,4 @@
1
- """Helpers for uploading RL environment credentials to the backend."""
1
+ """Helpers for uploading Environment credentials to the backend."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- """Helpers for generating RL environment credentials."""
1
+ """Helpers for generating Environment credentials."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  from pathlib import Path
4
4
  from typing import Any
5
5
 
6
- from ...http import AsyncHttpClient, HTTPError
6
+ from ..._utils.http import AsyncHttpClient, HTTPError
7
7
  from .config import prepare_sft_job_payload
8
8
  from .data import validate_jsonl_or_raise
9
9