synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show
  1. examples/README.md +1 -0
  2. examples/analyze_semantic_words.sh +2 -2
  3. examples/blog_posts/pokemon_vl/README.md +98 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  5. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  6. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  7. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  8. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  9. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  12. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  13. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  15. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  16. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  17. examples/multi_step/SFT_README.md +147 -0
  18. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
  20. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  21. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  22. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  23. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  24. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  25. examples/multi_step/convert_traces_to_sft.py +84 -0
  26. examples/multi_step/run_sft_qwen30b.sh +45 -0
  27. examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
  28. examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
  29. examples/qwen_coder/configs/coder_lora_small.toml +1 -2
  30. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  31. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  32. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  33. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  34. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  35. examples/qwen_vl/QUICKSTART.md +327 -0
  36. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  37. examples/qwen_vl/README.md +152 -0
  38. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  39. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  40. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  41. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  42. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  43. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  44. examples/qwen_vl/__init__.py +2 -0
  45. examples/qwen_vl/collect_data_via_cli.md +415 -0
  46. examples/qwen_vl/collect_vision_traces.py +368 -0
  47. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  48. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  49. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  50. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  51. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  52. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  53. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  54. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  55. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  56. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  57. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  58. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  59. examples/qwen_vl/run_vision_comparison.sh +61 -0
  60. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  61. examples/qwen_vl/test_image_validation.py +201 -0
  62. examples/qwen_vl/test_sft_vision_data.py +110 -0
  63. examples/rl/README.md +6 -6
  64. examples/rl/configs/eval_base_qwen.toml +17 -0
  65. examples/rl/configs/eval_rl_qwen.toml +13 -0
  66. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  67. examples/rl/configs/rl_from_base_qwen17.toml +79 -0
  68. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  69. examples/rl/run_eval.py +436 -0
  70. examples/rl/run_rl_and_save.py +111 -0
  71. examples/rl/task_app/README.md +21 -0
  72. examples/rl/task_app/math_single_step.py +990 -0
  73. examples/rl/task_app/math_task_app.py +111 -0
  74. examples/run_crafter_demo.sh +2 -2
  75. examples/sft/README.md +6 -6
  76. examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
  77. examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
  78. examples/sft/evaluate.py +2 -4
  79. examples/sft/export_dataset.py +7 -4
  80. examples/swe/task_app/README.md +33 -3
  81. examples/swe/task_app/grpo_swe_mini.py +4 -1
  82. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  83. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  84. examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
  85. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  86. examples/swe/task_app/hosted/policy_routes.py +0 -2
  87. examples/swe/task_app/hosted/rollout.py +0 -8
  88. examples/swe/task_app/morph_backend.py +178 -0
  89. examples/task_apps/crafter/task_app/README.md +1 -1
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
  91. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
  97. examples/task_apps/enron/__init__.py +1 -0
  98. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  99. examples/task_apps/math/README.md +1 -2
  100. examples/task_apps/pokemon_red/README.md +3 -4
  101. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  102. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  103. examples/task_apps/pokemon_red/task_app.py +36 -5
  104. examples/task_apps/sokoban/README.md +2 -3
  105. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  106. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  107. examples/vlm/README.md +3 -3
  108. examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
  109. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  110. examples/vlm/filter_image_rows.py +1 -1
  111. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  112. examples/warming_up_to_rl/_utils.py +92 -0
  113. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  114. examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
  115. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  116. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  117. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  118. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  119. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  120. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  121. examples/warming_up_to_rl/readme.md +63 -132
  122. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  123. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  124. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  125. examples/warming_up_to_rl/task_app/README.md +42 -0
  126. examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
  127. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  128. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  129. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  130. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  131. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  132. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  133. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  134. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  135. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  136. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  137. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  138. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  139. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  147. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  148. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  152. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  153. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  154. synth_ai/__init__.py +44 -30
  155. synth_ai/_utils/__init__.py +47 -0
  156. synth_ai/_utils/base_url.py +10 -0
  157. synth_ai/_utils/http.py +10 -0
  158. synth_ai/_utils/prompts.py +10 -0
  159. synth_ai/_utils/task_app_state.py +12 -0
  160. synth_ai/_utils/user_config.py +10 -0
  161. synth_ai/api/models/supported.py +144 -7
  162. synth_ai/api/train/__init__.py +13 -1
  163. synth_ai/api/train/builders.py +9 -3
  164. synth_ai/api/train/cli.py +155 -17
  165. synth_ai/api/train/config_finder.py +18 -11
  166. synth_ai/api/train/configs/__init__.py +8 -1
  167. synth_ai/api/train/configs/rl.py +32 -7
  168. synth_ai/api/train/configs/sft.py +6 -2
  169. synth_ai/api/train/configs/shared.py +59 -2
  170. synth_ai/api/train/env_resolver.py +13 -10
  171. synth_ai/auth/credentials.py +119 -0
  172. synth_ai/cli/__init__.py +61 -69
  173. synth_ai/cli/_modal_wrapper.py +7 -5
  174. synth_ai/cli/_typer_patch.py +0 -2
  175. synth_ai/cli/_validate_task_app.py +22 -4
  176. synth_ai/cli/commands/__init__.py +17 -0
  177. synth_ai/cli/commands/demo/__init__.py +6 -0
  178. synth_ai/cli/commands/demo/core.py +163 -0
  179. synth_ai/cli/commands/deploy/__init__.py +23 -0
  180. synth_ai/cli/commands/deploy/core.py +614 -0
  181. synth_ai/cli/commands/deploy/errors.py +72 -0
  182. synth_ai/cli/commands/deploy/validation.py +11 -0
  183. synth_ai/cli/commands/eval/__init__.py +19 -0
  184. synth_ai/cli/commands/eval/core.py +1109 -0
  185. synth_ai/cli/commands/eval/errors.py +81 -0
  186. synth_ai/cli/commands/eval/validation.py +133 -0
  187. synth_ai/cli/commands/filter/__init__.py +12 -0
  188. synth_ai/cli/commands/filter/core.py +388 -0
  189. synth_ai/cli/commands/filter/errors.py +55 -0
  190. synth_ai/cli/commands/filter/validation.py +77 -0
  191. synth_ai/cli/commands/help/__init__.py +177 -0
  192. synth_ai/cli/commands/help/core.py +73 -0
  193. synth_ai/cli/commands/status/__init__.py +64 -0
  194. synth_ai/cli/commands/status/client.py +192 -0
  195. synth_ai/cli/commands/status/config.py +92 -0
  196. synth_ai/cli/commands/status/errors.py +20 -0
  197. synth_ai/cli/commands/status/formatters.py +164 -0
  198. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  199. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  200. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  201. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  202. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  203. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  204. synth_ai/cli/commands/status/utils.py +114 -0
  205. synth_ai/cli/commands/train/__init__.py +53 -0
  206. synth_ai/cli/commands/train/core.py +21 -0
  207. synth_ai/cli/commands/train/errors.py +117 -0
  208. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  209. synth_ai/cli/commands/train/judge_validation.py +304 -0
  210. synth_ai/cli/commands/train/validation.py +443 -0
  211. synth_ai/cli/demo.py +2 -162
  212. synth_ai/cli/deploy/__init__.py +28 -0
  213. synth_ai/cli/deploy/core.py +5 -0
  214. synth_ai/cli/deploy/errors.py +23 -0
  215. synth_ai/cli/deploy/validation.py +5 -0
  216. synth_ai/cli/eval/__init__.py +36 -0
  217. synth_ai/cli/eval/core.py +5 -0
  218. synth_ai/cli/eval/errors.py +31 -0
  219. synth_ai/cli/eval/validation.py +5 -0
  220. synth_ai/cli/filter/__init__.py +28 -0
  221. synth_ai/cli/filter/core.py +5 -0
  222. synth_ai/cli/filter/errors.py +23 -0
  223. synth_ai/cli/filter/validation.py +5 -0
  224. synth_ai/cli/legacy_root_backup.py +3 -1
  225. synth_ai/cli/lib/__init__.py +10 -0
  226. synth_ai/cli/lib/task_app_discovery.py +7 -0
  227. synth_ai/cli/lib/task_app_env.py +518 -0
  228. synth_ai/cli/modal_serve/__init__.py +12 -0
  229. synth_ai/cli/modal_serve/core.py +14 -0
  230. synth_ai/cli/modal_serve/errors.py +8 -0
  231. synth_ai/cli/modal_serve/validation.py +11 -0
  232. synth_ai/cli/recent.py +2 -1
  233. synth_ai/cli/serve/__init__.py +12 -0
  234. synth_ai/cli/serve/core.py +14 -0
  235. synth_ai/cli/serve/errors.py +8 -0
  236. synth_ai/cli/serve/validation.py +11 -0
  237. synth_ai/cli/setup.py +21 -0
  238. synth_ai/cli/status.py +7 -126
  239. synth_ai/cli/task_app_deploy.py +7 -0
  240. synth_ai/cli/task_app_list.py +25 -0
  241. synth_ai/cli/task_app_modal_serve.py +11 -0
  242. synth_ai/cli/task_app_serve.py +11 -0
  243. synth_ai/cli/task_apps.py +110 -1499
  244. synth_ai/cli/traces.py +1 -1
  245. synth_ai/cli/train/__init__.py +12 -0
  246. synth_ai/cli/train/core.py +21 -0
  247. synth_ai/cli/train/errors.py +8 -0
  248. synth_ai/cli/train/validation.py +24 -0
  249. synth_ai/cli/train.py +5 -0
  250. synth_ai/cli/turso.py +1 -1
  251. synth_ai/cli/watch.py +1 -1
  252. synth_ai/demos/__init__.py +10 -0
  253. synth_ai/demos/core/__init__.py +28 -1
  254. synth_ai/demos/crafter/__init__.py +1 -0
  255. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  256. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  257. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  258. synth_ai/demos/demo_registry.py +176 -0
  259. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  260. synth_ai/demos/math/__init__.py +1 -0
  261. synth_ai/demos/math/_common.py +16 -0
  262. synth_ai/demos/math/app.py +38 -0
  263. synth_ai/demos/math/config.toml +76 -0
  264. synth_ai/demos/math/deploy_modal.py +54 -0
  265. synth_ai/demos/math/modal_task_app.py +702 -0
  266. synth_ai/demos/math/task_app_entry.py +51 -0
  267. synth_ai/environments/environment/core.py +7 -1
  268. synth_ai/environments/examples/bandit/engine.py +0 -1
  269. synth_ai/environments/examples/bandit/environment.py +0 -1
  270. synth_ai/environments/examples/red/engine.py +33 -12
  271. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  272. synth_ai/environments/examples/red/environment.py +26 -0
  273. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  274. synth_ai/environments/examples/wordle/environment.py +0 -1
  275. synth_ai/evals/base.py +16 -5
  276. synth_ai/evals/client.py +1 -1
  277. synth_ai/http.py +8 -22
  278. synth_ai/inference/client.py +1 -1
  279. synth_ai/judge_schemas.py +4 -5
  280. synth_ai/learning/client.py +1 -1
  281. synth_ai/learning/health.py +1 -1
  282. synth_ai/learning/jobs.py +1 -1
  283. synth_ai/learning/rl/client.py +4 -2
  284. synth_ai/learning/rl/env_keys.py +1 -1
  285. synth_ai/learning/rl/secrets.py +1 -1
  286. synth_ai/learning/sft/client.py +1 -1
  287. synth_ai/learning/sft/data.py +407 -4
  288. synth_ai/learning/validators.py +4 -1
  289. synth_ai/streaming/__init__.py +29 -0
  290. synth_ai/streaming/config.py +94 -0
  291. synth_ai/streaming/handlers.py +469 -0
  292. synth_ai/streaming/streamer.py +301 -0
  293. synth_ai/streaming/types.py +95 -0
  294. synth_ai/task/apps/__init__.py +4 -2
  295. synth_ai/task/config.py +6 -4
  296. synth_ai/task/rubrics/__init__.py +1 -2
  297. synth_ai/task/rubrics/loaders.py +14 -10
  298. synth_ai/task/rubrics.py +219 -0
  299. synth_ai/task/trace_correlation_helpers.py +24 -11
  300. synth_ai/task/tracing_utils.py +14 -3
  301. synth_ai/task/validators.py +0 -1
  302. synth_ai/tracing_v3/abstractions.py +3 -3
  303. synth_ai/tracing_v3/config.py +15 -13
  304. synth_ai/tracing_v3/constants.py +21 -0
  305. synth_ai/tracing_v3/db_config.py +3 -1
  306. synth_ai/tracing_v3/decorators.py +10 -7
  307. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  308. synth_ai/tracing_v3/migration_helper.py +1 -2
  309. synth_ai/tracing_v3/session_tracer.py +7 -7
  310. synth_ai/tracing_v3/storage/base.py +29 -29
  311. synth_ai/tracing_v3/storage/config.py +3 -3
  312. synth_ai/tracing_v3/turso/daemon.py +8 -9
  313. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  314. synth_ai/tracing_v3/utils.py +2 -2
  315. synth_ai/utils/__init__.py +101 -0
  316. synth_ai/utils/base_url.py +94 -0
  317. synth_ai/utils/cli.py +131 -0
  318. synth_ai/utils/env.py +294 -0
  319. synth_ai/utils/http.py +172 -0
  320. synth_ai/utils/modal.py +308 -0
  321. synth_ai/utils/process.py +212 -0
  322. synth_ai/utils/prompts.py +39 -0
  323. synth_ai/utils/sqld.py +122 -0
  324. synth_ai/utils/task_app_discovery.py +882 -0
  325. synth_ai/utils/task_app_env.py +186 -0
  326. synth_ai/utils/task_app_state.py +318 -0
  327. synth_ai/utils/user_config.py +137 -0
  328. synth_ai/v0/config/__init__.py +1 -5
  329. synth_ai/v0/config/base_url.py +1 -7
  330. synth_ai/v0/tracing/config.py +1 -1
  331. synth_ai/v0/tracing/decorators.py +1 -1
  332. synth_ai/v0/tracing/upload.py +1 -1
  333. synth_ai/v0/tracing_v1/config.py +1 -1
  334. synth_ai/v0/tracing_v1/decorators.py +1 -1
  335. synth_ai/v0/tracing_v1/upload.py +1 -1
  336. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
  337. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
  338. synth_ai/cli/man.py +0 -106
  339. synth_ai/cli/tui.py +0 -57
  340. synth_ai/compound/cais.py +0 -0
  341. synth_ai/core/experiment.py +0 -13
  342. synth_ai/core/system.py +0 -15
  343. synth_ai/demo_registry.py +0 -295
  344. synth_ai/handshake.py +0 -109
  345. synth_ai/tui/__init__.py +0 -5
  346. synth_ai/tui/__main__.py +0 -13
  347. synth_ai/tui/cli/__init__.py +0 -1
  348. synth_ai/tui/cli/query_experiments.py +0 -164
  349. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  350. synth_ai/tui/dashboard.py +0 -906
  351. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  354. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,92 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable, Sequence
4
+
5
+ from synth_ai.task import (
6
+ RolloutEnvSpec,
7
+ RolloutPolicySpec,
8
+ RolloutRecordConfig,
9
+ RolloutRequest,
10
+ RolloutSafetyConfig,
11
+ )
12
+
13
+ DEFAULT_POLICY_NAME = "crafter-react"
14
+ DEFAULT_ENV_NAME = "crafter"
15
+
16
+
17
+ def parse_ops(spec: str | None) -> list[str] | None:
18
+ """Parse a comma-separated operations string into a list."""
19
+
20
+ if spec is None:
21
+ return None
22
+ ops = [op.strip() for op in spec.split(",") if op.strip()]
23
+ if not ops:
24
+ raise ValueError("Ops must contain at least one entry")
25
+ return ops
26
+
27
+
28
+ def ops_from_pairs(max_llm_calls: int, *, cap: int | None = None) -> list[str]:
29
+ """Return alternating agent/env ops for the requested number of LLM calls."""
30
+
31
+ pairs = max(1, int(max_llm_calls or 0))
32
+ if cap is not None:
33
+ pairs = min(pairs, cap)
34
+ ops: list[str] = []
35
+ for _ in range(pairs):
36
+ ops.extend(["agent", "env"])
37
+ return ops
38
+
39
+
40
+ def build_rollout_request(
41
+ *,
42
+ seed: int,
43
+ run_id: str,
44
+ model: str,
45
+ inference_url: str,
46
+ ops: Sequence[str] | Iterable[str],
47
+ inference_api_key: str | None = None,
48
+ extra_headers: dict[str, str] | None = None,
49
+ trace_format: str = "compact",
50
+ return_trace: bool = False,
51
+ policy_name: str = DEFAULT_POLICY_NAME,
52
+ env_name: str = DEFAULT_ENV_NAME,
53
+ max_policy_tokens: int | None = None,
54
+ record_trajectories: bool = True,
55
+ ) -> RolloutRequest:
56
+ """Construct a RolloutRequest shared across local rollout utilities."""
57
+
58
+ policy_config: dict[str, object] = {
59
+ "model": model,
60
+ "inference_url": inference_url,
61
+ }
62
+ if inference_api_key is not None:
63
+ policy_config["api_key"] = inference_api_key
64
+ if extra_headers:
65
+ policy_config["extra_headers"] = extra_headers
66
+ if max_policy_tokens is not None:
67
+ policy_config["max_completion_tokens"] = max_policy_tokens
68
+ policy_config["max_tokens"] = max_policy_tokens
69
+
70
+ record_cfg = RolloutRecordConfig(
71
+ trajectories=record_trajectories,
72
+ trace_format=trace_format,
73
+ return_trace=return_trace,
74
+ )
75
+ return RolloutRequest(
76
+ run_id=run_id,
77
+ env=RolloutEnvSpec(env_name=env_name, seed=seed, config={}),
78
+ policy=RolloutPolicySpec(policy_name=policy_name, config=policy_config),
79
+ ops=list(ops),
80
+ record=record_cfg,
81
+ on_done="reset",
82
+ safety=RolloutSafetyConfig(),
83
+ )
84
+
85
+
86
+ __all__ = [
87
+ "DEFAULT_POLICY_NAME",
88
+ "DEFAULT_ENV_NAME",
89
+ "build_rollout_request",
90
+ "ops_from_pairs",
91
+ "parse_ops",
92
+ ]
@@ -383,7 +383,7 @@ def main() -> None:
383
383
  parser.add_argument(
384
384
  "--db",
385
385
  type=Path,
386
- default=Path("traces/v3/synth_ai.db"),
386
+ default=Path("traces/task_app_traces.db"),
387
387
  help="Path to the tracing_v3 SQLite database",
388
388
  )
389
389
  args = parser.parse_args()
@@ -1,6 +1,11 @@
1
1
  # Crafter Full Finetune (FFT) example on H100
2
2
  # Adjust paths and hyperparameters to your environment before running.
3
3
 
4
+ [algorithm]
5
+ type = "offline"
6
+ method = "sft"
7
+ variety = "fft"
8
+
4
9
  [job]
5
10
  model = "Qwen/Qwen3-4B" # base model to finetune
6
11
  # Path to your SFT JSONL dataset
@@ -1,5 +1,7 @@
1
1
  # Eval config for finetuned Qwen/Qwen3-4B (FFT) via task app rollout
2
2
 
3
+ type = "sft"
4
+
3
5
  # Required
4
6
  task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
5
7
  # Replace with your finished job id if different
@@ -1,6 +1,8 @@
1
1
  # Eval config for Groq Qwen3-32B
2
2
  # Fields mirror run_eval.py expectations
3
3
 
4
+ type = "rl"
5
+
4
6
  # Required
5
7
  task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
6
8
  model = "qwen/qwen3-32b"
@@ -1,5 +1,7 @@
1
1
  # Eval config for Synth Modal inference Qwen/Qwen3-4B via task app rollout
2
2
 
3
+ type = "rl"
4
+
3
5
  # Required
4
6
  task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
5
7
  model = "Qwen/Qwen3-4B"
@@ -20,4 +22,3 @@ concurrency = 10
20
22
  # fetch the vLLM base from the task app /info to use as inference_url.
21
23
  # - Ensure the task app mounts the openai-api-key secret if your vLLM gateway
22
24
  # requires a bearer token (OPENAI_API_KEY). Otherwise it will call unauthenticated.
23
-
@@ -5,7 +5,6 @@ type = "online"
5
5
  method = "policy_gradient"
6
6
  variety = "gspo"
7
7
 
8
-
9
8
  [services]
10
9
  task_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
11
10
 
@@ -39,6 +38,7 @@ health_interval_ms = 300
39
38
  [model]
40
39
  # Base model start
41
40
  base = "Qwen/Qwen3-4B"
41
+ trainer_mode = "full"
42
42
  label = "crafter-rl-from-base"
43
43
 
44
44
  [rollout]
@@ -61,6 +61,7 @@ seeds = [
61
61
  [training]
62
62
  num_epochs = 1
63
63
  iterations_per_epoch = 10
64
+ max_turns = 10
64
65
  batch_size = 16
65
66
  group_size = 4
66
67
  gradient_accumulation_steps = 1
@@ -1,5 +1,7 @@
1
1
  # RL training starting from a finetuned model id (TOML-only model selection)
2
2
 
3
+ type = "rl"
4
+
3
5
  [services]
4
6
  # Task app base URL used by the RL job for rollouts
5
7
  # task_url = "https://YOUR-TASK-APP.modal.run"
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
 
6
6
  import argparse
7
7
  import json
8
+ import os
8
9
  import sqlite3
9
10
  import sys
10
11
  from collections import Counter, defaultdict
@@ -12,6 +13,13 @@ from collections.abc import Iterable
12
13
  from pathlib import Path
13
14
  from typing import Any
14
15
 
16
+ from synth_ai._utils.prompts import ensure_required_args
17
+ from synth_ai.tracing_v3.constants import (
18
+ TRACE_DB_BASENAME,
19
+ TRACE_DB_DIR,
20
+ canonical_trace_db_name,
21
+ )
22
+
15
23
  Row = sqlite3.Row
16
24
 
17
25
 
@@ -489,55 +497,81 @@ def _validate_dataset(records: list[dict[str, Any]]) -> None:
489
497
 
490
498
 
491
499
  def _find_trace_database() -> Path | None:
492
- """Automatically discover the trace database in common locations."""
500
+ """Automatically discover the most recent trace database in common locations."""
493
501
 
494
- # Check for demo directory from state
495
- try:
496
- state_path = Path.home() / ".synth-ai" / "demo.json"
497
- if state_path.exists():
498
- import json
499
-
500
- with state_path.open() as f:
501
- data = json.load(f)
502
- demo_dir = data.get("DEMO_DIR")
503
- if demo_dir:
504
- candidate = Path(demo_dir) / "traces" / "v3" / "synth_ai.db"
505
- if candidate.exists():
506
- return candidate
507
- except Exception:
508
- pass
502
+ candidates: list[Path] = []
509
503
 
510
- # Search upward from current directory
504
+ # Walk up parent directories from CWD
511
505
  cwd = Path.cwd()
512
506
  for parent in [cwd] + list(cwd.parents):
513
- candidate = parent / "traces" / "v3" / "synth_ai.db"
514
- if candidate.exists():
515
- return candidate
516
-
517
- # Check standard locations
518
- standard_locations = [
519
- Path("traces/v3/synth_ai.db"),
520
- Path("../traces/v3/synth_ai.db"),
521
- Path.home() / "synth-ai" / "traces" / "v3" / "synth_ai.db",
522
- ]
523
-
524
- for location in standard_locations:
507
+ candidates.append(parent / "traces" / "v3")
508
+
509
+ # Standard fallback locations
510
+ candidates.extend(
511
+ [
512
+ TRACE_DB_DIR,
513
+ Path("../traces"),
514
+ Path.home() / "synth-ai" / "traces" / "v3",
515
+ ]
516
+ )
517
+
518
+ found: list[Path] = []
519
+ for directory in candidates:
525
520
  try:
526
- if location.exists():
527
- return location.resolve()
521
+ if not directory.exists():
522
+ continue
523
+ for pattern in (
524
+ f"{TRACE_DB_BASENAME}_*.db",
525
+ canonical_trace_db_name(),
526
+ ):
527
+ for candidate in directory.glob(pattern):
528
+ found.append(candidate.resolve())
528
529
  except Exception:
529
530
  continue
530
531
 
531
- return None
532
+ if not found:
533
+ return None
534
+
535
+ found.sort(key=lambda p: p.stat().st_mtime, reverse=True)
536
+ return found[0]
537
+
538
+
539
+ def _discover_local_trace_dbs(root: Path) -> list[Path]:
540
+ """Return trace DBs under *root* (recursively), newest first."""
541
+
542
+ candidates: set[Path] = set()
543
+ ignore_dirs = {".git", ".venv", "__pycache__", "node_modules", "dist", "build"}
544
+ target_exact = canonical_trace_db_name()
545
+
546
+ for dirpath, dirnames, filenames in os.walk(root):
547
+ dirnames[:] = [d for d in dirnames if d not in ignore_dirs]
548
+ for filename in filenames:
549
+ if filename == target_exact or (
550
+ filename.startswith(f"{TRACE_DB_BASENAME}_") and filename.endswith(".db")
551
+ ):
552
+ path = Path(dirpath) / filename
553
+ try:
554
+ candidates.add(path.resolve())
555
+ except Exception:
556
+ continue
557
+
558
+ return sorted(candidates, key=lambda p: p.stat().st_mtime, reverse=True)
532
559
 
533
560
 
534
561
  def main() -> None:
535
562
  parser = argparse.ArgumentParser(description=__doc__)
536
- parser.add_argument("--db", type=Path, default=None, help="Path to tracing_v3 SQLite DB")
537
563
  parser.add_argument(
538
- "--output",
564
+ "--in",
565
+ dest="input_path",
566
+ type=Path,
567
+ default=None,
568
+ help="Path to tracing_v3 SQLite DB",
569
+ )
570
+ parser.add_argument(
571
+ "--out",
572
+ dest="output_path",
539
573
  type=Path,
540
- required=False,
574
+ default=None,
541
575
  help="Destination JSONL path for the exported dataset",
542
576
  )
543
577
  parser.add_argument(
@@ -593,25 +627,109 @@ def main() -> None:
593
627
  )
594
628
  args = parser.parse_args()
595
629
 
596
- # Auto-discover database if not specified
597
- db_path = args.db
598
- if db_path is None:
599
- db_path = _find_trace_database()
600
- if db_path:
601
- print(f"Found trace database: {db_path}")
602
- else:
603
- print("\nTrace database configuration:")
604
- db_input = input("Trace database path [traces/v3/synth_ai.db]: ").strip()
605
- db_path = Path(db_input) if db_input else Path("traces/v3/synth_ai.db")
630
+ default_output_path = (Path.cwd() / "ft_data" / "crafter_sft.jsonl").resolve()
631
+
632
+ initial_path: Path | None = None
633
+ if args.input_path is not None:
634
+ initial_path = Path(args.input_path).expanduser().resolve()
635
+ else:
636
+ discovered = _find_trace_database()
637
+ if discovered is not None:
638
+ initial_path = discovered.expanduser().resolve()
639
+ args.input_path = initial_path
640
+
641
+ if args.output_path is None:
642
+ args.output_path = default_output_path
643
+
644
+ local_candidates = _discover_local_trace_dbs(Path.cwd())
645
+ if local_candidates:
646
+ print("\nDiscovered trace databases:")
647
+ for idx, path in enumerate(local_candidates, start=1):
648
+ marker = " <- most recent" if idx == 1 else ""
649
+ print(f" {idx}) {path}{marker}")
650
+ print(" m) Enter path manually")
651
+ print(" 0) Abort")
652
+
653
+ default_index = 1
654
+ if initial_path:
655
+ for idx, candidate in enumerate(local_candidates, start=1):
656
+ if candidate == initial_path:
657
+ default_index = idx
658
+ break
606
659
 
660
+ while True:
661
+ prompt = f"Select database [{default_index}]: "
662
+ choice = input(prompt).strip().lower()
663
+ if not choice:
664
+ args.input_path = local_candidates[default_index - 1]
665
+ break
666
+ if choice == "0":
667
+ raise SystemExit("Aborted by user.")
668
+ if choice in {"m", "manual"}:
669
+ manual = input("Enter trace database path: ").strip()
670
+ if manual:
671
+ args.input_path = Path(manual)
672
+ break
673
+ print("Path required; try again.")
674
+ continue
675
+ try:
676
+ idx = int(choice)
677
+ except ValueError:
678
+ print("Invalid selection; enter a number, 'm', or 0 to abort.")
679
+ continue
680
+ if 1 <= idx <= len(local_candidates):
681
+ args.input_path = local_candidates[idx - 1]
682
+ break
683
+ print(f"Select between 1 and {len(local_candidates)}, 'm', or 0.")
684
+ elif initial_path is not None:
685
+ args.input_path = initial_path
686
+
687
+ # If output wasn't overridden, derive it from the chosen DB name
688
+ if args.output_path == default_output_path and args.input_path:
689
+ db_name = Path(args.input_path).name # e.g., task_app_traces_2025-10-23_13-23-02.db
690
+ timestamp = db_name[:-3] if db_name.endswith(".db") else db_name
691
+ if timestamp.startswith("task_app_traces_"):
692
+ timestamp = timestamp[len("task_app_traces_") :]
693
+ derived_name = f"sft_dataset_{timestamp}.jsonl"
694
+ args.output_path = (Path.cwd() / "ft_data" / derived_name).resolve()
695
+
696
+ input_default = (
697
+ Path(args.input_path).expanduser().resolve()
698
+ if args.input_path is not None
699
+ else (TRACE_DB_DIR / canonical_trace_db_name()).expanduser().resolve()
700
+ )
701
+ output_default = Path(args.output_path).expanduser().resolve() if args.output_path else default_output_path
702
+
703
+ args = ensure_required_args(
704
+ args,
705
+ {
706
+ "input_path": "Trace database path",
707
+ "output_path": "Output JSONL path",
708
+ },
709
+ coerce={
710
+ "input_path": lambda raw: Path(raw).expanduser().resolve(),
711
+ "output_path": lambda raw: Path(raw).expanduser().resolve(),
712
+ },
713
+ defaults={
714
+ "input_path": input_default,
715
+ "output_path": output_default,
716
+ },
717
+ )
718
+
719
+ db_path = Path(args.input_path).expanduser().resolve()
720
+ print(f"Trace database: {db_path}")
607
721
  if not db_path.exists():
608
- print(f"Database not found: {db_path}", file=sys.stderr)
609
- raise SystemExit(1)
722
+ discovered = _find_trace_database()
723
+ if discovered and discovered.exists():
724
+ discovered = discovered.resolve()
725
+ print(f"Discovered trace database: {discovered}")
726
+ db_path = discovered
727
+ else:
728
+ print(f"Database not found: {db_path}", file=sys.stderr)
729
+ raise SystemExit(1)
610
730
 
611
- output_path = args.output
612
- if not output_path:
613
- output_path = Path("ft_data/crafter_traces.jsonl")
614
- print(f"Output will be written to: {output_path.resolve()}")
731
+ output_path = Path(args.output_path).expanduser().resolve()
732
+ print(f"Output dataset: {output_path}")
615
733
 
616
734
  min_unique = args.min_unique
617
735
  if min_unique is None:
@@ -619,15 +737,11 @@ def main() -> None:
619
737
  print(f"Minimum unique achievements filter: {min_unique} (all traces)")
620
738
 
621
739
  # Override args with prompted values
622
- args.db = db_path
623
- args.output = output_path
740
+ args.input_path = db_path
741
+ args.output_path = output_path
624
742
  args.min_unique = min_unique
625
743
 
626
- if not args.db.exists():
627
- print(f"Database not found: {args.db}", file=sys.stderr)
628
- raise SystemExit(1)
629
-
630
- conn = connect(args.db)
744
+ conn = connect(args.input_path)
631
745
  try:
632
746
  (
633
747
  achievements_map,
@@ -708,11 +822,11 @@ def main() -> None:
708
822
  raise SystemExit(1)
709
823
 
710
824
  _validate_dataset(dataset)
711
- write_jsonl(args.output, dataset)
825
+ write_jsonl(args.output_path, dataset)
712
826
  session_ids = {item.get("metadata", {}).get("session_id") for item in dataset}
713
827
  session_ids.discard(None)
714
828
  print(
715
- f"Wrote {len(dataset)} examples from {len(session_ids)} session(s) -> {args.output.resolve()}",
829
+ f"Wrote {len(dataset)} examples from {len(session_ids)} session(s) -> {args.output_path.resolve()}",
716
830
  file=sys.stderr,
717
831
  )
718
832
  finally: