synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (349) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +23 -17
  3. examples/dev/qwen3_32b_qlora_4xh100.toml +40 -0
  4. examples/multi_step/crafter_rl_lora.md +29 -0
  5. examples/qwen_coder/README.md +102 -0
  6. examples/qwen_coder/_shared.py +113 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  8. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  9. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  10. examples/qwen_coder/generate_dataset.py +98 -0
  11. examples/qwen_coder/infer_ft_smoke.py +65 -0
  12. examples/qwen_coder/infer_prod_proxy.py +73 -0
  13. examples/qwen_coder/infer_via_synth.py +87 -0
  14. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  15. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  16. examples/qwen_coder/sft_full_17b.py +103 -0
  17. examples/qwen_coder/sft_lora_30b.py +110 -0
  18. examples/qwen_coder/subset_jsonl.py +39 -0
  19. examples/qwen_coder/todos.md +38 -0
  20. examples/qwen_coder/validate_jsonl.py +60 -0
  21. examples/rl/configs/eval_base_qwen.toml +1 -1
  22. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  23. examples/rl/download_dataset.py +26 -10
  24. examples/rl/run_eval.py +53 -52
  25. examples/rl/run_rl_and_save.py +29 -12
  26. examples/rl/task_app/math_single_step.py +180 -41
  27. examples/rl/task_app/math_task_app.py +14 -6
  28. examples/sft/README.md +139 -0
  29. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  30. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  31. examples/sft/evaluate.py +117 -0
  32. examples/sft/export_dataset.py +117 -0
  33. examples/sft/generate_traces.py +162 -0
  34. examples/swe/__init__.py +12 -0
  35. examples/swe/task_app/README.md +105 -0
  36. examples/swe/task_app/__init__.py +2 -0
  37. examples/swe/task_app/grpo_swe_mini.py +571 -0
  38. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  39. examples/swe/task_app/hosted/README.md +173 -0
  40. examples/swe/task_app/hosted/__init__.py +5 -0
  41. examples/swe/task_app/hosted/branching.py +143 -0
  42. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  43. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  44. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  45. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  46. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  47. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  48. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  49. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  50. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  53. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  54. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  55. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  56. examples/swe/task_app/hosted/hosted_app.py +204 -0
  57. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  58. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  59. examples/swe/task_app/hosted/main.py +100 -0
  60. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  61. examples/swe/task_app/hosted/registry.py +195 -0
  62. examples/swe/task_app/hosted/rollout.py +1869 -0
  63. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  64. examples/swe/task_app/hosted/storage/volume.py +211 -0
  65. examples/swe/task_app/hosted/test_agents.py +161 -0
  66. examples/swe/task_app/hosted/test_service.py +137 -0
  67. examples/swe/task_app/hosted/utils.py +62 -0
  68. examples/vlm/PROPOSAL.md +53 -0
  69. examples/vlm/README.md +68 -0
  70. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  71. examples/vlm/crafter_image_only_agent.py +207 -0
  72. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  73. examples/vlm/filter_image_rows.py +63 -0
  74. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  75. examples/warming_up_to_rl/analyze_trace_db.py +12 -10
  76. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  77. examples/warming_up_to_rl/export_trace_sft.py +218 -36
  78. examples/warming_up_to_rl/groq_test.py +15 -8
  79. examples/warming_up_to_rl/manage_secrets.py +29 -25
  80. examples/warming_up_to_rl/readme.md +9 -2
  81. examples/warming_up_to_rl/run_eval.py +137 -61
  82. examples/warming_up_to_rl/run_fft_and_save.py +131 -60
  83. examples/warming_up_to_rl/run_local_rollout.py +88 -39
  84. examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
  85. examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
  86. examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
  87. examples/warming_up_to_rl/run_rl_and_save.py +35 -12
  88. examples/warming_up_to_rl/run_rollout_remote.py +44 -19
  89. examples/warming_up_to_rl/task_app/README.md +6 -2
  90. examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
  91. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  111. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
  112. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  113. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
  114. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  115. synth_ai/__init__.py +1 -0
  116. synth_ai/api/models/supported.py +376 -0
  117. synth_ai/api/train/builders.py +157 -26
  118. synth_ai/api/train/cli.py +213 -57
  119. synth_ai/api/train/config_finder.py +65 -5
  120. synth_ai/api/train/env_resolver.py +33 -15
  121. synth_ai/api/train/pollers.py +13 -4
  122. synth_ai/api/train/supported_algos.py +139 -0
  123. synth_ai/api/train/task_app.py +5 -3
  124. synth_ai/api/train/utils.py +33 -48
  125. synth_ai/cli/__init__.py +19 -4
  126. synth_ai/cli/_modal_wrapper.py +28 -0
  127. synth_ai/cli/_typer_patch.py +49 -0
  128. synth_ai/cli/balance.py +2 -3
  129. synth_ai/cli/calc.py +1 -1
  130. synth_ai/cli/demo.py +21 -6
  131. synth_ai/cli/recent.py +2 -2
  132. synth_ai/cli/rl_demo.py +77 -17
  133. synth_ai/cli/root.py +116 -39
  134. synth_ai/cli/status.py +2 -2
  135. synth_ai/cli/task_apps.py +1699 -259
  136. synth_ai/cli/traces.py +7 -4
  137. synth_ai/cli/turso.py +73 -0
  138. synth_ai/cli/watch.py +12 -18
  139. synth_ai/core/experiment.py +0 -2
  140. synth_ai/demo_registry.py +68 -31
  141. synth_ai/demos/core/cli.py +516 -194
  142. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  143. synth_ai/demos/demo_task_apps/core.py +64 -28
  144. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  145. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
  146. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  147. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  148. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  149. synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
  150. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  151. synth_ai/environments/examples/bandit/engine.py +12 -4
  152. synth_ai/environments/examples/bandit/taskset.py +4 -4
  153. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  154. synth_ai/environments/reproducibility/tree.py +5 -6
  155. synth_ai/environments/service/app.py +11 -12
  156. synth_ai/environments/service/core_routes.py +10 -9
  157. synth_ai/environments/stateful/engine.py +1 -1
  158. synth_ai/environments/tasks/core.py +1 -0
  159. synth_ai/environments/tasks/filters.py +5 -6
  160. synth_ai/environments/tasks/utils.py +4 -5
  161. synth_ai/evals/base.py +0 -2
  162. synth_ai/handshake.py +11 -9
  163. synth_ai/http.py +1 -1
  164. synth_ai/http_client.py +43 -11
  165. synth_ai/inference/__init__.py +0 -2
  166. synth_ai/inference/client.py +20 -6
  167. synth_ai/jobs/client.py +103 -78
  168. synth_ai/learning/__init__.py +41 -6
  169. synth_ai/learning/algorithms.py +14 -0
  170. synth_ai/learning/client.py +121 -29
  171. synth_ai/learning/config.py +2 -40
  172. synth_ai/learning/constants.py +0 -2
  173. synth_ai/learning/ft_client.py +4 -56
  174. synth_ai/learning/health.py +13 -7
  175. synth_ai/learning/jobs.py +43 -47
  176. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  177. synth_ai/learning/rl/client.py +267 -0
  178. synth_ai/learning/rl/config.py +31 -0
  179. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  180. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  181. synth_ai/learning/rl/secrets.py +13 -0
  182. synth_ai/learning/rl_client.py +2 -253
  183. synth_ai/learning/sft/__init__.py +29 -0
  184. synth_ai/learning/sft/client.py +68 -0
  185. synth_ai/learning/sft/config.py +270 -0
  186. synth_ai/learning/sft/data.py +295 -0
  187. synth_ai/learning/sse.py +25 -26
  188. synth_ai/learning/validators.py +25 -24
  189. synth_ai/lm/__init__.py +21 -47
  190. synth_ai/task/__init__.py +26 -27
  191. synth_ai/task/apps/__init__.py +18 -19
  192. synth_ai/task/auth.py +35 -23
  193. synth_ai/task/client.py +15 -13
  194. synth_ai/task/contracts.py +37 -35
  195. synth_ai/task/datasets.py +9 -6
  196. synth_ai/task/errors.py +11 -10
  197. synth_ai/task/health.py +17 -11
  198. synth_ai/task/json.py +58 -24
  199. synth_ai/task/proxy.py +15 -14
  200. synth_ai/task/rubrics.py +22 -15
  201. synth_ai/task/server.py +43 -17
  202. synth_ai/task/tracing_utils.py +12 -7
  203. synth_ai/task/validators.py +0 -1
  204. synth_ai/task/vendors.py +5 -7
  205. synth_ai/tracing_v3/__init__.py +2 -0
  206. synth_ai/tracing_v3/abstractions.py +21 -4
  207. synth_ai/tracing_v3/db_config.py +26 -1
  208. synth_ai/tracing_v3/decorators.py +18 -15
  209. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  210. synth_ai/tracing_v3/hooks.py +6 -4
  211. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  212. synth_ai/tracing_v3/replica_sync.py +1 -0
  213. synth_ai/tracing_v3/session_tracer.py +63 -16
  214. synth_ai/tracing_v3/storage/base.py +89 -1
  215. synth_ai/tracing_v3/storage/config.py +21 -8
  216. synth_ai/tracing_v3/storage/factory.py +10 -8
  217. synth_ai/tracing_v3/storage/utils.py +4 -2
  218. synth_ai/tracing_v3/turso/daemon.py +7 -2
  219. synth_ai/tracing_v3/turso/models.py +5 -2
  220. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  221. synth_ai/tracing_v3/utils.py +4 -3
  222. synth_ai/v0/api/__init__.py +8 -0
  223. synth_ai/v0/api/models/__init__.py +8 -0
  224. synth_ai/v0/api/models/supported.py +8 -0
  225. synth_ai/v0/config/__init__.py +15 -0
  226. synth_ai/v0/config/base_url.py +12 -0
  227. synth_ai/v0/lm/__init__.py +51 -0
  228. synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
  229. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  230. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  231. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  232. synth_ai/{lm → v0/lm}/config.py +6 -1
  233. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  234. synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
  235. synth_ai/{lm → v0/lm}/core/main.py +19 -7
  236. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  237. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
  238. synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
  239. synth_ai/{lm → v0/lm}/overrides.py +4 -4
  240. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  241. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  242. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  243. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  244. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
  245. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  246. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  247. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
  248. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
  249. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
  250. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
  251. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  252. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  253. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  254. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  255. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  256. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  257. synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
  258. synth_ai/v0/tracing/upload.py +32 -135
  259. synth_ai/v0/tracing_v3/__init__.py +10 -0
  260. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  261. synth_ai/v0/tracing_v3/decorators.py +3 -0
  262. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  263. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  264. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/METADATA +10 -7
  265. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/RECORD +294 -258
  266. examples/common_old/backend.py +0 -21
  267. examples/evals_old/README.md +0 -98
  268. examples/evals_old/__init__.py +0 -6
  269. examples/evals_old/compare_models.py +0 -1037
  270. examples/evals_old/example_log.md +0 -145
  271. examples/evals_old/run_demo.sh +0 -126
  272. examples/evals_old/trace_analysis.py +0 -270
  273. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  274. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  275. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  276. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
  277. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  278. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  279. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  280. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  281. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  282. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
  283. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  284. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  285. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
  286. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  287. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  288. examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
  289. examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
  290. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  291. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  292. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
  293. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
  294. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
  295. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  296. examples/finetuning_old/synth_qwen_v1/util.py +0 -147
  297. examples/rl_old/task_app.py +0 -962
  298. synth_ai/experimental/synth_oss.py +0 -446
  299. synth_ai/install_sqld.sh +0 -40
  300. synth_ai/learning/filtering.py +0 -0
  301. synth_ai/learning/offline/dpo.py +0 -0
  302. synth_ai/learning/offline/providers.py +0 -7
  303. synth_ai/learning/offline/sft.py +0 -0
  304. synth_ai/learning/offline/shared.py +0 -0
  305. synth_ai/learning/online/grpo.py +0 -0
  306. synth_ai/learning/online/irft.py +0 -0
  307. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  308. synth_ai/learning/prompts/gepa.py +0 -0
  309. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  310. synth_ai/learning/prompts/mipro.py +0 -289
  311. synth_ai/learning/prompts/random_search.py +0 -246
  312. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  313. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  314. synth_ai/rl/secrets.py +0 -19
  315. synth_ai/scripts/verify_rewards.py +0 -100
  316. synth_ai/tracing/__init__.py +0 -30
  317. synth_ai/tracing_v1/__init__.py +0 -33
  318. synth_ai/tracing_v3/turso/__init__.py +0 -25
  319. synth_ai/tracing_v3/turso/manager.py +0 -774
  320. synth_ai/zyk/__init__.py +0 -30
  321. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  322. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  323. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  324. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  325. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  326. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  327. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  328. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  329. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  330. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  331. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  332. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  333. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  334. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  335. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  336. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  337. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  338. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  339. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  340. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  341. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  342. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  343. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  344. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  345. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  346. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/WHEEL +0 -0
  347. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/entry_points.txt +0 -0
  348. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/licenses/LICENSE +0 -0
  349. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import contextlib
3
4
  import logging
5
+ import os
4
6
  from datetime import datetime
5
- from typing import Any, Dict, List, Optional
7
+ from typing import Any
6
8
 
7
9
  from fastapi import APIRouter, HTTPException, Request
8
10
  from pydantic import BaseModel
@@ -11,8 +13,6 @@ from .envs.crafter.policy import CrafterPolicy
11
13
  from .inference.openai_client import create_inference_client
12
14
  from .registry import registry
13
15
  from .storage.volume import storage
14
- import os
15
- from typing import Tuple
16
16
 
17
17
  # Token budgeting (shared logic with inference server)
18
18
  try:
@@ -34,10 +34,10 @@ router = APIRouter()
34
34
 
35
35
  class PolicyCreateRequest(BaseModel):
36
36
  policy_name: str
37
- config: Dict[str, Any] = {}
38
- parent_policy_id: Optional[str] = None
37
+ config: dict[str, Any] = {}
38
+ parent_policy_id: str | None = None
39
39
  rl_run_id: str
40
- bound_env_id: Optional[str] = None
40
+ bound_env_id: str | None = None
41
41
 
42
42
 
43
43
  class PolicyCreateResponse(BaseModel):
@@ -46,15 +46,15 @@ class PolicyCreateResponse(BaseModel):
46
46
 
47
47
  class PolicyStepRequest(BaseModel):
48
48
  policy_id: str
49
- observation: Dict[str, Any]
50
- state: Optional[Dict[str, Any]] = None
51
- metadata: Optional[Dict[str, Any]] = None
49
+ observation: dict[str, Any]
50
+ state: dict[str, Any] | None = None
51
+ metadata: dict[str, Any] | None = None
52
52
  dry_run: bool = False
53
53
 
54
54
 
55
55
  class PolicyStepResponse(BaseModel):
56
- tool_calls: List[Dict[str, Any]]
57
- meta: Dict[str, Any]
56
+ tool_calls: list[dict[str, Any]]
57
+ meta: dict[str, Any]
58
58
 
59
59
 
60
60
  class PolicySnapshotRequest(BaseModel):
@@ -91,14 +91,23 @@ async def create_policy(
91
91
  ) -> PolicyCreateResponse:
92
92
  """Create a new policy instance."""
93
93
  try:
94
- task_app = req.app.state.task_app
95
-
96
- # Set defaults from TaskApp if not provided
97
- config = request.config.copy()
98
- if "inference_url" not in config:
99
- config["inference_url"] = task_app.vllm_base_url
100
- if "model" not in config and task_app.default_model:
101
- config["model"] = task_app.default_model
94
+ task_app = getattr(req.app.state, "task_app", None)
95
+
96
+ # Set defaults from TaskApp / environment if not provided
97
+ config = dict(request.config or {})
98
+ if "inference_url" not in config and task_app is not None:
99
+ base_url = getattr(task_app, "vllm_base_url", None)
100
+ if base_url:
101
+ config["inference_url"] = base_url
102
+ if "model" not in config and task_app is not None:
103
+ default_model = getattr(task_app, "default_model", None)
104
+ if default_model:
105
+ config["model"] = default_model
106
+ if "inference_url" not in config or "model" not in config:
107
+ raise HTTPException(
108
+ status_code=422,
109
+ detail="Policy configuration must include 'inference_url' and 'model'.",
110
+ )
102
111
 
103
112
  # Create policy instance based on name
104
113
  pname = request.policy_name.lower()
@@ -110,11 +119,13 @@ async def create_policy(
110
119
  await policy.initialize(config)
111
120
  elif pname in ["wordle-react", "wordle"]:
112
121
  try:
113
- from .envs.wordle.policy import WordlePolicy as _WordlePolicy
122
+ from .envs.wordle.policy import WordlePolicy
114
123
  except Exception as e:
115
- raise HTTPException(status_code=500, detail=f"Wordle policy unavailable: {e}")
124
+ raise HTTPException(
125
+ status_code=500, detail=f"Wordle policy unavailable: {e}"
126
+ ) from e
116
127
 
117
- policy = _WordlePolicy(
128
+ policy = WordlePolicy(
118
129
  inference_url=config["inference_url"],
119
130
  model=config["model"],
120
131
  word_length=int(config["word_length"]),
@@ -123,22 +134,24 @@ async def create_policy(
123
134
  await policy.initialize(config)
124
135
  elif pname in ["sokoban-react", "sokoban"]:
125
136
  try:
126
- from .envs.sokoban.policy import SokobanPolicy as _SokobanPolicy
137
+ from .envs.sokoban.policy import SokobanPolicy
127
138
  except Exception as e:
128
- raise HTTPException(status_code=500, detail=f"Sokoban policy unavailable: {e}")
139
+ raise HTTPException(
140
+ status_code=500, detail=f"Sokoban policy unavailable: {e}"
141
+ ) from e
129
142
 
130
- policy = _SokobanPolicy(
143
+ policy = SokobanPolicy(
131
144
  inference_url=config["inference_url"],
132
145
  model=config["model"],
133
146
  )
134
147
  await policy.initialize(config)
135
148
  elif pname in ["math-react", "math"]:
136
149
  try:
137
- from .envs.math.policy import MathPolicy as _MathPolicy
150
+ from .envs.math.policy import MathPolicy
138
151
  except Exception as e:
139
- raise HTTPException(status_code=500, detail=f"Math policy unavailable: {e}")
152
+ raise HTTPException(status_code=500, detail=f"Math policy unavailable: {e}") from e
140
153
 
141
- policy = _MathPolicy(
154
+ policy = MathPolicy(
142
155
  inference_url=config["inference_url"],
143
156
  model=config["model"],
144
157
  )
@@ -160,7 +173,7 @@ async def create_policy(
160
173
 
161
174
  except Exception as e:
162
175
  logger.error(f"Failed to create policy: {e}")
163
- raise HTTPException(status_code=500, detail=str(e))
176
+ raise HTTPException(status_code=500, detail=str(e)) from e
164
177
 
165
178
 
166
179
  @router.post("/step", response_model=PolicyStepResponse)
@@ -171,140 +184,172 @@ async def step_policy(
171
184
  """Execute a policy step to generate actions."""
172
185
  handle = registry.get_policy(request.policy_id)
173
186
  if not handle:
174
- raise HTTPException(
175
- status_code=404, detail=f"Policy {request.policy_id} not found"
176
- )
187
+ raise HTTPException(status_code=404, detail=f"Policy {request.policy_id} not found")
177
188
 
178
189
  try:
179
190
  task_app = req.app.state.task_app
180
191
  policy = handle.policy
181
192
  tracing_context = getattr(req.state, "rollout_tracing", None)
182
193
 
183
- # Format observation text conditionally for each env
194
+ obs_text = request.observation
184
195
  if isinstance(request.observation, dict):
185
196
  if isinstance(policy, CrafterPolicy):
186
197
  from .envs.crafter.shared import format_observation as format_crafter
187
198
 
188
199
  obs_text = format_crafter(request.observation)
189
- elif True:
200
+ else:
201
+ formatted: str | None = None
202
+
203
+ # Wordle formatting
190
204
  try:
191
- from .envs.wordle.policy import WordlePolicy as _WordlePolicy
205
+ from .envs.wordle.policy import WordlePolicy
192
206
  except Exception:
193
- _WordlePolicy = None # type: ignore
207
+ wordle_policy_cls = None # type: ignore[assignment]
208
+ else:
209
+ wordle_policy_cls = WordlePolicy
194
210
 
195
- if _WordlePolicy is not None and isinstance(policy, _WordlePolicy):
211
+ if formatted is None and wordle_policy_cls is not None and isinstance(
212
+ policy, wordle_policy_cls
213
+ ):
196
214
  from .envs.wordle.shared import format_observation_wordle
197
215
 
198
- # ASSERTION: Validate observation structure
199
- assert request.observation is not None, (
200
- "request.observation cannot be None"
201
- )
202
- assert isinstance(request.observation, dict), (
203
- f"request.observation must be dict, got {type(request.observation)}"
204
- )
216
+ # ASSERTION: Validate observation structure
217
+ assert request.observation is not None, "request.observation cannot be None"
218
+ assert isinstance(request.observation, dict), (
219
+ f"request.observation must be dict, got {type(request.observation)}"
220
+ )
205
221
 
206
- # Required keys for Wordle observation
207
- required_keys = {
208
- "text",
209
- "status",
210
- "remaining_guesses",
211
- "guesses",
212
- "feedback",
213
- "reward_last",
214
- "total_reward",
215
- "terminated",
216
- }
217
- missing_keys = required_keys - set(request.observation.keys())
218
- assert not missing_keys, (
219
- f"Wordle observation missing required keys: {missing_keys}"
220
- )
222
+ required_keys = {
223
+ "text",
224
+ "status",
225
+ "remaining_guesses",
226
+ "guesses",
227
+ "feedback",
228
+ "reward_last",
229
+ "total_reward",
230
+ "terminated",
231
+ }
232
+ missing_keys = required_keys - set(request.observation.keys())
233
+ assert (
234
+ not missing_keys
235
+ ), f"Wordle observation missing required keys: {missing_keys}"
236
+
237
+ print("DEBUG POLICY_ROUTES: About to format Wordle observation")
238
+ print(f"DEBUG POLICY_ROUTES: Observation type: {type(request.observation)}")
239
+ print(
240
+ f"DEBUG POLICY_ROUTES: Observation keys: {list(request.observation.keys())}"
241
+ )
242
+ feedback_val = request.observation["feedback"]
243
+ print(f"DEBUG POLICY_ROUTES: Observation feedback: {feedback_val}")
244
+ print(
245
+ f"DEBUG POLICY_ROUTES: Observation guesses: {request.observation['guesses']}"
246
+ )
247
+ print(
248
+ "DEBUG POLICY_ROUTES: Observation text length: "
249
+ f"{len(request.observation['text'])}"
250
+ )
221
251
 
222
- print("DEBUG POLICY_ROUTES: About to format Wordle observation")
223
- print(
224
- f"DEBUG POLICY_ROUTES: Observation type: {type(request.observation)}"
225
- )
226
- print(
227
- f"DEBUG POLICY_ROUTES: Observation keys: {list(request.observation.keys())}"
228
- )
229
- feedback_val = request.observation["feedback"]
230
- print(f"DEBUG POLICY_ROUTES: Observation feedback: {feedback_val}")
231
- print(
232
- f"DEBUG POLICY_ROUTES: Observation guesses: {request.observation['guesses']}"
233
- )
234
- print(
235
- f"DEBUG POLICY_ROUTES: Observation text length: {len(request.observation['text'])}"
236
- )
252
+ guesses = request.observation["guesses"]
253
+ feedback = request.observation["feedback"]
254
+ assert isinstance(guesses, list), f"guesses must be list, got {type(guesses)}"
255
+ assert isinstance(
256
+ feedback, list
257
+ ), f"feedback must be list, got {type(feedback)}"
237
258
 
238
- # ASSERTION: Validate feedback data
239
- guesses = request.observation["guesses"]
240
- feedback = request.observation["feedback"]
241
- assert isinstance(guesses, list), (
242
- f"guesses must be list, got {type(guesses)}"
243
- )
244
- assert isinstance(feedback, list), (
245
- f"feedback must be list, got {type(feedback)}"
246
- )
247
- # Note: We don't assert equal lengths here since the environment is broken
259
+ formatted = format_observation_wordle(request.observation)
248
260
 
249
- obs_text = format_observation_wordle(request.observation)
261
+ assert isinstance(formatted, str), (
262
+ f"obs_text must be string, got {type(formatted)}"
263
+ )
264
+ assert len(formatted) > 0, "obs_text cannot be empty"
265
+ assert "WORDLE" in formatted, "obs_text must contain 'WORDLE' header"
266
+ assert "Respond with a single tool call" in formatted, (
267
+ "obs_text must contain instruction text"
268
+ )
250
269
 
251
- # ASSERTION: Validate formatted output
252
- assert isinstance(obs_text, str), (
253
- f"obs_text must be string, got {type(obs_text)}"
254
- )
255
- assert len(obs_text) > 0, "obs_text cannot be empty"
256
- assert "WORDLE" in obs_text, "obs_text must contain 'WORDLE' header"
257
- assert "Respond with a single tool call" in obs_text, (
258
- "obs_text must contain instruction text"
259
- )
270
+ print(
271
+ f"DEBUG POLICY_ROUTES: Formatted obs_text length: {len(formatted)}"
272
+ )
273
+ print(
274
+ "DEBUG POLICY_ROUTES: Formatted obs_text contains 🟩: "
275
+ f"{'🟩' in formatted}"
276
+ )
277
+ print(
278
+ "DEBUG POLICY_ROUTES: Formatted obs_text contains 🟨: "
279
+ f"{'🟨' in formatted}"
280
+ )
281
+ print(
282
+ "DEBUG POLICY_ROUTES: Formatted obs_text contains ⬛: "
283
+ f"{'⬛' in formatted}"
284
+ )
285
+ print(
286
+ "DEBUG POLICY_ROUTES: Formatted obs_text first 200 chars: "
287
+ f"{formatted[:200]}"
288
+ )
260
289
 
261
- print(
262
- f"DEBUG POLICY_ROUTES: Formatted obs_text length: {len(obs_text)}"
263
- )
264
- print(
265
- f"DEBUG POLICY_ROUTES: Formatted obs_text contains 🟩: {'🟩' in obs_text}"
266
- )
267
- print(
268
- f"DEBUG POLICY_ROUTES: Formatted obs_text contains 🟨: {'🟨' in obs_text}"
269
- )
270
- print(
271
- f"DEBUG POLICY_ROUTES: Formatted obs_text contains ⬛: {'⬛' in obs_text}"
272
- )
273
- print(
274
- f"DEBUG POLICY_ROUTES: Formatted obs_text first 200 chars: {obs_text[:200]}"
275
- )
276
- elif True:
290
+ # Sokoban formatting
277
291
  try:
278
- from .envs.sokoban.policy import SokobanPolicy as _SokobanPolicy
292
+ from .envs.sokoban.policy import SokobanPolicy
279
293
  except Exception:
280
- _SokobanPolicy = None # type: ignore
281
-
282
- if _SokobanPolicy is not None and isinstance(policy, _SokobanPolicy):
294
+ sokoban_policy_cls = None # type: ignore[assignment]
295
+ else:
296
+ sokoban_policy_cls = SokobanPolicy
297
+
298
+ if formatted is None and sokoban_policy_cls is not None and isinstance(
299
+ policy, sokoban_policy_cls
300
+ ):
283
301
  from .envs.sokoban.shared import format_observation_sokoban
284
-
285
- obs_text = format_observation_sokoban(request.observation)
286
- elif True:
302
+
303
+ formatted = format_observation_sokoban(request.observation)
304
+
305
+ # Math formatting
287
306
  try:
288
- from .envs.math.policy import MathPolicy as _MathPolicy
307
+ from .envs.math.policy import MathPolicy
289
308
  except Exception:
290
- _MathPolicy = None # type: ignore
291
- if _MathPolicy is not None and isinstance(policy, _MathPolicy):
292
- # Simple extraction of problem text
309
+ math_policy_cls = None # type: ignore[assignment]
310
+ else:
311
+ math_policy_cls = MathPolicy
312
+
313
+ if formatted is None and math_policy_cls is not None and isinstance(
314
+ policy, math_policy_cls
315
+ ):
293
316
  try:
294
- obs_text = str(request.observation.get("problem_text") or request.observation)
317
+ formatted = str(
318
+ request.observation.get("problem_text") or request.observation
319
+ )
295
320
  except Exception:
296
- obs_text = str(request.observation)
297
- else:
298
- obs_text = str(request.observation)
299
- else:
300
- obs_text = request.observation
321
+ formatted = str(request.observation)
322
+
323
+ if formatted is None:
324
+ formatted = str(request.observation)
325
+
326
+ obs_text = formatted
327
+
328
+ # Merge metadata with raw observation for multimodal policies
329
+ step_metadata: dict[str, Any] = dict(request.metadata or {})
330
+ step_metadata["raw_observation"] = request.observation
301
331
 
302
332
  # Execute policy step to get inference request
303
333
  tool_calls, meta = await policy.step(
304
334
  observation_text=obs_text,
305
335
  state=request.state,
306
- metadata=request.metadata,
336
+ metadata=step_metadata,
307
337
  )
338
+ # Compact tool call summary
339
+ with contextlib.suppress(Exception):
340
+ _summary: list[dict[str, Any]] = []
341
+ _tc = tool_calls or []
342
+ for _item in (_tc if isinstance(_tc, list) else []):
343
+ if isinstance(_item, dict):
344
+ _tool = _item.get("tool")
345
+ _args = _item.get("args")
346
+ _keys = list(_args.keys()) if isinstance(_args, dict) else []
347
+ _summary.append({"tool": _tool, "args_keys": _keys})
348
+ logger.info(
349
+ "POLICY_STEP: tool_calls=%d summary=%s",
350
+ len(_tc),
351
+ _summary,
352
+ )
308
353
 
309
354
  # If not dry run, perform inference
310
355
  if not request.dry_run and "inference_request" in meta:
@@ -312,13 +357,11 @@ async def step_policy(
312
357
  inf_req = meta["inference_request"]
313
358
  msgs = inf_req["messages"]
314
359
  model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
315
- system_messages: List[str] = []
316
- user_messages: List[str] = []
360
+ system_messages: list[str] = []
361
+ user_messages: list[str] = []
317
362
  if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
318
363
  sys_text = msgs[0]["content"]
319
- policy_name = (
320
- getattr(policy, "name", "") or type(policy).__name__.lower()
321
- )
364
+ policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
322
365
 
323
366
  # Assert environment-specific prompts match the policy
324
367
  if policy_name in ("wordle-react", "wordle"):
@@ -342,7 +385,6 @@ async def step_policy(
342
385
  raise ValueError(
343
386
  f"PROMPT MISMATCH: Crafter policy {policy_name} received Wordle system prompt: {sys_text[:200]}..."
344
387
  )
345
-
346
388
  elif policy_name in ("sokoban-react", "sokoban"):
347
389
  if "Sokoban" not in sys_text:
348
390
  raise ValueError(
@@ -363,6 +405,7 @@ async def step_policy(
363
405
 
364
406
  # Emit full system/user prompts for observability (no secrets included)
365
407
  try:
408
+
366
409
  def _as_text(content: object) -> str:
367
410
  if isinstance(content, str):
368
411
  return content
@@ -380,40 +423,54 @@ async def step_policy(
380
423
  return "".join(parts)
381
424
  return str(content)
382
425
 
383
- system_messages: list[str] = []
384
- user_messages: list[str] = []
426
+ system_prompt_records: list[dict[str, Any]] = []
427
+ user_prompt_records: list[dict[str, Any]] = []
385
428
  for message in msgs:
386
429
  role = message.get("role")
387
- content = _as_text(message.get("content"))
430
+ raw_content = message.get("content")
431
+ content = _as_text(raw_content)
432
+ record = {"role": role, "text": content, "content": raw_content}
388
433
  if role == "system":
389
- system_messages.append(content)
434
+ system_prompt_records.append(record)
390
435
  elif role == "user":
391
- user_messages.append(content)
436
+ user_prompt_records.append(record)
392
437
 
393
- if system_messages:
438
+ logger.info(
439
+ "PROMPTS: system_msgs=%d user_msgs=%d last_user_chars=%d",
440
+ len(system_prompt_records),
441
+ len(user_prompt_records),
442
+ len(user_prompt_records[-1].get("text", "")) if user_prompt_records else 0,
443
+ )
444
+
445
+ if system_prompt_records:
394
446
  logger.info("PROMPT_DUMP_SYSTEM_BEGIN")
395
- for idx, smsg in enumerate(system_messages):
447
+ for idx, rec in enumerate(system_prompt_records):
448
+ smsg = rec.get("text", "")
396
449
  logger.info(f"SYSTEM[{idx}]\n{smsg}")
397
450
  logger.info("PROMPT_DUMP_SYSTEM_END")
398
451
 
399
- if user_messages:
452
+ if user_prompt_records:
400
453
  logger.info("PROMPT_DUMP_USER_BEGIN")
401
- for idx, umsg in enumerate(user_messages):
454
+ for idx, rec in enumerate(user_prompt_records):
455
+ umsg = rec.get("text", "")
402
456
  logger.info(f"USER[{idx}]\n{umsg}")
403
457
  logger.info("PROMPT_DUMP_USER_END")
404
458
  # Print concise preview for visibility in standard logs
405
- try:
406
- last_user = user_messages[-1] if user_messages else ""
407
- #preview = last_user[:400] if isinstance(last_user, str) else str(last_user)[:400]
459
+ with contextlib.suppress(Exception):
460
+ last_user = (
461
+ user_prompt_records[-1].get("text", "")
462
+ if user_prompt_records
463
+ else ""
464
+ )
408
465
  print(f"[task:crafter] user prompt: {last_user}", flush=True)
409
- except Exception:
410
- pass
411
466
  except Exception as e:
412
467
  logger.warning(f"PROMPT_DUMP_FAILED: {e}")
413
468
 
414
469
  if tracing_context is not None:
415
470
  try:
416
- await tracing_context.record_policy_prompts(system_messages, user_messages)
471
+ await tracing_context.record_policy_prompts(
472
+ system_prompt_records, user_prompt_records
473
+ )
417
474
  except Exception as exc:
418
475
  logger.debug(f"TRACING_PROMPTS_FAIL: {exc}")
419
476
 
@@ -426,25 +483,37 @@ async def step_policy(
426
483
  )
427
484
 
428
485
  # Ensure meta carries the final target URL for downstream logging/clients
429
- try:
486
+ with contextlib.suppress(Exception):
430
487
  meta["inference_url"] = target_url
431
- except Exception:
432
- pass
433
488
 
434
489
  # Select API key based on resolved target URL
435
490
  api_key_override = None
436
491
  try:
437
492
  import os as _os
493
+
438
494
  if isinstance(target_url, str):
439
495
  low_url = target_url.lower()
440
- if "openai.com" in low_url:
441
- api_key_override = _os.getenv("OPENAI_API_KEY") or getattr(task_app, "openai_api_key", None)
442
- elif "groq.com" in low_url:
496
+ # Proxy endpoints should not receive a bearer; the server-side proxy holds the vendor key
497
+ if "/proxy/groq" in low_url or "/proxy/openai" in low_url:
498
+ api_key_override = None
499
+ elif "openai.com" in low_url:
500
+ api_key_override = _os.getenv("OPENAI_API_KEY") or getattr(
501
+ task_app, "openai_api_key", None
502
+ )
503
+ elif "groq.com" in low_url or "/proxy/groq" in low_url:
443
504
  api_key_override = _os.getenv("GROQ_API_KEY")
444
505
  else:
445
- api_key_override = _os.getenv("SYNTH_API_KEY") or _os.getenv("OPENAI_API_KEY") or getattr(task_app, "openai_api_key", None)
506
+ api_key_override = (
507
+ _os.getenv("SYNTH_API_KEY")
508
+ or _os.getenv("OPENAI_API_KEY")
509
+ or getattr(task_app, "openai_api_key", None)
510
+ )
446
511
  else:
447
- api_key_override = _os.getenv("SYNTH_API_KEY") or _os.getenv("OPENAI_API_KEY") or getattr(task_app, "openai_api_key", None)
512
+ api_key_override = (
513
+ _os.getenv("SYNTH_API_KEY")
514
+ or _os.getenv("OPENAI_API_KEY")
515
+ or getattr(task_app, "openai_api_key", None)
516
+ )
448
517
  except Exception:
449
518
  api_key_override = None
450
519
 
@@ -455,7 +524,9 @@ async def step_policy(
455
524
  masked = "<masked>"
456
525
  logger.debug(f"INFERENCE_AUTH: Using bearer key {masked}")
457
526
  else:
458
- logger.warning("INFERENCE_AUTH: No API key resolved for inference request; downstream may 401")
527
+ logger.warning(
528
+ "INFERENCE_AUTH: No API key resolved for inference request; downstream may 401"
529
+ )
459
530
 
460
531
  client = create_inference_client(task_app, api_key=api_key_override)
461
532
 
@@ -544,16 +615,16 @@ async def step_policy(
544
615
  except Exception:
545
616
  return max(1, int(len(text) / 4))
546
617
 
547
- def _count_messages_tokens(messages: List[Dict[str, Any]]) -> int:
618
+ def _count_messages_tokens(messages: list[dict[str, Any]]) -> int:
548
619
  total = 0
549
620
  for m in messages:
550
621
  total += _count_tokens(_content_to_text(m.get("content")))
551
622
  return total
552
623
 
553
624
  def _truncate_messages_to_budget(
554
- messages: List[Dict[str, Any]],
625
+ messages: list[dict[str, Any]],
555
626
  max_tokens: int,
556
- ) -> Tuple[List[Dict[str, Any]], int, int, int]:
627
+ ) -> tuple[list[dict[str, Any]], int, int, int]:
557
628
  before = _count_messages_tokens(messages)
558
629
  if before <= max_tokens:
559
630
  return messages, before, before, len(messages)
@@ -563,7 +634,7 @@ async def step_policy(
563
634
  if messages and messages[0].get("role") == "system":
564
635
  system_msg = messages[0]
565
636
  start_idx = 1
566
- kept_rev: List[Dict[str, Any]] = []
637
+ kept_rev: list[dict[str, Any]] = []
567
638
  total = _count_messages_tokens([system_msg] if system_msg else [])
568
639
  # Walk from the end keeping most recent messages
569
640
  for m in reversed(messages[start_idx:]):
@@ -604,7 +675,7 @@ async def step_policy(
604
675
  )
605
676
  if new_msgs is not msgs:
606
677
  inf_req["messages"] = new_msgs
607
- try:
678
+ with contextlib.suppress(Exception):
608
679
  logger.info(
609
680
  {
610
681
  "chat_truncated": True,
@@ -614,8 +685,6 @@ async def step_policy(
614
685
  "kept_msgs": int(kept_count),
615
686
  }
616
687
  )
617
- except Exception:
618
- pass
619
688
  except Exception as _trunc_e:
620
689
  logger.warning(f"CHAT_TRUNCATION_FAILED: {type(_trunc_e).__name__}: {_trunc_e}")
621
690
 
@@ -643,76 +712,78 @@ async def step_policy(
643
712
  # Prompt diagnostics before sending to inference: build chat template locally,
644
713
  # count tokens, and log the first 10k tokens if oversized. Also stash a
645
714
  # compact preview in meta so the trainer can surface it.
646
- try:
715
+ with contextlib.suppress(Exception):
647
716
  req_for_diag = meta.get("inference_request", {})
648
717
  model_for_diag = req_for_diag.get("model") or getattr(policy, "model", None) or ""
649
718
  messages_for_diag = req_for_diag.get("messages") or []
650
719
  if model_for_diag and messages_for_diag:
651
- try:
652
- from transformers import AutoTokenizer
653
- tok = AutoTokenizer.from_pretrained(model_for_diag)
654
- prompt_preview = tok.apply_chat_template(
655
- messages_for_diag,
656
- add_generation_prompt=True,
657
- tokenize=False,
720
+ from transformers import AutoTokenizer
721
+
722
+ tok = AutoTokenizer.from_pretrained(model_for_diag)
723
+ prompt_preview = tok.apply_chat_template(
724
+ messages_for_diag,
725
+ add_generation_prompt=True,
726
+ tokenize=False,
727
+ )
728
+ ids = tok.encode(prompt_preview, add_special_tokens=False)
729
+ max_len = getattr(tok, "model_max_length", None)
730
+ over_limit = False
731
+ with contextlib.suppress(Exception):
732
+ over_limit = (
733
+ isinstance(max_len, int) and max_len > 0 and len(ids) > int(max_len)
658
734
  )
659
- ids = tok.encode(prompt_preview, add_special_tokens=False)
660
- max_len = getattr(tok, "model_max_length", None)
661
- over_limit = False
662
- try:
663
- over_limit = isinstance(max_len, int) and max_len > 0 and len(ids) > int(max_len)
664
- except Exception:
665
- over_limit = False
666
- if over_limit or len(ids) > 10000:
667
- preview_ids = ids[:10000]
668
- preview_text = tok.decode(preview_ids, skip_special_tokens=False)
669
- try:
670
- logger.warning(
671
- {
672
- "prompt_token_overflow_local": True,
673
- "model": str(model_for_diag),
674
- "token_count": int(len(ids)),
675
- "model_max_length": int(max_len) if isinstance(max_len, int) else None,
676
- "preview_tokens_logged": int(len(preview_ids)),
677
- "prompt_preview_first_10k_tokens": preview_text,
678
- }
679
- )
680
- except Exception:
681
- pass
682
- try:
683
- meta["prompt_debug"] = {
735
+ if over_limit or len(ids) > 10000:
736
+ preview_ids = ids[:10000]
737
+ preview_text = tok.decode(
738
+ preview_ids,
739
+ skip_special_tokens=False,
740
+ )
741
+ with contextlib.suppress(Exception):
742
+ logger.warning(
743
+ {
744
+ "prompt_token_overflow_local": True,
745
+ "model": str(model_for_diag),
684
746
  "token_count": int(len(ids)),
685
- "model_max_length": int(max_len) if isinstance(max_len, int) else None,
686
- "preview_first_10k_tokens": preview_text,
747
+ "model_max_length": int(max_len)
748
+ if isinstance(max_len, int)
749
+ else None,
750
+ "preview_tokens_logged": int(len(preview_ids)),
751
+ "prompt_preview_first_10k_tokens": preview_text,
687
752
  }
688
- except Exception:
689
- pass
690
- except Exception:
691
- pass
692
- except Exception:
693
- pass
753
+ )
754
+ with contextlib.suppress(Exception):
755
+ meta["prompt_debug"] = {
756
+ "token_count": int(len(ids)),
757
+ "model_max_length": int(max_len)
758
+ if isinstance(max_len, int)
759
+ else None,
760
+ "preview_first_10k_tokens": preview_text,
761
+ }
694
762
 
695
763
  # Emit the exact prompt/messages and tools before calling the LLM (bounded preview)
696
- try:
764
+ with contextlib.suppress(Exception):
697
765
  req_dump = meta.get("inference_request", {})
698
766
  msgs = req_dump.get("messages")
699
767
  tools_dump = req_dump.get("tools")
700
768
  if isinstance(msgs, list):
701
769
  # Print compact messages structure and tool schema with bounded length
702
770
  import json as _json
771
+
703
772
  msgs_compact = _json.dumps(msgs)[:20000]
704
- tools_compact = _json.dumps(tools_dump)[:8000] if tools_dump is not None else None
705
- print({
706
- "llm.call": True,
707
- "policy": str(policy_name),
708
- "messages_preview": msgs_compact,
709
- "tools_preview": tools_compact,
710
- })
711
- except Exception:
712
- pass
773
+ tools_compact = (
774
+ _json.dumps(tools_dump)[:8000] if tools_dump is not None else None
775
+ )
776
+ print(
777
+ {
778
+ "llm.call": True,
779
+ "policy": str(policy_name),
780
+ "messages_preview": msgs_compact,
781
+ "tools_preview": tools_compact,
782
+ }
783
+ )
713
784
 
714
785
  # Normalize request for non-OpenAI endpoints (strict schemas)
715
- try:
786
+ with contextlib.suppress(Exception):
716
787
  base = str(target_url or "")
717
788
  is_openai_dotcom = "openai.com" in base.lower()
718
789
  if not is_openai_dotcom:
@@ -721,20 +792,25 @@ async def step_policy(
721
792
  # Force structured tool_choice if a bare "required" is present
722
793
  if req_body.get("tool_choice") == "required":
723
794
  func_name = "interact_many"
724
- try:
795
+ with contextlib.suppress(Exception):
725
796
  tools_arr = req_body.get("tools") or []
726
797
  if isinstance(tools_arr, list) and tools_arr:
727
- f = tools_arr[0].get("function") if isinstance(tools_arr[0], dict) else None
798
+ f = (
799
+ tools_arr[0].get("function")
800
+ if isinstance(tools_arr[0], dict)
801
+ else None
802
+ )
728
803
  cand = (f or {}).get("name") if isinstance(f, dict) else None
729
804
  if isinstance(cand, str) and cand:
730
805
  func_name = cand
731
- except Exception:
732
- pass
733
- req_body["tool_choice"] = {"type": "function", "function": {"name": func_name}}
806
+ req_body["tool_choice"] = {
807
+ "type": "function",
808
+ "function": {"name": func_name},
809
+ }
734
810
  req_body["parallel_tool_calls"] = False
735
811
  req_body.setdefault("function_call", {"name": func_name})
736
812
  # Inject extra_body for thinking controls expected by Modal service
737
- try:
813
+ with contextlib.suppress(Exception):
738
814
  tb = req_body.get("thinking_budget")
739
815
  tm = str(req_body.get("thinking_mode") or "").lower()
740
816
  enable_thinking = bool(tb) or tm == "think"
@@ -742,25 +818,52 @@ async def step_policy(
742
818
  chat_kwargs = dict(extra.get("chat_template_kwargs") or {})
743
819
  if enable_thinking:
744
820
  chat_kwargs["enable_thinking"] = True
745
- if isinstance(tb, (int, float, str)) and str(tb).strip():
746
- try:
821
+ if isinstance(tb, int | float | str) and str(tb).strip():
822
+ with contextlib.suppress(Exception):
747
823
  chat_kwargs["thinking_budget"] = int(tb)
748
- except Exception:
749
- pass
750
824
  if chat_kwargs:
751
825
  extra["chat_template_kwargs"] = chat_kwargs
752
826
  # Ensure stop_after_tool_calls honored via extra_body for stricter servers
753
827
  extra.setdefault("stop_after_tool_calls", 1)
754
828
  if extra:
755
829
  req_body["extra_body"] = extra
756
- except Exception:
757
- pass
758
830
  # Provide a conservative default temperature if missing
759
831
  if "temperature" not in req_body:
760
832
  req_body["temperature"] = 0.1
761
833
  meta["inference_request"] = req_body
762
- except Exception:
763
- pass
834
+
835
+ # Strip image parts: Crafter policy currently only uses text prompts.
836
+ # Some providers reject image_url payloads entirely, so always flatten to plain text.
837
+ req_body2 = meta.get("inference_request", {})
838
+ if isinstance(req_body2, dict):
839
+ msgs = req_body2.get("messages")
840
+ if isinstance(msgs, list):
841
+ new_msgs = []
842
+ changed = False
843
+ for m in msgs:
844
+ try:
845
+ if isinstance(m, dict):
846
+ content = m.get("content")
847
+ if isinstance(content, list):
848
+ parts: list[str] = []
849
+ for seg in content:
850
+ if isinstance(seg, dict):
851
+ txt = seg.get("text") or seg.get("content")
852
+ if isinstance(txt, str) and txt:
853
+ parts.append(txt)
854
+ m2 = dict(m)
855
+ m2["content"] = "\n".join(parts)
856
+ new_msgs.append(m2)
857
+ changed = True
858
+ else:
859
+ new_msgs.append(m)
860
+ else:
861
+ new_msgs.append(m)
862
+ except Exception:
863
+ new_msgs.append(m)
864
+ if changed:
865
+ req_body2["messages"] = new_msgs
866
+ meta["inference_request"] = req_body2
764
867
 
765
868
  _t_start = _t.time()
766
869
  call_started_at = datetime.utcnow()
@@ -799,10 +902,13 @@ async def step_policy(
799
902
  else:
800
903
  try:
801
904
  import json as _json
802
- print({
803
- "tool_calls_parsed": int(len(tool_calls)),
804
- "tool_calls_preview": _json.dumps(tool_calls)[:20000],
805
- })
905
+
906
+ print(
907
+ {
908
+ "tool_calls_parsed": int(len(tool_calls)),
909
+ "tool_calls_preview": _json.dumps(tool_calls)[:20000],
910
+ }
911
+ )
806
912
  except Exception:
807
913
  logger.info(f"Parsed {len(tool_calls)} tool calls: {tool_calls}")
808
914
 
@@ -814,21 +920,17 @@ async def step_policy(
814
920
  inference_response, getattr(policy, "use_tools", True)
815
921
  )
816
922
  else:
817
- parsed = policy.parse_model_response(
818
- inference_response, request.observation
819
- )
923
+ parsed = policy.parse_model_response(inference_response, request.observation)
820
924
  # Replace tool_calls with parsed result
821
925
  if isinstance(parsed, list):
822
926
  tool_calls = parsed
823
- try:
927
+ with contextlib.suppress(Exception):
824
928
  logger.info(
825
929
  "TOOLCALL_PARSE: parsed=%d has_tools=%s example=%r",
826
930
  len(tool_calls) if isinstance(tool_calls, list) else -1,
827
931
  bool(getattr(policy, "use_tools", True)),
828
932
  (tool_calls[0] if isinstance(tool_calls, list) and tool_calls else None),
829
933
  )
830
- except Exception:
831
- pass
832
934
  except Exception as _pe:
833
935
  logger.warning(f"Failed to parse tool calls: {str(_pe)}")
834
936
  # Attach raw response + usage for observability
@@ -858,7 +960,7 @@ async def step_policy(
858
960
 
859
961
  except Exception as e:
860
962
  logger.error(f"Failed to step policy {request.policy_id}: {e}")
861
- raise HTTPException(status_code=500, detail=str(e))
963
+ raise HTTPException(status_code=500, detail=str(e)) from e
862
964
 
863
965
 
864
966
  @router.post("/snapshot", response_model=PolicySnapshotResponse)
@@ -866,9 +968,7 @@ async def snapshot_policy(request: PolicySnapshotRequest) -> PolicySnapshotRespo
866
968
  """Create a snapshot of the policy state."""
867
969
  handle = registry.get_policy(request.policy_id)
868
970
  if not handle:
869
- raise HTTPException(
870
- status_code=404, detail=f"Policy {request.policy_id} not found"
871
- )
971
+ raise HTTPException(status_code=404, detail=f"Policy {request.policy_id} not found")
872
972
 
873
973
  try:
874
974
  # Serialize policy state
@@ -898,7 +998,7 @@ async def snapshot_policy(request: PolicySnapshotRequest) -> PolicySnapshotRespo
898
998
 
899
999
  except Exception as e:
900
1000
  logger.error(f"Failed to snapshot policy {request.policy_id}: {e}")
901
- raise HTTPException(status_code=500, detail=str(e))
1001
+ raise HTTPException(status_code=500, detail=str(e)) from e
902
1002
 
903
1003
 
904
1004
  @router.post("/restore", response_model=PolicyRestoreResponse)
@@ -906,9 +1006,7 @@ async def restore_policy(request: PolicyRestoreRequest) -> PolicyRestoreResponse
906
1006
  """Restore a policy from a snapshot."""
907
1007
  snapshot = registry.get_snapshot(request.snapshot_id)
908
1008
  if not snapshot:
909
- raise HTTPException(
910
- status_code=404, detail=f"Snapshot {request.snapshot_id} not found"
911
- )
1009
+ raise HTTPException(status_code=404, detail=f"Snapshot {request.snapshot_id} not found")
912
1010
 
913
1011
  if snapshot.kind != "policy":
914
1012
  raise HTTPException(
@@ -931,16 +1029,20 @@ async def restore_policy(request: PolicyRestoreRequest) -> PolicyRestoreResponse
931
1029
  policy = await CrafterPolicy.deserialize(state_dict)
932
1030
  elif low in ["wordle-react", "wordle"]:
933
1031
  try:
934
- from .envs.wordle.policy import WordlePolicy as _WordlePolicy
1032
+ from .envs.wordle.policy import WordlePolicy
935
1033
  except Exception as e:
936
- raise HTTPException(status_code=500, detail=f"Wordle policy unavailable: {e}")
937
- policy = await _WordlePolicy.deserialize(state_dict)
1034
+ raise HTTPException(
1035
+ status_code=500, detail=f"Wordle policy unavailable: {e}"
1036
+ ) from e
1037
+ policy = await WordlePolicy.deserialize(state_dict)
938
1038
  elif low in ["sokoban-react", "sokoban"]:
939
1039
  try:
940
- from .envs.sokoban.policy import SokobanPolicy as _SokobanPolicy
1040
+ from .envs.sokoban.policy import SokobanPolicy
941
1041
  except Exception as e:
942
- raise HTTPException(status_code=500, detail=f"Sokoban policy unavailable: {e}")
943
- policy = await _SokobanPolicy.deserialize(state_dict)
1042
+ raise HTTPException(
1043
+ status_code=500, detail=f"Sokoban policy unavailable: {e}"
1044
+ ) from e
1045
+ policy = await SokobanPolicy.deserialize(state_dict)
944
1046
  else:
945
1047
  raise HTTPException(
946
1048
  status_code=422,
@@ -956,10 +1058,8 @@ async def restore_policy(request: PolicyRestoreRequest) -> PolicyRestoreResponse
956
1058
  return PolicyRestoreResponse(policy_id=policy_id)
957
1059
 
958
1060
  except Exception as e:
959
- logger.error(
960
- f"Failed to restore policy from snapshot {request.snapshot_id}: {e}"
961
- )
962
- raise HTTPException(status_code=500, detail=str(e))
1061
+ logger.error(f"Failed to restore policy from snapshot {request.snapshot_id}: {e}")
1062
+ raise HTTPException(status_code=500, detail=str(e)) from e
963
1063
 
964
1064
 
965
1065
  @router.post("/terminate", response_model=PolicyTerminateResponse)
@@ -967,9 +1067,7 @@ async def terminate_policy(request: PolicyTerminateRequest) -> PolicyTerminateRe
967
1067
  """Terminate a policy and clean up resources."""
968
1068
  handle = registry.get_policy(request.policy_id)
969
1069
  if not handle:
970
- raise HTTPException(
971
- status_code=404, detail=f"Policy {request.policy_id} not found"
972
- )
1070
+ raise HTTPException(status_code=404, detail=f"Policy {request.policy_id} not found")
973
1071
 
974
1072
  try:
975
1073
  # Call terminate on the policy
@@ -982,4 +1080,4 @@ async def terminate_policy(request: PolicyTerminateRequest) -> PolicyTerminateRe
982
1080
 
983
1081
  except Exception as e:
984
1082
  logger.error(f"Failed to terminate policy {request.policy_id}: {e}")
985
- raise HTTPException(status_code=500, detail=str(e))
1083
+ raise HTTPException(status_code=500, detail=str(e)) from e