synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (351) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +23 -17
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +53 -52
  22. examples/rl/run_rl_and_save.py +29 -12
  23. examples/rl/task_app/math_single_step.py +180 -41
  24. examples/rl/task_app/math_task_app.py +14 -6
  25. examples/sft/README.md +139 -0
  26. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  27. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  28. examples/sft/evaluate.py +117 -0
  29. examples/sft/export_dataset.py +117 -0
  30. examples/sft/generate_traces.py +162 -0
  31. examples/swe/__init__.py +12 -0
  32. examples/swe/task_app/README.md +105 -0
  33. examples/swe/task_app/__init__.py +2 -0
  34. examples/swe/task_app/grpo_swe_mini.py +571 -0
  35. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  36. examples/swe/task_app/hosted/README.md +173 -0
  37. examples/swe/task_app/hosted/__init__.py +5 -0
  38. examples/swe/task_app/hosted/branching.py +143 -0
  39. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  40. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  41. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  42. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  43. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  44. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  45. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  46. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  47. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  50. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  53. examples/swe/task_app/hosted/hosted_app.py +204 -0
  54. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  55. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  56. examples/swe/task_app/hosted/main.py +100 -0
  57. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  58. examples/swe/task_app/hosted/registry.py +195 -0
  59. examples/swe/task_app/hosted/rollout.py +1869 -0
  60. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  61. examples/swe/task_app/hosted/storage/volume.py +211 -0
  62. examples/swe/task_app/hosted/test_agents.py +161 -0
  63. examples/swe/task_app/hosted/test_service.py +137 -0
  64. examples/swe/task_app/hosted/utils.py +62 -0
  65. examples/vlm/README.md +68 -0
  66. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  67. examples/vlm/crafter_image_only_agent.py +207 -0
  68. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  69. examples/vlm/filter_image_rows.py +63 -0
  70. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  71. examples/warming_up_to_rl/analyze_trace_db.py +12 -10
  72. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  73. examples/warming_up_to_rl/export_trace_sft.py +218 -36
  74. examples/warming_up_to_rl/groq_test.py +15 -8
  75. examples/warming_up_to_rl/manage_secrets.py +29 -25
  76. examples/warming_up_to_rl/readme.md +9 -2
  77. examples/warming_up_to_rl/run_eval.py +137 -61
  78. examples/warming_up_to_rl/run_fft_and_save.py +131 -60
  79. examples/warming_up_to_rl/run_local_rollout.py +88 -39
  80. examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
  81. examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
  82. examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
  83. examples/warming_up_to_rl/run_rl_and_save.py +35 -12
  84. examples/warming_up_to_rl/run_rollout_remote.py +44 -19
  85. examples/warming_up_to_rl/task_app/README.md +6 -2
  86. examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
  87. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  111. synth/__init__.py +14 -0
  112. synth_ai/__init__.py +20 -4
  113. synth_ai/api/models/supported.py +376 -0
  114. synth_ai/api/train/builders.py +157 -26
  115. synth_ai/api/train/cli.py +213 -57
  116. synth_ai/api/train/config_finder.py +65 -5
  117. synth_ai/api/train/env_resolver.py +33 -15
  118. synth_ai/api/train/pollers.py +13 -4
  119. synth_ai/api/train/supported_algos.py +139 -0
  120. synth_ai/api/train/task_app.py +5 -3
  121. synth_ai/api/train/utils.py +33 -48
  122. synth_ai/cli/__init__.py +19 -4
  123. synth_ai/cli/_modal_wrapper.py +28 -0
  124. synth_ai/cli/_typer_patch.py +49 -0
  125. synth_ai/cli/balance.py +2 -3
  126. synth_ai/cli/calc.py +1 -1
  127. synth_ai/cli/demo.py +21 -6
  128. synth_ai/cli/recent.py +2 -2
  129. synth_ai/cli/rl_demo.py +77 -17
  130. synth_ai/cli/root.py +116 -39
  131. synth_ai/cli/status.py +2 -2
  132. synth_ai/cli/task_apps.py +1699 -259
  133. synth_ai/cli/traces.py +7 -4
  134. synth_ai/cli/turso.py +73 -0
  135. synth_ai/cli/watch.py +12 -18
  136. synth_ai/core/experiment.py +0 -2
  137. synth_ai/demo_registry.py +68 -31
  138. synth_ai/demos/core/cli.py +516 -194
  139. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  140. synth_ai/demos/demo_task_apps/core.py +64 -28
  141. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  142. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
  143. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  144. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  145. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  146. synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
  147. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  148. synth_ai/environments/examples/bandit/engine.py +12 -4
  149. synth_ai/environments/examples/bandit/taskset.py +4 -4
  150. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  151. synth_ai/environments/reproducibility/tree.py +5 -6
  152. synth_ai/environments/service/app.py +11 -12
  153. synth_ai/environments/service/core_routes.py +10 -9
  154. synth_ai/environments/stateful/engine.py +1 -1
  155. synth_ai/environments/tasks/core.py +1 -0
  156. synth_ai/environments/tasks/filters.py +5 -6
  157. synth_ai/environments/tasks/utils.py +4 -5
  158. synth_ai/evals/base.py +0 -2
  159. synth_ai/handshake.py +11 -9
  160. synth_ai/http.py +1 -1
  161. synth_ai/http_client.py +43 -11
  162. synth_ai/inference/__init__.py +0 -2
  163. synth_ai/inference/client.py +20 -6
  164. synth_ai/jobs/client.py +103 -78
  165. synth_ai/learning/__init__.py +41 -6
  166. synth_ai/learning/algorithms.py +14 -0
  167. synth_ai/learning/client.py +121 -29
  168. synth_ai/learning/config.py +2 -40
  169. synth_ai/learning/constants.py +0 -2
  170. synth_ai/learning/ft_client.py +4 -56
  171. synth_ai/learning/health.py +13 -7
  172. synth_ai/learning/jobs.py +43 -47
  173. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  174. synth_ai/learning/rl/client.py +267 -0
  175. synth_ai/learning/rl/config.py +31 -0
  176. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  177. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  178. synth_ai/learning/rl/secrets.py +13 -0
  179. synth_ai/learning/rl_client.py +2 -253
  180. synth_ai/learning/sft/__init__.py +29 -0
  181. synth_ai/learning/sft/client.py +68 -0
  182. synth_ai/learning/sft/config.py +270 -0
  183. synth_ai/learning/sft/data.py +295 -0
  184. synth_ai/learning/sse.py +25 -26
  185. synth_ai/learning/validators.py +25 -24
  186. synth_ai/lm/__init__.py +21 -47
  187. synth_ai/task/__init__.py +26 -27
  188. synth_ai/task/apps/__init__.py +18 -19
  189. synth_ai/task/auth.py +35 -23
  190. synth_ai/task/client.py +15 -13
  191. synth_ai/task/contracts.py +37 -35
  192. synth_ai/task/datasets.py +9 -6
  193. synth_ai/task/errors.py +11 -10
  194. synth_ai/task/health.py +17 -11
  195. synth_ai/task/json.py +58 -24
  196. synth_ai/task/proxy.py +15 -14
  197. synth_ai/task/rubrics.py +22 -15
  198. synth_ai/task/server.py +43 -17
  199. synth_ai/task/tracing_utils.py +12 -7
  200. synth_ai/task/validators.py +0 -1
  201. synth_ai/task/vendors.py +5 -7
  202. synth_ai/tracing_v3/__init__.py +2 -0
  203. synth_ai/tracing_v3/abstractions.py +21 -4
  204. synth_ai/tracing_v3/db_config.py +26 -1
  205. synth_ai/tracing_v3/decorators.py +18 -15
  206. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  207. synth_ai/tracing_v3/hooks.py +6 -4
  208. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  209. synth_ai/tracing_v3/replica_sync.py +1 -0
  210. synth_ai/tracing_v3/session_tracer.py +63 -16
  211. synth_ai/tracing_v3/storage/base.py +89 -1
  212. synth_ai/tracing_v3/storage/config.py +21 -8
  213. synth_ai/tracing_v3/storage/factory.py +10 -8
  214. synth_ai/tracing_v3/storage/utils.py +4 -2
  215. synth_ai/tracing_v3/turso/daemon.py +7 -2
  216. synth_ai/tracing_v3/turso/models.py +5 -2
  217. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  218. synth_ai/tracing_v3/utils.py +4 -3
  219. synth_ai/v0/api/__init__.py +8 -0
  220. synth_ai/v0/api/models/__init__.py +8 -0
  221. synth_ai/v0/api/models/supported.py +8 -0
  222. synth_ai/v0/config/__init__.py +15 -0
  223. synth_ai/v0/config/base_url.py +12 -0
  224. synth_ai/v0/lm/__init__.py +51 -0
  225. synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
  226. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  227. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  228. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  229. synth_ai/{lm → v0/lm}/config.py +6 -1
  230. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  231. synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
  232. synth_ai/{lm → v0/lm}/core/main.py +19 -7
  233. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  234. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
  235. synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
  236. synth_ai/{lm → v0/lm}/overrides.py +4 -4
  237. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  238. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  239. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  240. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  241. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
  242. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  243. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  244. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
  245. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
  246. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
  247. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
  248. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  249. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  250. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  251. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  252. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  253. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  254. synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
  255. synth_ai/v0/tracing/upload.py +32 -135
  256. synth_ai/v0/tracing_v3/__init__.py +10 -0
  257. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  258. synth_ai/v0/tracing_v3/decorators.py +3 -0
  259. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  260. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  261. synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
  262. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -262
  263. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
  264. examples/common_old/backend.py +0 -21
  265. examples/evals_old/README.md +0 -98
  266. examples/evals_old/__init__.py +0 -6
  267. examples/evals_old/compare_models.py +0 -1037
  268. examples/evals_old/example_log.md +0 -145
  269. examples/evals_old/run_demo.sh +0 -126
  270. examples/evals_old/trace_analysis.py +0 -270
  271. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  272. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  273. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  274. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
  275. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  276. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  277. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  278. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  279. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  280. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
  281. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  282. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  283. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
  284. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  285. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  286. examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
  287. examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
  288. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  289. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  290. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
  291. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
  292. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
  293. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  294. examples/finetuning_old/synth_qwen_v1/util.py +0 -147
  295. examples/rl_old/task_app.py +0 -962
  296. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  297. examples/warming_up_to_rl/old/notes.md +0 -73
  298. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  299. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  300. synth_ai/experimental/synth_oss.py +0 -446
  301. synth_ai/install_sqld.sh +0 -40
  302. synth_ai/learning/filtering.py +0 -0
  303. synth_ai/learning/offline/dpo.py +0 -0
  304. synth_ai/learning/offline/providers.py +0 -7
  305. synth_ai/learning/offline/sft.py +0 -0
  306. synth_ai/learning/offline/shared.py +0 -0
  307. synth_ai/learning/online/grpo.py +0 -0
  308. synth_ai/learning/online/irft.py +0 -0
  309. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  310. synth_ai/learning/prompts/gepa.py +0 -0
  311. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  312. synth_ai/learning/prompts/mipro.py +0 -289
  313. synth_ai/learning/prompts/random_search.py +0 -246
  314. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  315. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  316. synth_ai/rl/secrets.py +0 -19
  317. synth_ai/scripts/verify_rewards.py +0 -100
  318. synth_ai/tracing/__init__.py +0 -30
  319. synth_ai/tracing_v1/__init__.py +0 -33
  320. synth_ai/tracing_v3/turso/__init__.py +0 -25
  321. synth_ai/tracing_v3/turso/manager.py +0 -774
  322. synth_ai/zyk/__init__.py +0 -30
  323. synth_ai-0.2.9.dev5.dist-info/METADATA +0 -131
  324. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  325. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  326. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  327. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  328. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  329. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  330. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  331. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  332. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  333. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  334. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  335. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  336. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  337. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  338. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  339. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  340. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  341. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  342. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  343. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  344. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  345. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  346. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  347. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  348. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  349. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
  350. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
  351. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,355 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import re
6
+ from dataclasses import asdict, dataclass, field
7
+ from typing import Any
8
+
9
+ from jinja2 import StrictUndefined, Template
10
+ from minisweagent.agents.default import FormatError
11
+
12
+ from .shared import (
13
+ DEFAULT_ACTION_TEMPLATE,
14
+ DEFAULT_INSTANCE_TEMPLATE,
15
+ DEFAULT_SYSTEM_TEMPLATE,
16
+ )
17
+ from .tools import RUN_COMMAND_TOOL, SUBMIT_TOOL, TOOLS_SCHEMA
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ COMMAND_PATTERN = re.compile(r"```(?:bash)?\s*\n(.*?)\n```", re.DOTALL)
22
+
23
+
24
+ def _render_template(source: str, **kwargs: Any) -> str:
25
+ return Template(source, undefined=StrictUndefined).render(**kwargs)
26
+
27
+
28
+ @dataclass
29
+ class MiniSwePolicyConfig:
30
+ system_template: str = DEFAULT_SYSTEM_TEMPLATE
31
+ instance_template: str = DEFAULT_INSTANCE_TEMPLATE
32
+ action_template: str = DEFAULT_ACTION_TEMPLATE
33
+ model: str | None = None
34
+ temperature: float | None = None
35
+ top_p: float | None = None
36
+ max_completion_tokens: int | None = None
37
+ tool_choice: str = "required"
38
+ use_tools: bool = True
39
+ step_limit: int = 0
40
+ cost_limit: float = 3.0
41
+ extra_template_vars: dict[str, Any] = field(default_factory=dict)
42
+
43
+ @classmethod
44
+ def from_payload(cls, payload: dict[str, Any]) -> MiniSwePolicyConfig:
45
+ base = MiniSwePolicyConfig()
46
+ for key in (
47
+ "system_template",
48
+ "instance_template",
49
+ "action_template",
50
+ "model",
51
+ "temperature",
52
+ "top_p",
53
+ "max_completion_tokens",
54
+ "tool_choice",
55
+ "use_tools",
56
+ "step_limit",
57
+ "cost_limit",
58
+ ):
59
+ if key in payload:
60
+ setattr(base, key, payload[key])
61
+ extra = payload.get("extra_template_vars") or {}
62
+ if isinstance(extra, dict):
63
+ base.extra_template_vars = dict(extra)
64
+ return base
65
+
66
+
67
+ class MiniSwePolicy:
68
+ """Mini-SWE policy that mirrors the default agent prompt loop."""
69
+
70
+ name = "swe-mini"
71
+
72
+ def __init__(self, *, inference_url: str | None = None, model: str | None = None) -> None:
73
+ self.inference_url = inference_url
74
+ self.config = MiniSwePolicyConfig(model=model)
75
+ self.system_template = Template(self.config.system_template, undefined=StrictUndefined)
76
+ self.instance_template = Template(self.config.instance_template, undefined=StrictUndefined)
77
+ self.action_template = Template(self.config.action_template, undefined=StrictUndefined)
78
+
79
+ self.messages: list[dict[str, Any]] = []
80
+ self.turn_index = 0
81
+ self.history_messages: list[dict[str, Any]] = []
82
+ self.trajectory_history: list[dict[str, Any]] = []
83
+ self.task: dict[str, Any] | None = None
84
+ self.template_vars: dict[str, Any] = {}
85
+
86
+ async def initialize(self, payload: dict[str, Any]) -> None:
87
+ cfg = MiniSwePolicyConfig.from_payload(payload or {})
88
+ self.config = cfg
89
+ self.system_template = Template(cfg.system_template, undefined=StrictUndefined)
90
+ self.instance_template = Template(cfg.instance_template, undefined=StrictUndefined)
91
+ self.action_template = Template(cfg.action_template, undefined=StrictUndefined)
92
+ if cfg.model:
93
+ self.config.model = cfg.model
94
+ self.template_vars = dict(cfg.extra_template_vars or {})
95
+ logger.info("Mini-swe policy initialized with model=%s", self.config.model)
96
+ self._reset_state()
97
+
98
+ def _reset_state(self) -> None:
99
+ self.messages = []
100
+ self.history_messages = []
101
+ self.trajectory_history = []
102
+ self.turn_index = 0
103
+
104
+ def _append_user(self, content: str) -> None:
105
+ msg = {"role": "user", "content": content}
106
+ self.messages.append(msg)
107
+ self.history_messages.append(msg)
108
+ self.turn_index += 1
109
+
110
+ def _append_assistant(self, content: str) -> None:
111
+ msg = {"role": "assistant", "content": content}
112
+ self.messages.append(msg)
113
+ self.history_messages.append(msg)
114
+
115
+ def _apply_previous_cycle(self, metadata: dict[str, Any] | None) -> None:
116
+ if not metadata:
117
+ return
118
+ prev_tool_calls = metadata.get("prev_tool_calls")
119
+ prev_response = metadata.get("prev_inference_response")
120
+ prev_env_result = metadata.get("prev_env_result")
121
+ prev_assistant_text = metadata.get("prev_assistant_text")
122
+
123
+ if prev_assistant_text:
124
+ self._append_assistant(prev_assistant_text)
125
+ elif prev_response:
126
+ text = self._extract_response_text(prev_response)
127
+ if text:
128
+ self._append_assistant(text)
129
+
130
+ if prev_tool_calls or prev_env_result:
131
+ record = {
132
+ "turn": self.turn_index,
133
+ "tool_calls": prev_tool_calls,
134
+ "env_result": prev_env_result,
135
+ }
136
+ self.trajectory_history.append(record)
137
+
138
+ def _ensure_task_context(self, observation: dict[str, Any] | None) -> None:
139
+ if self.task is not None:
140
+ return
141
+ task = (observation or {}).get("task") or {}
142
+ self.task = dict(task)
143
+ render_vars = dict(self.template_vars)
144
+ render_vars.setdefault("task", task)
145
+ render_vars.setdefault("problem_statement", task.get("problem_statement", ""))
146
+ render_vars.setdefault("instructions", task.get("instructions", ""))
147
+ render_vars.setdefault("metadata", task.get("metadata", {}))
148
+ rendered_system = self.system_template.render(**render_vars)
149
+ rendered_user = self.instance_template.render(**render_vars)
150
+ self.messages.append({"role": "system", "content": rendered_system})
151
+ self.history_messages.append({"role": "system", "content": rendered_system})
152
+ self._append_user(rendered_user)
153
+
154
+ def _render_action_observation(self, observation: dict[str, Any]) -> str:
155
+ last = observation.get("last") or {}
156
+ output = {
157
+ "stdout": last.get("stdout", ""),
158
+ "returncode": last.get("returncode", 0),
159
+ }
160
+ template_input = {"output": output, "observation": observation}
161
+ return self.action_template.render(**template_input)
162
+
163
+ def _extract_response_text(self, response: dict[str, Any]) -> str:
164
+ try:
165
+ choices = response.get("choices") or []
166
+ for choice in choices:
167
+ msg = choice.get("message") or {}
168
+ content = msg.get("content")
169
+ if isinstance(content, str):
170
+ return content
171
+ if isinstance(content, list):
172
+ parts = []
173
+ for entry in content:
174
+ if isinstance(entry, dict):
175
+ txt = entry.get("text") or entry.get("content")
176
+ if isinstance(txt, str):
177
+ parts.append(txt)
178
+ if parts:
179
+ return "".join(parts)
180
+ except Exception:
181
+ pass
182
+ return ""
183
+
184
+ def _build_inference_request(self) -> dict[str, Any]:
185
+ payload: dict[str, Any] = {"messages": self.messages}
186
+ if self.config.model:
187
+ payload["model"] = self.config.model
188
+ if self.config.temperature is not None:
189
+ payload["temperature"] = self.config.temperature
190
+ if self.config.top_p is not None:
191
+ payload["top_p"] = self.config.top_p
192
+ if self.config.max_completion_tokens is not None:
193
+ payload["max_completion_tokens"] = self.config.max_completion_tokens
194
+ if self.config.use_tools:
195
+ model_name = str(self.config.model or "").lower()
196
+ if "gpt-5" in model_name:
197
+ # GPT-5 models insist on a single tool; keep run_command to avoid shim calls.
198
+ tool_list: list[dict[str, Any]] = [RUN_COMMAND_TOOL]
199
+ payload["tools"] = tool_list
200
+ payload["tool_choice"] = {
201
+ "type": "function",
202
+ "function": {"name": "run_command"},
203
+ }
204
+ payload["parallel_tool_calls"] = False
205
+ else:
206
+ # Groq/Qwen and other OpenAI-compatible models handle both tools under auto mode.
207
+ tool_list = [RUN_COMMAND_TOOL, SUBMIT_TOOL]
208
+ payload["tools"] = tool_list
209
+ payload["tool_choice"] = "auto"
210
+ payload["parallel_tool_calls"] = False
211
+ return payload
212
+
213
+ async def step(
214
+ self,
215
+ observation_text: str,
216
+ state: dict[str, Any] | None = None,
217
+ metadata: dict[str, Any] | None = None,
218
+ ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
219
+ raw_observation: dict[str, Any] | None = None
220
+ if metadata is not None:
221
+ candidate = metadata.get("raw_observation")
222
+ if isinstance(candidate, dict):
223
+ raw_observation = candidate
224
+
225
+ self._ensure_task_context(raw_observation)
226
+ self._apply_previous_cycle(metadata)
227
+
228
+ message_text = observation_text or ""
229
+ if raw_observation and raw_observation.get("last"):
230
+ rendered = self._render_action_observation(raw_observation)
231
+ message_text = f"{message_text}\n\n{rendered}" if message_text else rendered
232
+ elif not message_text:
233
+ message_text = (
234
+ "Observation: repository ready. Begin by inspecting files and planning next steps."
235
+ )
236
+
237
+ self._append_user(message_text)
238
+
239
+ inference_request = self._build_inference_request()
240
+ meta = {
241
+ "inference_request": inference_request,
242
+ "turn_index": self.turn_index,
243
+ "history_len": len(self.history_messages),
244
+ "tool_schema": TOOLS_SCHEMA,
245
+ }
246
+ if self.inference_url:
247
+ meta["inference_url"] = self.inference_url
248
+
249
+ return [], meta
250
+
251
+ @staticmethod
252
+ def _parse_command_from_text(text: str) -> str:
253
+ matches = COMMAND_PATTERN.findall(text or "")
254
+ if len(matches) != 1:
255
+ raise FormatError(
256
+ "Please provide exactly one bash command enclosed in a single ```bash``` block."
257
+ )
258
+ command = matches[0].strip()
259
+ if not command:
260
+ raise FormatError("Command block was empty. Provide a valid shell command.")
261
+ return command
262
+
263
+ def parse_response_to_tool_calls(
264
+ self,
265
+ response: dict[str, Any],
266
+ use_tools: bool = True,
267
+ ) -> list[dict[str, Any]]:
268
+ if use_tools:
269
+ # Prefer structured tool calls if available.
270
+ for choice in response.get("choices", []):
271
+ msg = choice.get("message") or {}
272
+ tool_calls = msg.get("tool_calls")
273
+ if tool_calls:
274
+ parsed: list[dict[str, Any]] = []
275
+ for tool in tool_calls:
276
+ if not isinstance(tool, dict):
277
+ continue
278
+ name = tool.get("name")
279
+ args = tool.get("arguments")
280
+ if "function" in tool:
281
+ name = tool["function"].get("name")
282
+ args = tool["function"].get("arguments")
283
+ if isinstance(args, str):
284
+ try:
285
+ args = json.loads(args)
286
+ except json.JSONDecodeError:
287
+ args = {"command": args}
288
+ parsed.append({"tool_name": name, "arguments": args})
289
+ if parsed:
290
+ return parsed
291
+
292
+ text = self._extract_response_text(response)
293
+ if not text:
294
+ logger.warning("Model response missing content; defaulting to echo NOOP")
295
+ return [{"tool_name": "run_command", "arguments": {"command": "echo NOOP"}}]
296
+
297
+ try:
298
+ command = self._parse_command_from_text(text)
299
+ except FormatError as err:
300
+ logger.warning("Format error parsing command: %s; defaulting to echo NOOP", err)
301
+ return [{"tool_name": "run_command", "arguments": {"command": "echo NOOP"}}]
302
+
303
+ return [{"tool_name": "run_command", "arguments": {"command": command}}]
304
+
305
+ def state_dict(self) -> dict[str, Any]:
306
+ return {
307
+ "config": asdict(self.config),
308
+ "messages": self.messages,
309
+ "history_messages": self.history_messages,
310
+ "trajectory_history": self.trajectory_history,
311
+ "turn_index": self.turn_index,
312
+ "task": self.task,
313
+ "template_vars": self.template_vars,
314
+ }
315
+
316
+ def load_state_dict(self, state: dict[str, Any]) -> None:
317
+ self.config = MiniSwePolicyConfig.from_payload(state.get("config", {}))
318
+ self.system_template = Template(
319
+ self.config.system_template, undefined=StrictUndefined
320
+ )
321
+ self.instance_template = Template(
322
+ self.config.instance_template, undefined=StrictUndefined
323
+ )
324
+ self.action_template = Template(self.config.action_template, undefined=StrictUndefined)
325
+ self.messages = state.get("messages", [])
326
+ self.history_messages = state.get("history_messages", [])
327
+ self.trajectory_history = state.get("trajectory_history", [])
328
+ self.turn_index = int(state.get("turn_index", 0))
329
+ self.task = state.get("task")
330
+ self.template_vars = state.get("template_vars", {})
331
+
332
+ async def serialize(self) -> dict[str, Any]:
333
+ return {
334
+ "name": self.name,
335
+ "config": asdict(self.config),
336
+ "state": self.state_dict(),
337
+ }
338
+
339
+ @classmethod
340
+ async def deserialize(cls, payload: dict[str, Any]) -> MiniSwePolicy:
341
+ config = payload.get("config") or {}
342
+ state = payload.get("state") or {}
343
+ policy = cls(
344
+ inference_url=config.get("inference_url"),
345
+ model=config.get("model"),
346
+ )
347
+ await policy.initialize(config)
348
+ policy.load_state_dict(state)
349
+ return policy
350
+
351
+ async def terminate(self) -> None:
352
+ return None
353
+
354
+
355
+ __all__ = ["MiniSwePolicy"]
@@ -0,0 +1,83 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import textwrap
5
+ from typing import Any
6
+
7
+ DEFAULT_SYSTEM_TEMPLATE = textwrap.dedent(
8
+ """\
9
+ You are a helpful assistant that can interact with a software repository by issuing shell commands.
10
+ Follow the workflow and formatting guidelines exactly. Every response MUST contain a THOUGHT section
11
+ and exactly one bash command enclosed in a single ```bash``` block.
12
+ """
13
+ )
14
+
15
+ DEFAULT_INSTANCE_TEMPLATE = textwrap.dedent(
16
+ """\
17
+ Please solve this task:
18
+
19
+ {{problem_statement}}
20
+
21
+ {{instructions}}
22
+
23
+ Remember:
24
+ - Explain your reasoning in a THOUGHT section before the command.
25
+ - Provide exactly one bash command wrapped in ```bash``` fences.
26
+ - Use non-interactive flags and prefer deterministic tooling.
27
+ - To finish, run `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached`.
28
+ """
29
+ )
30
+
31
+ DEFAULT_ACTION_TEMPLATE = textwrap.dedent(
32
+ """\
33
+ <returncode>{{ output.returncode }}</returncode>
34
+ {% if output.stdout | length < 10000 %}
35
+ <output>
36
+ {{ output.stdout }}
37
+ </output>
38
+ {% else %}
39
+ <warning>Output truncated ({{ output.stdout | length }} characters)</warning>
40
+ <output_head>{{ output.stdout[:5000] }}</output_head>
41
+ <output_tail>{{ output.stdout[-5000:] }}</output_tail>
42
+ {% endif %}
43
+ """
44
+ )
45
+
46
+
47
+ def summarise_history(history: list[dict[str, Any]], limit: int = 5) -> list[dict[str, Any]]:
48
+ """Return the most recent command history entries, truncated for transport."""
49
+
50
+ def _truncate(text: str, max_len: int = 4000) -> str:
51
+ if len(text) <= max_len:
52
+ return text
53
+ head = text[: max_len // 2]
54
+ tail = text[-max_len // 2 :]
55
+ return f"{head}\n... [truncated {len(text) - max_len} chars] ...\n{tail}"
56
+
57
+ trimmed: list[dict[str, Any]] = []
58
+ for item in history[-limit:]:
59
+ trimmed.append(
60
+ {
61
+ "command": item.get("command"),
62
+ "returncode": item.get("returncode"),
63
+ "stdout": _truncate(item.get("stdout", "")),
64
+ "duration": item.get("duration"),
65
+ }
66
+ )
67
+ return trimmed
68
+
69
+
70
+ def format_observation(observation: dict[str, Any]) -> str:
71
+ """Simple pretty-printer used by tracing/logging."""
72
+
73
+ last = observation.get("last")
74
+ task = observation.get("task", {})
75
+ summary = {
76
+ "instance_id": task.get("instance_id"),
77
+ "step": observation.get("step_idx"),
78
+ "submitted": bool(observation.get("submitted")),
79
+ "last_command": (last or {}).get("command"),
80
+ "returncode": (last or {}).get("returncode"),
81
+ }
82
+ return json.dumps(summary, indent=2, sort_keys=True)
83
+
@@ -0,0 +1,96 @@
1
+ """Tool schema for mini-SWE command execution."""
2
+
3
+ from __future__ import annotations
4
+
5
+ RUN_COMMAND_TOOL = {
6
+ "type": "function",
7
+ "function": {
8
+ "name": "run_command",
9
+ "description": (
10
+ "Execute a bash command inside the task workspace. Use this for all shell "
11
+ "operations including editing files, running tests, and submitting results."
12
+ ),
13
+ "parameters": {
14
+ "type": "object",
15
+ "properties": {
16
+ "command": {
17
+ "type": "string",
18
+ "description": "The bash command to execute. Must be non-empty.",
19
+ },
20
+ "timeout": {
21
+ "type": "integer",
22
+ "minimum": 1,
23
+ "maximum": 600,
24
+ "description": (
25
+ "Optional timeout (seconds) for the command. Defaults to the environment "
26
+ "timeout if omitted."
27
+ ),
28
+ },
29
+ },
30
+ "required": ["command"],
31
+ "additionalProperties": False,
32
+ },
33
+ },
34
+ }
35
+
36
+ SUBMIT_TOOL = {
37
+ "type": "function",
38
+ "function": {
39
+ "name": "submit_patch",
40
+ "description": (
41
+ "Finish the task and submit the final patch. Call this once you believe the "
42
+ "fix is complete and tests pass."
43
+ ),
44
+ "parameters": {
45
+ "type": "object",
46
+ "properties": {
47
+ "command": {
48
+ "type": "string",
49
+ "description": (
50
+ "Optional submission command. Defaults to "
51
+ "`echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached` "
52
+ "if omitted."
53
+ ),
54
+ }
55
+ },
56
+ "required": [],
57
+ "additionalProperties": False,
58
+ },
59
+ },
60
+ }
61
+
62
+ TOOLS_SCHEMA = [RUN_COMMAND_TOOL, SUBMIT_TOOL]
63
+
64
+ # Compatibility: some OpenAI reasoning models (e.g., gpt-5) insist on calling
65
+ # a generic function (e.g., 'interact' or 'interact_many'). Provide stubs so
66
+ # vendor requests do not 400 on unknown function names; the policy will map
67
+ # these calls to concrete environment tools.
68
+ COMPAT_INTERACT_TOOL = {
69
+ "type": "function",
70
+ "function": {
71
+ "name": "interact",
72
+ "description": "Compatibility shim for models that call a generic 'interact' tool.",
73
+ "parameters": {
74
+ "type": "object",
75
+ "properties": {},
76
+ "additionalProperties": True,
77
+ },
78
+ },
79
+ }
80
+
81
+ COMPAT_INTERACT_MANY_TOOL = {
82
+ "type": "function",
83
+ "function": {
84
+ "name": "interact_many",
85
+ "description": "Compatibility shim for models that call 'interact_many'.",
86
+ "parameters": {
87
+ "type": "object",
88
+ "properties": {},
89
+ "additionalProperties": True,
90
+ },
91
+ },
92
+ }
93
+
94
+ # Append compatibility tools last so preferred tools remain first in the list
95
+ TOOLS_SCHEMA.extend([COMPAT_INTERACT_TOOL, COMPAT_INTERACT_MANY_TOOL])
96
+