synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (323) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +8 -11
  3. examples/dev/qwen3_32b_qlora_4xh100.toml +40 -0
  4. examples/multi_step/crafter_rl_lora.md +29 -0
  5. examples/qwen_coder/README.md +102 -0
  6. examples/qwen_coder/_shared.py +113 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  8. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  9. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  10. examples/qwen_coder/generate_dataset.py +98 -0
  11. examples/qwen_coder/infer_ft_smoke.py +65 -0
  12. examples/qwen_coder/infer_prod_proxy.py +73 -0
  13. examples/qwen_coder/infer_via_synth.py +87 -0
  14. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  15. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  16. examples/qwen_coder/sft_full_17b.py +103 -0
  17. examples/qwen_coder/sft_lora_30b.py +110 -0
  18. examples/qwen_coder/subset_jsonl.py +39 -0
  19. examples/qwen_coder/todos.md +38 -0
  20. examples/qwen_coder/validate_jsonl.py +60 -0
  21. examples/rl/run_eval.py +36 -37
  22. examples/rl/run_rl_and_save.py +5 -5
  23. examples/rl/task_app/math_single_step.py +65 -43
  24. examples/rl/task_app/math_task_app.py +3 -3
  25. examples/sft/README.md +139 -0
  26. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  27. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  28. examples/sft/evaluate.py +117 -0
  29. examples/sft/export_dataset.py +117 -0
  30. examples/sft/generate_traces.py +162 -0
  31. examples/swe/__init__.py +12 -0
  32. examples/swe/task_app/README.md +105 -0
  33. examples/swe/task_app/__init__.py +2 -0
  34. examples/swe/task_app/grpo_swe_mini.py +571 -0
  35. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  36. examples/swe/task_app/hosted/README.md +173 -0
  37. examples/swe/task_app/hosted/__init__.py +5 -0
  38. examples/swe/task_app/hosted/branching.py +143 -0
  39. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  40. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  41. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  42. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  43. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  44. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  45. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  46. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  47. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  50. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  53. examples/swe/task_app/hosted/hosted_app.py +204 -0
  54. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  55. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  56. examples/swe/task_app/hosted/main.py +100 -0
  57. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  58. examples/swe/task_app/hosted/registry.py +195 -0
  59. examples/swe/task_app/hosted/rollout.py +1869 -0
  60. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  61. examples/swe/task_app/hosted/storage/volume.py +211 -0
  62. examples/swe/task_app/hosted/test_agents.py +161 -0
  63. examples/swe/task_app/hosted/test_service.py +137 -0
  64. examples/swe/task_app/hosted/utils.py +62 -0
  65. examples/vlm/PROPOSAL.md +53 -0
  66. examples/vlm/README.md +68 -0
  67. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  68. examples/vlm/crafter_image_only_agent.py +207 -0
  69. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  70. examples/vlm/filter_image_rows.py +63 -0
  71. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  72. examples/warming_up_to_rl/analyze_trace_db.py +5 -5
  73. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  74. examples/warming_up_to_rl/export_trace_sft.py +78 -21
  75. examples/warming_up_to_rl/groq_test.py +4 -4
  76. examples/warming_up_to_rl/manage_secrets.py +13 -18
  77. examples/warming_up_to_rl/run_eval.py +42 -44
  78. examples/warming_up_to_rl/run_fft_and_save.py +11 -16
  79. examples/warming_up_to_rl/run_local_rollout.py +1 -3
  80. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
  81. examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
  82. examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
  83. examples/warming_up_to_rl/run_rl_and_save.py +5 -6
  84. examples/warming_up_to_rl/run_rollout_remote.py +8 -10
  85. examples/warming_up_to_rl/task_app/README.md +6 -2
  86. examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
  87. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  105. synth_ai/api/models/supported.py +376 -0
  106. synth_ai/api/train/builders.py +128 -21
  107. synth_ai/api/train/cli.py +80 -64
  108. synth_ai/api/train/config_finder.py +7 -2
  109. synth_ai/api/train/env_resolver.py +1 -1
  110. synth_ai/api/train/pollers.py +2 -1
  111. synth_ai/api/train/supported_algos.py +139 -0
  112. synth_ai/api/train/task_app.py +1 -2
  113. synth_ai/api/train/utils.py +13 -44
  114. synth_ai/cli/__init__.py +8 -0
  115. synth_ai/cli/_modal_wrapper.py +28 -0
  116. synth_ai/cli/_typer_patch.py +49 -0
  117. synth_ai/cli/balance.py +1 -2
  118. synth_ai/cli/calc.py +1 -1
  119. synth_ai/cli/demo.py +2 -1
  120. synth_ai/cli/recent.py +2 -2
  121. synth_ai/cli/rl_demo.py +2 -1
  122. synth_ai/cli/root.py +11 -13
  123. synth_ai/cli/status.py +2 -2
  124. synth_ai/cli/task_apps.py +529 -179
  125. synth_ai/cli/traces.py +6 -4
  126. synth_ai/cli/watch.py +12 -18
  127. synth_ai/demo_registry.py +1 -1
  128. synth_ai/demos/core/cli.py +36 -43
  129. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  130. synth_ai/demos/demo_task_apps/core.py +17 -25
  131. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
  132. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  133. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
  134. synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
  135. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  136. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  137. synth_ai/environments/reproducibility/tree.py +2 -5
  138. synth_ai/environments/service/app.py +11 -12
  139. synth_ai/environments/service/core_routes.py +4 -7
  140. synth_ai/environments/stateful/engine.py +1 -1
  141. synth_ai/environments/tasks/core.py +1 -0
  142. synth_ai/environments/tasks/filters.py +5 -6
  143. synth_ai/environments/tasks/utils.py +4 -5
  144. synth_ai/handshake.py +9 -9
  145. synth_ai/http.py +1 -1
  146. synth_ai/http_client.py +18 -10
  147. synth_ai/inference/client.py +15 -5
  148. synth_ai/jobs/client.py +78 -83
  149. synth_ai/learning/__init__.py +41 -6
  150. synth_ai/learning/algorithms.py +14 -0
  151. synth_ai/learning/client.py +91 -24
  152. synth_ai/learning/config.py +2 -38
  153. synth_ai/learning/ft_client.py +4 -59
  154. synth_ai/learning/health.py +5 -6
  155. synth_ai/learning/jobs.py +31 -47
  156. synth_ai/{rl → learning/rl}/__init__.py +14 -4
  157. synth_ai/learning/rl/client.py +267 -0
  158. synth_ai/learning/rl/config.py +31 -0
  159. synth_ai/{rl → learning/rl}/contracts.py +5 -8
  160. synth_ai/{rl → learning/rl}/env_keys.py +39 -15
  161. synth_ai/learning/rl/secrets.py +13 -0
  162. synth_ai/learning/rl_client.py +2 -281
  163. synth_ai/learning/sft/__init__.py +29 -0
  164. synth_ai/learning/sft/client.py +68 -0
  165. synth_ai/learning/sft/config.py +270 -0
  166. synth_ai/learning/sft/data.py +295 -0
  167. synth_ai/learning/sse.py +25 -24
  168. synth_ai/learning/validators.py +25 -28
  169. synth_ai/lm/__init__.py +21 -47
  170. synth_ai/task/__init__.py +25 -27
  171. synth_ai/task/apps/__init__.py +7 -8
  172. synth_ai/task/auth.py +8 -8
  173. synth_ai/task/client.py +14 -14
  174. synth_ai/task/contracts.py +36 -35
  175. synth_ai/task/datasets.py +6 -5
  176. synth_ai/task/errors.py +10 -10
  177. synth_ai/task/health.py +17 -9
  178. synth_ai/task/json.py +58 -23
  179. synth_ai/task/proxy.py +13 -9
  180. synth_ai/task/rubrics.py +16 -15
  181. synth_ai/task/server.py +12 -12
  182. synth_ai/task/tracing_utils.py +4 -4
  183. synth_ai/task/vendors.py +5 -6
  184. synth_ai/tracing_v3/__init__.py +2 -0
  185. synth_ai/tracing_v3/abstractions.py +21 -4
  186. synth_ai/tracing_v3/decorators.py +18 -16
  187. synth_ai/tracing_v3/hooks.py +5 -5
  188. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  189. synth_ai/tracing_v3/session_tracer.py +40 -14
  190. synth_ai/tracing_v3/storage/base.py +85 -0
  191. synth_ai/tracing_v3/storage/config.py +21 -8
  192. synth_ai/tracing_v3/storage/factory.py +10 -7
  193. synth_ai/tracing_v3/storage/utils.py +4 -2
  194. synth_ai/tracing_v3/turso/daemon.py +7 -2
  195. synth_ai/tracing_v3/turso/models.py +2 -2
  196. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  197. synth_ai/tracing_v3/utils.py +4 -4
  198. synth_ai/v0/api/__init__.py +8 -0
  199. synth_ai/v0/api/models/__init__.py +8 -0
  200. synth_ai/v0/api/models/supported.py +8 -0
  201. synth_ai/v0/config/__init__.py +15 -0
  202. synth_ai/v0/config/base_url.py +12 -0
  203. synth_ai/v0/lm/__init__.py +51 -0
  204. synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
  205. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  206. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  207. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  208. synth_ai/{lm → v0/lm}/config.py +6 -1
  209. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  210. synth_ai/{lm → v0/lm}/core/main.py +6 -6
  211. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  212. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
  213. synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
  214. synth_ai/{lm → v0/lm}/overrides.py +2 -2
  215. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  216. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  217. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  218. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  219. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
  220. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  221. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  222. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
  223. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
  224. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
  225. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
  226. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  227. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  228. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  229. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  230. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  231. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  232. synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
  233. synth_ai/v0/tracing_v3/__init__.py +10 -0
  234. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  235. synth_ai/v0/tracing_v3/decorators.py +3 -0
  236. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  237. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  238. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/METADATA +10 -7
  239. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/RECORD +269 -233
  240. examples/common_old/backend.py +0 -20
  241. examples/evals_old/README.md +0 -98
  242. examples/evals_old/__init__.py +0 -6
  243. examples/evals_old/compare_models.py +0 -1038
  244. examples/evals_old/example_log.md +0 -145
  245. examples/evals_old/run_demo.sh +0 -126
  246. examples/evals_old/trace_analysis.py +0 -270
  247. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  248. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  249. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  250. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
  251. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  252. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  253. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  254. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  255. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  256. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
  257. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  258. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  259. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
  260. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  261. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  262. examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
  263. examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
  264. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  265. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  266. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
  267. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
  268. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
  269. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  270. examples/finetuning_old/synth_qwen_v1/util.py +0 -152
  271. examples/rl_old/task_app.py +0 -1131
  272. synth_ai/experimental/synth_oss.py +0 -445
  273. synth_ai/learning/filtering.py +0 -0
  274. synth_ai/learning/offline/dpo.py +0 -0
  275. synth_ai/learning/offline/providers.py +0 -7
  276. synth_ai/learning/offline/sft.py +0 -0
  277. synth_ai/learning/offline/shared.py +0 -0
  278. synth_ai/learning/online/grpo.py +0 -0
  279. synth_ai/learning/online/irft.py +0 -0
  280. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  281. synth_ai/learning/prompts/gepa.py +0 -0
  282. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
  283. synth_ai/learning/prompts/mipro.py +0 -289
  284. synth_ai/learning/prompts/random_search.py +0 -249
  285. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  286. synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
  287. synth_ai/rl/secrets.py +0 -19
  288. synth_ai/scripts/verify_rewards.py +0 -100
  289. synth_ai/tracing/__init__.py +0 -30
  290. synth_ai/tracing_v1/__init__.py +0 -33
  291. synth_ai/tracing_v3/turso/__init__.py +0 -25
  292. synth_ai/tracing_v3/turso/manager.py +0 -838
  293. synth_ai/zyk/__init__.py +0 -30
  294. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  295. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  296. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  297. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  298. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  299. /synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
  300. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  301. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  302. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  303. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  304. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  305. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  306. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  307. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  308. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  309. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  310. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  311. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  312. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  313. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  314. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  315. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  316. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  317. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  318. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  319. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  320. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/WHEEL +0 -0
  321. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/entry_points.txt +0 -0
  322. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/licenses/LICENSE +0 -0
  323. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,355 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import re
6
+ from dataclasses import asdict, dataclass, field
7
+ from typing import Any
8
+
9
+ from jinja2 import StrictUndefined, Template
10
+ from minisweagent.agents.default import FormatError
11
+
12
+ from .shared import (
13
+ DEFAULT_ACTION_TEMPLATE,
14
+ DEFAULT_INSTANCE_TEMPLATE,
15
+ DEFAULT_SYSTEM_TEMPLATE,
16
+ )
17
+ from .tools import RUN_COMMAND_TOOL, SUBMIT_TOOL, TOOLS_SCHEMA
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ COMMAND_PATTERN = re.compile(r"```(?:bash)?\s*\n(.*?)\n```", re.DOTALL)
22
+
23
+
24
+ def _render_template(source: str, **kwargs: Any) -> str:
25
+ return Template(source, undefined=StrictUndefined).render(**kwargs)
26
+
27
+
28
+ @dataclass
29
+ class MiniSwePolicyConfig:
30
+ system_template: str = DEFAULT_SYSTEM_TEMPLATE
31
+ instance_template: str = DEFAULT_INSTANCE_TEMPLATE
32
+ action_template: str = DEFAULT_ACTION_TEMPLATE
33
+ model: str | None = None
34
+ temperature: float | None = None
35
+ top_p: float | None = None
36
+ max_completion_tokens: int | None = None
37
+ tool_choice: str = "required"
38
+ use_tools: bool = True
39
+ step_limit: int = 0
40
+ cost_limit: float = 3.0
41
+ extra_template_vars: dict[str, Any] = field(default_factory=dict)
42
+
43
+ @classmethod
44
+ def from_payload(cls, payload: dict[str, Any]) -> MiniSwePolicyConfig:
45
+ base = MiniSwePolicyConfig()
46
+ for key in (
47
+ "system_template",
48
+ "instance_template",
49
+ "action_template",
50
+ "model",
51
+ "temperature",
52
+ "top_p",
53
+ "max_completion_tokens",
54
+ "tool_choice",
55
+ "use_tools",
56
+ "step_limit",
57
+ "cost_limit",
58
+ ):
59
+ if key in payload:
60
+ setattr(base, key, payload[key])
61
+ extra = payload.get("extra_template_vars") or {}
62
+ if isinstance(extra, dict):
63
+ base.extra_template_vars = dict(extra)
64
+ return base
65
+
66
+
67
+ class MiniSwePolicy:
68
+ """Mini-SWE policy that mirrors the default agent prompt loop."""
69
+
70
+ name = "swe-mini"
71
+
72
+ def __init__(self, *, inference_url: str | None = None, model: str | None = None) -> None:
73
+ self.inference_url = inference_url
74
+ self.config = MiniSwePolicyConfig(model=model)
75
+ self.system_template = Template(self.config.system_template, undefined=StrictUndefined)
76
+ self.instance_template = Template(self.config.instance_template, undefined=StrictUndefined)
77
+ self.action_template = Template(self.config.action_template, undefined=StrictUndefined)
78
+
79
+ self.messages: list[dict[str, Any]] = []
80
+ self.turn_index = 0
81
+ self.history_messages: list[dict[str, Any]] = []
82
+ self.trajectory_history: list[dict[str, Any]] = []
83
+ self.task: dict[str, Any] | None = None
84
+ self.template_vars: dict[str, Any] = {}
85
+
86
+ async def initialize(self, payload: dict[str, Any]) -> None:
87
+ cfg = MiniSwePolicyConfig.from_payload(payload or {})
88
+ self.config = cfg
89
+ self.system_template = Template(cfg.system_template, undefined=StrictUndefined)
90
+ self.instance_template = Template(cfg.instance_template, undefined=StrictUndefined)
91
+ self.action_template = Template(cfg.action_template, undefined=StrictUndefined)
92
+ if cfg.model:
93
+ self.config.model = cfg.model
94
+ self.template_vars = dict(cfg.extra_template_vars or {})
95
+ logger.info("Mini-swe policy initialized with model=%s", self.config.model)
96
+ self._reset_state()
97
+
98
+ def _reset_state(self) -> None:
99
+ self.messages = []
100
+ self.history_messages = []
101
+ self.trajectory_history = []
102
+ self.turn_index = 0
103
+
104
+ def _append_user(self, content: str) -> None:
105
+ msg = {"role": "user", "content": content}
106
+ self.messages.append(msg)
107
+ self.history_messages.append(msg)
108
+ self.turn_index += 1
109
+
110
+ def _append_assistant(self, content: str) -> None:
111
+ msg = {"role": "assistant", "content": content}
112
+ self.messages.append(msg)
113
+ self.history_messages.append(msg)
114
+
115
+ def _apply_previous_cycle(self, metadata: dict[str, Any] | None) -> None:
116
+ if not metadata:
117
+ return
118
+ prev_tool_calls = metadata.get("prev_tool_calls")
119
+ prev_response = metadata.get("prev_inference_response")
120
+ prev_env_result = metadata.get("prev_env_result")
121
+ prev_assistant_text = metadata.get("prev_assistant_text")
122
+
123
+ if prev_assistant_text:
124
+ self._append_assistant(prev_assistant_text)
125
+ elif prev_response:
126
+ text = self._extract_response_text(prev_response)
127
+ if text:
128
+ self._append_assistant(text)
129
+
130
+ if prev_tool_calls or prev_env_result:
131
+ record = {
132
+ "turn": self.turn_index,
133
+ "tool_calls": prev_tool_calls,
134
+ "env_result": prev_env_result,
135
+ }
136
+ self.trajectory_history.append(record)
137
+
138
+ def _ensure_task_context(self, observation: dict[str, Any] | None) -> None:
139
+ if self.task is not None:
140
+ return
141
+ task = (observation or {}).get("task") or {}
142
+ self.task = dict(task)
143
+ render_vars = dict(self.template_vars)
144
+ render_vars.setdefault("task", task)
145
+ render_vars.setdefault("problem_statement", task.get("problem_statement", ""))
146
+ render_vars.setdefault("instructions", task.get("instructions", ""))
147
+ render_vars.setdefault("metadata", task.get("metadata", {}))
148
+ rendered_system = self.system_template.render(**render_vars)
149
+ rendered_user = self.instance_template.render(**render_vars)
150
+ self.messages.append({"role": "system", "content": rendered_system})
151
+ self.history_messages.append({"role": "system", "content": rendered_system})
152
+ self._append_user(rendered_user)
153
+
154
+ def _render_action_observation(self, observation: dict[str, Any]) -> str:
155
+ last = observation.get("last") or {}
156
+ output = {
157
+ "stdout": last.get("stdout", ""),
158
+ "returncode": last.get("returncode", 0),
159
+ }
160
+ template_input = {"output": output, "observation": observation}
161
+ return self.action_template.render(**template_input)
162
+
163
+ def _extract_response_text(self, response: dict[str, Any]) -> str:
164
+ try:
165
+ choices = response.get("choices") or []
166
+ for choice in choices:
167
+ msg = choice.get("message") or {}
168
+ content = msg.get("content")
169
+ if isinstance(content, str):
170
+ return content
171
+ if isinstance(content, list):
172
+ parts = []
173
+ for entry in content:
174
+ if isinstance(entry, dict):
175
+ txt = entry.get("text") or entry.get("content")
176
+ if isinstance(txt, str):
177
+ parts.append(txt)
178
+ if parts:
179
+ return "".join(parts)
180
+ except Exception:
181
+ pass
182
+ return ""
183
+
184
+ def _build_inference_request(self) -> dict[str, Any]:
185
+ payload: dict[str, Any] = {"messages": self.messages}
186
+ if self.config.model:
187
+ payload["model"] = self.config.model
188
+ if self.config.temperature is not None:
189
+ payload["temperature"] = self.config.temperature
190
+ if self.config.top_p is not None:
191
+ payload["top_p"] = self.config.top_p
192
+ if self.config.max_completion_tokens is not None:
193
+ payload["max_completion_tokens"] = self.config.max_completion_tokens
194
+ if self.config.use_tools:
195
+ model_name = str(self.config.model or "").lower()
196
+ if "gpt-5" in model_name:
197
+ # GPT-5 models insist on a single tool; keep run_command to avoid shim calls.
198
+ tool_list: list[dict[str, Any]] = [RUN_COMMAND_TOOL]
199
+ payload["tools"] = tool_list
200
+ payload["tool_choice"] = {
201
+ "type": "function",
202
+ "function": {"name": "run_command"},
203
+ }
204
+ payload["parallel_tool_calls"] = False
205
+ else:
206
+ # Groq/Qwen and other OpenAI-compatible models handle both tools under auto mode.
207
+ tool_list = [RUN_COMMAND_TOOL, SUBMIT_TOOL]
208
+ payload["tools"] = tool_list
209
+ payload["tool_choice"] = "auto"
210
+ payload["parallel_tool_calls"] = False
211
+ return payload
212
+
213
+ async def step(
214
+ self,
215
+ observation_text: str,
216
+ state: dict[str, Any] | None = None,
217
+ metadata: dict[str, Any] | None = None,
218
+ ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
219
+ raw_observation: dict[str, Any] | None = None
220
+ if metadata is not None:
221
+ candidate = metadata.get("raw_observation")
222
+ if isinstance(candidate, dict):
223
+ raw_observation = candidate
224
+
225
+ self._ensure_task_context(raw_observation)
226
+ self._apply_previous_cycle(metadata)
227
+
228
+ message_text = observation_text or ""
229
+ if raw_observation and raw_observation.get("last"):
230
+ rendered = self._render_action_observation(raw_observation)
231
+ message_text = f"{message_text}\n\n{rendered}" if message_text else rendered
232
+ elif not message_text:
233
+ message_text = (
234
+ "Observation: repository ready. Begin by inspecting files and planning next steps."
235
+ )
236
+
237
+ self._append_user(message_text)
238
+
239
+ inference_request = self._build_inference_request()
240
+ meta = {
241
+ "inference_request": inference_request,
242
+ "turn_index": self.turn_index,
243
+ "history_len": len(self.history_messages),
244
+ "tool_schema": TOOLS_SCHEMA,
245
+ }
246
+ if self.inference_url:
247
+ meta["inference_url"] = self.inference_url
248
+
249
+ return [], meta
250
+
251
+ @staticmethod
252
+ def _parse_command_from_text(text: str) -> str:
253
+ matches = COMMAND_PATTERN.findall(text or "")
254
+ if len(matches) != 1:
255
+ raise FormatError(
256
+ "Please provide exactly one bash command enclosed in a single ```bash``` block."
257
+ )
258
+ command = matches[0].strip()
259
+ if not command:
260
+ raise FormatError("Command block was empty. Provide a valid shell command.")
261
+ return command
262
+
263
+ def parse_response_to_tool_calls(
264
+ self,
265
+ response: dict[str, Any],
266
+ use_tools: bool = True,
267
+ ) -> list[dict[str, Any]]:
268
+ if use_tools:
269
+ # Prefer structured tool calls if available.
270
+ for choice in response.get("choices", []):
271
+ msg = choice.get("message") or {}
272
+ tool_calls = msg.get("tool_calls")
273
+ if tool_calls:
274
+ parsed: list[dict[str, Any]] = []
275
+ for tool in tool_calls:
276
+ if not isinstance(tool, dict):
277
+ continue
278
+ name = tool.get("name")
279
+ args = tool.get("arguments")
280
+ if "function" in tool:
281
+ name = tool["function"].get("name")
282
+ args = tool["function"].get("arguments")
283
+ if isinstance(args, str):
284
+ try:
285
+ args = json.loads(args)
286
+ except json.JSONDecodeError:
287
+ args = {"command": args}
288
+ parsed.append({"tool_name": name, "arguments": args})
289
+ if parsed:
290
+ return parsed
291
+
292
+ text = self._extract_response_text(response)
293
+ if not text:
294
+ logger.warning("Model response missing content; defaulting to echo NOOP")
295
+ return [{"tool_name": "run_command", "arguments": {"command": "echo NOOP"}}]
296
+
297
+ try:
298
+ command = self._parse_command_from_text(text)
299
+ except FormatError as err:
300
+ logger.warning("Format error parsing command: %s; defaulting to echo NOOP", err)
301
+ return [{"tool_name": "run_command", "arguments": {"command": "echo NOOP"}}]
302
+
303
+ return [{"tool_name": "run_command", "arguments": {"command": command}}]
304
+
305
+ def state_dict(self) -> dict[str, Any]:
306
+ return {
307
+ "config": asdict(self.config),
308
+ "messages": self.messages,
309
+ "history_messages": self.history_messages,
310
+ "trajectory_history": self.trajectory_history,
311
+ "turn_index": self.turn_index,
312
+ "task": self.task,
313
+ "template_vars": self.template_vars,
314
+ }
315
+
316
+ def load_state_dict(self, state: dict[str, Any]) -> None:
317
+ self.config = MiniSwePolicyConfig.from_payload(state.get("config", {}))
318
+ self.system_template = Template(
319
+ self.config.system_template, undefined=StrictUndefined
320
+ )
321
+ self.instance_template = Template(
322
+ self.config.instance_template, undefined=StrictUndefined
323
+ )
324
+ self.action_template = Template(self.config.action_template, undefined=StrictUndefined)
325
+ self.messages = state.get("messages", [])
326
+ self.history_messages = state.get("history_messages", [])
327
+ self.trajectory_history = state.get("trajectory_history", [])
328
+ self.turn_index = int(state.get("turn_index", 0))
329
+ self.task = state.get("task")
330
+ self.template_vars = state.get("template_vars", {})
331
+
332
+ async def serialize(self) -> dict[str, Any]:
333
+ return {
334
+ "name": self.name,
335
+ "config": asdict(self.config),
336
+ "state": self.state_dict(),
337
+ }
338
+
339
+ @classmethod
340
+ async def deserialize(cls, payload: dict[str, Any]) -> MiniSwePolicy:
341
+ config = payload.get("config") or {}
342
+ state = payload.get("state") or {}
343
+ policy = cls(
344
+ inference_url=config.get("inference_url"),
345
+ model=config.get("model"),
346
+ )
347
+ await policy.initialize(config)
348
+ policy.load_state_dict(state)
349
+ return policy
350
+
351
+ async def terminate(self) -> None:
352
+ return None
353
+
354
+
355
+ __all__ = ["MiniSwePolicy"]
@@ -0,0 +1,83 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import textwrap
5
+ from typing import Any
6
+
7
+ DEFAULT_SYSTEM_TEMPLATE = textwrap.dedent(
8
+ """\
9
+ You are a helpful assistant that can interact with a software repository by issuing shell commands.
10
+ Follow the workflow and formatting guidelines exactly. Every response MUST contain a THOUGHT section
11
+ and exactly one bash command enclosed in a single ```bash``` block.
12
+ """
13
+ )
14
+
15
+ DEFAULT_INSTANCE_TEMPLATE = textwrap.dedent(
16
+ """\
17
+ Please solve this task:
18
+
19
+ {{problem_statement}}
20
+
21
+ {{instructions}}
22
+
23
+ Remember:
24
+ - Explain your reasoning in a THOUGHT section before the command.
25
+ - Provide exactly one bash command wrapped in ```bash``` fences.
26
+ - Use non-interactive flags and prefer deterministic tooling.
27
+ - To finish, run `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached`.
28
+ """
29
+ )
30
+
31
+ DEFAULT_ACTION_TEMPLATE = textwrap.dedent(
32
+ """\
33
+ <returncode>{{ output.returncode }}</returncode>
34
+ {% if output.stdout | length < 10000 %}
35
+ <output>
36
+ {{ output.stdout }}
37
+ </output>
38
+ {% else %}
39
+ <warning>Output truncated ({{ output.stdout | length }} characters)</warning>
40
+ <output_head>{{ output.stdout[:5000] }}</output_head>
41
+ <output_tail>{{ output.stdout[-5000:] }}</output_tail>
42
+ {% endif %}
43
+ """
44
+ )
45
+
46
+
47
+ def summarise_history(history: list[dict[str, Any]], limit: int = 5) -> list[dict[str, Any]]:
48
+ """Return the most recent command history entries, truncated for transport."""
49
+
50
+ def _truncate(text: str, max_len: int = 4000) -> str:
51
+ if len(text) <= max_len:
52
+ return text
53
+ head = text[: max_len // 2]
54
+ tail = text[-max_len // 2 :]
55
+ return f"{head}\n... [truncated {len(text) - max_len} chars] ...\n{tail}"
56
+
57
+ trimmed: list[dict[str, Any]] = []
58
+ for item in history[-limit:]:
59
+ trimmed.append(
60
+ {
61
+ "command": item.get("command"),
62
+ "returncode": item.get("returncode"),
63
+ "stdout": _truncate(item.get("stdout", "")),
64
+ "duration": item.get("duration"),
65
+ }
66
+ )
67
+ return trimmed
68
+
69
+
70
+ def format_observation(observation: dict[str, Any]) -> str:
71
+ """Simple pretty-printer used by tracing/logging."""
72
+
73
+ last = observation.get("last")
74
+ task = observation.get("task", {})
75
+ summary = {
76
+ "instance_id": task.get("instance_id"),
77
+ "step": observation.get("step_idx"),
78
+ "submitted": bool(observation.get("submitted")),
79
+ "last_command": (last or {}).get("command"),
80
+ "returncode": (last or {}).get("returncode"),
81
+ }
82
+ return json.dumps(summary, indent=2, sort_keys=True)
83
+
@@ -0,0 +1,96 @@
1
+ """Tool schema for mini-SWE command execution."""
2
+
3
+ from __future__ import annotations
4
+
5
+ RUN_COMMAND_TOOL = {
6
+ "type": "function",
7
+ "function": {
8
+ "name": "run_command",
9
+ "description": (
10
+ "Execute a bash command inside the task workspace. Use this for all shell "
11
+ "operations including editing files, running tests, and submitting results."
12
+ ),
13
+ "parameters": {
14
+ "type": "object",
15
+ "properties": {
16
+ "command": {
17
+ "type": "string",
18
+ "description": "The bash command to execute. Must be non-empty.",
19
+ },
20
+ "timeout": {
21
+ "type": "integer",
22
+ "minimum": 1,
23
+ "maximum": 600,
24
+ "description": (
25
+ "Optional timeout (seconds) for the command. Defaults to the environment "
26
+ "timeout if omitted."
27
+ ),
28
+ },
29
+ },
30
+ "required": ["command"],
31
+ "additionalProperties": False,
32
+ },
33
+ },
34
+ }
35
+
36
+ SUBMIT_TOOL = {
37
+ "type": "function",
38
+ "function": {
39
+ "name": "submit_patch",
40
+ "description": (
41
+ "Finish the task and submit the final patch. Call this once you believe the "
42
+ "fix is complete and tests pass."
43
+ ),
44
+ "parameters": {
45
+ "type": "object",
46
+ "properties": {
47
+ "command": {
48
+ "type": "string",
49
+ "description": (
50
+ "Optional submission command. Defaults to "
51
+ "`echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached` "
52
+ "if omitted."
53
+ ),
54
+ }
55
+ },
56
+ "required": [],
57
+ "additionalProperties": False,
58
+ },
59
+ },
60
+ }
61
+
62
+ TOOLS_SCHEMA = [RUN_COMMAND_TOOL, SUBMIT_TOOL]
63
+
64
+ # Compatibility: some OpenAI reasoning models (e.g., gpt-5) insist on calling
65
+ # a generic function (e.g., 'interact' or 'interact_many'). Provide stubs so
66
+ # vendor requests do not 400 on unknown function names; the policy will map
67
+ # these calls to concrete environment tools.
68
+ COMPAT_INTERACT_TOOL = {
69
+ "type": "function",
70
+ "function": {
71
+ "name": "interact",
72
+ "description": "Compatibility shim for models that call a generic 'interact' tool.",
73
+ "parameters": {
74
+ "type": "object",
75
+ "properties": {},
76
+ "additionalProperties": True,
77
+ },
78
+ },
79
+ }
80
+
81
+ COMPAT_INTERACT_MANY_TOOL = {
82
+ "type": "function",
83
+ "function": {
84
+ "name": "interact_many",
85
+ "description": "Compatibility shim for models that call 'interact_many'.",
86
+ "parameters": {
87
+ "type": "object",
88
+ "properties": {},
89
+ "additionalProperties": True,
90
+ },
91
+ },
92
+ }
93
+
94
+ # Append compatibility tools last so preferred tools remain first in the list
95
+ TOOLS_SCHEMA.extend([COMPAT_INTERACT_TOOL, COMPAT_INTERACT_MANY_TOOL])
96
+