synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.9.dev8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (327) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +8 -11
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/run_eval.py +36 -37
  19. examples/rl/run_rl_and_save.py +5 -5
  20. examples/rl/task_app/math_single_step.py +65 -43
  21. examples/rl/task_app/math_task_app.py +3 -3
  22. examples/sft/README.md +139 -0
  23. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  24. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  25. examples/sft/evaluate.py +117 -0
  26. examples/sft/export_dataset.py +117 -0
  27. examples/sft/generate_traces.py +162 -0
  28. examples/swe/__init__.py +12 -0
  29. examples/swe/task_app/README.md +105 -0
  30. examples/swe/task_app/__init__.py +2 -0
  31. examples/swe/task_app/grpo_swe_mini.py +571 -0
  32. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  33. examples/swe/task_app/hosted/README.md +173 -0
  34. examples/swe/task_app/hosted/__init__.py +5 -0
  35. examples/swe/task_app/hosted/branching.py +143 -0
  36. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  37. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  38. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  39. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  40. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  41. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  42. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  43. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  44. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  45. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  46. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  47. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  50. examples/swe/task_app/hosted/hosted_app.py +204 -0
  51. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  52. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  53. examples/swe/task_app/hosted/main.py +100 -0
  54. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  55. examples/swe/task_app/hosted/registry.py +195 -0
  56. examples/swe/task_app/hosted/rollout.py +1869 -0
  57. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  58. examples/swe/task_app/hosted/storage/volume.py +211 -0
  59. examples/swe/task_app/hosted/test_agents.py +161 -0
  60. examples/swe/task_app/hosted/test_service.py +137 -0
  61. examples/swe/task_app/hosted/utils.py +62 -0
  62. examples/vlm/README.md +68 -0
  63. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  64. examples/vlm/crafter_image_only_agent.py +207 -0
  65. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  66. examples/vlm/filter_image_rows.py +63 -0
  67. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  68. examples/warming_up_to_rl/analyze_trace_db.py +5 -5
  69. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  70. examples/warming_up_to_rl/export_trace_sft.py +78 -21
  71. examples/warming_up_to_rl/groq_test.py +4 -4
  72. examples/warming_up_to_rl/manage_secrets.py +13 -18
  73. examples/warming_up_to_rl/run_eval.py +42 -44
  74. examples/warming_up_to_rl/run_fft_and_save.py +11 -16
  75. examples/warming_up_to_rl/run_local_rollout.py +1 -3
  76. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
  77. examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
  78. examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
  79. examples/warming_up_to_rl/run_rl_and_save.py +5 -6
  80. examples/warming_up_to_rl/run_rollout_remote.py +8 -10
  81. examples/warming_up_to_rl/task_app/README.md +6 -2
  82. examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
  83. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  101. synth/__init__.py +14 -0
  102. synth_ai/__init__.py +26 -4
  103. synth_ai/api/models/supported.py +376 -0
  104. synth_ai/api/train/builders.py +128 -21
  105. synth_ai/api/train/cli.py +80 -64
  106. synth_ai/api/train/config_finder.py +7 -2
  107. synth_ai/api/train/env_resolver.py +1 -1
  108. synth_ai/api/train/pollers.py +2 -1
  109. synth_ai/api/train/supported_algos.py +139 -0
  110. synth_ai/api/train/task_app.py +1 -2
  111. synth_ai/api/train/utils.py +13 -44
  112. synth_ai/cli/__init__.py +8 -0
  113. synth_ai/cli/_modal_wrapper.py +28 -0
  114. synth_ai/cli/_typer_patch.py +49 -0
  115. synth_ai/cli/balance.py +1 -2
  116. synth_ai/cli/calc.py +1 -1
  117. synth_ai/cli/demo.py +2 -1
  118. synth_ai/cli/recent.py +2 -2
  119. synth_ai/cli/rl_demo.py +2 -1
  120. synth_ai/cli/root.py +11 -13
  121. synth_ai/cli/status.py +2 -2
  122. synth_ai/cli/task_apps.py +529 -179
  123. synth_ai/cli/traces.py +6 -4
  124. synth_ai/cli/watch.py +12 -18
  125. synth_ai/demo_registry.py +1 -1
  126. synth_ai/demos/core/cli.py +36 -43
  127. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  128. synth_ai/demos/demo_task_apps/core.py +17 -25
  129. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
  130. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  131. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
  132. synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
  133. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  134. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  135. synth_ai/environments/reproducibility/tree.py +2 -5
  136. synth_ai/environments/service/app.py +11 -12
  137. synth_ai/environments/service/core_routes.py +4 -7
  138. synth_ai/environments/stateful/engine.py +1 -1
  139. synth_ai/environments/tasks/core.py +1 -0
  140. synth_ai/environments/tasks/filters.py +5 -6
  141. synth_ai/environments/tasks/utils.py +4 -5
  142. synth_ai/handshake.py +9 -9
  143. synth_ai/http.py +1 -1
  144. synth_ai/http_client.py +18 -10
  145. synth_ai/inference/client.py +15 -5
  146. synth_ai/jobs/client.py +78 -83
  147. synth_ai/learning/__init__.py +41 -6
  148. synth_ai/learning/algorithms.py +14 -0
  149. synth_ai/learning/client.py +91 -24
  150. synth_ai/learning/config.py +2 -38
  151. synth_ai/learning/ft_client.py +4 -59
  152. synth_ai/learning/health.py +5 -6
  153. synth_ai/learning/jobs.py +31 -47
  154. synth_ai/{rl → learning/rl}/__init__.py +14 -4
  155. synth_ai/learning/rl/client.py +267 -0
  156. synth_ai/learning/rl/config.py +31 -0
  157. synth_ai/{rl → learning/rl}/contracts.py +5 -8
  158. synth_ai/{rl → learning/rl}/env_keys.py +39 -15
  159. synth_ai/learning/rl/secrets.py +13 -0
  160. synth_ai/learning/rl_client.py +2 -281
  161. synth_ai/learning/sft/__init__.py +29 -0
  162. synth_ai/learning/sft/client.py +68 -0
  163. synth_ai/learning/sft/config.py +270 -0
  164. synth_ai/learning/sft/data.py +295 -0
  165. synth_ai/learning/sse.py +25 -24
  166. synth_ai/learning/validators.py +25 -28
  167. synth_ai/lm/__init__.py +21 -47
  168. synth_ai/main.py +4 -0
  169. synth_ai/task/__init__.py +25 -27
  170. synth_ai/task/apps/__init__.py +7 -8
  171. synth_ai/task/auth.py +8 -8
  172. synth_ai/task/client.py +14 -14
  173. synth_ai/task/contracts.py +36 -35
  174. synth_ai/task/datasets.py +6 -5
  175. synth_ai/task/errors.py +10 -10
  176. synth_ai/task/health.py +17 -9
  177. synth_ai/task/json.py +58 -23
  178. synth_ai/task/proxy.py +13 -9
  179. synth_ai/task/rubrics.py +16 -15
  180. synth_ai/task/server.py +12 -12
  181. synth_ai/task/tracing_utils.py +4 -4
  182. synth_ai/task/vendors.py +5 -6
  183. synth_ai/tracing_v3/__init__.py +2 -0
  184. synth_ai/tracing_v3/abstractions.py +21 -4
  185. synth_ai/tracing_v3/decorators.py +18 -16
  186. synth_ai/tracing_v3/hooks.py +5 -5
  187. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  188. synth_ai/tracing_v3/session_tracer.py +40 -14
  189. synth_ai/tracing_v3/storage/base.py +85 -0
  190. synth_ai/tracing_v3/storage/config.py +21 -8
  191. synth_ai/tracing_v3/storage/factory.py +10 -7
  192. synth_ai/tracing_v3/storage/utils.py +4 -2
  193. synth_ai/tracing_v3/turso/daemon.py +7 -2
  194. synth_ai/tracing_v3/turso/models.py +2 -2
  195. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  196. synth_ai/tracing_v3/utils.py +4 -4
  197. synth_ai/v0/api/__init__.py +8 -0
  198. synth_ai/v0/api/models/__init__.py +8 -0
  199. synth_ai/v0/api/models/supported.py +8 -0
  200. synth_ai/v0/config/__init__.py +15 -0
  201. synth_ai/v0/config/base_url.py +12 -0
  202. synth_ai/v0/lm/__init__.py +51 -0
  203. synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
  204. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  205. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  206. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  207. synth_ai/{lm → v0/lm}/config.py +6 -1
  208. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  209. synth_ai/{lm → v0/lm}/core/main.py +6 -6
  210. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  211. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
  212. synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
  213. synth_ai/{lm → v0/lm}/overrides.py +2 -2
  214. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  215. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  216. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  217. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  218. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
  219. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  220. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  221. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
  222. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
  223. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
  224. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
  225. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  226. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  227. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  228. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  229. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  230. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  231. synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
  232. synth_ai/v0/tracing_v3/__init__.py +10 -0
  233. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  234. synth_ai/v0/tracing_v3/decorators.py +3 -0
  235. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  236. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  237. synth_ai-0.2.9.dev8.dist-info/METADATA +191 -0
  238. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/RECORD +268 -238
  239. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/top_level.txt +1 -0
  240. examples/common_old/backend.py +0 -20
  241. examples/evals_old/README.md +0 -98
  242. examples/evals_old/__init__.py +0 -6
  243. examples/evals_old/compare_models.py +0 -1038
  244. examples/evals_old/example_log.md +0 -145
  245. examples/evals_old/run_demo.sh +0 -126
  246. examples/evals_old/trace_analysis.py +0 -270
  247. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  248. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  249. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  250. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
  251. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  252. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  253. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  254. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  255. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  256. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
  257. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  258. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  259. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
  260. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  261. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  262. examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
  263. examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
  264. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  265. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  266. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
  267. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
  268. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
  269. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  270. examples/finetuning_old/synth_qwen_v1/util.py +0 -152
  271. examples/rl_old/task_app.py +0 -1131
  272. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  273. examples/warming_up_to_rl/old/notes.md +0 -73
  274. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  275. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  276. synth_ai/experimental/synth_oss.py +0 -445
  277. synth_ai/learning/filtering.py +0 -0
  278. synth_ai/learning/offline/dpo.py +0 -0
  279. synth_ai/learning/offline/providers.py +0 -7
  280. synth_ai/learning/offline/sft.py +0 -0
  281. synth_ai/learning/offline/shared.py +0 -0
  282. synth_ai/learning/online/grpo.py +0 -0
  283. synth_ai/learning/online/irft.py +0 -0
  284. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  285. synth_ai/learning/prompts/gepa.py +0 -0
  286. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
  287. synth_ai/learning/prompts/mipro.py +0 -289
  288. synth_ai/learning/prompts/random_search.py +0 -249
  289. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  290. synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
  291. synth_ai/rl/secrets.py +0 -19
  292. synth_ai/scripts/verify_rewards.py +0 -100
  293. synth_ai/tracing/__init__.py +0 -30
  294. synth_ai/tracing_v1/__init__.py +0 -33
  295. synth_ai/tracing_v3/turso/__init__.py +0 -25
  296. synth_ai/tracing_v3/turso/manager.py +0 -838
  297. synth_ai/zyk/__init__.py +0 -30
  298. synth_ai-0.2.9.dev7.dist-info/METADATA +0 -131
  299. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  300. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  301. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  302. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  303. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  304. /synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
  305. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  306. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  307. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  308. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  309. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  310. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  311. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  312. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  313. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  314. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  315. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  316. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  317. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  318. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  319. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  320. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  321. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  322. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  323. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  324. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  325. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/WHEEL +0 -0
  326. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/entry_points.txt +0 -0
  327. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,105 @@
1
+ # mini-SWE Task App
2
+
3
+ This directory contains an example task app that exposes the
4
+ [mini-swe-agent](https://github.com/SWE-agent/mini-swe-agent) workflow through
5
+ the Synth AI task app interface. The goal is to provide a Crafter-like workflow
6
+ for SWE tasks: you can serve the task app locally, point RL training at it,
7
+ collect rollouts with tracing, and run vendor inference via the standard proxy
8
+ endpoints.
9
+
10
+ > **Status:** The implementation focuses on a minimal, hackable integration.
11
+ > It supports local/docker environments, step-wise command execution, tracing
12
+ > hooks, and rollouts. By default it streams SWE-Bench Verified tasks from
13
+ > Hugging Face; you can point the loader at your own dataset (or the bundled
14
+ > sample JSONL) via environment variables (see the docs at the end of this
15
+ > file).
16
+
17
+ ## Layout
18
+
19
+ - `grpo_swe_mini.py` – main task-app configuration (dataset, rollout executor,
20
+ tracing, Modal metadata, registration).
21
+ - `grpo_swe_mini_task_app.py` – backwards-compatible FastAPI wrapper that
22
+ allows running the module directly (mirrors `grpo_crafter_task_app.py`).
23
+ - `hosted/envs/mini_swe` – environment/policy adapters that wrap `mini-swe-agent`
24
+ inside a hosted FastAPI service.
25
+ - `data/sample_instances.json` – optional curated subset for quick smoke tests
26
+ (no longer the default dataset).
27
+
28
+ ## Using the task app
29
+
30
+ ```
31
+ uvx synth-ai serve swe-mini --port 8020
32
+ ```
33
+
34
+ ### Recommended: non-interactive serve + .env
35
+
36
+ ```bash
37
+ uvx synth-ai serve swe-mini \
38
+ --port 8020 \
39
+ --env-file .env \
40
+ --trace traces/v3 \
41
+ --trace-db traces/v3/synth_ai.db
42
+ ```
43
+
44
+ This avoids interactive prompts (useful for CI) and loads `ENVIRONMENT_API_KEY`, `OPENAI_API_KEY`, etc. from `.env`.
45
+
46
+ ### Configure dataset and execution
47
+
48
+ Set `SWE_MINI_DATASET` to control what tasks the environment loads (defaults to
49
+ `hf://princeton-nlp/SWE-Bench_Verified:test`):
50
+
51
+ - `file://path/to/tasks.jsonl` – each line should contain an object with
52
+ `instance_id`, `problem_statement`, and optional docker metadata
53
+ (`image_name`, `repo`, …).
54
+ - `hf://namespace/dataset:split` – lazily stream from Hugging Face (requires
55
+ `datasets` and network).
56
+ For quick local smoke tests you can point at
57
+ `file://$REPO/examples/swe/task_app/data/sample_instances.json`.
58
+
59
+ Execution is handled by mini-swe's environment classes. Configure execution via
60
+ `SWE_MINI_ENVIRONMENT_CLASS` (`local`, `docker`, `singularity`, …) and pass
61
+ additional keyword arguments with `SWE_MINI_ENVIRONMENT_KWARGS` (JSON).
62
+
63
+ ### Tracing & SFT
64
+
65
+ Tracing works the same as Crafter; pass `--trace` / `--trace-db` to the CLI or
66
+ set `TASKAPP_TRACING_ENABLED=1`. The task app writes JSONL snippets for SFT and
67
+ records decision traces in the configured SQLite/Postgres database.
68
+
69
+ ## Next steps
70
+
71
+ - `docs/examples/swe/mini_swe_task_app.md` – end-to-end walkthrough
72
+ - `examples/swe/task_app/grpo_swe_mini.py` – main entrypoint
73
+ - `examples/swe/task_app/hosted` – shared host scaffolding for the Mini-SWE task app
74
+
75
+ Pull requests welcome – especially for better dataset loaders, richer metrics,
76
+ and robust docker support.
77
+
78
+ ### Example rollout configs
79
+
80
+ - OpenAI gpt-4o-mini (works out-of-the-box):
81
+
82
+ ```json
83
+ {
84
+ "run_id": "example-$(date +%s)",
85
+ "policy": {
86
+ "policy_name": "swe-mini-react",
87
+ "config": {
88
+ "model": "gpt-4o-mini",
89
+ "inference_url": "https://api.openai.com",
90
+ "temperature": 0.0,
91
+ "max_completion_tokens": 256,
92
+ "use_tools": false,
93
+ "response_format": { "type": "text" },
94
+ "system_template": "You are participating in a software engineering evaluation. Provide exactly one bash command enclosed in a single ```bash``` block. No THOUGHT. No extra text. If unsure, output ```bash\necho NOOP\n```.",
95
+ "instance_template": "{{problem_statement}}\n\n{{instructions}}",
96
+ "action_template": "{{ output.stdout }}"
97
+ }
98
+ },
99
+ "env": { "env_name": "swe-mini" },
100
+ "ops": ["agent","env","agent","env","agent","env"],
101
+ "record": {"trajectories": true, "return_trace": true, "trace_format": "compact"}
102
+ }
103
+ ```
104
+
105
+ - OpenAI gpt-5-mini (experimental): remove reasoning flags and constrain output. If responses are empty, retry without `stop` and consider switching to `gpt-4o-mini`.
@@ -0,0 +1,2 @@
1
+ """Task app configuration for the mini-SWE integration."""
2
+
@@ -0,0 +1,571 @@
1
+ """Task app configuration for the mini-SWE agent integration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import os
8
+ from collections.abc import Iterable, Sequence
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
14
+ from synth_ai.task.contracts import RolloutMetrics, RolloutRequest, RolloutResponse, TaskInfo
15
+ from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
16
+ from synth_ai.task.rubrics import load_rubric
17
+ from synth_ai.task.server import ProxyConfig, RubricBundle, TaskAppConfig
18
+ from synth_ai.task.tracing_utils import (
19
+ build_tracer_factory,
20
+ resolve_sft_output_dir,
21
+ resolve_tracing_db_url,
22
+ tracing_env_enabled,
23
+ )
24
+ from synth_ai.tracing_v3.session_tracer import SessionTracer
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ try:
30
+ from examples.swe.task_app.hosted.branching import ( # type: ignore
31
+ router as branching_router,
32
+ )
33
+ from examples.swe.task_app.hosted.environment_routes import ( # type: ignore # noqa: E501
34
+ router as environment_router,
35
+ )
36
+ from examples.swe.task_app.hosted.policy_routes import ( # type: ignore
37
+ router as policy_router,
38
+ )
39
+ from examples.swe.task_app.hosted.rollout import ( # type: ignore
40
+ RolloutEnvSpec as LegacyRolloutEnvSpec,
41
+ )
42
+ from examples.swe.task_app.hosted.rollout import (
43
+ RolloutPolicySpec as LegacyRolloutPolicySpec,
44
+ )
45
+ from examples.swe.task_app.hosted.rollout import (
46
+ RolloutRecordConfig as LegacyRolloutRecordConfig,
47
+ )
48
+ from examples.swe.task_app.hosted.rollout import (
49
+ RolloutRequest as LegacyRolloutRequest,
50
+ )
51
+ from examples.swe.task_app.hosted.rollout import (
52
+ RolloutResponse as LegacyRolloutResponse,
53
+ )
54
+ from examples.swe.task_app.hosted.rollout import (
55
+ RolloutSafetyConfig as LegacyRolloutSafetyConfig,
56
+ )
57
+ from examples.swe.task_app.hosted.rollout import (
58
+ execute_rollout as legacy_execute_rollout,
59
+ )
60
+ HAS_HOSTED = True
61
+ except Exception:
62
+ try: # pragma: no cover - optional dependency path
63
+ from examples.warming_up_to_rl.task_app.synth_envs_hosted.branching import ( # type: ignore
64
+ router as branching_router,
65
+ )
66
+ from examples.warming_up_to_rl.task_app.synth_envs_hosted.environment_routes import ( # type: ignore # noqa: E501
67
+ router as environment_router,
68
+ )
69
+ from examples.warming_up_to_rl.task_app.synth_envs_hosted.policy_routes import ( # type: ignore
70
+ router as policy_router,
71
+ )
72
+ from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import ( # type: ignore
73
+ RolloutEnvSpec as LegacyRolloutEnvSpec,
74
+ )
75
+ from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
76
+ RolloutPolicySpec as LegacyRolloutPolicySpec,
77
+ )
78
+ from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
79
+ RolloutRecordConfig as LegacyRolloutRecordConfig,
80
+ )
81
+ from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
82
+ RolloutRequest as LegacyRolloutRequest,
83
+ )
84
+ from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
85
+ RolloutResponse as LegacyRolloutResponse,
86
+ )
87
+ from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
88
+ RolloutSafetyConfig as LegacyRolloutSafetyConfig,
89
+ )
90
+ from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
91
+ execute_rollout as legacy_execute_rollout,
92
+ )
93
+ HAS_HOSTED = True
94
+ except Exception as exc: # pragma: no cover - optional dependency path
95
+ logger.warning(
96
+ "mini-swe task app running without hosted environment support: %s", exc
97
+ )
98
+ HAS_HOSTED = False
99
+
100
+
101
+ _HERE = Path(__file__).resolve()
102
+
103
+
104
+ def _resolve_repo_root() -> Path:
105
+ candidates = [
106
+ Path(os.getenv("SYNTH_AI_REPO_ROOT", "")).expanduser(),
107
+ _HERE.parents[3],
108
+ Path(__file__).resolve().parents[2],
109
+ Path.cwd(),
110
+ ]
111
+ for candidate in candidates:
112
+ if not candidate:
113
+ continue
114
+ try:
115
+ resolved = candidate.resolve()
116
+ except Exception:
117
+ continue
118
+ if (resolved / "pyproject.toml").exists():
119
+ return resolved
120
+ if (resolved / "synth_ai").is_dir():
121
+ return resolved
122
+ return Path(__file__).resolve().parents[3]
123
+
124
+
125
+ REPO_ROOT = _resolve_repo_root()
126
+ def _load_hf_dataset(source: str) -> list[dict[str, Any]]:
127
+ try:
128
+ from datasets import load_dataset # type: ignore
129
+ except Exception as exc: # pragma: no cover - optional dependency
130
+ raise RuntimeError(
131
+ "datasets package is required for hf:// dataset sources. "
132
+ "pip install datasets or set SWE_MINI_DATASET=file://<path>."
133
+ ) from exc
134
+
135
+ # source looks like hf://namespace/dataset:split
136
+ cleaned = source[len("hf://") :]
137
+ if ":" in cleaned:
138
+ dataset_id, split = cleaned.split(":", 1)
139
+ else:
140
+ dataset_id, split = cleaned, "train"
141
+ logger.info("Loading HuggingFace dataset %s split=%s", dataset_id, split)
142
+ ds = load_dataset(dataset_id, split=split)
143
+ return [dict(record) for record in ds]
144
+
145
+
146
+ def _parse_records(text: str) -> list[dict[str, Any]]:
147
+ text = text.strip()
148
+ if not text:
149
+ return []
150
+ if text.startswith("["):
151
+ parsed = json.loads(text)
152
+ if isinstance(parsed, list):
153
+ return [dict(item) for item in parsed]
154
+ raise ValueError("Expected list JSON for dataset file")
155
+ records = []
156
+ for line in text.splitlines():
157
+ line = line.strip()
158
+ if not line:
159
+ continue
160
+ records.append(json.loads(line))
161
+ return records
162
+
163
+
164
+ def _normalize_instance(raw: dict[str, Any]) -> dict[str, Any]:
165
+ instance_id = raw.get("instance_id") or raw.get("id")
166
+ if not instance_id:
167
+ raise ValueError(f"Dataset entry missing instance_id: {raw}")
168
+ problem_statement = raw.get("problem_statement") or raw.get("prompt") or ""
169
+ instructions = raw.get("instructions") or raw.get("extra") or ""
170
+ metadata = raw.get("metadata") or {}
171
+ if not isinstance(metadata, dict):
172
+ metadata = {"raw_metadata": metadata}
173
+ for key, value in raw.items():
174
+ if key in {"metadata", "instructions", "problem_statement"}:
175
+ continue
176
+ metadata.setdefault(key, value)
177
+ metadata.setdefault("raw_instance", raw)
178
+ metadata.setdefault("instance_id", instance_id)
179
+ if "image_name" not in metadata:
180
+ iid = str(instance_id).replace("__", "_1776_")
181
+ metadata["image_name"] = f"docker.io/swebench/sweb.eval.x86_64.{iid}:latest".lower()
182
+ return {
183
+ "instance_id": instance_id,
184
+ "problem_statement": problem_statement,
185
+ "instructions": instructions,
186
+ "metadata": metadata,
187
+ }
188
+
189
+
190
+ def _load_instances() -> list[dict[str, Any]]:
191
+ default_dataset = "hf://princeton-nlp/SWE-Bench_Verified:test"
192
+ source = os.getenv("SWE_MINI_DATASET", default_dataset).strip()
193
+ if not source:
194
+ source = default_dataset
195
+
196
+ if source.startswith("file://"):
197
+ path = Path(source[len("file://") :]).expanduser()
198
+ if not path.exists():
199
+ raise FileNotFoundError(f"SWE_MINI_DATASET file not found: {path}")
200
+ data = path.read_text(encoding="utf-8")
201
+ records = _parse_records(data)
202
+ elif source.startswith("hf://"):
203
+ records = _load_hf_dataset(source)
204
+ else:
205
+ path = Path(source).expanduser()
206
+ if path.exists():
207
+ data = path.read_text(encoding="utf-8")
208
+ records = _parse_records(data)
209
+ else:
210
+ raise ValueError(
211
+ f"Unsupported SWE_MINI_DATASET value '{source}'. "
212
+ "Use file://..., or hf://dataset:split."
213
+ )
214
+
215
+ normalised = []
216
+ for record in records:
217
+ try:
218
+ normalised.append(_normalize_instance(record))
219
+ except Exception as exc:
220
+ logger.warning("Skipping invalid dataset entry: %s", exc)
221
+ if not normalised:
222
+ raise RuntimeError("No valid mini-swe dataset entries found.")
223
+ return normalised
224
+
225
+
226
+ @dataclass
227
+ class MiniSweDataset:
228
+ instances: list[dict[str, Any]]
229
+
230
+ def __post_init__(self) -> None:
231
+ self.by_id = {item["instance_id"]: item for item in self.instances}
232
+
233
+ def ids(self) -> list[str]:
234
+ return [item["instance_id"] for item in self.instances]
235
+
236
+ def get(self, instance_id: str) -> dict[str, Any]:
237
+ if instance_id not in self.by_id:
238
+ raise KeyError(f"Unknown mini-swe instance_id: {instance_id}")
239
+ return self.by_id[instance_id]
240
+
241
+ def sample_by_index(self, index: int) -> dict[str, Any]:
242
+ if not self.instances:
243
+ raise RuntimeError("Mini-swe dataset is empty")
244
+ return self.instances[index % len(self.instances)]
245
+
246
+
247
+ DATASET_SPEC = TaskDatasetSpec(
248
+ id="mini_swe_sample",
249
+ name="mini-SWE Tasks",
250
+ version="0.1.0",
251
+ splits=["train"],
252
+ default_split="train",
253
+ description="Interactive SWE tasks executed via mini-swe-agent environments.",
254
+ )
255
+
256
+
257
+ def build_dataset() -> tuple[TaskDatasetRegistry, MiniSweDataset]:
258
+ registry = TaskDatasetRegistry()
259
+ dataset = MiniSweDataset(_load_instances())
260
+ registry.register(DATASET_SPEC, lambda _spec: dataset, cache=True)
261
+ return registry, dataset
262
+
263
+
264
+ def _base_task_info(dataset: MiniSweDataset) -> TaskInfo:
265
+ return TaskInfo(
266
+ task={"id": "swe_mini", "name": "mini-SWE Tasks", "version": "0.1.0"},
267
+ environments=["swe-mini"],
268
+ action_space={
269
+ "type": "tool",
270
+ "tools": ["run_command", "submit_patch"],
271
+ "description": "Issue bash commands or submit the final patch.",
272
+ },
273
+ observation={
274
+ "summary": "Step-wise command output and submission status.",
275
+ "keys": ["task", "history", "last", "submitted"],
276
+ },
277
+ dataset={
278
+ **DATASET_SPEC.model_dump(),
279
+ "instances": dataset.ids()[:50],
280
+ },
281
+ rubric={
282
+ "version": "1",
283
+ "criteria_count": 2,
284
+ "source": "inline",
285
+ "aggregation": "weighted_sum",
286
+ },
287
+ inference={
288
+ "supports_proxy": True,
289
+ "endpoints": {
290
+ "openai": "/proxy/v1/chat/completions",
291
+ "groq": "/proxy/groq/v1/chat/completions",
292
+ },
293
+ "tool": {"name": "run_command", "parallel_tool_calls": False},
294
+ },
295
+ capabilities={
296
+ "supports_rollout": True,
297
+ "supports_env_lifecycle": True,
298
+ "requires_api_key_header": True,
299
+ },
300
+ limits={"max_ops": 2000, "max_time_s": 7200},
301
+ )
302
+
303
+
304
+ OUTCOME_RUBRIC = load_rubric(
305
+ {
306
+ "version": "1",
307
+ "goal_text": "Complete the software engineering task and ensure tests pass.",
308
+ "aggregation": "weighted_sum",
309
+ "criteria": [
310
+ {
311
+ "id": "functional",
312
+ "description": "All acceptance tests and lint checks succeed.",
313
+ "weight": 1.0,
314
+ },
315
+ {
316
+ "id": "quality",
317
+ "description": "Code changes follow project conventions and include required updates.",
318
+ "weight": 1.0,
319
+ },
320
+ ],
321
+ }
322
+ )
323
+
324
+ EVENTS_RUBRIC = load_rubric(
325
+ {
326
+ "version": "1",
327
+ "goal_text": "Encourage deliberate, well-scoped shell interactions.",
328
+ "aggregation": "weighted_sum",
329
+ "criteria": [
330
+ {
331
+ "id": "productive_steps",
332
+ "description": "Commands meaningfully progress the task (editing files, running tests, inspecting context).",
333
+ "weight": 1.0,
334
+ }
335
+ ],
336
+ }
337
+ )
338
+
339
+
340
+ def describe_taskset(dataset: MiniSweDataset) -> dict[str, Any]:
341
+ return {
342
+ **DATASET_SPEC.model_dump(),
343
+ "instance_ids": dataset.ids(),
344
+ }
345
+
346
+
347
+ def provide_task_instances(
348
+ dataset: MiniSweDataset, base_info: TaskInfo, seeds: Sequence[int]
349
+ ) -> Iterable[TaskInfo]:
350
+ infos: list[TaskInfo] = []
351
+ for seed in seeds:
352
+ instance = dataset.sample_by_index(int(seed))
353
+ infos.append(
354
+ TaskInfo(
355
+ task=base_info.task,
356
+ environments=base_info.environments,
357
+ action_space=base_info.action_space,
358
+ observation={**base_info.observation, "instance_id": instance["instance_id"]},
359
+ dataset={**base_info.dataset, "instance_id": instance["instance_id"]},
360
+ rubric=base_info.rubric,
361
+ inference=base_info.inference,
362
+ capabilities=base_info.capabilities,
363
+ limits=base_info.limits,
364
+ )
365
+ )
366
+ return infos
367
+
368
+
369
+ def _ensure_env_has_task(
370
+ dataset: MiniSweDataset, env_spec: LegacyRolloutEnvSpec
371
+ ) -> LegacyRolloutEnvSpec:
372
+ config = dict(env_spec.config or {})
373
+ if "task" not in config:
374
+ instance_id = config.get("instance_id")
375
+ if not instance_id:
376
+ raise ValueError("mini-swe rollout request requires env.config.instance_id")
377
+ config["task"] = dataset.get(instance_id)
378
+ return env_spec.model_copy(update={"config": config})
379
+
380
+
381
+ def build_config() -> TaskAppConfig:
382
+ registry, dataset = build_dataset()
383
+ base_info = _base_task_info(dataset)
384
+
385
+ tracing_enabled = tracing_env_enabled()
386
+ tracing_db_url = resolve_tracing_db_url()
387
+ tracer_factory = build_tracer_factory(SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url)
388
+ sft_output_dir = resolve_sft_output_dir()
389
+
390
+ hosted_task_app = None
391
+ if HAS_HOSTED:
392
+ HostedTaskAppCls = None
393
+ try:
394
+ from examples.swe.task_app.hosted.hosted_app import ( # type: ignore
395
+ TaskApp as HostedTaskApp,
396
+ )
397
+ HostedTaskAppCls = HostedTaskApp
398
+ except Exception:
399
+ try:
400
+ from examples.warming_up_to_rl.task_app.synth_envs_hosted.hosted_app import ( # type: ignore
401
+ TaskApp as HostedTaskApp,
402
+ )
403
+ HostedTaskAppCls = HostedTaskApp
404
+ except Exception as exc: # pragma: no cover - optional dependency path
405
+ logger.warning("Unable to import HostedTaskApp for swe-mini: %s", exc)
406
+ if HostedTaskAppCls is not None:
407
+ hosted_task_app = HostedTaskAppCls(
408
+ service_base_url=os.getenv("SWE_MINI_SERVICE_BASE_URL"),
409
+ vllm_base_url=os.getenv(
410
+ "SWE_MINI_VLLM_BASE_URL",
411
+ "http://localhost:8020/proxy",
412
+ ),
413
+ default_model=os.getenv("SWE_MINI_DEFAULT_MODEL"),
414
+ )
415
+
416
+ app_state: dict[str, Any] = {
417
+ "dataset": dataset,
418
+ "allowed_environments": ["swe-mini"],
419
+ "tracing_enabled": tracing_enabled,
420
+ }
421
+ if tracer_factory is not None:
422
+ app_state["session_tracer_factory"] = tracer_factory
423
+ if sft_output_dir:
424
+ app_state["sft_output_dir"] = sft_output_dir
425
+ if hosted_task_app is not None:
426
+ app_state["task_app"] = hosted_task_app
427
+
428
+ if tracing_enabled:
429
+ logger.info("[swe-mini:tracing] enabled (db=%s)", tracing_db_url or "default")
430
+ else:
431
+ logger.info("[swe-mini:tracing] disabled")
432
+ if sft_output_dir:
433
+ logger.info("[swe-mini:sft] writing JSONL to %s", sft_output_dir)
434
+
435
+ routers: tuple = (environment_router, policy_router, branching_router) if HAS_HOSTED else ()
436
+
437
+ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
438
+ if not HAS_HOSTED:
439
+ return RolloutResponse(
440
+ run_id=request.run_id,
441
+ trajectories=[],
442
+ branches={},
443
+ metrics=RolloutMetrics(
444
+ episode_returns=[],
445
+ mean_return=0.0,
446
+ num_steps=0,
447
+ num_episodes=0,
448
+ ),
449
+ aborted=True,
450
+ ops_executed=0,
451
+ trace=None,
452
+ )
453
+
454
+ env_spec = _ensure_env_has_task(dataset, request.env)
455
+
456
+ legacy_request = LegacyRolloutRequest(
457
+ run_id=request.run_id,
458
+ env=LegacyRolloutEnvSpec(
459
+ env_id=request.env.env_id,
460
+ env_name=env_spec.env_name or "swe-mini",
461
+ config=env_spec.config,
462
+ seed=request.env.seed,
463
+ ),
464
+ policy=LegacyRolloutPolicySpec(
465
+ policy_id=request.policy.policy_id,
466
+ policy_name=request.policy.policy_name or "swe-mini-react",
467
+ config=request.policy.config,
468
+ ),
469
+ ops=request.ops,
470
+ record=LegacyRolloutRecordConfig(**request.record.model_dump()),
471
+ on_done=request.on_done,
472
+ branch=getattr(request, "branch", None),
473
+ safety=LegacyRolloutSafetyConfig(**request.safety.model_dump()),
474
+ training_session_id=request.training_session_id,
475
+ synth_base_url=request.synth_base_url,
476
+ )
477
+
478
+ legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
479
+ legacy_request, fastapi_request
480
+ )
481
+ data = legacy_response.model_dump()
482
+ metrics = data.get("metrics", {}) or {}
483
+ metrics.setdefault("details", {})
484
+ data["metrics"] = metrics
485
+ return RolloutResponse.model_validate(data)
486
+
487
+ async def _noop_rollout(request: RolloutRequest, fastapi_request) -> RolloutResponse:
488
+ return RolloutResponse(
489
+ run_id=request.run_id,
490
+ trajectories=[],
491
+ branches={},
492
+ metrics=RolloutMetrics(
493
+ episode_returns=[],
494
+ mean_return=0.0,
495
+ num_steps=0,
496
+ num_episodes=0,
497
+ ),
498
+ aborted=True,
499
+ ops_executed=0,
500
+ trace=None,
501
+ )
502
+
503
+ rollout_callable = rollout_executor if HAS_HOSTED else _noop_rollout
504
+
505
+ config = TaskAppConfig(
506
+ app_id="swe-mini",
507
+ name="mini-SWE Task App",
508
+ description="Interactive SWE tasks executed via mini-swe-agent environments.",
509
+ base_task_info=base_info,
510
+ describe_taskset=lambda: describe_taskset(dataset),
511
+ provide_task_instances=lambda seeds: provide_task_instances(dataset, base_info, seeds),
512
+ rollout=rollout_callable,
513
+ dataset_registry=registry,
514
+ rubrics=RubricBundle(outcome=OUTCOME_RUBRIC, events=EVENTS_RUBRIC),
515
+ proxy=ProxyConfig(enable_openai=True, enable_groq=True),
516
+ routers=routers,
517
+ app_state=app_state,
518
+ cors_origins=["*"],
519
+ )
520
+ return config
521
+
522
+
523
+ register_task_app(
524
+ entry=TaskAppEntry(
525
+ app_id="swe-mini",
526
+ description="mini-swe-agent task app with rollout + proxy endpoints",
527
+ config_factory=build_config,
528
+ aliases=("mini-swe", "swe-mini-task"),
529
+ env_files=(str(REPO_ROOT / "backend" / ".env.dev"),),
530
+ modal=ModalDeploymentConfig(
531
+ app_name="swe-mini-task-app",
532
+ python_version="3.11",
533
+ pip_packages=(
534
+ "fastapi>=0.109.0",
535
+ "uvicorn>=0.23.0",
536
+ "pydantic>=2.7.0",
537
+ "numpy>=1.24.0",
538
+ "aiohttp>=3.8.0",
539
+ "httpx>=0.24.0",
540
+ "python-dotenv>=1.0.1",
541
+ "sqlalchemy>=2.0.42",
542
+ "aiosqlite>=0.21.0",
543
+ "greenlet>=3.0.3",
544
+ "modal>=0.63.0",
545
+ "tenacity>=8.2.3",
546
+ "swebench[modal]>=1.1.0",
547
+ "swe-rex[modal]>=1.4.0",
548
+ "mini-swe-agent>=1.14.2",
549
+ "datasets>=2.18.0",
550
+ "litellm>=1.75.5",
551
+ "rich>=13.7.0",
552
+ "jinja2>=3.1.3",
553
+ ),
554
+ extra_local_dirs=(
555
+ (str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai"),
556
+ (
557
+ str(REPO_ROOT / "examples" / "swe" / "task_app" / "hosted"),
558
+ "/opt/synth_ai_repo/examples/swe/task_app/hosted",
559
+ ),
560
+ (
561
+ str(_HERE.parent),
562
+ "/opt/synth_ai_repo/examples/swe/task_app",
563
+ ),
564
+ ),
565
+ secret_names=("swe-mini-environment", "groq-api-key", "openai-api-key"),
566
+ memory=32768,
567
+ cpu=6.0,
568
+ max_containers=10,
569
+ ),
570
+ )
571
+ )