synth-ai 0.2.14__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (1086) hide show
  1. synth_ai/__init__.py +25 -46
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +98 -72
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/demo/__init__.py +3 -0
  25. synth_ai/cli/commands/demo/core.py +153 -0
  26. synth_ai/cli/commands/eval/__init__.py +10 -0
  27. synth_ai/cli/commands/eval/config.py +338 -0
  28. synth_ai/cli/commands/eval/core.py +258 -0
  29. synth_ai/cli/commands/eval/runner.py +704 -0
  30. synth_ai/cli/commands/eval/validation.py +60 -0
  31. synth_ai/cli/commands/filter/__init__.py +12 -0
  32. synth_ai/cli/commands/filter/core.py +424 -0
  33. synth_ai/cli/commands/filter/errors.py +55 -0
  34. synth_ai/cli/commands/filter/validation.py +77 -0
  35. synth_ai/cli/commands/help/__init__.py +185 -0
  36. synth_ai/cli/commands/help/core.py +72 -0
  37. synth_ai/cli/commands/scan/__init__.py +19 -0
  38. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  39. synth_ai/cli/commands/scan/core.py +344 -0
  40. synth_ai/cli/commands/scan/health_checker.py +242 -0
  41. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  42. synth_ai/cli/commands/scan/models.py +83 -0
  43. synth_ai/cli/commands/smoke/__init__.py +7 -0
  44. synth_ai/cli/commands/smoke/core.py +1428 -0
  45. synth_ai/cli/commands/status/__init__.py +3 -0
  46. synth_ai/cli/commands/status/client.py +91 -0
  47. synth_ai/cli/commands/status/config.py +12 -0
  48. synth_ai/cli/commands/status/errors.py +11 -0
  49. synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
  50. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  51. synth_ai/cli/commands/status/subcommands/files.py +34 -0
  52. synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
  53. synth_ai/cli/commands/status/subcommands/models.py +35 -0
  54. synth_ai/cli/commands/status/subcommands/runs.py +34 -0
  55. synth_ai/cli/commands/status/subcommands/session.py +77 -0
  56. synth_ai/cli/commands/status/subcommands/summary.py +39 -0
  57. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  58. synth_ai/cli/commands/status/utils.py +23 -0
  59. synth_ai/cli/commands/train/__init__.py +51 -0
  60. synth_ai/cli/commands/train/core.py +22 -0
  61. synth_ai/cli/commands/train/errors.py +117 -0
  62. synth_ai/cli/commands/train/prompt_learning_validation.py +632 -0
  63. synth_ai/cli/commands/train/validation.py +392 -0
  64. synth_ai/cli/commands/train/verifier_schemas.py +200 -0
  65. synth_ai/cli/commands/train/verifier_validation.py +235 -0
  66. synth_ai/cli/demo_apps/__init__.py +10 -0
  67. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  68. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  69. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  70. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  71. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  72. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  73. synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
  74. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  75. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  76. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  79. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  80. synth_ai/cli/demo_apps/math/_common.py +16 -0
  81. synth_ai/cli/demo_apps/math/app.py +38 -0
  82. synth_ai/cli/demo_apps/math/config.toml +75 -0
  83. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  84. synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
  85. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  86. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  87. synth_ai/cli/demo_apps/mipro/task_app.py +911 -0
  88. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  89. synth_ai/cli/demos/__init__.py +12 -0
  90. synth_ai/cli/demos/demo.py +32 -0
  91. synth_ai/cli/demos/rl_demo.py +254 -0
  92. synth_ai/cli/deploy.py +216 -0
  93. synth_ai/cli/infra/__init__.py +14 -0
  94. synth_ai/cli/infra/balance.py +216 -0
  95. synth_ai/cli/infra/mcp.py +35 -0
  96. synth_ai/cli/infra/modal_app.py +36 -0
  97. synth_ai/cli/infra/setup.py +69 -0
  98. synth_ai/cli/infra/status.py +16 -0
  99. synth_ai/cli/infra/turso.py +77 -0
  100. synth_ai/cli/lib/__init__.py +10 -0
  101. synth_ai/cli/lib/agents.py +76 -0
  102. synth_ai/cli/lib/apps/modal_app.py +101 -0
  103. synth_ai/cli/lib/apps/task_app.py +642 -0
  104. synth_ai/cli/lib/bin.py +39 -0
  105. synth_ai/cli/lib/env.py +375 -0
  106. synth_ai/cli/lib/errors.py +85 -0
  107. synth_ai/cli/lib/modal.py +315 -0
  108. synth_ai/cli/lib/plotting.py +126 -0
  109. synth_ai/cli/lib/prompt_args.py +39 -0
  110. synth_ai/cli/lib/prompts.py +284 -0
  111. synth_ai/cli/lib/sqld.py +122 -0
  112. synth_ai/cli/lib/task_app_discovery.py +884 -0
  113. synth_ai/cli/lib/task_app_env.py +295 -0
  114. synth_ai/cli/lib/train_cfgs.py +300 -0
  115. synth_ai/cli/lib/tunnel_records.py +207 -0
  116. synth_ai/cli/local/__init__.py +14 -0
  117. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  118. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  119. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  120. synth_ai/cli/local/experiment_queue/config.py +128 -0
  121. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  122. synth_ai/cli/local/experiment_queue/database.py +175 -0
  123. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  124. synth_ai/cli/local/experiment_queue/models.py +231 -0
  125. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  126. synth_ai/cli/local/experiment_queue/results.py +373 -0
  127. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  128. synth_ai/cli/local/experiment_queue/service.py +344 -0
  129. synth_ai/cli/local/experiment_queue/status.py +372 -0
  130. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  131. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  132. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  133. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  134. synth_ai/cli/local/session/__init__.py +92 -0
  135. synth_ai/cli/local/session/client.py +383 -0
  136. synth_ai/cli/local/session/constants.py +63 -0
  137. synth_ai/cli/local/session/exceptions.py +105 -0
  138. synth_ai/cli/local/session/manager.py +139 -0
  139. synth_ai/cli/local/session/models.py +89 -0
  140. synth_ai/cli/local/session/query.py +110 -0
  141. synth_ai/cli/root.py +30 -6
  142. synth_ai/cli/task_apps/__init__.py +37 -0
  143. synth_ai/cli/task_apps/commands.py +3145 -0
  144. synth_ai/cli/task_apps/deploy.py +7 -0
  145. synth_ai/cli/task_apps/list.py +26 -0
  146. synth_ai/cli/task_apps/main.py +36 -0
  147. synth_ai/cli/task_apps/modal_serve.py +11 -0
  148. synth_ai/cli/task_apps/serve.py +11 -0
  149. synth_ai/cli/training/__init__.py +8 -0
  150. synth_ai/cli/training/train.py +5 -0
  151. synth_ai/cli/training/train_cfg.py +34 -0
  152. synth_ai/cli/training/watch.py +506 -0
  153. synth_ai/cli/turso.py +34 -55
  154. synth_ai/cli/utils/__init__.py +8 -0
  155. synth_ai/cli/utils/experiments.py +235 -0
  156. synth_ai/cli/utils/queue.py +504 -0
  157. synth_ai/cli/utils/recent.py +133 -0
  158. synth_ai/cli/utils/traces.py +164 -0
  159. synth_ai/contracts/__init__.py +67 -0
  160. synth_ai/core/__init__.py +100 -0
  161. synth_ai/core/_utils/__init__.py +54 -0
  162. synth_ai/core/_utils/base_url.py +10 -0
  163. synth_ai/core/_utils/http.py +10 -0
  164. synth_ai/core/_utils/prompts.py +14 -0
  165. synth_ai/core/_utils/task_app_state.py +12 -0
  166. synth_ai/core/_utils/user_config.py +10 -0
  167. synth_ai/core/apps/common.py +116 -0
  168. synth_ai/core/auth.py +95 -0
  169. synth_ai/core/cfgs.py +240 -0
  170. synth_ai/core/config/__init__.py +16 -0
  171. synth_ai/core/config/base.py +168 -0
  172. synth_ai/core/config/resolver.py +89 -0
  173. synth_ai/core/env.py +231 -0
  174. synth_ai/core/errors.py +125 -0
  175. synth_ai/core/http.py +230 -0
  176. synth_ai/core/integrations/__init__.py +11 -0
  177. synth_ai/core/integrations/cloudflare.py +1886 -0
  178. synth_ai/core/integrations/mcp/__init__.py +6 -0
  179. synth_ai/core/integrations/mcp/__main__.py +8 -0
  180. synth_ai/core/integrations/mcp/claude.py +36 -0
  181. synth_ai/core/integrations/mcp/main.py +254 -0
  182. synth_ai/core/integrations/mcp/setup.py +100 -0
  183. synth_ai/core/integrations/modal.py +277 -0
  184. synth_ai/core/json.py +72 -0
  185. synth_ai/core/log_filter.py +99 -0
  186. synth_ai/core/logging.py +82 -0
  187. synth_ai/core/paths.py +107 -0
  188. synth_ai/core/pricing.py +109 -0
  189. synth_ai/core/process.py +233 -0
  190. synth_ai/core/ssl.py +25 -0
  191. synth_ai/core/storage/__init__.py +71 -0
  192. synth_ai/core/task_app_state.py +318 -0
  193. synth_ai/core/telemetry.py +282 -0
  194. synth_ai/core/tracing_v3/__init__.py +99 -0
  195. synth_ai/core/tracing_v3/abstractions.py +348 -0
  196. synth_ai/core/tracing_v3/config.py +229 -0
  197. synth_ai/core/tracing_v3/constants.py +21 -0
  198. synth_ai/core/tracing_v3/db_config.py +182 -0
  199. synth_ai/core/tracing_v3/decorators.py +401 -0
  200. synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
  201. synth_ai/core/tracing_v3/migration_helper.py +119 -0
  202. synth_ai/core/tracing_v3/session_tracer.py +542 -0
  203. synth_ai/core/tracing_v3/storage/base.py +211 -0
  204. synth_ai/core/tracing_v3/storage/config.py +109 -0
  205. synth_ai/core/tracing_v3/storage/factory.py +39 -0
  206. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  207. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  208. synth_ai/core/tracing_v3/turso/models.py +470 -0
  209. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  210. synth_ai/core/tracing_v3/utils.py +108 -0
  211. synth_ai/core/urls.py +18 -0
  212. synth_ai/core/user_config.py +137 -0
  213. synth_ai/core/uvicorn.py +222 -0
  214. synth_ai/data/__init__.py +83 -0
  215. synth_ai/data/enums.py +122 -0
  216. synth_ai/data/rewards.py +249 -0
  217. synth_ai/data/traces.py +35 -0
  218. synth_ai/products/__init__.py +6 -0
  219. synth_ai/products/graph_evolve/__init__.py +45 -0
  220. synth_ai/products/graph_evolve/client.py +226 -0
  221. synth_ai/products/graph_evolve/config.py +591 -0
  222. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  223. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  224. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  225. synth_ai/products/graph_evolve/run.py +222 -0
  226. synth_ai/products/graph_gepa/__init__.py +23 -0
  227. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  228. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  229. synth_ai/sdk/__init__.py +129 -0
  230. synth_ai/sdk/api/__init__.py +1 -0
  231. synth_ai/sdk/api/eval/__init__.py +33 -0
  232. synth_ai/sdk/api/eval/job.py +732 -0
  233. synth_ai/sdk/api/models/supported.py +514 -0
  234. synth_ai/sdk/api/research_agent/__init__.py +296 -0
  235. synth_ai/sdk/api/train/__init__.py +85 -0
  236. synth_ai/sdk/api/train/builders.py +1076 -0
  237. synth_ai/sdk/api/train/cli.py +2196 -0
  238. synth_ai/sdk/api/train/config_finder.py +267 -0
  239. synth_ai/sdk/api/train/configs/__init__.py +67 -0
  240. synth_ai/sdk/api/train/configs/prompt_learning.py +1800 -0
  241. synth_ai/sdk/api/train/configs/rl.py +436 -0
  242. synth_ai/sdk/api/train/configs/sft.py +263 -0
  243. synth_ai/sdk/api/train/configs/shared.py +81 -0
  244. synth_ai/sdk/api/train/context_learning.py +312 -0
  245. synth_ai/sdk/api/train/env_resolver.py +418 -0
  246. synth_ai/sdk/api/train/graph_validators.py +216 -0
  247. synth_ai/sdk/api/train/graphgen.py +1102 -0
  248. synth_ai/sdk/api/train/graphgen_models.py +873 -0
  249. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  250. synth_ai/sdk/api/train/local_api.py +10 -0
  251. synth_ai/sdk/api/train/pollers.py +160 -0
  252. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  253. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  254. synth_ai/sdk/api/train/progress/events.py +326 -0
  255. synth_ai/sdk/api/train/progress/results.py +428 -0
  256. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  257. synth_ai/sdk/api/train/prompt_learning.py +800 -0
  258. synth_ai/sdk/api/train/rl.py +478 -0
  259. synth_ai/sdk/api/train/sft.py +398 -0
  260. synth_ai/sdk/api/train/summary.py +522 -0
  261. synth_ai/sdk/api/train/supported_algos.py +147 -0
  262. synth_ai/sdk/api/train/task_app.py +351 -0
  263. synth_ai/sdk/api/train/utils.py +279 -0
  264. synth_ai/sdk/api/train/validators.py +2424 -0
  265. synth_ai/sdk/graphs/__init__.py +15 -0
  266. synth_ai/sdk/graphs/completions.py +776 -0
  267. synth_ai/sdk/graphs/verifier_schemas.py +222 -0
  268. synth_ai/sdk/inference/__init__.py +6 -0
  269. synth_ai/sdk/inference/client.py +128 -0
  270. synth_ai/sdk/jobs/__init__.py +16 -0
  271. synth_ai/sdk/jobs/client.py +371 -0
  272. synth_ai/sdk/learning/__init__.py +99 -0
  273. synth_ai/sdk/learning/client.py +240 -0
  274. synth_ai/sdk/learning/context_learning_client.py +531 -0
  275. synth_ai/sdk/learning/context_learning_types.py +294 -0
  276. synth_ai/sdk/learning/ft_client.py +7 -0
  277. synth_ai/sdk/learning/health.py +49 -0
  278. synth_ai/sdk/learning/jobs.py +202 -0
  279. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  280. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  281. synth_ai/sdk/learning/prompt_learning_types.py +186 -0
  282. synth_ai/sdk/learning/rl/__init__.py +35 -0
  283. synth_ai/sdk/learning/rl/client.py +268 -0
  284. synth_ai/sdk/learning/rl/contracts.py +23 -0
  285. synth_ai/sdk/learning/rl/env_keys.py +166 -0
  286. synth_ai/sdk/learning/rl/secrets.py +13 -0
  287. synth_ai/sdk/learning/sft/client.py +95 -0
  288. synth_ai/sdk/learning/sft/config.py +270 -0
  289. synth_ai/sdk/learning/sft/data.py +698 -0
  290. synth_ai/sdk/learning/validators.py +52 -0
  291. synth_ai/sdk/localapi/__init__.py +40 -0
  292. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  293. synth_ai/sdk/localapi/client.py +10 -0
  294. synth_ai/sdk/localapi/contracts.py +10 -0
  295. synth_ai/sdk/localapi/helpers.py +519 -0
  296. synth_ai/sdk/localapi/rollouts.py +93 -0
  297. synth_ai/sdk/localapi/server.py +29 -0
  298. synth_ai/sdk/localapi/template.py +49 -0
  299. synth_ai/sdk/streaming/__init__.py +35 -0
  300. synth_ai/sdk/streaming/config.py +94 -0
  301. synth_ai/sdk/streaming/handlers.py +1997 -0
  302. synth_ai/sdk/streaming/streamer.py +708 -0
  303. synth_ai/sdk/streaming/types.py +112 -0
  304. synth_ai/sdk/task/__init__.py +164 -0
  305. synth_ai/sdk/task/apps/__init__.py +169 -0
  306. synth_ai/sdk/task/client.py +175 -0
  307. synth_ai/sdk/task/config.py +256 -0
  308. synth_ai/sdk/task/contracts.py +340 -0
  309. synth_ai/sdk/task/datasets.py +108 -0
  310. synth_ai/sdk/task/in_process.py +1200 -0
  311. synth_ai/sdk/task/in_process_runner.py +314 -0
  312. synth_ai/sdk/task/inference_api.py +299 -0
  313. synth_ai/sdk/task/proxy.py +287 -0
  314. synth_ai/sdk/task/rubrics/__init__.py +54 -0
  315. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  316. synth_ai/sdk/task/rubrics/strict.py +148 -0
  317. synth_ai/sdk/task/rubrics.py +219 -0
  318. synth_ai/sdk/task/server.py +640 -0
  319. synth_ai/sdk/task/trace_correlation_helpers.py +557 -0
  320. synth_ai/sdk/task/tracing_utils.py +95 -0
  321. synth_ai/sdk/task/validators.py +441 -0
  322. synth_ai/sdk/training/__init__.py +93 -0
  323. synth_ai/sdk/tunnels/__init__.py +118 -0
  324. synth_ai/sdk/tunnels/cleanup.py +83 -0
  325. synth_ai/sdk/tunnels/ports.py +120 -0
  326. synth_ai/sdk/tunnels/tunneled_api.py +363 -0
  327. synth_ai/utils/__init__.py +213 -0
  328. synth_ai-0.4.4.dist-info/METADATA +262 -0
  329. synth_ai-0.4.4.dist-info/RECORD +369 -0
  330. synth_ai-0.4.4.dist-info/top_level.txt +1 -0
  331. examples/__init__.py +0 -16
  332. examples/analyze_semantic_words.sh +0 -17
  333. examples/crafter_debug_render.py +0 -186
  334. examples/dev/qwen3_32b_qlora_4xh100.toml +0 -40
  335. examples/multi_step/configs/README_verilog_rl.md +0 -77
  336. examples/multi_step/configs/VERILOG_REWARDS.md +0 -90
  337. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +0 -183
  338. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +0 -35
  339. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +0 -36
  340. examples/multi_step/configs/crafter_rl_outcome.toml +0 -74
  341. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +0 -187
  342. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +0 -83
  343. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +0 -78
  344. examples/multi_step/configs/crafter_synth_backend.md +0 -40
  345. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +0 -31
  346. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +0 -33
  347. examples/multi_step/configs/verilog_rl_lora.toml +0 -190
  348. examples/multi_step/crafter_rl_lora.md +0 -70
  349. examples/multi_step/judges/crafter_backend_judge.py +0 -220
  350. examples/multi_step/judges/verilog_backend_judge.py +0 -234
  351. examples/multi_step/readme.md +0 -48
  352. examples/multi_step/sse_metrics_streaming_notes.md +0 -357
  353. examples/multi_step/task_app_config_notes.md +0 -494
  354. examples/multi_step/verilog_rl_lora.md +0 -218
  355. examples/qwen_coder/README.md +0 -102
  356. examples/qwen_coder/_shared.py +0 -113
  357. examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
  358. examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
  359. examples/qwen_coder/configs/coder_lora_small.toml +0 -58
  360. examples/qwen_coder/generate_dataset.py +0 -98
  361. examples/qwen_coder/infer_ft_smoke.py +0 -65
  362. examples/qwen_coder/infer_prod_proxy.py +0 -73
  363. examples/qwen_coder/infer_via_synth.py +0 -87
  364. examples/qwen_coder/scripts/infer_coder.sh +0 -19
  365. examples/qwen_coder/scripts/train_coder_30b.sh +0 -22
  366. examples/qwen_coder/sft_full_17b.py +0 -103
  367. examples/qwen_coder/sft_lora_30b.py +0 -110
  368. examples/qwen_coder/subset_jsonl.py +0 -39
  369. examples/qwen_coder/todos.md +0 -38
  370. examples/qwen_coder/validate_jsonl.py +0 -60
  371. examples/rl/README.md +0 -169
  372. examples/rl/download_dataset.py +0 -80
  373. examples/run_crafter_demo.sh +0 -10
  374. examples/sft/README.md +0 -139
  375. examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
  376. examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
  377. examples/sft/evaluate.py +0 -119
  378. examples/sft/export_dataset.py +0 -117
  379. examples/sft/generate_traces.py +0 -164
  380. examples/swe/__init__.py +0 -12
  381. examples/swe/task_app/README.md +0 -105
  382. examples/swe/task_app/__init__.py +0 -2
  383. examples/swe/task_app/grpo_swe_mini.py +0 -601
  384. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
  385. examples/swe/task_app/hosted/README.md +0 -173
  386. examples/swe/task_app/hosted/__init__.py +0 -5
  387. examples/swe/task_app/hosted/branching.py +0 -143
  388. examples/swe/task_app/hosted/environment_routes.py +0 -1289
  389. examples/swe/task_app/hosted/envs/__init__.py +0 -1
  390. examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
  391. examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
  392. examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
  393. examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
  394. examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
  395. examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
  396. examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
  397. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
  398. examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
  399. examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
  400. examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
  401. examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
  402. examples/swe/task_app/hosted/hosted_app.py +0 -204
  403. examples/swe/task_app/hosted/inference/__init__.py +0 -5
  404. examples/swe/task_app/hosted/inference/openai_client.py +0 -618
  405. examples/swe/task_app/hosted/main.py +0 -100
  406. examples/swe/task_app/hosted/policy_routes.py +0 -1079
  407. examples/swe/task_app/hosted/registry.py +0 -195
  408. examples/swe/task_app/hosted/rollout.py +0 -1911
  409. examples/swe/task_app/hosted/storage/__init__.py +0 -5
  410. examples/swe/task_app/hosted/storage/volume.py +0 -211
  411. examples/swe/task_app/hosted/test_agents.py +0 -161
  412. examples/swe/task_app/hosted/test_service.py +0 -136
  413. examples/swe/task_app/hosted/utils.py +0 -62
  414. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +0 -258
  415. examples/task_apps/TESTING.md +0 -275
  416. examples/task_apps/crafter/CREATE_SFT_DATASET.md +0 -273
  417. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +0 -152
  418. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +0 -174
  419. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +0 -268
  420. examples/task_apps/crafter/QUERY_EXAMPLES.md +0 -203
  421. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +0 -316
  422. examples/task_apps/crafter/__init__.py +0 -0
  423. examples/task_apps/crafter/eval_image_only_gpt4o.toml +0 -28
  424. examples/task_apps/crafter/eval_text_only_groq_llama.toml +0 -36
  425. examples/task_apps/crafter/filter_sft_dataset.toml +0 -16
  426. examples/task_apps/crafter/task_app/README.md +0 -42
  427. examples/task_apps/crafter/task_app/__init__.py +0 -5
  428. examples/task_apps/crafter/task_app/grpo_crafter.py +0 -973
  429. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +0 -146
  430. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +0 -173
  431. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +0 -5
  432. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +0 -143
  433. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +0 -1226
  434. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +0 -1
  435. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
  436. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
  437. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -532
  438. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -547
  439. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -123
  440. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
  441. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
  442. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +0 -204
  443. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +0 -5
  444. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +0 -704
  445. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +0 -100
  446. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +0 -1152
  447. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +0 -195
  448. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +0 -2160
  449. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +0 -5
  450. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +0 -211
  451. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +0 -161
  452. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +0 -136
  453. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +0 -218
  454. examples/task_apps/dev/pokemon_emerald/__init__.py +0 -2
  455. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +0 -811
  456. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +0 -120
  457. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +0 -160
  458. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +0 -155
  459. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +0 -69
  460. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +0 -96
  461. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +0 -1502
  462. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +0 -4
  463. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +0 -68
  464. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +0 -216
  465. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +0 -35
  466. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +0 -631
  467. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +0 -1544
  468. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +0 -1428
  469. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +0 -4848
  470. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +0 -41
  471. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +0 -298
  472. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +0 -95
  473. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +0 -204
  474. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  475. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +0 -2152
  476. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +0 -429
  477. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +0 -155
  478. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +0 -78
  479. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  480. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +0 -122
  481. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +0 -76
  482. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +0 -413
  483. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +0 -204
  484. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +0 -133
  485. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +0 -229
  486. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +0 -300
  487. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +0 -205
  488. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +0 -200
  489. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +0 -284
  490. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +0 -468
  491. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +0 -575
  492. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +0 -311
  493. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +0 -259
  494. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  495. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +0 -372
  496. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +0 -296
  497. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +0 -275
  498. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +0 -22
  499. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +0 -44
  500. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +0 -514
  501. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +0 -415
  502. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +0 -1763
  503. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +0 -33
  504. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +0 -106
  505. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +0 -334
  506. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +0 -1020
  507. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +0 -188
  508. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +0 -1481
  509. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +0 -862
  510. examples/task_apps/dev/pokemon_emerald/modal_app.py +0 -114
  511. examples/task_apps/dev/pokemon_emerald/task_app/README.md +0 -81
  512. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +0 -6
  513. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +0 -685
  514. examples/task_apps/enron/__init__.py +0 -1
  515. examples/task_apps/enron/eval_groq_qwen32.toml +0 -16
  516. examples/task_apps/enron/filter_sft.toml +0 -5
  517. examples/task_apps/enron/task_app/README.md +0 -14
  518. examples/task_apps/enron/task_app/__init__.py +0 -1
  519. examples/task_apps/enron/task_app/grpo_enron.py +0 -906
  520. examples/task_apps/enron/task_app/grpo_enron_task_app.py +0 -146
  521. examples/task_apps/enron/tests/__init__.py +0 -4
  522. examples/task_apps/enron/tests/conftest.py +0 -115
  523. examples/task_apps/enron/tests/integration/__init__.py +0 -4
  524. examples/task_apps/enron/tests/integration/test_enron_eval.py +0 -179
  525. examples/task_apps/enron/tests/integration/test_enron_rollout.py +0 -135
  526. examples/task_apps/enron/tests/unit/__init__.py +0 -4
  527. examples/task_apps/enron/tests/unit/test_enron_environment.py +0 -126
  528. examples/task_apps/math/README.md +0 -22
  529. examples/task_apps/math/__init__.py +0 -0
  530. examples/task_apps/math/math_single_step.py +0 -1000
  531. examples/task_apps/math/math_task_app.py +0 -115
  532. examples/task_apps/pokemon_battle/__init__.py +0 -2
  533. examples/task_apps/pokemon_battle/modal_app.py +0 -104
  534. examples/task_apps/pokemon_battle/task_app/README.md +0 -68
  535. examples/task_apps/pokemon_battle/task_app/__init__.py +0 -6
  536. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +0 -932
  537. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +0 -283
  538. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +0 -155
  539. examples/task_apps/pokemon_red/README.md +0 -357
  540. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +0 -415
  541. examples/task_apps/pokemon_red/__init__.py +0 -3
  542. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +0 -29
  543. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +0 -225
  544. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +0 -75
  545. examples/task_apps/pokemon_red/task_app.py +0 -799
  546. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +0 -193
  547. examples/task_apps/sokoban/README.md +0 -307
  548. examples/task_apps/sokoban/__init__.py +0 -3
  549. examples/task_apps/sokoban/eval_groq_qwen32.toml +0 -16
  550. examples/task_apps/sokoban/eval_openai_gpt5.toml +0 -16
  551. examples/task_apps/sokoban/filter_sft.toml +0 -5
  552. examples/task_apps/sokoban/task_app.py +0 -1058
  553. examples/task_apps/sokoban/tests/__init__.py +0 -4
  554. examples/task_apps/sokoban/tests/conftest.py +0 -113
  555. examples/task_apps/sokoban/tests/integration/__init__.py +0 -4
  556. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +0 -57
  557. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +0 -198
  558. examples/task_apps/sokoban/tests/unit/__init__.py +0 -4
  559. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +0 -114
  560. examples/task_apps/verilog/__init__.py +0 -1
  561. examples/task_apps/verilog/eval_groq_qwen32b.toml +0 -24
  562. examples/task_apps/verilog/filter_sft.toml +0 -5
  563. examples/task_apps/verilog/task_app/README.md +0 -12
  564. examples/task_apps/verilog/task_app/__init__.py +0 -1
  565. examples/task_apps/verilog/task_app/grpo_verilog.py +0 -1166
  566. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +0 -145
  567. examples/task_apps/verilog/tests/__init__.py +0 -4
  568. examples/task_apps/verilog/tests/conftest.py +0 -115
  569. examples/task_apps/verilog/tests/integration/__init__.py +0 -4
  570. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +0 -181
  571. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +0 -55
  572. examples/task_apps/verilog/tests/unit/__init__.py +0 -4
  573. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +0 -118
  574. examples/vlm/PROPOSAL.md +0 -53
  575. examples/vlm/README.md +0 -68
  576. examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
  577. examples/vlm/crafter_image_only_agent.py +0 -207
  578. examples/vlm/crafter_openai_vlm_agent.py +0 -277
  579. examples/vlm/filter_image_rows.py +0 -63
  580. examples/vlm/run_crafter_vlm_benchmark.py +0 -316
  581. examples/warming_up_to_rl/analyze_trace_db.py +0 -422
  582. examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
  583. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
  584. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
  585. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
  586. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
  587. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +0 -35
  588. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +0 -26
  589. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +0 -36
  590. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +0 -32
  591. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
  592. examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
  593. examples/warming_up_to_rl/export_trace_sft.py +0 -723
  594. examples/warming_up_to_rl/groq_test.py +0 -97
  595. examples/warming_up_to_rl/manage_secrets.py +0 -131
  596. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  597. examples/warming_up_to_rl/old/notes.md +0 -73
  598. examples/warming_up_to_rl/readme.md +0 -179
  599. examples/warming_up_to_rl/run_eval.py +0 -736
  600. examples/warming_up_to_rl/run_fft_and_save.py +0 -380
  601. examples/warming_up_to_rl/run_local_rollout.py +0 -239
  602. examples/warming_up_to_rl/run_local_rollout_modal.py +0 -248
  603. examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -405
  604. examples/warming_up_to_rl/run_local_rollout_traced.py +0 -477
  605. examples/warming_up_to_rl/run_rl_and_save.py +0 -124
  606. examples/warming_up_to_rl/run_rollout_remote.py +0 -156
  607. examples/workflows/__init__.py +0 -0
  608. examples/workflows/math_rl/__init__.py +0 -0
  609. examples/workflows/math_rl/configs/eval_base_qwen.toml +0 -15
  610. examples/workflows/math_rl/configs/eval_rl_qwen.toml +0 -11
  611. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +0 -35
  612. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +0 -74
  613. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +0 -35
  614. examples/workflows/math_rl/download_dataset.py +0 -80
  615. examples/workflows/math_rl/run_eval.py +0 -436
  616. examples/workflows/math_rl/run_rl_and_save.py +0 -111
  617. synth_ai/api/models/supported.py +0 -377
  618. synth_ai/api/train/__init__.py +0 -5
  619. synth_ai/api/train/builders.py +0 -351
  620. synth_ai/api/train/cli.py +0 -635
  621. synth_ai/api/train/config_finder.py +0 -228
  622. synth_ai/api/train/configs/__init__.py +0 -44
  623. synth_ai/api/train/configs/rl.py +0 -134
  624. synth_ai/api/train/configs/sft.py +0 -95
  625. synth_ai/api/train/configs/shared.py +0 -24
  626. synth_ai/api/train/env_resolver.py +0 -349
  627. synth_ai/api/train/pollers.py +0 -75
  628. synth_ai/api/train/supported_algos.py +0 -147
  629. synth_ai/api/train/task_app.py +0 -195
  630. synth_ai/api/train/utils.py +0 -225
  631. synth_ai/cli/_modal_wrapper.py +0 -29
  632. synth_ai/cli/_storage.py +0 -20
  633. synth_ai/cli/_typer_patch.py +0 -49
  634. synth_ai/cli/_validate_task_app.py +0 -11
  635. synth_ai/cli/balance.py +0 -216
  636. synth_ai/cli/calc.py +0 -84
  637. synth_ai/cli/demo.py +0 -165
  638. synth_ai/cli/legacy_root_backup.py +0 -468
  639. synth_ai/cli/man.py +0 -106
  640. synth_ai/cli/recent.py +0 -132
  641. synth_ai/cli/rl_demo.py +0 -254
  642. synth_ai/cli/status.py +0 -134
  643. synth_ai/cli/task_apps.py +0 -4523
  644. synth_ai/cli/traces.py +0 -164
  645. synth_ai/cli/tui.py +0 -57
  646. synth_ai/cli/watch.py +0 -506
  647. synth_ai/compound/cais.py +0 -0
  648. synth_ai/config/base_url.py +0 -107
  649. synth_ai/core/experiment.py +0 -13
  650. synth_ai/core/system.py +0 -15
  651. synth_ai/demo_registry.py +0 -295
  652. synth_ai/demos/core/__init__.py +0 -1
  653. synth_ai/demos/core/cli.py +0 -1718
  654. synth_ai/demos/demo_task_apps/core.py +0 -440
  655. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -184
  656. synth_ai/demos/demo_task_apps/math/config.toml +0 -74
  657. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  658. synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
  659. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
  660. synth_ai/environments/__init__.py +0 -31
  661. synth_ai/environments/environment/__init__.py +0 -1
  662. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  663. synth_ai/environments/environment/artifacts/base.py +0 -52
  664. synth_ai/environments/environment/core.py +0 -67
  665. synth_ai/environments/environment/db/__init__.py +0 -1
  666. synth_ai/environments/environment/db/sqlite.py +0 -45
  667. synth_ai/environments/environment/registry.py +0 -233
  668. synth_ai/environments/environment/resources/sqlite.py +0 -45
  669. synth_ai/environments/environment/results.py +0 -1
  670. synth_ai/environments/environment/rewards/__init__.py +0 -1
  671. synth_ai/environments/environment/rewards/core.py +0 -29
  672. synth_ai/environments/environment/shared_engine.py +0 -26
  673. synth_ai/environments/environment/tools/__init__.py +0 -200
  674. synth_ai/environments/examples/__init__.py +0 -1
  675. synth_ai/environments/examples/bandit/__init__.py +0 -33
  676. synth_ai/environments/examples/bandit/engine.py +0 -302
  677. synth_ai/environments/examples/bandit/environment.py +0 -194
  678. synth_ai/environments/examples/bandit/taskset.py +0 -200
  679. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  680. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  681. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  682. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  683. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  684. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  685. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  686. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  687. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  688. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  689. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  690. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  691. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  692. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  693. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  694. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  695. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  696. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  697. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  698. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  699. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  700. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  701. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  702. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  703. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  704. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  705. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  706. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  707. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  708. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  709. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  710. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  711. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  712. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  713. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  714. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  715. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  716. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  717. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  718. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  719. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  720. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  721. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  722. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  723. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  724. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  725. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  726. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  727. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  728. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  729. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  730. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  731. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  732. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  733. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  734. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  735. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  736. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  737. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  738. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  739. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  740. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  741. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  742. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  743. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  744. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  745. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  746. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  747. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  748. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  749. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  750. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  751. synth_ai/environments/examples/crafter_classic/environment.py +0 -495
  752. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  753. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  754. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  755. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  756. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  757. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  758. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  759. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  760. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  761. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  762. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  763. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  764. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  765. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  766. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  767. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  768. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  769. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  770. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  771. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  772. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  773. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  774. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  775. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  776. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  777. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  778. synth_ai/environments/examples/enron/engine.py +0 -300
  779. synth_ai/environments/examples/enron/environment.py +0 -234
  780. synth_ai/environments/examples/enron/taskset.py +0 -112
  781. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  782. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  783. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  784. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  785. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  786. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  787. synth_ai/environments/examples/minigrid/engine.py +0 -589
  788. synth_ai/environments/examples/minigrid/environment.py +0 -274
  789. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  790. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  791. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  792. synth_ai/environments/examples/nethack/__init__.py +0 -7
  793. synth_ai/environments/examples/nethack/achievements.py +0 -337
  794. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  795. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  796. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  797. synth_ai/environments/examples/nethack/engine.py +0 -739
  798. synth_ai/environments/examples/nethack/environment.py +0 -256
  799. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  800. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  801. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  802. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  803. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  804. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  805. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  806. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  807. synth_ai/environments/examples/nethack/taskset.py +0 -323
  808. synth_ai/environments/examples/red/__init__.py +0 -7
  809. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  810. synth_ai/environments/examples/red/config_logging.py +0 -110
  811. synth_ai/environments/examples/red/engine.py +0 -721
  812. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  813. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -35
  814. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  815. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  816. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  817. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  818. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  819. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  820. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  821. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  822. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  823. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +0 -477
  824. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  825. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  826. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  827. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  828. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  829. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -172
  830. synth_ai/environments/examples/red/environment.py +0 -298
  831. synth_ai/environments/examples/red/taskset.py +0 -79
  832. synth_ai/environments/examples/red/units/__init__.py +0 -1
  833. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  834. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  835. synth_ai/environments/examples/sokoban/engine.py +0 -678
  836. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  837. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  838. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  839. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  840. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  841. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  842. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  843. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  844. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  845. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  846. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  847. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  848. synth_ai/environments/examples/sokoban/environment.py +0 -229
  849. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  850. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  851. synth_ai/environments/examples/sokoban/taskset.py +0 -544
  852. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  853. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  854. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  855. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  856. synth_ai/environments/examples/verilog/__init__.py +0 -10
  857. synth_ai/environments/examples/verilog/engine.py +0 -421
  858. synth_ai/environments/examples/verilog/environment.py +0 -350
  859. synth_ai/environments/examples/verilog/taskset.py +0 -420
  860. synth_ai/environments/examples/wordle/__init__.py +0 -29
  861. synth_ai/environments/examples/wordle/engine.py +0 -398
  862. synth_ai/environments/examples/wordle/environment.py +0 -159
  863. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  864. synth_ai/environments/examples/wordle/taskset.py +0 -230
  865. synth_ai/environments/reproducibility/core.py +0 -42
  866. synth_ai/environments/reproducibility/helpers.py +0 -0
  867. synth_ai/environments/reproducibility/tree.py +0 -363
  868. synth_ai/environments/service/app.py +0 -97
  869. synth_ai/environments/service/core_routes.py +0 -1021
  870. synth_ai/environments/service/external_registry.py +0 -56
  871. synth_ai/environments/service/registry.py +0 -9
  872. synth_ai/environments/stateful/__init__.py +0 -1
  873. synth_ai/environments/stateful/core.py +0 -163
  874. synth_ai/environments/stateful/engine.py +0 -21
  875. synth_ai/environments/stateful/state.py +0 -7
  876. synth_ai/environments/tasks/api.py +0 -19
  877. synth_ai/environments/tasks/core.py +0 -81
  878. synth_ai/environments/tasks/filters.py +0 -40
  879. synth_ai/environments/tasks/utils.py +0 -90
  880. synth_ai/environments/v0_observability/history.py +0 -3
  881. synth_ai/environments/v0_observability/log.py +0 -2
  882. synth_ai/evals/__init__.py +0 -15
  883. synth_ai/evals/base.py +0 -13
  884. synth_ai/evals/client.py +0 -82
  885. synth_ai/evals/types.py +0 -42
  886. synth_ai/handshake.py +0 -109
  887. synth_ai/http.py +0 -26
  888. synth_ai/http_client.py +0 -136
  889. synth_ai/inference/__init__.py +0 -5
  890. synth_ai/inference/client.py +0 -34
  891. synth_ai/jobs/client.py +0 -295
  892. synth_ai/judge_schemas.py +0 -127
  893. synth_ai/learning/__init__.py +0 -59
  894. synth_ai/learning/client.py +0 -241
  895. synth_ai/learning/ft_client.py +0 -7
  896. synth_ai/learning/health.py +0 -49
  897. synth_ai/learning/jobs.py +0 -201
  898. synth_ai/learning/rl/__init__.py +0 -39
  899. synth_ai/learning/rl/client.py +0 -267
  900. synth_ai/learning/rl/contracts.py +0 -27
  901. synth_ai/learning/rl/env_keys.py +0 -166
  902. synth_ai/learning/rl/secrets.py +0 -13
  903. synth_ai/learning/sft/client.py +0 -68
  904. synth_ai/learning/sft/config.py +0 -270
  905. synth_ai/learning/sft/data.py +0 -295
  906. synth_ai/learning/validators.py +0 -49
  907. synth_ai/lm/__init__.py +0 -25
  908. synth_ai/task/__init__.py +0 -121
  909. synth_ai/task/apps/__init__.py +0 -129
  910. synth_ai/task/client.py +0 -167
  911. synth_ai/task/config.py +0 -257
  912. synth_ai/task/contracts.py +0 -236
  913. synth_ai/task/datasets.py +0 -108
  914. synth_ai/task/proxy.py +0 -251
  915. synth_ai/task/rubrics/__init__.py +0 -56
  916. synth_ai/task/rubrics/loaders.py +0 -152
  917. synth_ai/task/rubrics/strict.py +0 -149
  918. synth_ai/task/server.py +0 -432
  919. synth_ai/task/trace_correlation_helpers.py +0 -315
  920. synth_ai/task/tracing_utils.py +0 -84
  921. synth_ai/task/validators.py +0 -418
  922. synth_ai/tracing_v3/__init__.py +0 -97
  923. synth_ai/tracing_v3/abstractions.py +0 -302
  924. synth_ai/tracing_v3/config.py +0 -84
  925. synth_ai/tracing_v3/db_config.py +0 -194
  926. synth_ai/tracing_v3/decorators.py +0 -398
  927. synth_ai/tracing_v3/llm_call_record_helpers.py +0 -391
  928. synth_ai/tracing_v3/migration_helper.py +0 -120
  929. synth_ai/tracing_v3/session_tracer.py +0 -540
  930. synth_ai/tracing_v3/storage/base.py +0 -210
  931. synth_ai/tracing_v3/storage/config.py +0 -75
  932. synth_ai/tracing_v3/storage/factory.py +0 -39
  933. synth_ai/tracing_v3/trace_utils.py +0 -317
  934. synth_ai/tracing_v3/turso/daemon.py +0 -151
  935. synth_ai/tracing_v3/turso/models.py +0 -469
  936. synth_ai/tracing_v3/turso/native_manager.py +0 -1209
  937. synth_ai/tracing_v3/utils.py +0 -108
  938. synth_ai/tui/__init__.py +0 -5
  939. synth_ai/tui/__main__.py +0 -13
  940. synth_ai/tui/cli/__init__.py +0 -1
  941. synth_ai/tui/cli/query_experiments.py +0 -164
  942. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  943. synth_ai/tui/dashboard.py +0 -906
  944. synth_ai/v0/api/__init__.py +0 -8
  945. synth_ai/v0/api/models/__init__.py +0 -8
  946. synth_ai/v0/api/models/supported.py +0 -8
  947. synth_ai/v0/config/__init__.py +0 -15
  948. synth_ai/v0/config/base_url.py +0 -12
  949. synth_ai/v0/lm/__init__.py +0 -51
  950. synth_ai/v0/lm/caching/__init__.py +0 -0
  951. synth_ai/v0/lm/caching/constants.py +0 -6
  952. synth_ai/v0/lm/caching/dbs.py +0 -0
  953. synth_ai/v0/lm/caching/ephemeral.py +0 -100
  954. synth_ai/v0/lm/caching/handler.py +0 -137
  955. synth_ai/v0/lm/caching/initialize.py +0 -11
  956. synth_ai/v0/lm/caching/persistent.py +0 -114
  957. synth_ai/v0/lm/config.py +0 -115
  958. synth_ai/v0/lm/constants.py +0 -32
  959. synth_ai/v0/lm/core/__init__.py +0 -8
  960. synth_ai/v0/lm/core/all.py +0 -73
  961. synth_ai/v0/lm/core/exceptions.py +0 -5
  962. synth_ai/v0/lm/core/main.py +0 -331
  963. synth_ai/v0/lm/core/main_v3.py +0 -594
  964. synth_ai/v0/lm/core/synth_models.py +0 -35
  965. synth_ai/v0/lm/core/vendor_clients.py +0 -190
  966. synth_ai/v0/lm/cost/__init__.py +0 -0
  967. synth_ai/v0/lm/cost/monitor.py +0 -1
  968. synth_ai/v0/lm/cost/statefulness.py +0 -1
  969. synth_ai/v0/lm/injection.py +0 -80
  970. synth_ai/v0/lm/overrides.py +0 -206
  971. synth_ai/v0/lm/provider_support/__init__.py +0 -8
  972. synth_ai/v0/lm/provider_support/anthropic.py +0 -972
  973. synth_ai/v0/lm/provider_support/openai.py +0 -1139
  974. synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
  975. synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
  976. synth_ai/v0/lm/structured_outputs/handler.py +0 -440
  977. synth_ai/v0/lm/structured_outputs/inject.py +0 -297
  978. synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
  979. synth_ai/v0/lm/tools/__init__.py +0 -3
  980. synth_ai/v0/lm/tools/base.py +0 -172
  981. synth_ai/v0/lm/unified_interface.py +0 -202
  982. synth_ai/v0/lm/vendors/__init__.py +0 -0
  983. synth_ai/v0/lm/vendors/base.py +0 -81
  984. synth_ai/v0/lm/vendors/core/__init__.py +0 -0
  985. synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
  986. synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
  987. synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
  988. synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
  989. synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
  990. synth_ai/v0/lm/vendors/local/__init__.py +0 -0
  991. synth_ai/v0/lm/vendors/local/ollama.py +0 -0
  992. synth_ai/v0/lm/vendors/openai_standard.py +0 -782
  993. synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
  994. synth_ai/v0/lm/vendors/retries.py +0 -22
  995. synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
  996. synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
  997. synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
  998. synth_ai/v0/lm/vendors/supported/grok.py +0 -75
  999. synth_ai/v0/lm/vendors/supported/groq.py +0 -16
  1000. synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
  1001. synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
  1002. synth_ai/v0/lm/vendors/supported/together.py +0 -11
  1003. synth_ai/v0/lm/vendors/synth_client.py +0 -835
  1004. synth_ai/v0/lm/warmup.py +0 -186
  1005. synth_ai/v0/tracing/__init__.py +0 -0
  1006. synth_ai/v0/tracing/abstractions.py +0 -224
  1007. synth_ai/v0/tracing/base_client.py +0 -91
  1008. synth_ai/v0/tracing/client_manager.py +0 -131
  1009. synth_ai/v0/tracing/config.py +0 -142
  1010. synth_ai/v0/tracing/context.py +0 -146
  1011. synth_ai/v0/tracing/decorators.py +0 -682
  1012. synth_ai/v0/tracing/events/__init__.py +0 -0
  1013. synth_ai/v0/tracing/events/manage.py +0 -147
  1014. synth_ai/v0/tracing/events/scope.py +0 -86
  1015. synth_ai/v0/tracing/events/store.py +0 -228
  1016. synth_ai/v0/tracing/immediate_client.py +0 -151
  1017. synth_ai/v0/tracing/local.py +0 -18
  1018. synth_ai/v0/tracing/log_client_base.py +0 -73
  1019. synth_ai/v0/tracing/retry_queue.py +0 -186
  1020. synth_ai/v0/tracing/trackers.py +0 -515
  1021. synth_ai/v0/tracing/upload.py +0 -409
  1022. synth_ai/v0/tracing/utils.py +0 -9
  1023. synth_ai/v0/tracing_v1/__init__.py +0 -16
  1024. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  1025. synth_ai/v0/tracing_v1/base_client.py +0 -91
  1026. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  1027. synth_ai/v0/tracing_v1/config.py +0 -142
  1028. synth_ai/v0/tracing_v1/context.py +0 -146
  1029. synth_ai/v0/tracing_v1/decorators.py +0 -703
  1030. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  1031. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  1032. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  1033. synth_ai/v0/tracing_v1/events/store.py +0 -228
  1034. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  1035. synth_ai/v0/tracing_v1/local.py +0 -18
  1036. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  1037. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  1038. synth_ai/v0/tracing_v1/trackers.py +0 -515
  1039. synth_ai/v0/tracing_v1/upload.py +0 -527
  1040. synth_ai/v0/tracing_v1/utils.py +0 -9
  1041. synth_ai/v0/tracing_v3/__init__.py +0 -10
  1042. synth_ai/v0/tracing_v3/abstractions.py +0 -3
  1043. synth_ai/v0/tracing_v3/decorators.py +0 -3
  1044. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
  1045. synth_ai/v0/tracing_v3/session_tracer.py +0 -3
  1046. synth_ai-0.2.14.dist-info/METADATA +0 -139
  1047. synth_ai-0.2.14.dist-info/RECORD +0 -762
  1048. synth_ai-0.2.14.dist-info/top_level.txt +0 -2
  1049. /synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
  1050. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
  1051. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
  1052. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
  1053. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  1054. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
  1055. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
  1056. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
  1057. {examples/task_apps → synth_ai/core/apps}/__init__.py +0 -0
  1058. /synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +0 -0
  1059. /synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
  1060. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  1061. /synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +0 -0
  1062. /synth_ai/{tracing_v3 → core/tracing_v3}/serialization.py +0 -0
  1063. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  1064. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  1065. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  1066. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +0 -0
  1067. /synth_ai/{tracing_v3 → core/tracing_v3}/turso/__init__.py +0 -0
  1068. /synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
  1069. /synth_ai/{learning → sdk/learning}/config.py +0 -0
  1070. /synth_ai/{learning → sdk/learning}/constants.py +0 -0
  1071. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  1072. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  1073. /synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
  1074. /synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
  1075. /synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
  1076. /synth_ai/{learning → sdk/learning}/sse.py +0 -0
  1077. /synth_ai/{task → sdk/task}/auth.py +0 -0
  1078. /synth_ai/{task → sdk/task}/errors.py +0 -0
  1079. /synth_ai/{task → sdk/task}/health.py +0 -0
  1080. /synth_ai/{task → sdk/task}/json.py +0 -0
  1081. /synth_ai/{task → sdk/task}/rubrics/models.py +0 -0
  1082. /synth_ai/{task → sdk/task}/rubrics/scoring.py +0 -0
  1083. /synth_ai/{task → sdk/task}/vendors.py +0 -0
  1084. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
  1085. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
  1086. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,1164 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- import contextlib
5
- import json
6
- import logging
7
- import os
8
- import shlex
9
- import shutil
10
- import subprocess
11
- import threading
12
- import time
13
- import uuid
14
- from dataclasses import dataclass, field
15
- from pathlib import Path
16
- from typing import Any
17
-
18
- from minisweagent.environments import get_environment
19
- from synth_ai.environments.environment.tools import EnvToolCall
20
-
21
- from .shared import summarise_history
22
- from .tools import TOOLS_SCHEMA
23
-
24
- logger = logging.getLogger(__name__)
25
-
26
-
27
- def _environment_type_from_config(config: dict[str, Any]) -> str:
28
- value = (config or {}).get("environment_class") or os.getenv(
29
- "SWE_MINI_ENVIRONMENT_CLASS", "local"
30
- )
31
- return str(value).strip() or "local"
32
-
33
-
34
- def _environment_kwargs_from_config(config: dict[str, Any]) -> dict[str, Any]:
35
- kwargs = dict(config or {}).get("environment_kwargs") or {}
36
- if not kwargs and (raw := os.getenv("SWE_MINI_ENVIRONMENT_KWARGS")):
37
- try:
38
- kwargs = json.loads(raw)
39
- except Exception: # pragma: no cover - environment var malformed
40
- logger.warning("Failed to parse SWE_MINI_ENVIRONMENT_KWARGS; ignoring")
41
- kwargs = {}
42
- if not isinstance(kwargs, dict):
43
- logger.warning("environment_kwargs must be a mapping, got %r", type(kwargs))
44
- kwargs = {}
45
- return kwargs
46
-
47
-
48
- def _default_submit_command() -> str:
49
- return "echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached"
50
-
51
-
52
- @dataclass
53
- class MiniSweEnvironmentState:
54
- """Serializable environment state used for snapshots."""
55
-
56
- task: dict[str, Any]
57
- history: list[dict[str, Any]] = field(default_factory=list)
58
- step_idx: int = 0
59
- submitted: bool = False
60
- submission_success: bool | None = None
61
-
62
-
63
- class MiniSweEnvironmentWrapper:
64
- """Wrapper around mini-swe-agent environments exposing Synth task-app semantics."""
65
-
66
- name = "swe-mini"
67
-
68
- def __init__(
69
- self,
70
- *,
71
- task: dict[str, Any],
72
- env_config: dict[str, Any] | None = None,
73
- submit_command: str | None = None,
74
- ) -> None:
75
- self.task = dict(task)
76
- self.env_config = dict(env_config or {})
77
- self.submit_command = submit_command or _default_submit_command()
78
- self.environment_type = _environment_type_from_config(self.env_config)
79
- kwargs = _environment_kwargs_from_config(self.env_config)
80
-
81
- self.instance_id = str(
82
- self.task.get("instance_id") or f"swe-mini-{uuid.uuid4().hex[:8]}"
83
- )
84
- self.metadata = dict(self.task.get("metadata") or {})
85
- self.repo_url = self._resolve_repo_url(self.metadata)
86
- self.base_commit = (
87
- self.metadata.get("base_commit")
88
- or self.metadata.get("environment_setup_commit")
89
- or None
90
- )
91
- self._local_workspace_dir: Path | None = None
92
- self._remote_workspace: str | None = None
93
- self._cleanup_workspace = False
94
-
95
- if self.environment_type == "local":
96
- workspace = self._prepare_local_workspace(kwargs)
97
- kwargs.setdefault("cwd", str(workspace))
98
- kwargs.setdefault("timeout", int(self.env_config.get("timeout", 60)))
99
- # Merge custom env vars with defaults expected by mini-swe
100
- merged_env = dict(kwargs.get("env") or {})
101
- merged_env.setdefault("PAGER", "cat")
102
- merged_env.setdefault("MANPAGER", "cat")
103
- merged_env.setdefault("LESS", "-R")
104
- merged_env.setdefault("PIP_PROGRESS_BAR", "off")
105
- merged_env.setdefault("TQDM_DISABLE", "1")
106
- merged_env.setdefault("GIT_TERMINAL_PROMPT", "0")
107
- kwargs["env"] = merged_env
108
- self._local_workspace_dir = workspace
109
- self._cleanup_workspace = True
110
- else:
111
- remote_cwd = kwargs.get("cwd")
112
- if not remote_cwd:
113
- base_remote = os.getenv("SWE_MINI_REMOTE_WORKSPACE_BASE", "/workspace")
114
- remote_cwd = f"{base_remote.rstrip('/')}/{self.instance_id}"
115
- kwargs["cwd"] = remote_cwd
116
- self._remote_workspace = kwargs["cwd"]
117
- timeout = self.env_config.get("timeout")
118
- if timeout and "timeout" not in kwargs:
119
- kwargs["timeout"] = int(timeout)
120
- if self.repo_url and "image" not in kwargs:
121
- image = self.metadata.get("image_name") or os.getenv("SWE_MINI_DOCKER_IMAGE")
122
- if image:
123
- kwargs["image"] = image
124
- if self.environment_type in {"docker", "bubblewrap"}:
125
- remote_env = dict(kwargs.get("env") or {})
126
- remote_env.setdefault("GIT_TERMINAL_PROMPT", "0")
127
- kwargs["env"] = remote_env
128
-
129
- logger.info(
130
- "Initialising mini-swe environment: type=%s kwargs=%s",
131
- self.environment_type,
132
- kwargs,
133
- )
134
- self.env = get_environment(
135
- {
136
- "environment_class": self.environment_type,
137
- **kwargs,
138
- },
139
- default_type="local",
140
- )
141
-
142
- if self.environment_type != "local":
143
- self._bootstrap_remote_workspace()
144
-
145
- self.state = MiniSweEnvironmentState(task=self.task)
146
- self.last_result: dict[str, Any] | None = None
147
- self.last_submission: dict[str, Any] | None = None
148
-
149
- async def initialize(self) -> dict[str, Any]:
150
- """Return initial observation."""
151
- logger.info(
152
- "Mini-swe task initialised: instance=%s",
153
- self.task.get("instance_id"),
154
- )
155
- return self._build_response(observation=self._build_observation(None), step_idx=0)
156
-
157
- async def terminate(self) -> dict[str, Any]:
158
- """Terminate the environment, returning the final observation."""
159
- logger.info(
160
- "Terminating mini-swe environment instance=%s submitted=%s",
161
- self.task.get("instance_id"),
162
- self.state.submitted,
163
- )
164
- response = self._build_response(
165
- observation=self._build_observation(self.last_result),
166
- step_idx=self.state.step_idx,
167
- )
168
- self._cleanup_workspaces()
169
- return response
170
-
171
- def _cleanup_workspaces(self) -> None:
172
- if self._cleanup_workspace and self._local_workspace_dir:
173
- with contextlib.suppress(Exception):
174
- shutil.rmtree(self._local_workspace_dir)
175
- self._local_workspace_dir = None
176
- self._cleanup_workspace = False
177
- if (
178
- self._remote_workspace
179
- and os.getenv("SWE_MINI_CLEANUP_REMOTE_WORKSPACE", "1") not in {"0", "false", "False"}
180
- ):
181
- with contextlib.suppress(Exception):
182
- self.env.execute(f"rm -rf {shlex.quote(self._remote_workspace)}")
183
- self._remote_workspace = None
184
-
185
- def _resolve_repo_url(self, metadata: dict[str, Any]) -> str | None:
186
- candidates = [
187
- metadata.get("repo_url"),
188
- metadata.get("repo"),
189
- metadata.get("repository"),
190
- ]
191
- for value in candidates:
192
- if not value:
193
- continue
194
- repo = str(value).strip()
195
- if not repo:
196
- continue
197
- if repo.startswith("http://") or repo.startswith("https://"):
198
- url = repo
199
- else:
200
- repo = repo.removesuffix(".git")
201
- url = f"https://github.com/{repo}.git"
202
- if not url.endswith(".git"):
203
- url = f"{url}.git"
204
- return url
205
- return None
206
-
207
- def _prepare_local_workspace(self, kwargs: dict[str, Any]) -> Path:
208
- if not self.repo_url:
209
- fallback = Path(kwargs.get("cwd") or self.env_config.get("cwd") or os.getcwd())
210
- fallback.mkdir(parents=True, exist_ok=True)
211
- logger.warning(
212
- "No repo URL provided for swe-mini instance %s; using cwd=%s",
213
- self.instance_id,
214
- fallback,
215
- )
216
- return fallback
217
-
218
- root = Path(
219
- os.getenv("SWE_MINI_LOCAL_WORKSPACE_ROOT")
220
- or Path.home() / ".cache" / "synth-ai" / "swe-mini" / "workspaces"
221
- )
222
- workspace = root / self.instance_id
223
- if workspace.exists():
224
- shutil.rmtree(workspace, ignore_errors=True)
225
- workspace.parent.mkdir(parents=True, exist_ok=True)
226
-
227
- self._run_local_cmd(
228
- [
229
- "git",
230
- "clone",
231
- "--filter=blob:none",
232
- "--no-tags",
233
- self.repo_url,
234
- str(workspace),
235
- ],
236
- description="clone repository",
237
- )
238
- if self.base_commit:
239
- self._run_local_cmd(
240
- ["git", "-C", str(workspace), "checkout", self.base_commit],
241
- description="checkout base commit",
242
- )
243
- self._run_local_cmd(
244
- ["git", "-C", str(workspace), "reset", "--hard"],
245
- description="reset working tree",
246
- )
247
- self._run_local_cmd(
248
- ["git", "-C", str(workspace), "clean", "-ffd"],
249
- description="clean working tree",
250
- )
251
- logger.info(
252
- "Prepared local workspace for %s at %s (repo=%s, commit=%s)",
253
- self.instance_id,
254
- workspace,
255
- self.repo_url,
256
- self.base_commit,
257
- )
258
- return workspace
259
-
260
- def _bootstrap_remote_workspace(self) -> None:
261
- if not self.repo_url or not self._remote_workspace:
262
- logger.warning(
263
- "Skipping remote workspace bootstrap for instance %s (repo=%s workspace=%s)",
264
- self.instance_id,
265
- self.repo_url,
266
- self._remote_workspace,
267
- )
268
- return
269
-
270
- workspace = self._remote_workspace.rstrip("/")
271
- base_dir = os.path.dirname(workspace) or "/"
272
- self._execute_bootstrap_command(f"mkdir -p {shlex.quote(base_dir)}")
273
- self._execute_bootstrap_command(f"rm -rf {shlex.quote(workspace)}")
274
- clone_cmd = (
275
- f"git clone --filter=blob:none --no-tags {shlex.quote(self.repo_url)} {shlex.quote(workspace)}"
276
- )
277
- self._execute_bootstrap_command(clone_cmd, timeout=900, description="clone repository")
278
- if self.base_commit:
279
- checkout_cmd = (
280
- f"cd {shlex.quote(workspace)} && git checkout {shlex.quote(self.base_commit)}"
281
- )
282
- self._execute_bootstrap_command(checkout_cmd, timeout=300, description="checkout commit")
283
- self._execute_bootstrap_command(
284
- f"cd {shlex.quote(workspace)} && git reset --hard",
285
- description="reset working tree",
286
- )
287
- self._execute_bootstrap_command(
288
- f"cd {shlex.quote(workspace)} && git clean -ffd",
289
- description="clean working tree",
290
- )
291
- logger.info(
292
- "Prepared remote workspace for %s at %s (repo=%s, commit=%s)",
293
- self.instance_id,
294
- workspace,
295
- self.repo_url,
296
- self.base_commit,
297
- )
298
-
299
- def _run_local_cmd(
300
- self, args: list[str], *, cwd: Path | None = None, description: str | None = None
301
- ) -> None:
302
- logger.debug(
303
- "Preparing workspace %s: running local command %s",
304
- self.instance_id,
305
- " ".join(args),
306
- )
307
- proc = subprocess.run(
308
- args,
309
- cwd=str(cwd) if cwd else None,
310
- text=True,
311
- capture_output=True,
312
- )
313
- if proc.returncode != 0:
314
- desc = description or "command"
315
- raise RuntimeError(
316
- f"Failed to {desc} (cmd={' '.join(args)}): {proc.stdout or ''}{proc.stderr or ''}"
317
- )
318
-
319
- def _execute_bootstrap_command(
320
- self, command: str, *, timeout: int | None = None, description: str | None = None
321
- ) -> None:
322
- logger.debug(
323
- "Preparing workspace %s: running remote command %s",
324
- self.instance_id,
325
- command,
326
- )
327
- result = self.env.execute(command, timeout=timeout)
328
- if result.get("returncode"):
329
- desc = description or command
330
- raise RuntimeError(
331
- f"Failed to {desc}: rc={result.get('returncode')} output={result.get('output')}"
332
- )
333
-
334
- def _normalize_tool_call(self, tool_call: EnvToolCall | dict[str, Any]) -> EnvToolCall:
335
- if isinstance(tool_call, EnvToolCall):
336
- return tool_call
337
- tool = tool_call.get("tool") or tool_call.get("tool_name")
338
- if not tool:
339
- raise ValueError(f"Tool call missing tool name: {tool_call}")
340
- args = tool_call.get("args") or tool_call.get("arguments") or {}
341
- if isinstance(args, str):
342
- try:
343
- args = json.loads(args)
344
- except Exception:
345
- args = {}
346
- return EnvToolCall(tool=str(tool), args=dict(args))
347
-
348
- async def step(self, tool_calls: list[EnvToolCall] | list[dict[str, Any]]) -> dict[str, Any]:
349
- """Execute run_command or submit_patch tool calls."""
350
- if not tool_calls:
351
- raise ValueError("MiniSweEnvironmentWrapper.step requires at least one tool call")
352
-
353
- responses: list[dict[str, Any]] = []
354
- for raw_call in tool_calls:
355
- call = self._normalize_tool_call(raw_call)
356
- tool = call.tool
357
- if tool == "run_command":
358
- responses.append(self._run_command(call))
359
- elif tool == "submit_patch":
360
- responses.append(self._submit(call))
361
- else:
362
- raise ValueError(f"Unsupported tool '{tool}' for swe-mini environment")
363
-
364
- last_result = responses[-1] if responses else None
365
- self.last_result = last_result
366
- observation = self._build_observation(last_result)
367
- done = bool(self.state.submitted)
368
- reward = 0.0
369
- if done:
370
- reward = 1.0 if self.state.submission_success else 0.0
371
- return self._build_response(
372
- observation=observation,
373
- step_idx=self.state.step_idx,
374
- done=done,
375
- reward=reward,
376
- info={"responses": responses},
377
- )
378
-
379
- def _run_command(self, call: EnvToolCall) -> dict[str, Any]:
380
- command = str(call.args.get("command") or "").strip()
381
- if not command:
382
- raise ValueError("run_command requires a non-empty 'command' argument")
383
- timeout = call.args.get("timeout")
384
- timeout = int(timeout) if timeout is not None else None
385
-
386
- started_at = time.time()
387
- result = self.env.execute(command, timeout=timeout)
388
- duration = time.time() - started_at
389
-
390
- record = {
391
- "command": command,
392
- "returncode": result.get("returncode"),
393
- "stdout": result.get("output") or "",
394
- "duration": duration,
395
- "timestamp": started_at,
396
- }
397
- self.state.history.append(record)
398
- self.state.step_idx += 1
399
- logger.info(
400
- "Executed command step=%s rc=%s",
401
- self.state.step_idx,
402
- record["returncode"],
403
- )
404
- return record
405
-
406
- def _submit(self, call: EnvToolCall) -> dict[str, Any]:
407
- if self.state.submitted:
408
- logger.info("Submit called again; ignoring additional submission.")
409
- return {
410
- "submitted": True,
411
- "command": None,
412
- "returncode": 0,
413
- "stdout": "",
414
- "submission_success": self.state.submission_success,
415
- "evaluation": self.last_submission,
416
- }
417
- command = str(call.args.get("command") or self.submit_command)
418
- result = self.env.execute(command)
419
- record = {
420
- "command": command,
421
- "returncode": result.get("returncode"),
422
- "stdout": result.get("output") or "",
423
- "duration": 0.0,
424
- "timestamp": time.time(),
425
- }
426
- self.state.history.append(record)
427
- self.state.step_idx += 1
428
- diff = self._extract_submission_diff(record["stdout"])
429
-
430
- evaluation: dict[str, Any] | None = None
431
- submission_success = False
432
- if record["returncode"] == 0 and diff is not None:
433
- evaluation = self._evaluate_submission(diff)
434
- submission_success = bool(evaluation.get("resolved")) if evaluation else False
435
- else:
436
- evaluation = {
437
- "completed": False,
438
- "resolved": False,
439
- "error": "submit command failed or diff unavailable",
440
- "returncode": record["returncode"],
441
- }
442
-
443
- self.state.submitted = True
444
- self.state.submission_success = submission_success
445
- self.last_submission = evaluation
446
-
447
- logger.info(
448
- "Submission command executed rc=%s resolved=%s",
449
- record["returncode"],
450
- submission_success,
451
- )
452
-
453
- return {
454
- **record,
455
- "submitted": True,
456
- "submission_success": submission_success,
457
- "diff": diff,
458
- "evaluation": evaluation,
459
- }
460
-
461
- def _extract_submission_diff(self, stdout: str) -> str | None:
462
- if stdout is None:
463
- return None
464
- lines = stdout.splitlines()
465
- if not lines:
466
- return ""
467
- first = lines[0].strip()
468
- sentinel = "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT"
469
- if first.startswith(sentinel):
470
- lines = lines[1:]
471
- diff = "\n".join(lines).strip("\n")
472
- return diff
473
-
474
- def _evaluate_submission(self, diff: str) -> dict[str, Any]:
475
- metadata = dict(self.task.get("metadata") or {})
476
- instance = dict(metadata.get("raw_instance") or {})
477
- instance_id = instance.setdefault("instance_id", self.task.get("instance_id"))
478
-
479
- required_fields = ["repo", "base_commit", "test_patch", "version"]
480
- missing = [field for field in required_fields if not instance.get(field)]
481
- if missing:
482
- msg = (
483
- "Cannot run SWE-bench evaluation; task metadata missing required fields "
484
- f"{missing}. Ensure the dataset preserves full SWE-bench records."
485
- )
486
- logger.error(msg)
487
- return {"completed": False, "resolved": False, "error": msg}
488
-
489
- try:
490
- from swebench.harness.constants import (
491
- KEY_INSTANCE_ID,
492
- KEY_MODEL,
493
- KEY_PREDICTION,
494
- )
495
- except Exception as exc: # pragma: no cover - dependency missing
496
- msg = (
497
- "SWE-bench harness is required for official scoring. "
498
- "Install swebench with evaluation extras."
499
- )
500
- logger.exception("Failed to import swebench harness constants: %s", exc)
501
- return {"completed": False, "resolved": False, "error": f"{msg} ({exc})"}
502
-
503
- backend = self._resolve_evaluation_backend(metadata)
504
-
505
- image_name = str(metadata.get("image_name") or "")
506
- namespace = metadata.get("namespace") or self._namespace_from_image(image_name) or "swebench"
507
- instance_image_tag = metadata.get("instance_image_tag") or self._image_tag_from_name(image_name) or "latest"
508
- env_image_tag = metadata.get("env_image_tag") or "latest"
509
-
510
- model_name = metadata.get("submission_model_name") or metadata.get("model_name") or "synth-ai-agent"
511
- run_id = f"swe_mini_eval_{uuid.uuid4().hex[:12]}"
512
- eval_timeout = self._resolve_eval_timeout(metadata)
513
- rm_image = self._to_bool(metadata.get("eval_rm_image") or os.getenv("SWE_MINI_EVAL_RM_IMAGE", "false"))
514
- force_rebuild = self._to_bool(metadata.get("eval_force_rebuild") or os.getenv("SWE_MINI_EVAL_FORCE_REBUILD", "false"))
515
-
516
- prediction = {
517
- KEY_INSTANCE_ID: instance_id,
518
- KEY_MODEL: model_name,
519
- KEY_PREDICTION: diff or "",
520
- }
521
-
522
- # Ensure log root exists so downstream collection succeeds.
523
- with contextlib.suppress(Exception):
524
- from swebench.harness.constants import RUN_EVALUATION_LOG_DIR
525
-
526
- Path(RUN_EVALUATION_LOG_DIR).mkdir(parents=True, exist_ok=True)
527
-
528
- if backend == "modal_harness":
529
- evaluation_payload = self._run_modal_harness(
530
- instance=instance,
531
- prediction=prediction,
532
- run_id=run_id,
533
- eval_timeout=eval_timeout,
534
- model_name=model_name,
535
- )
536
- elif backend == "swe_rex":
537
- evaluation_payload = self._run_swe_rex(
538
- instance=instance,
539
- prediction=prediction,
540
- run_id=run_id,
541
- eval_timeout=eval_timeout,
542
- namespace=namespace,
543
- instance_image_tag=instance_image_tag,
544
- env_image_tag=env_image_tag,
545
- model_name=model_name,
546
- )
547
- else:
548
- evaluation_payload = self._run_local_harness(
549
- instance=instance,
550
- prediction=prediction,
551
- run_id=run_id,
552
- eval_timeout=eval_timeout,
553
- namespace=namespace,
554
- instance_image_tag=instance_image_tag,
555
- env_image_tag=env_image_tag,
556
- rm_image=rm_image,
557
- force_rebuild=force_rebuild,
558
- model_name=model_name,
559
- )
560
-
561
- evaluation_payload = dict(evaluation_payload or {})
562
- evaluation_payload.setdefault("backend", backend)
563
- evaluation_payload.setdefault("run_id", run_id)
564
- evaluation_payload.setdefault("model_name", model_name)
565
- evaluation_payload.setdefault("instance_id", instance_id)
566
-
567
- artifacts = self._collect_evaluation_artifacts(
568
- run_id=run_id,
569
- model_name=model_name,
570
- instance_id=instance_id,
571
- )
572
- # Merge artifact data without clobbering explicit error/resolution flags.
573
- merged = {**artifacts, **evaluation_payload}
574
- if artifacts.get("completed"):
575
- merged["completed"] = True
576
- else:
577
- merged.setdefault("completed", False)
578
- if artifacts.get("resolved"):
579
- merged["resolved"] = True
580
- else:
581
- merged.setdefault("resolved", False)
582
- merged.setdefault("log_dir", artifacts.get("log_dir"))
583
- merged.setdefault("report_path", artifacts.get("report_path"))
584
- merged.setdefault("test_output_path", artifacts.get("test_output_path"))
585
- if artifacts.get("report") and not merged.get("report"):
586
- merged["report"] = artifacts["report"]
587
- if artifacts.get("error") and not merged.get("error"):
588
- merged["error"] = artifacts["error"]
589
- return merged
590
-
591
- def _resolve_evaluation_backend(self, metadata: dict[str, Any]) -> str:
592
- raw = (
593
- metadata.get("evaluation_backend")
594
- or self.env_config.get("evaluation_backend")
595
- or os.getenv("SWE_MINI_EVALUATION_BACKEND")
596
- or "local"
597
- )
598
- backend = str(raw).strip().lower()
599
- mapping = {
600
- "": "local",
601
- "local": "local",
602
- "docker": "local",
603
- "modal": "modal_harness",
604
- "modal_harness": "modal_harness",
605
- "modal-harness": "modal_harness",
606
- "modal-harnesses": "modal_harness",
607
- "swe_rex": "swe_rex",
608
- "swe-rex": "swe_rex",
609
- "swerex": "swe_rex",
610
- }
611
- return mapping.get(backend, "local")
612
-
613
- def _resolve_eval_timeout(self, metadata: dict[str, Any]) -> int:
614
- raw = (
615
- metadata.get("evaluation_timeout")
616
- or self.env_config.get("evaluation_timeout")
617
- or os.getenv("SWE_MINI_EVALUATION_TIMEOUT")
618
- or 3600
619
- )
620
- try:
621
- value = int(raw)
622
- except (TypeError, ValueError):
623
- return 3600
624
- return max(1, value)
625
-
626
- def _run_local_harness(
627
- self,
628
- *,
629
- instance: dict[str, Any],
630
- prediction: dict[str, Any],
631
- run_id: str,
632
- eval_timeout: int,
633
- namespace: str,
634
- instance_image_tag: str,
635
- env_image_tag: str,
636
- rm_image: bool,
637
- force_rebuild: bool,
638
- model_name: str,
639
- ) -> dict[str, Any]:
640
- try:
641
- from swebench.harness.run_evaluation import run_instance
642
- from swebench.harness.test_spec.test_spec import make_test_spec
643
- except Exception as exc: # pragma: no cover - dependency missing
644
- msg = (
645
- "SWE-bench harness is required for official scoring. "
646
- "Install swebench with evaluation extras."
647
- )
648
- logger.exception("Failed to import swebench harness: %s", exc)
649
- return {"completed": False, "resolved": False, "error": f"{msg} ({exc})", "backend": "local"}
650
-
651
- try:
652
- import docker
653
- except Exception as exc: # pragma: no cover - dependency missing
654
- msg = "Docker SDK for Python is required to run local SWE-bench evaluation."
655
- logger.exception("Failed to import docker SDK: %s", exc)
656
- return {"completed": False, "resolved": False, "error": f"{msg} ({exc})", "backend": "local"}
657
-
658
- instance_id = str(instance["instance_id"])
659
- try:
660
- test_spec = make_test_spec(
661
- instance,
662
- namespace=namespace,
663
- instance_image_tag=instance_image_tag,
664
- env_image_tag=env_image_tag,
665
- )
666
- except Exception as exc:
667
- logger.exception("Failed to build SWE-bench test spec for %s: %s", instance_id, exc)
668
- return {"completed": False, "resolved": False, "error": f"Failed to build test spec: {exc}", "backend": "local"}
669
-
670
- client = None
671
- result: dict[str, Any] = {}
672
- try:
673
- client = docker.from_env()
674
- result = run_instance(
675
- test_spec,
676
- prediction,
677
- rm_image,
678
- force_rebuild,
679
- client,
680
- run_id,
681
- int(eval_timeout),
682
- rewrite_reports=False,
683
- )
684
- except Exception as exc:
685
- logger.exception("Error while running SWE-bench evaluation for %s: %s", instance_id, exc)
686
- return {"completed": False, "resolved": False, "error": f"Evaluation failed: {exc}", "backend": "local"}
687
- finally:
688
- with contextlib.suppress(Exception):
689
- if client is not None:
690
- client.close()
691
-
692
- payload = {
693
- "completed": bool(result.get("completed")),
694
- "resolved": bool(result.get("resolved")),
695
- "backend": "local",
696
- }
697
- return payload
698
-
699
- def _run_modal_harness(
700
- self,
701
- *,
702
- instance: dict[str, Any],
703
- prediction: dict[str, Any],
704
- run_id: str,
705
- eval_timeout: int,
706
- model_name: str,
707
- ) -> dict[str, Any]:
708
- try:
709
- from swebench.harness.modal_eval import run_instances_modal
710
- except Exception as exc: # pragma: no cover - dependency missing
711
- msg = (
712
- "SWE-bench modal extras are required for the modal_harness backend. "
713
- "Install swebench[modal] inside the Modal deployment."
714
- )
715
- logger.exception("Failed to import swebench modal harness: %s", exc)
716
- return {"completed": False, "resolved": False, "error": f"{msg} ({exc})", "backend": "modal_harness"}
717
-
718
- instance_id = str(instance["instance_id"])
719
- predictions = {instance_id: dict(prediction)}
720
- dataset = [instance]
721
- try:
722
- run_instances_modal(
723
- predictions,
724
- dataset,
725
- dataset,
726
- run_id,
727
- int(eval_timeout),
728
- )
729
- except Exception as exc:
730
- logger.exception("Modal SWE-bench evaluation failed for %s: %s", instance_id, exc)
731
- return {"completed": False, "resolved": False, "error": f"Modal evaluation failed: {exc}", "backend": "modal_harness"}
732
-
733
- # run_instances_modal writes reports to RUN_EVALUATION_LOG_DIR; we rely on artifact collection.
734
- return {"backend": "modal_harness"}
735
-
736
- def _run_swe_rex(
737
- self,
738
- *,
739
- instance: dict[str, Any],
740
- prediction: dict[str, Any],
741
- run_id: str,
742
- eval_timeout: int,
743
- namespace: str,
744
- instance_image_tag: str,
745
- env_image_tag: str,
746
- model_name: str,
747
- ) -> dict[str, Any]:
748
- try:
749
- from swerex.deployment.config import ModalDeploymentConfig
750
- from swerex.runtime.abstract import Command, ReadFileRequest, WriteFileRequest
751
- except ModuleNotFoundError as exc: # pragma: no cover - optional dependency
752
- msg = (
753
- "SWE-ReX backend requires the swe-rex package. "
754
- "Install swe-rex (pip install swe-rex[modal]) to enable this backend."
755
- )
756
- logger.exception("Failed to import swe-rex: %s", exc)
757
- return {"completed": False, "resolved": False, "error": f"{msg} ({exc})", "backend": "swe_rex"}
758
- except Exception as exc: # pragma: no cover - defensive
759
- logger.exception("Unexpected swe-rex import failure: %s", exc)
760
- return {"completed": False, "resolved": False, "error": f"swe-rex import failed: {exc}", "backend": "swe_rex"}
761
-
762
- image_spec = (
763
- instance.get("swe_rex_image")
764
- or self.env_config.get("swe_rex_image")
765
- or os.getenv("SWE_REX_MODAL_IMAGE")
766
- or "ghcr.io/swe-agent/swe-rex-modal:latest"
767
- )
768
- install_pipx = self._to_bool(
769
- instance.get("swe_rex_install_pipx")
770
- or self.env_config.get("swe_rex_install_pipx")
771
- or os.getenv("SWE_REX_INSTALL_PIPX", "true")
772
- )
773
- modal_kwargs_raw = (
774
- instance.get("swe_rex_modal_kwargs")
775
- or self.env_config.get("swe_rex_modal_kwargs")
776
- or os.getenv("SWE_REX_MODAL_SANDBOX_KWARGS")
777
- )
778
- modal_kwargs: dict[str, Any] = {}
779
- if isinstance(modal_kwargs_raw, (dict, list)):
780
- modal_kwargs = dict(modal_kwargs_raw or {})
781
- elif isinstance(modal_kwargs_raw, str) and modal_kwargs_raw.strip():
782
- try:
783
- modal_kwargs = dict(json.loads(modal_kwargs_raw))
784
- except Exception as exc: # pragma: no cover - user input parsing
785
- logger.warning("Failed to parse SWE_REX_MODAL_SANDBOX_KWARGS=%s: %s", modal_kwargs_raw, exc)
786
-
787
- deployment_config = ModalDeploymentConfig(
788
- image=image_spec,
789
- runtime_timeout=float(
790
- instance.get("swe_rex_runtime_timeout")
791
- or self.env_config.get("swe_rex_runtime_timeout")
792
- or os.getenv("SWE_REX_RUNTIME_TIMEOUT", 900)
793
- ),
794
- deployment_timeout=float(
795
- instance.get("swe_rex_deployment_timeout")
796
- or self.env_config.get("swe_rex_deployment_timeout")
797
- or os.getenv("SWE_REX_DEPLOYMENT_TIMEOUT", 3600)
798
- ),
799
- modal_sandbox_kwargs=modal_kwargs,
800
- install_pipx=bool(install_pipx),
801
- )
802
-
803
- remote_root = (
804
- instance.get("swe_rex_workdir")
805
- or self.env_config.get("swe_rex_workdir")
806
- or os.getenv("SWE_REX_REMOTE_WORKDIR")
807
- or "/root/swebench_eval"
808
- )
809
- remote_root = str(remote_root).rstrip("/")
810
- dataset_remote_path = f"{remote_root}/dataset.json"
811
- predictions_remote_path = f"{remote_root}/predictions.json"
812
-
813
- environment_forward_raw = (
814
- instance.get("swe_rex_forward_env")
815
- or self.env_config.get("swe_rex_forward_env")
816
- or os.getenv("SWE_REX_FORWARD_ENV")
817
- )
818
- forward_env: dict[str, str] | None = None
819
- if isinstance(environment_forward_raw, dict):
820
- forward_env = {str(k): str(v) for k, v in environment_forward_raw.items()}
821
- elif isinstance(environment_forward_raw, str) and environment_forward_raw.strip():
822
- try:
823
- parsed = json.loads(environment_forward_raw)
824
- if isinstance(parsed, dict):
825
- forward_env = {str(k): str(v) for k, v in parsed.items()}
826
- except Exception as exc: # pragma: no cover - parsing failure
827
- logger.warning("Failed to parse SWE_REX_FORWARD_ENV=%s: %s", environment_forward_raw, exc)
828
-
829
- # Build coroutine for the async swe-rex flow.
830
- coro = self._run_swe_rex_async(
831
- deployment_config=deployment_config,
832
- remote_root=remote_root,
833
- dataset_remote_path=dataset_remote_path,
834
- predictions_remote_path=predictions_remote_path,
835
- forward_env=forward_env,
836
- instance=instance,
837
- prediction=prediction,
838
- run_id=run_id,
839
- eval_timeout=eval_timeout,
840
- namespace=namespace,
841
- instance_image_tag=instance_image_tag,
842
- env_image_tag=env_image_tag,
843
- model_name=model_name,
844
- Command=Command,
845
- WriteFileRequest=WriteFileRequest,
846
- ReadFileRequest=ReadFileRequest,
847
- )
848
- try:
849
- return self._run_coroutine_blocking(coro)
850
- except Exception as exc: # pragma: no cover - remote execution failure
851
- logger.exception("SWE-ReX evaluation failed for %s: %s", instance.get("instance_id"), exc)
852
- return {"completed": False, "resolved": False, "error": f"SWE-ReX evaluation failed: {exc}", "backend": "swe_rex"}
853
-
854
- async def _run_swe_rex_async(
855
- self,
856
- *,
857
- deployment_config,
858
- remote_root: str,
859
- dataset_remote_path: str,
860
- predictions_remote_path: str,
861
- forward_env: dict[str, str] | None,
862
- instance: dict[str, Any],
863
- prediction: dict[str, Any],
864
- run_id: str,
865
- eval_timeout: int,
866
- namespace: str,
867
- instance_image_tag: str,
868
- env_image_tag: str,
869
- model_name: str,
870
- Command,
871
- WriteFileRequest,
872
- ReadFileRequest,
873
- ) -> dict[str, Any]:
874
- deployment = deployment_config.get_deployment()
875
- await deployment.start()
876
- try:
877
- runtime = deployment.runtime
878
- instance_id = str(instance["instance_id"])
879
- safe_model = prediction["model_name_or_path"].replace("/", "__")
880
-
881
- # Ensure working directory exists.
882
- mkdir_resp = await runtime.execute(
883
- Command(command=["mkdir", "-p", remote_root], timeout=60, shell=False)
884
- )
885
- if mkdir_resp.exit_code not in (0, None):
886
- logger.warning("Failed to ensure remote directory %s (exit=%s)", remote_root, mkdir_resp.exit_code)
887
-
888
- # Upload dataset & predictions.
889
- dataset_blob = json.dumps([instance], ensure_ascii=False)
890
- predictions_blob = json.dumps({instance_id: prediction}, ensure_ascii=False)
891
- await runtime.write_file(WriteFileRequest(path=dataset_remote_path, content=dataset_blob))
892
- await runtime.write_file(WriteFileRequest(path=predictions_remote_path, content=predictions_blob))
893
-
894
- eval_cmd = [
895
- "python",
896
- "-m",
897
- "swebench.harness.run_evaluation",
898
- "--dataset_name",
899
- dataset_remote_path,
900
- "--split",
901
- "test",
902
- "--instance_ids",
903
- instance_id,
904
- "--predictions_path",
905
- predictions_remote_path,
906
- "-id",
907
- run_id,
908
- "--modal",
909
- "true",
910
- "--timeout",
911
- str(eval_timeout),
912
- "--namespace",
913
- namespace,
914
- "--instance_image_tag",
915
- instance_image_tag,
916
- "--env_image_tag",
917
- env_image_tag,
918
- "--max_workers",
919
- "1",
920
- ]
921
-
922
- command_timeout = max(eval_timeout + 900, 1200)
923
- response = await runtime.execute(
924
- Command(
925
- command=eval_cmd,
926
- timeout=command_timeout,
927
- cwd=remote_root,
928
- env=forward_env,
929
- shell=False,
930
- merge_output_streams=True,
931
- )
932
- )
933
- command_output = (response.stdout or "") + (response.stderr or "")
934
- exit_code = response.exit_code if response.exit_code is not None else -1
935
-
936
- # Retrieve artifacts back to local disk.
937
- artifacts = {}
938
- try:
939
- from swebench.harness.constants import RUN_EVALUATION_LOG_DIR
940
-
941
- local_log_dir = Path(RUN_EVALUATION_LOG_DIR) / run_id / safe_model / instance_id
942
- local_log_dir.mkdir(parents=True, exist_ok=True)
943
-
944
- remote_log_dir = f"{remote_root}/logs/run_evaluation/{run_id}/{safe_model}/{instance_id}"
945
- for filename in ("report.json", "test_output.txt", "run_instance.log", "patch.diff"):
946
- remote_path = f"{remote_log_dir}/{filename}"
947
- try:
948
- content = await runtime.read_file(ReadFileRequest(path=remote_path))
949
- except Exception:
950
- continue
951
- if getattr(content, "content", None):
952
- (local_log_dir / filename).write_text(content.content)
953
-
954
- artifacts = {
955
- "log_dir": str(local_log_dir),
956
- }
957
- except Exception as exc: # pragma: no cover - best effort artifact copy
958
- logger.warning("Failed to copy SWE-ReX artifacts locally: %s", exc)
959
-
960
- payload = {
961
- "backend": "swe_rex",
962
- "command_exit_code": exit_code,
963
- "command_output": command_output[-4000:] if command_output else "",
964
- "artifacts": artifacts,
965
- }
966
- if exit_code == 0:
967
- payload.setdefault("completed", True)
968
- return payload
969
- finally:
970
- with contextlib.suppress(Exception):
971
- await deployment.stop()
972
-
973
- def _collect_evaluation_artifacts(
974
- self,
975
- *,
976
- run_id: str,
977
- model_name: str,
978
- instance_id: str,
979
- ) -> dict[str, Any]:
980
- try:
981
- from swebench.harness.constants import (
982
- LOG_REPORT,
983
- LOG_TEST_OUTPUT,
984
- RUN_EVALUATION_LOG_DIR,
985
- )
986
- except Exception: # pragma: no cover - dependency missing
987
- return {
988
- "completed": False,
989
- "resolved": False,
990
- "log_dir": None,
991
- "report_path": None,
992
- "test_output_path": None,
993
- }
994
-
995
- log_model = model_name.replace("/", "__")
996
- log_dir = Path(RUN_EVALUATION_LOG_DIR) / run_id / log_model / instance_id
997
- payload: dict[str, Any] = {
998
- "log_dir": str(log_dir),
999
- "report_path": None,
1000
- "test_output_path": None,
1001
- "report": None,
1002
- "completed": False,
1003
- "resolved": False,
1004
- }
1005
-
1006
- if not log_dir.exists():
1007
- return payload
1008
-
1009
- report_path = log_dir / LOG_REPORT
1010
- if report_path.exists():
1011
- payload["report_path"] = str(report_path)
1012
- try:
1013
- report_blob = json.loads(report_path.read_text())
1014
- per_instance = report_blob.get(instance_id)
1015
- if per_instance is not None:
1016
- payload["report"] = per_instance
1017
- payload["completed"] = True
1018
- payload["resolved"] = bool(per_instance.get("resolved"))
1019
- except Exception as exc: # pragma: no cover - log parsing failure
1020
- logger.exception("Failed to parse SWE-bench report for %s: %s", instance_id, exc)
1021
- payload["error"] = f"Failed to parse report.json: {exc}"
1022
-
1023
- test_output_path = log_dir / LOG_TEST_OUTPUT
1024
- if test_output_path.exists():
1025
- payload["test_output_path"] = str(test_output_path)
1026
-
1027
- return payload
1028
-
1029
- @staticmethod
1030
- def _run_coroutine_blocking(coro):
1031
- try:
1032
- loop = asyncio.get_running_loop()
1033
- except RuntimeError:
1034
- loop = None
1035
-
1036
- if loop and loop.is_running():
1037
- result: dict[str, Any] = {}
1038
- error: dict[str, Exception] = {}
1039
-
1040
- def runner():
1041
- try:
1042
- result["value"] = asyncio.run(coro)
1043
- except Exception as exc: # pragma: no cover - propagate to caller
1044
- error["exc"] = exc
1045
-
1046
- thread = threading.Thread(target=runner, daemon=True)
1047
- thread.start()
1048
- thread.join()
1049
- if error:
1050
- raise error["exc"]
1051
- return result.get("value")
1052
-
1053
- return asyncio.run(coro)
1054
-
1055
- @staticmethod
1056
- def _namespace_from_image(image_name: str) -> str | None:
1057
- if not image_name:
1058
- return None
1059
- parts = image_name.split("/")
1060
- if len(parts) >= 2:
1061
- return parts[-2] if parts[0].endswith(".io") else parts[0]
1062
- return None
1063
-
1064
- @staticmethod
1065
- def _image_tag_from_name(image_name: str) -> str | None:
1066
- if not image_name or ":" not in image_name:
1067
- return None
1068
- return image_name.rsplit(":", 1)[-1] or None
1069
-
1070
- @staticmethod
1071
- def _to_bool(value: Any) -> bool:
1072
- if isinstance(value, bool):
1073
- return value
1074
- if isinstance(value, str):
1075
- return value.strip().lower() in {"1", "true", "yes", "on"}
1076
- if isinstance(value, (int, float)):
1077
- return bool(value)
1078
- return False # pragma: no cover - defensive default
1079
-
1080
- def _build_observation(self, last_result: dict[str, Any] | None) -> dict[str, Any]:
1081
- trimmed_history = summarise_history(self.state.history)
1082
- observation = {
1083
- "task": self.task,
1084
- "step_idx": self.state.step_idx,
1085
- "history": trimmed_history,
1086
- "submitted": self.state.submitted,
1087
- "submission_success": self.state.submission_success,
1088
- "tools": TOOLS_SCHEMA,
1089
- }
1090
- if last_result is not None:
1091
- observation["last"] = last_result
1092
- if self.last_submission is not None:
1093
- observation["submission_result"] = self.last_submission
1094
- return observation
1095
-
1096
- def _build_response(
1097
- self,
1098
- *,
1099
- observation: dict[str, Any],
1100
- step_idx: int,
1101
- done: bool = False,
1102
- reward: float | None = None,
1103
- info: dict[str, Any] | None = None,
1104
- ) -> dict[str, Any]:
1105
- response = {
1106
- "observation": observation,
1107
- "step_idx": step_idx,
1108
- "done": bool(done),
1109
- }
1110
- if reward is not None:
1111
- response["reward"] = reward
1112
- if info is not None:
1113
- response["info"] = info
1114
- return response
1115
-
1116
- def state_dict(self) -> dict[str, Any]:
1117
- return {
1118
- "task": self.state.task,
1119
- "history": self.state.history,
1120
- "step_idx": self.state.step_idx,
1121
- "submitted": self.state.submitted,
1122
- "submission_success": self.state.submission_success,
1123
- "last_result": self.last_result,
1124
- "last_submission": self.last_submission,
1125
- "environment_type": self.environment_type,
1126
- "env_config": self.env_config,
1127
- }
1128
-
1129
- def load_state_dict(self, payload: dict[str, Any]) -> None:
1130
- self.state = MiniSweEnvironmentState(
1131
- task=payload["task"],
1132
- history=payload.get("history", []),
1133
- step_idx=int(payload.get("step_idx", 0)),
1134
- submitted=bool(payload.get("submitted", False)),
1135
- submission_success=payload.get("submission_success"),
1136
- )
1137
- self.last_result = payload.get("last_result")
1138
- self.last_submission = payload.get("last_submission")
1139
- self.environment_type = payload.get("environment_type", self.environment_type)
1140
- self.env_config = payload.get("env_config", self.env_config)
1141
-
1142
- async def serialize(self) -> dict[str, Any]:
1143
- return {
1144
- "name": self.name,
1145
- "config": {
1146
- "env_config": self.env_config,
1147
- "submit_command": self.submit_command,
1148
- },
1149
- "state": self.state_dict(),
1150
- }
1151
-
1152
- @classmethod
1153
- async def deserialize(cls, payload: dict[str, Any]) -> MiniSweEnvironmentWrapper:
1154
- config = payload.get("config", {}) or {}
1155
- wrapper = cls(
1156
- task=payload["state"]["task"],
1157
- env_config=config.get("env_config"),
1158
- submit_command=config.get("submit_command"),
1159
- )
1160
- wrapper.load_state_dict(payload["state"])
1161
- return wrapper
1162
-
1163
-
1164
- __all__ = ["MiniSweEnvironmentWrapper"]