synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.9.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (327) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +8 -11
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/run_eval.py +36 -37
  19. examples/rl/run_rl_and_save.py +5 -5
  20. examples/rl/task_app/math_single_step.py +65 -43
  21. examples/rl/task_app/math_task_app.py +3 -3
  22. examples/sft/README.md +139 -0
  23. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  24. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  25. examples/sft/evaluate.py +117 -0
  26. examples/sft/export_dataset.py +117 -0
  27. examples/sft/generate_traces.py +162 -0
  28. examples/swe/__init__.py +12 -0
  29. examples/swe/task_app/README.md +105 -0
  30. examples/swe/task_app/__init__.py +2 -0
  31. examples/swe/task_app/grpo_swe_mini.py +571 -0
  32. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  33. examples/swe/task_app/hosted/README.md +173 -0
  34. examples/swe/task_app/hosted/__init__.py +5 -0
  35. examples/swe/task_app/hosted/branching.py +143 -0
  36. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  37. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  38. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  39. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  40. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  41. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  42. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  43. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  44. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  45. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  46. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  47. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  50. examples/swe/task_app/hosted/hosted_app.py +204 -0
  51. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  52. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  53. examples/swe/task_app/hosted/main.py +100 -0
  54. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  55. examples/swe/task_app/hosted/registry.py +195 -0
  56. examples/swe/task_app/hosted/rollout.py +1869 -0
  57. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  58. examples/swe/task_app/hosted/storage/volume.py +211 -0
  59. examples/swe/task_app/hosted/test_agents.py +161 -0
  60. examples/swe/task_app/hosted/test_service.py +137 -0
  61. examples/swe/task_app/hosted/utils.py +62 -0
  62. examples/vlm/README.md +68 -0
  63. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  64. examples/vlm/crafter_image_only_agent.py +207 -0
  65. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  66. examples/vlm/filter_image_rows.py +63 -0
  67. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  68. examples/warming_up_to_rl/analyze_trace_db.py +5 -5
  69. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  70. examples/warming_up_to_rl/export_trace_sft.py +78 -21
  71. examples/warming_up_to_rl/groq_test.py +4 -4
  72. examples/warming_up_to_rl/manage_secrets.py +13 -18
  73. examples/warming_up_to_rl/run_eval.py +42 -44
  74. examples/warming_up_to_rl/run_fft_and_save.py +11 -16
  75. examples/warming_up_to_rl/run_local_rollout.py +1 -3
  76. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
  77. examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
  78. examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
  79. examples/warming_up_to_rl/run_rl_and_save.py +5 -6
  80. examples/warming_up_to_rl/run_rollout_remote.py +8 -10
  81. examples/warming_up_to_rl/task_app/README.md +6 -2
  82. examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
  83. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  101. synth/__init__.py +14 -0
  102. synth_ai/__init__.py +26 -4
  103. synth_ai/api/models/supported.py +376 -0
  104. synth_ai/api/train/builders.py +128 -21
  105. synth_ai/api/train/cli.py +80 -64
  106. synth_ai/api/train/config_finder.py +7 -2
  107. synth_ai/api/train/env_resolver.py +1 -1
  108. synth_ai/api/train/pollers.py +2 -1
  109. synth_ai/api/train/supported_algos.py +139 -0
  110. synth_ai/api/train/task_app.py +1 -2
  111. synth_ai/api/train/utils.py +13 -44
  112. synth_ai/cli/__init__.py +8 -0
  113. synth_ai/cli/_modal_wrapper.py +28 -0
  114. synth_ai/cli/_typer_patch.py +49 -0
  115. synth_ai/cli/balance.py +1 -2
  116. synth_ai/cli/calc.py +1 -1
  117. synth_ai/cli/demo.py +2 -1
  118. synth_ai/cli/recent.py +2 -2
  119. synth_ai/cli/rl_demo.py +2 -1
  120. synth_ai/cli/root.py +11 -13
  121. synth_ai/cli/status.py +2 -2
  122. synth_ai/cli/task_apps.py +529 -179
  123. synth_ai/cli/traces.py +6 -4
  124. synth_ai/cli/watch.py +12 -18
  125. synth_ai/demo_registry.py +1 -1
  126. synth_ai/demos/core/cli.py +36 -43
  127. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  128. synth_ai/demos/demo_task_apps/core.py +17 -25
  129. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
  130. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  131. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
  132. synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
  133. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  134. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  135. synth_ai/environments/reproducibility/tree.py +2 -5
  136. synth_ai/environments/service/app.py +11 -12
  137. synth_ai/environments/service/core_routes.py +4 -7
  138. synth_ai/environments/stateful/engine.py +1 -1
  139. synth_ai/environments/tasks/core.py +1 -0
  140. synth_ai/environments/tasks/filters.py +5 -6
  141. synth_ai/environments/tasks/utils.py +4 -5
  142. synth_ai/handshake.py +9 -9
  143. synth_ai/http.py +1 -1
  144. synth_ai/http_client.py +18 -10
  145. synth_ai/inference/client.py +15 -5
  146. synth_ai/jobs/client.py +78 -83
  147. synth_ai/learning/__init__.py +41 -6
  148. synth_ai/learning/algorithms.py +14 -0
  149. synth_ai/learning/client.py +91 -24
  150. synth_ai/learning/config.py +2 -38
  151. synth_ai/learning/ft_client.py +4 -59
  152. synth_ai/learning/health.py +5 -6
  153. synth_ai/learning/jobs.py +31 -47
  154. synth_ai/{rl → learning/rl}/__init__.py +14 -4
  155. synth_ai/learning/rl/client.py +267 -0
  156. synth_ai/learning/rl/config.py +31 -0
  157. synth_ai/{rl → learning/rl}/contracts.py +5 -8
  158. synth_ai/{rl → learning/rl}/env_keys.py +39 -15
  159. synth_ai/learning/rl/secrets.py +13 -0
  160. synth_ai/learning/rl_client.py +2 -281
  161. synth_ai/learning/sft/__init__.py +29 -0
  162. synth_ai/learning/sft/client.py +68 -0
  163. synth_ai/learning/sft/config.py +270 -0
  164. synth_ai/learning/sft/data.py +295 -0
  165. synth_ai/learning/sse.py +25 -24
  166. synth_ai/learning/validators.py +25 -28
  167. synth_ai/lm/__init__.py +21 -47
  168. synth_ai/main.py +6 -0
  169. synth_ai/task/__init__.py +25 -27
  170. synth_ai/task/apps/__init__.py +7 -8
  171. synth_ai/task/auth.py +8 -8
  172. synth_ai/task/client.py +14 -14
  173. synth_ai/task/contracts.py +36 -35
  174. synth_ai/task/datasets.py +6 -5
  175. synth_ai/task/errors.py +10 -10
  176. synth_ai/task/health.py +17 -9
  177. synth_ai/task/json.py +58 -23
  178. synth_ai/task/proxy.py +13 -9
  179. synth_ai/task/rubrics.py +16 -15
  180. synth_ai/task/server.py +12 -12
  181. synth_ai/task/tracing_utils.py +4 -4
  182. synth_ai/task/vendors.py +5 -6
  183. synth_ai/tracing_v3/__init__.py +2 -0
  184. synth_ai/tracing_v3/abstractions.py +21 -4
  185. synth_ai/tracing_v3/decorators.py +18 -16
  186. synth_ai/tracing_v3/hooks.py +5 -5
  187. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  188. synth_ai/tracing_v3/session_tracer.py +40 -14
  189. synth_ai/tracing_v3/storage/base.py +85 -0
  190. synth_ai/tracing_v3/storage/config.py +21 -8
  191. synth_ai/tracing_v3/storage/factory.py +10 -7
  192. synth_ai/tracing_v3/storage/utils.py +4 -2
  193. synth_ai/tracing_v3/turso/daemon.py +7 -2
  194. synth_ai/tracing_v3/turso/models.py +2 -2
  195. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  196. synth_ai/tracing_v3/utils.py +4 -4
  197. synth_ai/v0/api/__init__.py +8 -0
  198. synth_ai/v0/api/models/__init__.py +8 -0
  199. synth_ai/v0/api/models/supported.py +8 -0
  200. synth_ai/v0/config/__init__.py +15 -0
  201. synth_ai/v0/config/base_url.py +12 -0
  202. synth_ai/v0/lm/__init__.py +51 -0
  203. synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
  204. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  205. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  206. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  207. synth_ai/{lm → v0/lm}/config.py +6 -1
  208. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  209. synth_ai/{lm → v0/lm}/core/main.py +6 -6
  210. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  211. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
  212. synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
  213. synth_ai/{lm → v0/lm}/overrides.py +2 -2
  214. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  215. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  216. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  217. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  218. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
  219. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  220. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  221. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
  222. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
  223. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
  224. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
  225. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  226. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  227. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  228. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  229. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  230. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  231. synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
  232. synth_ai/v0/tracing_v3/__init__.py +10 -0
  233. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  234. synth_ai/v0/tracing_v3/decorators.py +3 -0
  235. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  236. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  237. synth_ai-0.2.9.dev9.dist-info/METADATA +191 -0
  238. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/RECORD +268 -238
  239. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/top_level.txt +1 -0
  240. examples/common_old/backend.py +0 -20
  241. examples/evals_old/README.md +0 -98
  242. examples/evals_old/__init__.py +0 -6
  243. examples/evals_old/compare_models.py +0 -1038
  244. examples/evals_old/example_log.md +0 -145
  245. examples/evals_old/run_demo.sh +0 -126
  246. examples/evals_old/trace_analysis.py +0 -270
  247. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  248. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  249. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  250. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
  251. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  252. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  253. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  254. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  255. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  256. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
  257. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  258. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  259. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
  260. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  261. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  262. examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
  263. examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
  264. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  265. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  266. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
  267. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
  268. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
  269. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  270. examples/finetuning_old/synth_qwen_v1/util.py +0 -152
  271. examples/rl_old/task_app.py +0 -1131
  272. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  273. examples/warming_up_to_rl/old/notes.md +0 -73
  274. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  275. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  276. synth_ai/experimental/synth_oss.py +0 -445
  277. synth_ai/learning/filtering.py +0 -0
  278. synth_ai/learning/offline/dpo.py +0 -0
  279. synth_ai/learning/offline/providers.py +0 -7
  280. synth_ai/learning/offline/sft.py +0 -0
  281. synth_ai/learning/offline/shared.py +0 -0
  282. synth_ai/learning/online/grpo.py +0 -0
  283. synth_ai/learning/online/irft.py +0 -0
  284. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  285. synth_ai/learning/prompts/gepa.py +0 -0
  286. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
  287. synth_ai/learning/prompts/mipro.py +0 -289
  288. synth_ai/learning/prompts/random_search.py +0 -249
  289. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  290. synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
  291. synth_ai/rl/secrets.py +0 -19
  292. synth_ai/scripts/verify_rewards.py +0 -100
  293. synth_ai/tracing/__init__.py +0 -30
  294. synth_ai/tracing_v1/__init__.py +0 -33
  295. synth_ai/tracing_v3/turso/__init__.py +0 -25
  296. synth_ai/tracing_v3/turso/manager.py +0 -838
  297. synth_ai/zyk/__init__.py +0 -30
  298. synth_ai-0.2.9.dev7.dist-info/METADATA +0 -131
  299. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  300. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  301. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  302. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  303. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  304. /synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
  305. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  306. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  307. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  308. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  309. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  310. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  311. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  312. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  313. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  314. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  315. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  316. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  317. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  318. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  319. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  320. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  321. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  322. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  323. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  324. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  325. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/WHEEL +0 -0
  326. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/entry_points.txt +0 -0
  327. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,136 @@
1
+ """Compatibility wrapper for the mini-SWE task app."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ from pathlib import Path
7
+
8
+ from fastapi.exceptions import RequestValidationError
9
+ from fastapi.responses import JSONResponse
10
+ from starlette.requests import Request
11
+ from synth_ai.task.apps import ModalDeploymentConfig, registry
12
+ from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
13
+ from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
14
+
15
+ from .grpo_swe_mini import build_config
16
+
17
+ APP_ID = "swe-mini"
18
+
19
+
20
+ def _build_base_config() -> TaskAppConfig:
21
+ return build_config()
22
+
23
+
24
+ try:
25
+ _REGISTERED_ENTRY = registry.get(APP_ID)
26
+ except Exception: # pragma: no cover - registry unavailable in some contexts
27
+ MODAL_DEPLOYMENT: ModalDeploymentConfig | None = None
28
+ ENV_FILES: tuple[str, ...] = ()
29
+ else:
30
+ MODAL_DEPLOYMENT = _REGISTERED_ENTRY.modal
31
+ ENV_FILES = tuple(_REGISTERED_ENTRY.env_files)
32
+
33
+
34
+ def build_task_app_config() -> TaskAppConfig:
35
+ base = _build_base_config()
36
+ return base.clone()
37
+
38
+
39
+ def fastapi_app():
40
+ app = create_task_app(build_task_app_config())
41
+
42
+ filtered_routes = []
43
+ for route in app.router.routes:
44
+ path = getattr(route, "path", None)
45
+ methods = getattr(route, "methods", set()) or set()
46
+ if path in {"/health", "/health/rollout"} and "GET" in methods:
47
+ continue
48
+ filtered_routes.append(route)
49
+ app.router.routes = filtered_routes
50
+
51
+ def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
52
+ if not env_key:
53
+ return None
54
+ prefix = env_key[: max(1, len(env_key) // 2)]
55
+ print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
56
+ return prefix
57
+
58
+ @app.get("/health")
59
+ async def health(request: Request):
60
+ env_key = normalize_environment_api_key()
61
+ if not env_key:
62
+ return JSONResponse(
63
+ status_code=503,
64
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
65
+ )
66
+ if not is_api_key_header_authorized(request):
67
+ prefix = _log_env_key_prefix("health", env_key)
68
+ content = {"status": "healthy", "authorized": False}
69
+ if prefix:
70
+ content["expected_api_key_prefix"] = prefix
71
+ return JSONResponse(status_code=200, content=content)
72
+ return {"status": "healthy", "authorized": True}
73
+
74
+ @app.get("/health/rollout")
75
+ async def health_rollout(request: Request):
76
+ env_key = normalize_environment_api_key()
77
+ if not env_key:
78
+ return JSONResponse(
79
+ status_code=503,
80
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
81
+ )
82
+ if not is_api_key_header_authorized(request):
83
+ prefix = _log_env_key_prefix("health/rollout", env_key)
84
+ content = {"status": "healthy", "authorized": False}
85
+ if prefix:
86
+ content["expected_api_key_prefix"] = prefix
87
+ return JSONResponse(status_code=200, content=content)
88
+ return {"ok": True, "authorized": True}
89
+
90
+ @app.exception_handler(RequestValidationError)
91
+ async def _on_validation_error(request: Request, exc: RequestValidationError):
92
+ try:
93
+ hdr = request.headers
94
+ snapshot = {
95
+ "path": str(request.url.path),
96
+ "have_x_api_key": bool(hdr.get("x-api-key")),
97
+ "have_x_api_keys": bool(hdr.get("x-api-keys")),
98
+ "have_authorization": bool(hdr.get("authorization")),
99
+ "errors": exc.errors()[:5],
100
+ }
101
+ print("[422] validation", snapshot, flush=True)
102
+ except Exception:
103
+ pass
104
+ return JSONResponse(
105
+ status_code=422,
106
+ content={"status": "invalid", "detail": exc.errors()[:5]},
107
+ )
108
+
109
+ return app
110
+
111
+
112
+ if __name__ == "__main__":
113
+ parser = argparse.ArgumentParser(description="Run the mini-SWE task app locally")
114
+ parser.add_argument("--host", default="0.0.0.0")
115
+ parser.add_argument("--port", type=int, default=8020)
116
+ parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
117
+ parser.add_argument(
118
+ "--env-file",
119
+ action="append",
120
+ default=[],
121
+ help="Additional .env files to load before startup",
122
+ )
123
+ args = parser.parse_args()
124
+
125
+ default_env = Path(__file__).resolve().parents[4] / "backend" / ".env.dev"
126
+ env_files = [str(default_env)] if default_env.exists() else []
127
+ env_files.extend(args.env_file or [])
128
+
129
+ run_task_app(
130
+ build_task_app_config,
131
+ host=args.host,
132
+ port=args.port,
133
+ reload=args.reload,
134
+ env_files=env_files,
135
+ )
136
+
@@ -0,0 +1,173 @@
1
+ # GRPO Synth Envs Hosted Service
2
+
3
+ This service provides hosted environment and policy management for GRPO (Group Relative Policy Optimization) training with synthetic environments.
4
+
5
+ ## Architecture
6
+
7
+ The service implements a FastAPI-based HTTP API that manages:
8
+ - **Environments**: Stateful environment instances (currently Crafter)
9
+ - **Policies**: Thin policy clients that prepare inference requests
10
+ - **Rollouts**: Coordinated execution of environment-policy interaction loops
11
+ - **Snapshots**: State persistence using Modal Volumes
12
+ - **Branching**: Creating multiple copies of environments/policies for exploration
13
+
14
+ ## Key Components
15
+
16
+ ### Core Modules
17
+ - `hosted_app.py`: FastAPI app factory and configuration
18
+ - `registry.py`: In-memory registries for active instances
19
+ - `storage/volume.py`: Modal Volume operations for snapshots
20
+ - `inference/openai_client.py`: OpenAI-compatible inference client
21
+
22
+ ### API Routers
23
+ - `environment_routes.py`: Environment lifecycle endpoints
24
+ - `policy_routes.py`: Policy lifecycle endpoints
25
+ - `rollout.py`: Rollout coordinator and run management
26
+ - `branching.py`: Branching operations
27
+
28
+ ### Environment Implementations
29
+ - `envs/crafter/`: Crafter environment and policy implementations
30
+
31
+ ## API Endpoints
32
+
33
+ ### Service Discovery
34
+ - `GET /info`: Service configuration and endpoints
35
+ - `GET /health`: Health check
36
+
37
+ ### Environment Management
38
+ - `POST /env/create`: Create new environment
39
+ - `POST /env/reset`: Reset environment
40
+ - `POST /env/step`: Execute environment step
41
+ - `POST /env/snapshot`: Save environment state
42
+ - `POST /env/restore`: Restore from snapshot
43
+ - `POST /env/terminate`: Clean up environment
44
+
45
+ ### Policy Management
46
+ - `POST /policy/create`: Create new policy
47
+ - `POST /policy/step`: Generate actions (with optional inference)
48
+ - `POST /policy/snapshot`: Save policy state
49
+ - `POST /policy/restore`: Restore from snapshot
50
+ - `POST /policy/terminate`: Clean up policy
51
+
52
+ ### Coordination
53
+ - `POST /rollout`: Execute coordinated rollout
54
+ - `POST /branch`: Create environment/policy branches
55
+ - `POST /run/abort`: Abort running rollout
56
+ - `GET /run/status/{run_id}`: Check run status
57
+
58
+ ## Local Development
59
+
60
+ ```bash
61
+ # Install dependencies
62
+ pip install fastapi uvicorn httpx pydantic
63
+
64
+ # Run the service
65
+ python main.py
66
+
67
+ # Or with uvicorn directly
68
+ uvicorn main:app --reload --port 8000
69
+ ```
70
+
71
+ ## Modal Deployment
72
+
73
+ ```bash
74
+ # Deploy to Modal
75
+ modal deploy main.py
76
+
77
+ # Run once
78
+ modal run main.py
79
+ ```
80
+
81
+ ## Environment Variables
82
+
83
+ - `SERVICE_BASE_URL`: Base URL for this service (default: http://localhost:8000)
84
+ - `VLLM_BASE_URL`: Base URL for vLLM inference service (default: http://localhost:8001)
85
+ - `DEFAULT_MODEL`: Default model name for inference
86
+
87
+ ## Storage
88
+
89
+ The service uses Modal Volumes for persistent storage:
90
+ - Volume name: `synth-env-state`
91
+ - Mount path: `/data/state`
92
+ - Layout: `/data/state/runs/{rl_run_id}/{kind}/{shard}/{snapshot_id}.tar.gz`
93
+
94
+ ## Example Usage
95
+
96
+ ```python
97
+ import httpx
98
+
99
+ # Create environment
100
+ env_response = httpx.post(
101
+ "http://localhost:8000/env/create",
102
+ json={
103
+ "env_name": "crafter",
104
+ "config": {},
105
+ "seed": 42,
106
+ "rl_run_id": "test-run-1"
107
+ }
108
+ )
109
+ env_id = env_response.json()["env_id"]
110
+
111
+ # Create policy
112
+ policy_response = httpx.post(
113
+ "http://localhost:8000/policy/create",
114
+ json={
115
+ "policy_name": "crafter-react",
116
+ "config": {"inference_url": "http://vllm:8001"},
117
+ "rl_run_id": "test-run-1",
118
+ "bound_env_id": env_id
119
+ }
120
+ )
121
+ policy_id = policy_response.json()["policy_id"]
122
+
123
+ # Execute rollout
124
+ rollout_response = httpx.post(
125
+ "http://localhost:8000/rollout",
126
+ json={
127
+ "run_id": "test-run-1",
128
+ "env": {"env_id": env_id},
129
+ "policy": {"policy_id": policy_id},
130
+ "ops": ["agent", "env"] * 10,
131
+ "on_done": "reset"
132
+ }
133
+ )
134
+ trajectories = rollout_response.json()["trajectories"]
135
+ ```
136
+
137
+ ## Testing
138
+
139
+ The implementation follows the plan outlined in `plan.md` and decisions in `decisions.md`. Key test areas:
140
+ - Environment create/step/reset lifecycle
141
+ - Policy inference request building
142
+ - Snapshot/restore round trips
143
+ - Rollout coordination with abort support
144
+ - Branching operations
145
+
146
+ 4b
147
+ "aggregate": {
148
+ "completed": 20,
149
+ "total": 20,
150
+ "avg_turns": 10.0,
151
+ "avg_achievements": 1.3,
152
+ "achievements_freq": {
153
+ "collect_wood": 9,
154
+ "collect_sapling": 8,
155
+ "collect_drink": 7,
156
+ "place_plant": 2
157
+ }
158
+ }
159
+
160
+
161
+ groq qwen/qwen3-32b
162
+ ],
163
+ "aggregate": {
164
+ "completed": 20,
165
+ "total": 20,
166
+ "avg_turns": 10.0,
167
+ "avg_achievements": 1.0,
168
+ "achievements_freq": {
169
+ "collect_sapling": 7,
170
+ "collect_wood": 9,
171
+ "collect_drink": 4
172
+ }
173
+ }
@@ -0,0 +1,5 @@
1
+ """GRPO Synth Envs Hosted Service."""
2
+
3
+ from .hosted_app import TaskApp, create_app
4
+
5
+ __all__ = ["create_app", "TaskApp"]
@@ -0,0 +1,143 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from fastapi import APIRouter, HTTPException
6
+ from pydantic import BaseModel
7
+
8
+ from .registry import registry
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ router = APIRouter()
13
+
14
+
15
+ class BranchRequest(BaseModel):
16
+ env_ids: list[str] | None = None
17
+ policy_ids: list[str] | None = None
18
+ num_children: int = 1
19
+ max_branches: int = 10
20
+
21
+
22
+ class BranchResponse(BaseModel):
23
+ env_branches: dict[str, list[str]]
24
+ policy_branches: dict[str, list[str]]
25
+
26
+
27
+ @router.post("/branch", response_model=BranchResponse)
28
+ async def create_branches(request: BranchRequest) -> BranchResponse:
29
+ """Create branches of environments and/or policies."""
30
+
31
+ if request.num_children > request.max_branches:
32
+ raise HTTPException(
33
+ status_code=422,
34
+ detail=f"num_children ({request.num_children}) exceeds max_branches ({request.max_branches})",
35
+ )
36
+
37
+ env_branches = {}
38
+ policy_branches = {}
39
+
40
+ try:
41
+ # Branch environments
42
+ if request.env_ids:
43
+ for env_id in request.env_ids:
44
+ env_handle = registry.get_env(env_id)
45
+ if not env_handle:
46
+ logger.warning(f"Environment {env_id} not found, skipping")
47
+ continue
48
+
49
+ child_ids = []
50
+
51
+ for child_idx in range(request.num_children):
52
+ # Create snapshot of parent
53
+ from .environment_routes import (
54
+ EnvSnapshotRequest,
55
+ snapshot_environment,
56
+ )
57
+
58
+ snapshot_response = await snapshot_environment(
59
+ EnvSnapshotRequest(env_id=env_id)
60
+ )
61
+
62
+ # Restore to new environment with modified seed
63
+ from .environment_routes import (
64
+ EnvRestoreRequest,
65
+ restore_environment,
66
+ )
67
+
68
+ restore_response = await restore_environment(
69
+ EnvRestoreRequest(snapshot_id=snapshot_response.snapshot_id)
70
+ )
71
+
72
+ child_id = restore_response.env_id
73
+ child_handle = registry.get_env(child_id)
74
+
75
+ # Update child seed for determinism
76
+ if child_handle and child_handle.seed is not None:
77
+ child_handle.seed = child_handle.seed + child_idx + 1
78
+ child_handle.env.seed = child_handle.seed
79
+
80
+ child_ids.append(child_id)
81
+
82
+ # Track parent relationship in snapshot metadata
83
+ snapshot_meta = registry.get_snapshot(snapshot_response.snapshot_id)
84
+ if snapshot_meta:
85
+ snapshot_meta.parent_snapshot_id = env_id
86
+
87
+ env_branches[env_id] = child_ids
88
+
89
+ # Branch policies
90
+ if request.policy_ids:
91
+ for policy_id in request.policy_ids:
92
+ policy_handle = registry.get_policy(policy_id)
93
+ if not policy_handle:
94
+ logger.warning(f"Policy {policy_id} not found, skipping")
95
+ continue
96
+
97
+ child_ids = []
98
+
99
+ for child_idx in range(request.num_children):
100
+ # Create snapshot of parent
101
+ from .policy_routes import PolicySnapshotRequest, snapshot_policy
102
+
103
+ snapshot_response = await snapshot_policy(
104
+ PolicySnapshotRequest(policy_id=policy_id)
105
+ )
106
+
107
+ # Restore to new policy
108
+ from .policy_routes import PolicyRestoreRequest, restore_policy
109
+
110
+ restore_response = await restore_policy(
111
+ PolicyRestoreRequest(snapshot_id=snapshot_response.snapshot_id)
112
+ )
113
+
114
+ child_id = restore_response.policy_id
115
+ child_ids.append(child_id)
116
+
117
+ # Copy bound environment if parent had one
118
+ child_handle = registry.get_policy(child_id)
119
+ if child_handle and policy_handle.bound_env_id:
120
+ # If we also branched the env, bind to corresponding child
121
+ if policy_handle.bound_env_id in env_branches:
122
+ child_envs = env_branches[policy_handle.bound_env_id]
123
+ if child_idx < len(child_envs):
124
+ child_handle.bound_env_id = child_envs[child_idx]
125
+ else:
126
+ # Otherwise keep same env binding
127
+ child_handle.bound_env_id = policy_handle.bound_env_id
128
+
129
+ # Track parent relationship
130
+ snapshot_meta = registry.get_snapshot(snapshot_response.snapshot_id)
131
+ if snapshot_meta:
132
+ snapshot_meta.parent_snapshot_id = policy_id
133
+
134
+ policy_branches[policy_id] = child_ids
135
+
136
+ return BranchResponse(
137
+ env_branches=env_branches,
138
+ policy_branches=policy_branches,
139
+ )
140
+
141
+ except Exception as e:
142
+ logger.error(f"Failed to create branches: {e}")
143
+ raise HTTPException(status_code=500, detail=str(e)) from e