synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (236) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
  4. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  5. examples/multi_step/convert_traces_to_sft.py +84 -0
  6. examples/multi_step/run_sft_qwen30b.sh +45 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
  8. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  9. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  10. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  11. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  12. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  13. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  14. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  15. examples/qwen_vl/QUICKSTART.md +327 -0
  16. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  17. examples/qwen_vl/README.md +154 -0
  18. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  19. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  20. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  21. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  22. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  23. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  24. examples/qwen_vl/__init__.py +2 -0
  25. examples/qwen_vl/collect_data_via_cli.md +423 -0
  26. examples/qwen_vl/collect_vision_traces.py +368 -0
  27. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  28. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  29. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  30. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  31. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  32. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  33. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  34. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  35. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  36. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  37. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  38. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  39. examples/qwen_vl/run_vision_comparison.sh +62 -0
  40. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  41. examples/qwen_vl/test_image_validation.py +201 -0
  42. examples/qwen_vl/test_sft_vision_data.py +110 -0
  43. examples/rl/README.md +1 -1
  44. examples/rl/configs/eval_base_qwen.toml +17 -0
  45. examples/rl/configs/eval_rl_qwen.toml +13 -0
  46. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  47. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  48. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  49. examples/rl/run_eval.py +436 -0
  50. examples/rl/run_rl_and_save.py +111 -0
  51. examples/rl/task_app/README.md +22 -0
  52. examples/rl/task_app/math_single_step.py +990 -0
  53. examples/rl/task_app/math_task_app.py +111 -0
  54. examples/sft/README.md +5 -5
  55. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  56. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  57. examples/sft/evaluate.py +2 -4
  58. examples/sft/export_dataset.py +7 -4
  59. examples/swe/task_app/README.md +1 -1
  60. examples/swe/task_app/grpo_swe_mini.py +0 -1
  61. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  62. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  63. examples/swe/task_app/hosted/policy_routes.py +0 -2
  64. examples/swe/task_app/hosted/rollout.py +0 -8
  65. examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
  66. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
  67. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
  68. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
  69. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
  70. examples/task_apps/enron/__init__.py +1 -0
  71. examples/vlm/README.md +3 -3
  72. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  73. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  74. examples/vlm/filter_image_rows.py +1 -1
  75. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  76. examples/warming_up_to_rl/_utils.py +92 -0
  77. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  78. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  79. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  80. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  81. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  82. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  83. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  84. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  85. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  86. examples/warming_up_to_rl/readme.md +63 -132
  87. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  88. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  89. examples/warming_up_to_rl/task_app/README.md +42 -0
  90. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  91. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  111. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  112. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  113. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  114. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  115. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  116. synth_ai/__init__.py +44 -30
  117. synth_ai/_utils/__init__.py +47 -0
  118. synth_ai/_utils/base_url.py +10 -0
  119. synth_ai/_utils/http.py +10 -0
  120. synth_ai/_utils/prompts.py +10 -0
  121. synth_ai/_utils/task_app_state.py +12 -0
  122. synth_ai/_utils/user_config.py +10 -0
  123. synth_ai/api/models/supported.py +144 -7
  124. synth_ai/api/train/__init__.py +13 -1
  125. synth_ai/api/train/cli.py +30 -7
  126. synth_ai/api/train/config_finder.py +18 -11
  127. synth_ai/api/train/env_resolver.py +13 -10
  128. synth_ai/cli/__init__.py +62 -78
  129. synth_ai/cli/_modal_wrapper.py +7 -5
  130. synth_ai/cli/_typer_patch.py +0 -2
  131. synth_ai/cli/_validate_task_app.py +22 -4
  132. synth_ai/cli/legacy_root_backup.py +3 -1
  133. synth_ai/cli/lib/__init__.py +10 -0
  134. synth_ai/cli/lib/task_app_discovery.py +7 -0
  135. synth_ai/cli/lib/task_app_env.py +518 -0
  136. synth_ai/cli/recent.py +2 -1
  137. synth_ai/cli/setup.py +266 -0
  138. synth_ai/cli/status.py +1 -1
  139. synth_ai/cli/task_app_deploy.py +16 -0
  140. synth_ai/cli/task_app_list.py +25 -0
  141. synth_ai/cli/task_app_modal_serve.py +16 -0
  142. synth_ai/cli/task_app_serve.py +18 -0
  143. synth_ai/cli/task_apps.py +71 -31
  144. synth_ai/cli/traces.py +1 -1
  145. synth_ai/cli/train.py +18 -0
  146. synth_ai/cli/tui.py +7 -2
  147. synth_ai/cli/turso.py +1 -1
  148. synth_ai/cli/watch.py +1 -1
  149. synth_ai/demos/__init__.py +10 -0
  150. synth_ai/demos/core/__init__.py +28 -1
  151. synth_ai/demos/crafter/__init__.py +1 -0
  152. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  153. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  154. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  155. synth_ai/demos/demo_registry.py +176 -0
  156. synth_ai/demos/math/__init__.py +1 -0
  157. synth_ai/demos/math/_common.py +16 -0
  158. synth_ai/demos/math/app.py +38 -0
  159. synth_ai/demos/math/config.toml +76 -0
  160. synth_ai/demos/math/deploy_modal.py +54 -0
  161. synth_ai/demos/math/modal_task_app.py +702 -0
  162. synth_ai/demos/math/task_app_entry.py +51 -0
  163. synth_ai/environments/environment/core.py +7 -1
  164. synth_ai/environments/examples/bandit/engine.py +0 -1
  165. synth_ai/environments/examples/bandit/environment.py +0 -1
  166. synth_ai/environments/examples/wordle/environment.py +0 -1
  167. synth_ai/evals/base.py +16 -5
  168. synth_ai/evals/client.py +1 -1
  169. synth_ai/inference/client.py +1 -1
  170. synth_ai/judge_schemas.py +8 -8
  171. synth_ai/learning/client.py +1 -1
  172. synth_ai/learning/health.py +1 -1
  173. synth_ai/learning/jobs.py +1 -1
  174. synth_ai/learning/rl/client.py +1 -1
  175. synth_ai/learning/rl/env_keys.py +1 -1
  176. synth_ai/learning/rl/secrets.py +1 -1
  177. synth_ai/learning/sft/client.py +1 -1
  178. synth_ai/learning/sft/data.py +407 -4
  179. synth_ai/learning/validators.py +4 -1
  180. synth_ai/task/apps/__init__.py +4 -2
  181. synth_ai/task/config.py +6 -4
  182. synth_ai/task/rubrics/__init__.py +1 -2
  183. synth_ai/task/rubrics/loaders.py +14 -10
  184. synth_ai/task/rubrics.py +219 -0
  185. synth_ai/task/trace_correlation_helpers.py +24 -11
  186. synth_ai/task/tracing_utils.py +14 -3
  187. synth_ai/task/validators.py +2 -3
  188. synth_ai/tracing_v3/abstractions.py +3 -3
  189. synth_ai/tracing_v3/config.py +15 -13
  190. synth_ai/tracing_v3/constants.py +21 -0
  191. synth_ai/tracing_v3/db_config.py +3 -1
  192. synth_ai/tracing_v3/decorators.py +10 -7
  193. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  194. synth_ai/tracing_v3/session_tracer.py +7 -7
  195. synth_ai/tracing_v3/storage/base.py +29 -29
  196. synth_ai/tracing_v3/storage/config.py +3 -3
  197. synth_ai/tracing_v3/turso/daemon.py +8 -9
  198. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  199. synth_ai/tracing_v3/utils.py +2 -2
  200. synth_ai/tui/cli/query_experiments.py +4 -4
  201. synth_ai/tui/cli/query_experiments_v3.py +4 -4
  202. synth_ai/tui/dashboard.py +14 -9
  203. synth_ai/utils/__init__.py +101 -0
  204. synth_ai/utils/base_url.py +94 -0
  205. synth_ai/utils/cli.py +131 -0
  206. synth_ai/utils/env.py +287 -0
  207. synth_ai/utils/http.py +169 -0
  208. synth_ai/utils/modal.py +308 -0
  209. synth_ai/utils/process.py +212 -0
  210. synth_ai/utils/prompts.py +39 -0
  211. synth_ai/utils/sqld.py +122 -0
  212. synth_ai/utils/task_app_discovery.py +882 -0
  213. synth_ai/utils/task_app_env.py +186 -0
  214. synth_ai/utils/task_app_state.py +318 -0
  215. synth_ai/utils/user_config.py +137 -0
  216. synth_ai/v0/config/__init__.py +1 -5
  217. synth_ai/v0/config/base_url.py +1 -7
  218. synth_ai/v0/tracing/config.py +1 -1
  219. synth_ai/v0/tracing/decorators.py +1 -1
  220. synth_ai/v0/tracing/upload.py +1 -1
  221. synth_ai/v0/tracing_v1/config.py +1 -1
  222. synth_ai/v0/tracing_v1/decorators.py +1 -1
  223. synth_ai/v0/tracing_v1/upload.py +1 -1
  224. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  225. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
  226. synth_ai/cli/man.py +0 -106
  227. synth_ai/compound/cais.py +0 -0
  228. synth_ai/core/experiment.py +0 -13
  229. synth_ai/core/system.py +0 -15
  230. synth_ai/demo_registry.py +0 -295
  231. synth_ai/handshake.py +0 -109
  232. synth_ai/http.py +0 -26
  233. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  234. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  235. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  236. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,135 @@
1
+ """Compatibility wrapper for the GRPO Crafter task app.
2
+
3
+ This module now delegates to the TaskAppConfig defined in the colocated example at
4
+ `examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
5
+ (running the file directly or targeting `fastapi_app` from external tooling). Prefer using
6
+ `uvx synth-ai serve grpo-crafter` for local development and testing.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ from pathlib import Path
13
+
14
+ from fastapi.exceptions import RequestValidationError
15
+ from fastapi.responses import JSONResponse
16
+ from starlette.requests import Request
17
+ from synth_ai.task.apps import ModalDeploymentConfig, registry
18
+ from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
19
+ from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
20
+
21
+ from .grpo_crafter import build_config
22
+
23
+ APP_ID = "grpo-crafter"
24
+
25
+
26
+ def _build_base_config() -> TaskAppConfig:
27
+ # Lazily construct the base config to avoid heavy work at import time
28
+ return build_config()
29
+
30
+
31
+ try:
32
+ _REGISTERED_ENTRY = registry.get(APP_ID)
33
+ except Exception: # pragma: no cover - registry unavailable in some contexts
34
+ MODAL_DEPLOYMENT: ModalDeploymentConfig | None = None
35
+ ENV_FILES: tuple[str, ...] = ()
36
+ else:
37
+ MODAL_DEPLOYMENT = _REGISTERED_ENTRY.modal
38
+ ENV_FILES = tuple(_REGISTERED_ENTRY.env_files)
39
+
40
+
41
+ def build_task_app_config() -> TaskAppConfig:
42
+ """Return a fresh TaskAppConfig for this wrapper."""
43
+ base = _build_base_config()
44
+ return base.clone()
45
+
46
+
47
+ def fastapi_app():
48
+ """Return the FastAPI application for Modal or other ASGI hosts."""
49
+
50
+ app = create_task_app(build_task_app_config())
51
+
52
+ # Replace default health endpoints so we can permit soft auth failures and log 422s.
53
+ filtered_routes = []
54
+ for route in app.router.routes:
55
+ path = getattr(route, "path", None)
56
+ methods = getattr(route, "methods", set()) or set()
57
+ if path in {"/health", "/health/rollout"} and "GET" in methods:
58
+ continue
59
+ filtered_routes.append(route)
60
+ app.router.routes = filtered_routes
61
+
62
+ def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
63
+ if not env_key:
64
+ return None
65
+ prefix = env_key[: max(1, len(env_key) // 2)]
66
+ print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
67
+ return prefix
68
+
69
+ @app.get("/health")
70
+ async def health(request: Request):
71
+ env_key = normalize_environment_api_key()
72
+ if not env_key:
73
+ return JSONResponse(
74
+ status_code=503,
75
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
76
+ )
77
+ if not is_api_key_header_authorized(request):
78
+ prefix = _log_env_key_prefix("health", env_key)
79
+ content = {"status": "healthy", "authorized": False}
80
+ if prefix:
81
+ content["expected_api_key_prefix"] = prefix
82
+ return JSONResponse(status_code=200, content=content)
83
+ return {"status": "healthy", "authorized": True}
84
+
85
+ @app.get("/health/rollout")
86
+ async def health_rollout(request: Request):
87
+ env_key = normalize_environment_api_key()
88
+ if not env_key:
89
+ return JSONResponse(
90
+ status_code=503,
91
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
92
+ )
93
+ if not is_api_key_header_authorized(request):
94
+ prefix = _log_env_key_prefix("health/rollout", env_key)
95
+ content = {"status": "healthy", "authorized": False}
96
+ if prefix:
97
+ content["expected_api_key_prefix"] = prefix
98
+ return JSONResponse(status_code=200, content=content)
99
+ return {"ok": True, "authorized": True}
100
+
101
+ @app.exception_handler(RequestValidationError)
102
+ async def _on_validation_error(request: Request, exc: RequestValidationError):
103
+ try:
104
+ hdr = request.headers
105
+ snapshot = {
106
+ "path": str(request.url.path),
107
+ "have_x_api_key": bool(hdr.get("x-api-key")),
108
+ "have_x_api_keys": bool(hdr.get("x-api-keys")),
109
+ "have_authorization": bool(hdr.get("authorization")),
110
+ "errors": exc.errors()[:5],
111
+ }
112
+ print("[422] validation", snapshot, flush=True)
113
+ except Exception:
114
+ pass
115
+ return JSONResponse(
116
+ status_code=422,
117
+ content={"status": "invalid", "detail": exc.errors()[:5]},
118
+ )
119
+
120
+ return app
121
+
122
+
123
+ if __name__ == "__main__":
124
+ parser = argparse.ArgumentParser(description="Run the Crafter task app locally")
125
+ parser.add_argument("--host", default="0.0.0.0")
126
+ parser.add_argument("--port", type=int, default=8001)
127
+ parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
128
+ args = parser.parse_args()
129
+
130
+ run_task_app(
131
+ build_task_app_config,
132
+ host=args.host,
133
+ port=args.port,
134
+ reload=args.reload,
135
+ )
@@ -0,0 +1,173 @@
1
+ # GRPO Synth Envs Hosted Service
2
+
3
+ This service provides hosted environment and policy management for GRPO (Group Relative Policy Optimization) training with synthetic environments.
4
+
5
+ ## Architecture
6
+
7
+ The service implements a FastAPI-based HTTP API that manages:
8
+ - **Environments**: Stateful environment instances (currently Crafter)
9
+ - **Policies**: Thin policy clients that prepare inference requests
10
+ - **Rollouts**: Coordinated execution of environment-policy interaction loops
11
+ - **Snapshots**: State persistence using Modal Volumes
12
+ - **Branching**: Creating multiple copies of environments/policies for exploration
13
+
14
+ ## Key Components
15
+
16
+ ### Core Modules
17
+ - `hosted_app.py`: FastAPI app factory and configuration
18
+ - `registry.py`: In-memory registries for active instances
19
+ - `storage/volume.py`: Modal Volume operations for snapshots
20
+ - `inference/openai_client.py`: OpenAI-compatible inference client
21
+
22
+ ### API Routers
23
+ - `environment_routes.py`: Environment lifecycle endpoints
24
+ - `policy_routes.py`: Policy lifecycle endpoints
25
+ - `rollout.py`: Rollout coordinator and run management
26
+ - `branching.py`: Branching operations
27
+
28
+ ### Environment Implementations
29
+ - `envs/crafter/`: Crafter environment and policy implementations
30
+
31
+ ## API Endpoints
32
+
33
+ ### Service Discovery
34
+ - `GET /info`: Service configuration and endpoints
35
+ - `GET /health`: Health check
36
+
37
+ ### Environment Management
38
+ - `POST /env/create`: Create new environment
39
+ - `POST /env/reset`: Reset environment
40
+ - `POST /env/step`: Execute environment step
41
+ - `POST /env/snapshot`: Save environment state
42
+ - `POST /env/restore`: Restore from snapshot
43
+ - `POST /env/terminate`: Clean up environment
44
+
45
+ ### Policy Management
46
+ - `POST /policy/create`: Create new policy
47
+ - `POST /policy/step`: Generate actions (with optional inference)
48
+ - `POST /policy/snapshot`: Save policy state
49
+ - `POST /policy/restore`: Restore from snapshot
50
+ - `POST /policy/terminate`: Clean up policy
51
+
52
+ ### Coordination
53
+ - `POST /rollout`: Execute coordinated rollout
54
+ - `POST /branch`: Create environment/policy branches
55
+ - `POST /run/abort`: Abort running rollout
56
+ - `GET /run/status/{run_id}`: Check run status
57
+
58
+ ## Local Development
59
+
60
+ ```bash
61
+ # Install dependencies
62
+ pip install fastapi uvicorn httpx pydantic
63
+
64
+ # Run the service
65
+ python main.py
66
+
67
+ # Or with uvicorn directly
68
+ uvicorn main:app --reload --port 8000
69
+ ```
70
+
71
+ ## Modal Deployment
72
+
73
+ ```bash
74
+ # Deploy to Modal
75
+ modal deploy main.py
76
+
77
+ # Run once
78
+ modal run main.py
79
+ ```
80
+
81
+ ## Environment Variables
82
+
83
+ - `SERVICE_BASE_URL`: Base URL for this service (default: http://localhost:8000)
84
+ - `VLLM_BASE_URL`: Base URL for vLLM inference service (default: http://localhost:8001)
85
+ - `DEFAULT_MODEL`: Default model name for inference
86
+
87
+ ## Storage
88
+
89
+ The service uses Modal Volumes for persistent storage:
90
+ - Volume name: `synth-env-state`
91
+ - Mount path: `/data/state`
92
+ - Layout: `/data/state/runs/{rl_run_id}/{kind}/{shard}/{snapshot_id}.tar.gz`
93
+
94
+ ## Example Usage
95
+
96
+ ```python
97
+ import httpx
98
+
99
+ # Create environment
100
+ env_response = httpx.post(
101
+ "http://localhost:8000/env/create",
102
+ json={
103
+ "env_name": "crafter",
104
+ "config": {},
105
+ "seed": 42,
106
+ "rl_run_id": "test-run-1"
107
+ }
108
+ )
109
+ env_id = env_response.json()["env_id"]
110
+
111
+ # Create policy
112
+ policy_response = httpx.post(
113
+ "http://localhost:8000/policy/create",
114
+ json={
115
+ "policy_name": "crafter-react",
116
+ "config": {"inference_url": "http://vllm:8001"},
117
+ "rl_run_id": "test-run-1",
118
+ "bound_env_id": env_id
119
+ }
120
+ )
121
+ policy_id = policy_response.json()["policy_id"]
122
+
123
+ # Execute rollout
124
+ rollout_response = httpx.post(
125
+ "http://localhost:8000/rollout",
126
+ json={
127
+ "run_id": "test-run-1",
128
+ "env": {"env_id": env_id},
129
+ "policy": {"policy_id": policy_id},
130
+ "ops": ["agent", "env"] * 10,
131
+ "on_done": "reset"
132
+ }
133
+ )
134
+ trajectories = rollout_response.json()["trajectories"]
135
+ ```
136
+
137
+ ## Testing
138
+
139
+ The implementation follows the plan outlined in `plan.md` and decisions in `decisions.md`. Key test areas:
140
+ - Environment create/step/reset lifecycle
141
+ - Policy inference request building
142
+ - Snapshot/restore round trips
143
+ - Rollout coordination with abort support
144
+ - Branching operations
145
+
146
+ 4b
147
+ "aggregate": {
148
+ "completed": 20,
149
+ "total": 20,
150
+ "avg_turns": 10.0,
151
+ "avg_achievements": 1.3,
152
+ "achievements_freq": {
153
+ "collect_wood": 9,
154
+ "collect_sapling": 8,
155
+ "collect_drink": 7,
156
+ "place_plant": 2
157
+ }
158
+ }
159
+
160
+
161
+ groq qwen/qwen3-32b
162
+ ],
163
+ "aggregate": {
164
+ "completed": 20,
165
+ "total": 20,
166
+ "avg_turns": 10.0,
167
+ "avg_achievements": 1.0,
168
+ "achievements_freq": {
169
+ "collect_sapling": 7,
170
+ "collect_wood": 9,
171
+ "collect_drink": 4
172
+ }
173
+ }
@@ -0,0 +1,5 @@
1
+ """GRPO Synth Envs Hosted Service."""
2
+
3
+ from .hosted_app import TaskApp, create_app
4
+
5
+ __all__ = ["create_app", "TaskApp"]
@@ -0,0 +1,143 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from fastapi import APIRouter, HTTPException
6
+ from pydantic import BaseModel
7
+
8
+ from .registry import registry
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ router = APIRouter()
13
+
14
+
15
+ class BranchRequest(BaseModel):
16
+ env_ids: list[str] | None = None
17
+ policy_ids: list[str] | None = None
18
+ num_children: int = 1
19
+ max_branches: int = 10
20
+
21
+
22
+ class BranchResponse(BaseModel):
23
+ env_branches: dict[str, list[str]]
24
+ policy_branches: dict[str, list[str]]
25
+
26
+
27
+ @router.post("/branch", response_model=BranchResponse)
28
+ async def create_branches(request: BranchRequest) -> BranchResponse:
29
+ """Create branches of environments and/or policies."""
30
+
31
+ if request.num_children > request.max_branches:
32
+ raise HTTPException(
33
+ status_code=422,
34
+ detail=f"num_children ({request.num_children}) exceeds max_branches ({request.max_branches})",
35
+ )
36
+
37
+ env_branches = {}
38
+ policy_branches = {}
39
+
40
+ try:
41
+ # Branch environments
42
+ if request.env_ids:
43
+ for env_id in request.env_ids:
44
+ env_handle = registry.get_env(env_id)
45
+ if not env_handle:
46
+ logger.warning(f"Environment {env_id} not found, skipping")
47
+ continue
48
+
49
+ child_ids = []
50
+
51
+ for child_idx in range(request.num_children):
52
+ # Create snapshot of parent
53
+ from .environment_routes import (
54
+ EnvSnapshotRequest,
55
+ snapshot_environment,
56
+ )
57
+
58
+ snapshot_response = await snapshot_environment(
59
+ EnvSnapshotRequest(env_id=env_id)
60
+ )
61
+
62
+ # Restore to new environment with modified seed
63
+ from .environment_routes import (
64
+ EnvRestoreRequest,
65
+ restore_environment,
66
+ )
67
+
68
+ restore_response = await restore_environment(
69
+ EnvRestoreRequest(snapshot_id=snapshot_response.snapshot_id)
70
+ )
71
+
72
+ child_id = restore_response.env_id
73
+ child_handle = registry.get_env(child_id)
74
+
75
+ # Update child seed for determinism
76
+ if child_handle and child_handle.seed is not None:
77
+ child_handle.seed = child_handle.seed + child_idx + 1
78
+ child_handle.env.seed = child_handle.seed
79
+
80
+ child_ids.append(child_id)
81
+
82
+ # Track parent relationship in snapshot metadata
83
+ snapshot_meta = registry.get_snapshot(snapshot_response.snapshot_id)
84
+ if snapshot_meta:
85
+ snapshot_meta.parent_snapshot_id = env_id
86
+
87
+ env_branches[env_id] = child_ids
88
+
89
+ # Branch policies
90
+ if request.policy_ids:
91
+ for policy_id in request.policy_ids:
92
+ policy_handle = registry.get_policy(policy_id)
93
+ if not policy_handle:
94
+ logger.warning(f"Policy {policy_id} not found, skipping")
95
+ continue
96
+
97
+ child_ids = []
98
+
99
+ for child_idx in range(request.num_children):
100
+ # Create snapshot of parent
101
+ from .policy_routes import PolicySnapshotRequest, snapshot_policy
102
+
103
+ snapshot_response = await snapshot_policy(
104
+ PolicySnapshotRequest(policy_id=policy_id)
105
+ )
106
+
107
+ # Restore to new policy
108
+ from .policy_routes import PolicyRestoreRequest, restore_policy
109
+
110
+ restore_response = await restore_policy(
111
+ PolicyRestoreRequest(snapshot_id=snapshot_response.snapshot_id)
112
+ )
113
+
114
+ child_id = restore_response.policy_id
115
+ child_ids.append(child_id)
116
+
117
+ # Copy bound environment if parent had one
118
+ child_handle = registry.get_policy(child_id)
119
+ if child_handle and policy_handle.bound_env_id:
120
+ # If we also branched the env, bind to corresponding child
121
+ if policy_handle.bound_env_id in env_branches:
122
+ child_envs = env_branches[policy_handle.bound_env_id]
123
+ if child_idx < len(child_envs):
124
+ child_handle.bound_env_id = child_envs[child_idx]
125
+ else:
126
+ # Otherwise keep same env binding
127
+ child_handle.bound_env_id = policy_handle.bound_env_id
128
+
129
+ # Track parent relationship
130
+ snapshot_meta = registry.get_snapshot(snapshot_response.snapshot_id)
131
+ if snapshot_meta:
132
+ snapshot_meta.parent_snapshot_id = policy_id
133
+
134
+ policy_branches[policy_id] = child_ids
135
+
136
+ return BranchResponse(
137
+ env_branches=env_branches,
138
+ policy_branches=policy_branches,
139
+ )
140
+
141
+ except Exception as e:
142
+ logger.error(f"Failed to create branches: {e}")
143
+ raise HTTPException(status_code=500, detail=str(e)) from e