synth-ai 0.2.9.dev3__py3-none-any.whl → 0.2.9.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (107) hide show
  1. examples/analyze_semantic_words.sh +17 -0
  2. examples/common_old/backend.py +21 -0
  3. examples/crafter_debug_render.py +180 -0
  4. examples/evals_old/README.md +98 -0
  5. examples/evals_old/__init__.py +6 -0
  6. examples/evals_old/compare_models.py +1037 -0
  7. examples/evals_old/example_log.md +145 -0
  8. examples/evals_old/run_demo.sh +126 -0
  9. examples/evals_old/trace_analysis.py +270 -0
  10. examples/finetuning_old/_backup_synth_qwen/config.toml +29 -0
  11. examples/finetuning_old/_backup_synth_qwen/example_log.md +324 -0
  12. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +60 -0
  13. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +239 -0
  14. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +109 -0
  15. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +1924 -0
  16. examples/finetuning_old/_backup_synth_qwen/readme.md +49 -0
  17. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +114 -0
  18. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +195 -0
  19. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +118 -0
  20. examples/finetuning_old/synth_qwen_v1/README.md +68 -0
  21. examples/finetuning_old/synth_qwen_v1/filter_traces.py +60 -0
  22. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +239 -0
  23. examples/finetuning_old/synth_qwen_v1/finetune.py +46 -0
  24. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +71 -0
  25. examples/finetuning_old/synth_qwen_v1/infer.py +37 -0
  26. examples/finetuning_old/synth_qwen_v1/poll.py +44 -0
  27. examples/finetuning_old/synth_qwen_v1/prepare_data.py +35 -0
  28. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +109 -0
  29. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +1932 -0
  30. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +207 -0
  31. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +232 -0
  32. examples/finetuning_old/synth_qwen_v1/upload_data.py +34 -0
  33. examples/finetuning_old/synth_qwen_v1/util.py +147 -0
  34. examples/rl/README.md +169 -0
  35. examples/rl/configs/eval_base_qwen.toml +15 -0
  36. examples/rl/configs/eval_rl_qwen.toml +11 -0
  37. examples/rl/configs/rl_from_base_qwen.toml +35 -0
  38. examples/rl/configs/rl_from_base_qwen17.toml +74 -0
  39. examples/rl/configs/rl_from_ft_qwen.toml +35 -0
  40. examples/rl/download_dataset.py +64 -0
  41. examples/rl/run_eval.py +435 -0
  42. examples/rl/run_rl_and_save.py +94 -0
  43. examples/rl/task_app/README.md +22 -0
  44. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +8 -8
  45. examples/rl/task_app/math_task_app.py +107 -0
  46. examples/rl_old/task_app.py +962 -0
  47. examples/run_crafter_demo.sh +10 -0
  48. examples/warming_up_to_rl/analyze_trace_db.py +420 -0
  49. examples/warming_up_to_rl/configs/crafter_fft.toml +48 -0
  50. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  51. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +20 -0
  52. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +13 -0
  53. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +23 -0
  54. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +73 -0
  55. examples/warming_up_to_rl/configs/rl_from_ft.toml +56 -0
  56. examples/warming_up_to_rl/export_trace_sft.py +541 -0
  57. examples/warming_up_to_rl/groq_test.py +88 -0
  58. examples/warming_up_to_rl/manage_secrets.py +127 -0
  59. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  60. examples/warming_up_to_rl/old/notes.md +73 -0
  61. examples/warming_up_to_rl/readme.md +172 -0
  62. examples/warming_up_to_rl/run_eval.py +434 -0
  63. examples/warming_up_to_rl/run_fft_and_save.py +309 -0
  64. examples/warming_up_to_rl/run_local_rollout.py +188 -0
  65. examples/warming_up_to_rl/run_local_rollout_modal.py +160 -0
  66. examples/warming_up_to_rl/run_local_rollout_parallel.py +342 -0
  67. examples/warming_up_to_rl/run_local_rollout_traced.py +372 -0
  68. examples/warming_up_to_rl/run_rl_and_save.py +101 -0
  69. examples/warming_up_to_rl/run_rollout_remote.py +129 -0
  70. examples/warming_up_to_rl/task_app/README.md +38 -0
  71. {synth_ai/task/apps → examples/warming_up_to_rl/task_app}/grpo_crafter.py +7 -7
  72. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +165 -0
  73. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  74. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  75. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +145 -0
  76. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1271 -0
  77. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  78. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +429 -0
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +442 -0
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +96 -0
  83. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +302 -0
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +202 -0
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +512 -0
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +102 -0
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +985 -0
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +197 -0
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1749 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +217 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +160 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +146 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +61 -0
  97. synth_ai/api/train/config_finder.py +18 -18
  98. synth_ai/api/train/env_resolver.py +28 -1
  99. synth_ai/cli/task_apps.py +291 -56
  100. synth_ai/task/apps/__init__.py +54 -13
  101. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/METADATA +1 -1
  102. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/RECORD +106 -13
  103. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/top_level.txt +1 -0
  104. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  105. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/WHEEL +0 -0
  106. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/entry_points.txt +0 -0
  107. {synth_ai-0.2.9.dev3.dist-info → synth_ai-0.2.9.dev5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,165 @@
1
+
2
+ """Compatibility wrapper for the GRPO Crafter task app.
3
+
4
+ This module now delegates to the shared TaskAppConfig defined in
5
+ `synth_ai.task.apps.grpo_crafter`. It is kept for legacy usage (running the
6
+ file directly or targeting `fastapi_app` from external tooling). Prefer using
7
+ `uvx synth-ai serve grpo-crafter` for local development and testing.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ from pathlib import Path
14
+
15
+ from fastapi.exceptions import RequestValidationError
16
+ from fastapi.responses import JSONResponse
17
+ from starlette.requests import Request
18
+
19
+ from synth_ai.task.apps import ModalDeploymentConfig, registry
20
+ from .grpo_crafter import build_config
21
+ from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
22
+ from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
23
+
24
+
25
+ APP_ID = "grpo-crafter"
26
+
27
+
28
+ _BASE_CONFIG = build_config()
29
+ TASK_APP_CONFIG = TaskAppConfig(
30
+ app_id="grpo-crafter",
31
+ name=_BASE_CONFIG.name,
32
+ description=_BASE_CONFIG.description,
33
+ base_task_info=_BASE_CONFIG.base_task_info,
34
+ describe_taskset=_BASE_CONFIG.describe_taskset,
35
+ provide_task_instances=_BASE_CONFIG.provide_task_instances,
36
+ rollout=_BASE_CONFIG.rollout,
37
+ dataset_registry=_BASE_CONFIG.dataset_registry,
38
+ rubrics=_BASE_CONFIG.rubrics,
39
+ proxy=_BASE_CONFIG.proxy,
40
+ routers=_BASE_CONFIG.routers,
41
+ middleware=_BASE_CONFIG.middleware,
42
+ app_state=_BASE_CONFIG.app_state,
43
+ require_api_key=_BASE_CONFIG.require_api_key,
44
+ expose_debug_env=_BASE_CONFIG.expose_debug_env,
45
+ cors_origins=_BASE_CONFIG.cors_origins,
46
+ startup_hooks=_BASE_CONFIG.startup_hooks,
47
+ shutdown_hooks=_BASE_CONFIG.shutdown_hooks,
48
+ )
49
+
50
+ try:
51
+ _REGISTERED_ENTRY = registry.get(APP_ID)
52
+ except Exception: # pragma: no cover - registry unavailable in some contexts
53
+ MODAL_DEPLOYMENT: ModalDeploymentConfig | None = None
54
+ ENV_FILES: tuple[str, ...] = ()
55
+ else:
56
+ MODAL_DEPLOYMENT = _REGISTERED_ENTRY.modal
57
+ ENV_FILES = tuple(_REGISTERED_ENTRY.env_files)
58
+
59
+
60
+ def build_task_app_config() -> TaskAppConfig:
61
+ """Return a fresh TaskAppConfig for this wrapper."""
62
+
63
+ return TASK_APP_CONFIG.clone()
64
+
65
+
66
+ def fastapi_app():
67
+ """Return the FastAPI application for Modal or other ASGI hosts."""
68
+
69
+ app = create_task_app(build_task_app_config())
70
+
71
+ # Replace default health endpoints so we can permit soft auth failures and log 422s.
72
+ filtered_routes = []
73
+ for route in app.router.routes:
74
+ path = getattr(route, "path", None)
75
+ methods = getattr(route, "methods", set()) or set()
76
+ if path in {"/health", "/health/rollout"} and "GET" in methods:
77
+ continue
78
+ filtered_routes.append(route)
79
+ app.router.routes = filtered_routes
80
+
81
+ def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
82
+ if not env_key:
83
+ return None
84
+ prefix = env_key[: max(1, len(env_key) // 2)]
85
+ print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
86
+ return prefix
87
+
88
+ @app.get("/health")
89
+ async def health(request: Request):
90
+ env_key = normalize_environment_api_key()
91
+ if not env_key:
92
+ return JSONResponse(
93
+ status_code=503,
94
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
95
+ )
96
+ if not is_api_key_header_authorized(request):
97
+ prefix = _log_env_key_prefix("health", env_key)
98
+ content = {"status": "healthy", "authorized": False}
99
+ if prefix:
100
+ content["expected_api_key_prefix"] = prefix
101
+ return JSONResponse(status_code=200, content=content)
102
+ return {"status": "healthy", "authorized": True}
103
+
104
+ @app.get("/health/rollout")
105
+ async def health_rollout(request: Request):
106
+ env_key = normalize_environment_api_key()
107
+ if not env_key:
108
+ return JSONResponse(
109
+ status_code=503,
110
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
111
+ )
112
+ if not is_api_key_header_authorized(request):
113
+ prefix = _log_env_key_prefix("health/rollout", env_key)
114
+ content = {"status": "healthy", "authorized": False}
115
+ if prefix:
116
+ content["expected_api_key_prefix"] = prefix
117
+ return JSONResponse(status_code=200, content=content)
118
+ return {"ok": True, "authorized": True}
119
+
120
+ @app.exception_handler(RequestValidationError)
121
+ async def _on_validation_error(request: Request, exc: RequestValidationError):
122
+ try:
123
+ hdr = request.headers
124
+ snapshot = {
125
+ "path": str(getattr(request, "url").path),
126
+ "have_x_api_key": bool(hdr.get("x-api-key")),
127
+ "have_x_api_keys": bool(hdr.get("x-api-keys")),
128
+ "have_authorization": bool(hdr.get("authorization")),
129
+ "errors": exc.errors()[:5],
130
+ }
131
+ print("[422] validation", snapshot, flush=True)
132
+ except Exception:
133
+ pass
134
+ return JSONResponse(
135
+ status_code=422,
136
+ content={"status": "invalid", "detail": exc.errors()[:5]},
137
+ )
138
+
139
+ return app
140
+
141
+
142
+ if __name__ == "__main__":
143
+ parser = argparse.ArgumentParser(description="Run the Crafter task app locally")
144
+ parser.add_argument("--host", default="0.0.0.0")
145
+ parser.add_argument("--port", type=int, default=8001)
146
+ parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
147
+ parser.add_argument(
148
+ "--env-file",
149
+ action="append",
150
+ default=[],
151
+ help="Additional .env files to load before startup",
152
+ )
153
+ args = parser.parse_args()
154
+
155
+ default_env = Path(__file__).resolve().parents[4] / "backend" / ".env.dev"
156
+ env_files = [str(default_env)] if default_env.exists() else []
157
+ env_files.extend(args.env_file or [])
158
+
159
+ run_task_app(
160
+ build_task_app_config,
161
+ host=args.host,
162
+ port=args.port,
163
+ reload=args.reload,
164
+ env_files=env_files,
165
+ )
@@ -0,0 +1,173 @@
1
+ # GRPO Synth Envs Hosted Service
2
+
3
+ This service provides hosted environment and policy management for GRPO (Group Relative Policy Optimization) training with synthetic environments.
4
+
5
+ ## Architecture
6
+
7
+ The service implements a FastAPI-based HTTP API that manages:
8
+ - **Environments**: Stateful environment instances (currently Crafter)
9
+ - **Policies**: Thin policy clients that prepare inference requests
10
+ - **Rollouts**: Coordinated execution of environment-policy interaction loops
11
+ - **Snapshots**: State persistence using Modal Volumes
12
+ - **Branching**: Creating multiple copies of environments/policies for exploration
13
+
14
+ ## Key Components
15
+
16
+ ### Core Modules
17
+ - `hosted_app.py`: FastAPI app factory and configuration
18
+ - `registry.py`: In-memory registries for active instances
19
+ - `storage/volume.py`: Modal Volume operations for snapshots
20
+ - `inference/openai_client.py`: OpenAI-compatible inference client
21
+
22
+ ### API Routers
23
+ - `environment_routes.py`: Environment lifecycle endpoints
24
+ - `policy_routes.py`: Policy lifecycle endpoints
25
+ - `rollout.py`: Rollout coordinator and run management
26
+ - `branching.py`: Branching operations
27
+
28
+ ### Environment Implementations
29
+ - `envs/crafter/`: Crafter environment and policy implementations
30
+
31
+ ## API Endpoints
32
+
33
+ ### Service Discovery
34
+ - `GET /info`: Service configuration and endpoints
35
+ - `GET /health`: Health check
36
+
37
+ ### Environment Management
38
+ - `POST /env/create`: Create new environment
39
+ - `POST /env/reset`: Reset environment
40
+ - `POST /env/step`: Execute environment step
41
+ - `POST /env/snapshot`: Save environment state
42
+ - `POST /env/restore`: Restore from snapshot
43
+ - `POST /env/terminate`: Clean up environment
44
+
45
+ ### Policy Management
46
+ - `POST /policy/create`: Create new policy
47
+ - `POST /policy/step`: Generate actions (with optional inference)
48
+ - `POST /policy/snapshot`: Save policy state
49
+ - `POST /policy/restore`: Restore from snapshot
50
+ - `POST /policy/terminate`: Clean up policy
51
+
52
+ ### Coordination
53
+ - `POST /rollout`: Execute coordinated rollout
54
+ - `POST /branch`: Create environment/policy branches
55
+ - `POST /run/abort`: Abort running rollout
56
+ - `GET /run/status/{run_id}`: Check run status
57
+
58
+ ## Local Development
59
+
60
+ ```bash
61
+ # Install dependencies
62
+ pip install fastapi uvicorn httpx pydantic
63
+
64
+ # Run the service
65
+ python main.py
66
+
67
+ # Or with uvicorn directly
68
+ uvicorn main:app --reload --port 8000
69
+ ```
70
+
71
+ ## Modal Deployment
72
+
73
+ ```bash
74
+ # Deploy to Modal
75
+ modal deploy main.py
76
+
77
+ # Run once
78
+ modal run main.py
79
+ ```
80
+
81
+ ## Environment Variables
82
+
83
+ - `SERVICE_BASE_URL`: Base URL for this service (default: http://localhost:8000)
84
+ - `VLLM_BASE_URL`: Base URL for vLLM inference service (default: http://localhost:8001)
85
+ - `DEFAULT_MODEL`: Default model name for inference
86
+
87
+ ## Storage
88
+
89
+ The service uses Modal Volumes for persistent storage:
90
+ - Volume name: `synth-env-state`
91
+ - Mount path: `/data/state`
92
+ - Layout: `/data/state/runs/{rl_run_id}/{kind}/{shard}/{snapshot_id}.tar.gz`
93
+
94
+ ## Example Usage
95
+
96
+ ```python
97
+ import httpx
98
+
99
+ # Create environment
100
+ env_response = httpx.post(
101
+ "http://localhost:8000/env/create",
102
+ json={
103
+ "env_name": "crafter",
104
+ "config": {},
105
+ "seed": 42,
106
+ "rl_run_id": "test-run-1"
107
+ }
108
+ )
109
+ env_id = env_response.json()["env_id"]
110
+
111
+ # Create policy
112
+ policy_response = httpx.post(
113
+ "http://localhost:8000/policy/create",
114
+ json={
115
+ "policy_name": "crafter-react",
116
+ "config": {"inference_url": "http://vllm:8001"},
117
+ "rl_run_id": "test-run-1",
118
+ "bound_env_id": env_id
119
+ }
120
+ )
121
+ policy_id = policy_response.json()["policy_id"]
122
+
123
+ # Execute rollout
124
+ rollout_response = httpx.post(
125
+ "http://localhost:8000/rollout",
126
+ json={
127
+ "run_id": "test-run-1",
128
+ "env": {"env_id": env_id},
129
+ "policy": {"policy_id": policy_id},
130
+ "ops": ["agent", "env"] * 10,
131
+ "on_done": "reset"
132
+ }
133
+ )
134
+ trajectories = rollout_response.json()["trajectories"]
135
+ ```
136
+
137
+ ## Testing
138
+
139
+ The implementation follows the plan outlined in `plan.md` and decisions in `decisions.md`. Key test areas:
140
+ - Environment create/step/reset lifecycle
141
+ - Policy inference request building
142
+ - Snapshot/restore round trips
143
+ - Rollout coordination with abort support
144
+ - Branching operations
145
+
146
+ 4b
147
+ "aggregate": {
148
+ "completed": 20,
149
+ "total": 20,
150
+ "avg_turns": 10.0,
151
+ "avg_achievements": 1.3,
152
+ "achievements_freq": {
153
+ "collect_wood": 9,
154
+ "collect_sapling": 8,
155
+ "collect_drink": 7,
156
+ "place_plant": 2
157
+ }
158
+ }
159
+
160
+
161
+ groq qwen/qwen3-32b
162
+ ],
163
+ "aggregate": {
164
+ "completed": 20,
165
+ "total": 20,
166
+ "avg_turns": 10.0,
167
+ "avg_achievements": 1.0,
168
+ "achievements_freq": {
169
+ "collect_sapling": 7,
170
+ "collect_wood": 9,
171
+ "collect_drink": 4
172
+ }
173
+ }
@@ -0,0 +1,5 @@
1
+ """GRPO Synth Envs Hosted Service."""
2
+
3
+ from .hosted_app import create_app, TaskApp
4
+
5
+ __all__ = ["create_app", "TaskApp"]
@@ -0,0 +1,145 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Dict, List, Optional
5
+
6
+ from fastapi import APIRouter, HTTPException
7
+ from pydantic import BaseModel
8
+
9
+ from .registry import registry
10
+ from .storage.volume import storage
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ router = APIRouter()
15
+
16
+
17
+ class BranchRequest(BaseModel):
18
+ env_ids: Optional[List[str]] = None
19
+ policy_ids: Optional[List[str]] = None
20
+ num_children: int = 1
21
+ max_branches: int = 10
22
+
23
+
24
+ class BranchResponse(BaseModel):
25
+ env_branches: Dict[str, List[str]]
26
+ policy_branches: Dict[str, List[str]]
27
+
28
+
29
+ @router.post("/branch", response_model=BranchResponse)
30
+ async def create_branches(request: BranchRequest) -> BranchResponse:
31
+ """Create branches of environments and/or policies."""
32
+
33
+ if request.num_children > request.max_branches:
34
+ raise HTTPException(
35
+ status_code=422,
36
+ detail=f"num_children ({request.num_children}) exceeds max_branches ({request.max_branches})",
37
+ )
38
+
39
+ env_branches = {}
40
+ policy_branches = {}
41
+
42
+ try:
43
+ # Branch environments
44
+ if request.env_ids:
45
+ for env_id in request.env_ids:
46
+ env_handle = registry.get_env(env_id)
47
+ if not env_handle:
48
+ logger.warning(f"Environment {env_id} not found, skipping")
49
+ continue
50
+
51
+ child_ids = []
52
+
53
+ for child_idx in range(request.num_children):
54
+ # Create snapshot of parent
55
+ from .environment_routes import (
56
+ snapshot_environment,
57
+ EnvSnapshotRequest,
58
+ )
59
+
60
+ snapshot_response = await snapshot_environment(
61
+ EnvSnapshotRequest(env_id=env_id)
62
+ )
63
+
64
+ # Restore to new environment with modified seed
65
+ from .environment_routes import (
66
+ restore_environment,
67
+ EnvRestoreRequest,
68
+ )
69
+
70
+ restore_response = await restore_environment(
71
+ EnvRestoreRequest(snapshot_id=snapshot_response.snapshot_id)
72
+ )
73
+
74
+ child_id = restore_response.env_id
75
+ child_handle = registry.get_env(child_id)
76
+
77
+ # Update child seed for determinism
78
+ if child_handle and child_handle.seed is not None:
79
+ child_handle.seed = child_handle.seed + child_idx + 1
80
+ child_handle.env.seed = child_handle.seed
81
+
82
+ child_ids.append(child_id)
83
+
84
+ # Track parent relationship in snapshot metadata
85
+ snapshot_meta = registry.get_snapshot(snapshot_response.snapshot_id)
86
+ if snapshot_meta:
87
+ snapshot_meta.parent_snapshot_id = env_id
88
+
89
+ env_branches[env_id] = child_ids
90
+
91
+ # Branch policies
92
+ if request.policy_ids:
93
+ for policy_id in request.policy_ids:
94
+ policy_handle = registry.get_policy(policy_id)
95
+ if not policy_handle:
96
+ logger.warning(f"Policy {policy_id} not found, skipping")
97
+ continue
98
+
99
+ child_ids = []
100
+
101
+ for child_idx in range(request.num_children):
102
+ # Create snapshot of parent
103
+ from .policy_routes import snapshot_policy, PolicySnapshotRequest
104
+
105
+ snapshot_response = await snapshot_policy(
106
+ PolicySnapshotRequest(policy_id=policy_id)
107
+ )
108
+
109
+ # Restore to new policy
110
+ from .policy_routes import restore_policy, PolicyRestoreRequest
111
+
112
+ restore_response = await restore_policy(
113
+ PolicyRestoreRequest(snapshot_id=snapshot_response.snapshot_id)
114
+ )
115
+
116
+ child_id = restore_response.policy_id
117
+ child_ids.append(child_id)
118
+
119
+ # Copy bound environment if parent had one
120
+ child_handle = registry.get_policy(child_id)
121
+ if child_handle and policy_handle.bound_env_id:
122
+ # If we also branched the env, bind to corresponding child
123
+ if policy_handle.bound_env_id in env_branches:
124
+ child_envs = env_branches[policy_handle.bound_env_id]
125
+ if child_idx < len(child_envs):
126
+ child_handle.bound_env_id = child_envs[child_idx]
127
+ else:
128
+ # Otherwise keep same env binding
129
+ child_handle.bound_env_id = policy_handle.bound_env_id
130
+
131
+ # Track parent relationship
132
+ snapshot_meta = registry.get_snapshot(snapshot_response.snapshot_id)
133
+ if snapshot_meta:
134
+ snapshot_meta.parent_snapshot_id = policy_id
135
+
136
+ policy_branches[policy_id] = child_ids
137
+
138
+ return BranchResponse(
139
+ env_branches=env_branches,
140
+ policy_branches=policy_branches,
141
+ )
142
+
143
+ except Exception as e:
144
+ logger.error(f"Failed to create branches: {e}")
145
+ raise HTTPException(status_code=500, detail=str(e))