PyPI - synth-ai - Versions diffs - 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl - Mend

synth-ai 0.2.14py3-none-any.whl → 0.2.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (236) hide show

examples/README.md +1 -0
examples/multi_step/SFT_README.md +147 -0
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
examples/multi_step/convert_traces_to_sft.py +84 -0
examples/multi_step/run_sft_qwen30b.sh +45 -0
examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
examples/qwen_coder/configs/coder_lora_small.toml +2 -1
examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
examples/qwen_vl/QUICKSTART.md +327 -0
examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
examples/qwen_vl/README.md +154 -0
examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
examples/qwen_vl/RL_VISION_TESTING.md +333 -0
examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
examples/qwen_vl/SETUP_COMPLETE.md +275 -0
examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
examples/qwen_vl/__init__.py +2 -0
examples/qwen_vl/collect_data_via_cli.md +423 -0
examples/qwen_vl/collect_vision_traces.py +368 -0
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
examples/qwen_vl/configs/filter_vision_test.toml +8 -0
examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
examples/qwen_vl/run_vision_comparison.sh +62 -0
examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
examples/qwen_vl/test_image_validation.py +201 -0
examples/qwen_vl/test_sft_vision_data.py +110 -0
examples/rl/README.md +1 -1
examples/rl/configs/eval_base_qwen.toml +17 -0
examples/rl/configs/eval_rl_qwen.toml +13 -0
examples/rl/configs/rl_from_base_qwen.toml +37 -0
examples/rl/configs/rl_from_base_qwen17.toml +76 -0
examples/rl/configs/rl_from_ft_qwen.toml +37 -0
examples/rl/run_eval.py +436 -0
examples/rl/run_rl_and_save.py +111 -0
examples/rl/task_app/README.md +22 -0
examples/rl/task_app/math_single_step.py +990 -0
examples/rl/task_app/math_task_app.py +111 -0
examples/sft/README.md +5 -5
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
examples/sft/evaluate.py +2 -4
examples/sft/export_dataset.py +7 -4
examples/swe/task_app/README.md +1 -1
examples/swe/task_app/grpo_swe_mini.py +0 -1
examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
examples/swe/task_app/hosted/policy_routes.py +0 -2
examples/swe/task_app/hosted/rollout.py +0 -8
examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
examples/task_apps/enron/__init__.py +1 -0
examples/vlm/README.md +3 -3
examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
examples/vlm/crafter_openai_vlm_agent.py +3 -5
examples/vlm/filter_image_rows.py +1 -1
examples/vlm/run_crafter_vlm_benchmark.py +2 -2
examples/warming_up_to_rl/_utils.py +92 -0
examples/warming_up_to_rl/analyze_trace_db.py +1 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
examples/warming_up_to_rl/export_trace_sft.py +174 -60
examples/warming_up_to_rl/readme.md +63 -132
examples/warming_up_to_rl/run_fft_and_save.py +1 -1
examples/warming_up_to_rl/run_rl_and_save.py +1 -1
examples/warming_up_to_rl/task_app/README.md +42 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
synth_ai/__init__.py +44 -30
synth_ai/_utils/__init__.py +47 -0
synth_ai/_utils/base_url.py +10 -0
synth_ai/_utils/http.py +10 -0
synth_ai/_utils/prompts.py +10 -0
synth_ai/_utils/task_app_state.py +12 -0
synth_ai/_utils/user_config.py +10 -0
synth_ai/api/models/supported.py +144 -7
synth_ai/api/train/__init__.py +13 -1
synth_ai/api/train/cli.py +30 -7
synth_ai/api/train/config_finder.py +18 -11
synth_ai/api/train/env_resolver.py +13 -10
synth_ai/cli/__init__.py +62 -78
synth_ai/cli/_modal_wrapper.py +7 -5
synth_ai/cli/_typer_patch.py +0 -2
synth_ai/cli/_validate_task_app.py +22 -4
synth_ai/cli/legacy_root_backup.py +3 -1
synth_ai/cli/lib/__init__.py +10 -0
synth_ai/cli/lib/task_app_discovery.py +7 -0
synth_ai/cli/lib/task_app_env.py +518 -0
synth_ai/cli/recent.py +2 -1
synth_ai/cli/setup.py +266 -0
synth_ai/cli/status.py +1 -1
synth_ai/cli/task_app_deploy.py +16 -0
synth_ai/cli/task_app_list.py +25 -0
synth_ai/cli/task_app_modal_serve.py +16 -0
synth_ai/cli/task_app_serve.py +18 -0
synth_ai/cli/task_apps.py +71 -31
synth_ai/cli/traces.py +1 -1
synth_ai/cli/train.py +18 -0
synth_ai/cli/tui.py +7 -2
synth_ai/cli/turso.py +1 -1
synth_ai/cli/watch.py +1 -1
synth_ai/demos/__init__.py +10 -0
synth_ai/demos/core/__init__.py +28 -1
synth_ai/demos/crafter/__init__.py +1 -0
synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
synth_ai/demos/demo_registry.py +176 -0
synth_ai/demos/math/__init__.py +1 -0
synth_ai/demos/math/_common.py +16 -0
synth_ai/demos/math/app.py +38 -0
synth_ai/demos/math/config.toml +76 -0
synth_ai/demos/math/deploy_modal.py +54 -0
synth_ai/demos/math/modal_task_app.py +702 -0
synth_ai/demos/math/task_app_entry.py +51 -0
synth_ai/environments/environment/core.py +7 -1
synth_ai/environments/examples/bandit/engine.py +0 -1
synth_ai/environments/examples/bandit/environment.py +0 -1
synth_ai/environments/examples/wordle/environment.py +0 -1
synth_ai/evals/base.py +16 -5
synth_ai/evals/client.py +1 -1
synth_ai/inference/client.py +1 -1
synth_ai/judge_schemas.py +8 -8
synth_ai/learning/client.py +1 -1
synth_ai/learning/health.py +1 -1
synth_ai/learning/jobs.py +1 -1
synth_ai/learning/rl/client.py +1 -1
synth_ai/learning/rl/env_keys.py +1 -1
synth_ai/learning/rl/secrets.py +1 -1
synth_ai/learning/sft/client.py +1 -1
synth_ai/learning/sft/data.py +407 -4
synth_ai/learning/validators.py +4 -1
synth_ai/task/apps/__init__.py +4 -2
synth_ai/task/config.py +6 -4
synth_ai/task/rubrics/__init__.py +1 -2
synth_ai/task/rubrics/loaders.py +14 -10
synth_ai/task/rubrics.py +219 -0
synth_ai/task/trace_correlation_helpers.py +24 -11
synth_ai/task/tracing_utils.py +14 -3
synth_ai/task/validators.py +2 -3
synth_ai/tracing_v3/abstractions.py +3 -3
synth_ai/tracing_v3/config.py +15 -13
synth_ai/tracing_v3/constants.py +21 -0
synth_ai/tracing_v3/db_config.py +3 -1
synth_ai/tracing_v3/decorators.py +10 -7
synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
synth_ai/tracing_v3/session_tracer.py +7 -7
synth_ai/tracing_v3/storage/base.py +29 -29
synth_ai/tracing_v3/storage/config.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +8 -9
synth_ai/tracing_v3/turso/native_manager.py +80 -72
synth_ai/tracing_v3/utils.py +2 -2
synth_ai/tui/cli/query_experiments.py +4 -4
synth_ai/tui/cli/query_experiments_v3.py +4 -4
synth_ai/tui/dashboard.py +14 -9
synth_ai/utils/__init__.py +101 -0
synth_ai/utils/base_url.py +94 -0
synth_ai/utils/cli.py +131 -0
synth_ai/utils/env.py +287 -0
synth_ai/utils/http.py +169 -0
synth_ai/utils/modal.py +308 -0
synth_ai/utils/process.py +212 -0
synth_ai/utils/prompts.py +39 -0
synth_ai/utils/sqld.py +122 -0
synth_ai/utils/task_app_discovery.py +882 -0
synth_ai/utils/task_app_env.py +186 -0
synth_ai/utils/task_app_state.py +318 -0
synth_ai/utils/user_config.py +137 -0
synth_ai/v0/config/__init__.py +1 -5
synth_ai/v0/config/base_url.py +1 -7
synth_ai/v0/tracing/config.py +1 -1
synth_ai/v0/tracing/decorators.py +1 -1
synth_ai/v0/tracing/upload.py +1 -1
synth_ai/v0/tracing_v1/config.py +1 -1
synth_ai/v0/tracing_v1/decorators.py +1 -1
synth_ai/v0/tracing_v1/upload.py +1 -1
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
synth_ai/cli/man.py +0 -106
synth_ai/compound/cais.py +0 -0
synth_ai/core/experiment.py +0 -13
synth_ai/core/system.py +0 -15
synth_ai/demo_registry.py +0 -295
synth_ai/handshake.py +0 -109
synth_ai/http.py +0 -26
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0

examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py ADDED Viewed

@@ -0,0 +1,135 @@
+"""Compatibility wrapper for the GRPO Crafter task app.
+This module now delegates to the TaskAppConfig defined in the colocated example at
+`examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
+(running the file directly or targeting `fastapi_app` from external tooling). Prefer using
+`uvx synth-ai serve grpo-crafter` for local development and testing.
+"""
+from __future__ import annotations
+import argparse
+from pathlib import Path
+from fastapi.exceptions import RequestValidationError
+from fastapi.responses import JSONResponse
+from starlette.requests import Request
+from synth_ai.task.apps import ModalDeploymentConfig, registry
+from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
+from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
+from .grpo_crafter import build_config
+APP_ID = "grpo-crafter"
+def _build_base_config() -> TaskAppConfig:
+    # Lazily construct the base config to avoid heavy work at import time
+    return build_config()
+try:
+    _REGISTERED_ENTRY = registry.get(APP_ID)
+except Exception:  # pragma: no cover - registry unavailable in some contexts
+    MODAL_DEPLOYMENT: ModalDeploymentConfig | None = None
+    ENV_FILES: tuple[str, ...] = ()
+else:
+    MODAL_DEPLOYMENT = _REGISTERED_ENTRY.modal
+    ENV_FILES = tuple(_REGISTERED_ENTRY.env_files)
+def build_task_app_config() -> TaskAppConfig:
+    """Return a fresh TaskAppConfig for this wrapper."""
+    base = _build_base_config()
+    return base.clone()
+def fastapi_app():
+    """Return the FastAPI application for Modal or other ASGI hosts."""
+    app = create_task_app(build_task_app_config())
+    # Replace default health endpoints so we can permit soft auth failures and log 422s.
+    filtered_routes = []
+    for route in app.router.routes:
+        path = getattr(route, "path", None)
+        methods = getattr(route, "methods", set()) or set()
+        if path in {"/health", "/health/rollout"} and "GET" in methods:
+            continue
+        filtered_routes.append(route)
+    app.router.routes = filtered_routes
+    def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
+        if not env_key:
+            return None
+        prefix = env_key[: max(1, len(env_key) // 2)]
+        print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
+        return prefix
+    @app.get("/health")
+    async def health(request: Request):
+        env_key = normalize_environment_api_key()
+        if not env_key:
+            return JSONResponse(
+                status_code=503,
+                content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
+            )
+        if not is_api_key_header_authorized(request):
+            prefix = _log_env_key_prefix("health", env_key)
+            content = {"status": "healthy", "authorized": False}
+            if prefix:
+                content["expected_api_key_prefix"] = prefix
+            return JSONResponse(status_code=200, content=content)
+        return {"status": "healthy", "authorized": True}
+    @app.get("/health/rollout")
+    async def health_rollout(request: Request):
+        env_key = normalize_environment_api_key()
+        if not env_key:
+            return JSONResponse(
+                status_code=503,
+                content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
+            )
+        if not is_api_key_header_authorized(request):
+            prefix = _log_env_key_prefix("health/rollout", env_key)
+            content = {"status": "healthy", "authorized": False}
+            if prefix:
+                content["expected_api_key_prefix"] = prefix
+            return JSONResponse(status_code=200, content=content)
+        return {"ok": True, "authorized": True}
+    @app.exception_handler(RequestValidationError)
+    async def _on_validation_error(request: Request, exc: RequestValidationError):
+        try:
+            hdr = request.headers
+            snapshot = {
+                "path": str(request.url.path),
+                "have_x_api_key": bool(hdr.get("x-api-key")),
+                "have_x_api_keys": bool(hdr.get("x-api-keys")),
+                "have_authorization": bool(hdr.get("authorization")),
+                "errors": exc.errors()[:5],
+            }
+            print("[422] validation", snapshot, flush=True)
+        except Exception:
+            pass
+        return JSONResponse(
+            status_code=422,
+            content={"status": "invalid", "detail": exc.errors()[:5]},
+        )
+    return app
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Run the Crafter task app locally")
+    parser.add_argument("--host", default="0.0.0.0")
+    parser.add_argument("--port", type=int, default=8001)
+    parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
+    args = parser.parse_args()
+    run_task_app(
+        build_task_app_config,
+        host=args.host,
+        port=args.port,
+        reload=args.reload,
+    )

examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md ADDED Viewed

@@ -0,0 +1,173 @@
+# GRPO Synth Envs Hosted Service
+This service provides hosted environment and policy management for GRPO (Group Relative Policy Optimization) training with synthetic environments.
+## Architecture
+The service implements a FastAPI-based HTTP API that manages:
+- **Environments**: Stateful environment instances (currently Crafter)
+- **Policies**: Thin policy clients that prepare inference requests
+- **Rollouts**: Coordinated execution of environment-policy interaction loops
+- **Snapshots**: State persistence using Modal Volumes
+- **Branching**: Creating multiple copies of environments/policies for exploration
+## Key Components
+### Core Modules
+- `hosted_app.py`: FastAPI app factory and configuration
+- `registry.py`: In-memory registries for active instances
+- `storage/volume.py`: Modal Volume operations for snapshots
+- `inference/openai_client.py`: OpenAI-compatible inference client
+### API Routers
+- `environment_routes.py`: Environment lifecycle endpoints
+- `policy_routes.py`: Policy lifecycle endpoints
+- `rollout.py`: Rollout coordinator and run management
+- `branching.py`: Branching operations
+### Environment Implementations
+- `envs/crafter/`: Crafter environment and policy implementations
+## API Endpoints
+### Service Discovery
+- `GET /info`: Service configuration and endpoints
+- `GET /health`: Health check
+### Environment Management
+- `POST /env/create`: Create new environment
+- `POST /env/reset`: Reset environment
+- `POST /env/step`: Execute environment step
+- `POST /env/snapshot`: Save environment state
+- `POST /env/restore`: Restore from snapshot
+- `POST /env/terminate`: Clean up environment
+### Policy Management
+- `POST /policy/create`: Create new policy
+- `POST /policy/step`: Generate actions (with optional inference)
+- `POST /policy/snapshot`: Save policy state
+- `POST /policy/restore`: Restore from snapshot
+- `POST /policy/terminate`: Clean up policy
+### Coordination
+- `POST /rollout`: Execute coordinated rollout
+- `POST /branch`: Create environment/policy branches
+- `POST /run/abort`: Abort running rollout
+- `GET /run/status/{run_id}`: Check run status
+## Local Development
+```bash
+# Install dependencies
+pip install fastapi uvicorn httpx pydantic
+# Run the service
+python main.py
+# Or with uvicorn directly
+uvicorn main:app --reload --port 8000
+```
+## Modal Deployment
+```bash
+# Deploy to Modal
+modal deploy main.py
+# Run once
+modal run main.py
+```
+## Environment Variables
+- `SERVICE_BASE_URL`: Base URL for this service (default: http://localhost:8000)
+- `VLLM_BASE_URL`: Base URL for vLLM inference service (default: http://localhost:8001)
+- `DEFAULT_MODEL`: Default model name for inference
+## Storage
+The service uses Modal Volumes for persistent storage:
+- Volume name: `synth-env-state`
+- Mount path: `/data/state`
+- Layout: `/data/state/runs/{rl_run_id}/{kind}/{shard}/{snapshot_id}.tar.gz`
+## Example Usage
+```python
+import httpx
+# Create environment
+env_response = httpx.post(
+    "http://localhost:8000/env/create",
+    json={
+        "env_name": "crafter",
+        "config": {},
+        "seed": 42,
+        "rl_run_id": "test-run-1"
+    }
+)
+env_id = env_response.json()["env_id"]
+# Create policy
+policy_response = httpx.post(
+    "http://localhost:8000/policy/create",
+    json={
+        "policy_name": "crafter-react",
+        "config": {"inference_url": "http://vllm:8001"},
+        "rl_run_id": "test-run-1",
+        "bound_env_id": env_id
+    }
+)
+policy_id = policy_response.json()["policy_id"]
+# Execute rollout
+rollout_response = httpx.post(
+    "http://localhost:8000/rollout",
+    json={
+        "run_id": "test-run-1",
+        "env": {"env_id": env_id},
+        "policy": {"policy_id": policy_id},
+        "ops": ["agent", "env"] * 10,
+        "on_done": "reset"
+    }
+)
+trajectories = rollout_response.json()["trajectories"]
+```
+## Testing
+The implementation follows the plan outlined in `plan.md` and decisions in `decisions.md`. Key test areas:
+- Environment create/step/reset lifecycle
+- Policy inference request building
+- Snapshot/restore round trips
+- Rollout coordination with abort support
+- Branching operations
+4b
+"aggregate": {
+    "completed": 20,
+    "total": 20,
+    "avg_turns": 10.0,
+    "avg_achievements": 1.3,
+    "achievements_freq": {
+      "collect_wood": 9,
+      "collect_sapling": 8,
+      "collect_drink": 7,
+      "place_plant": 2
+    }
+  }
+groq qwen/qwen3-32b
+ ],
+  "aggregate": {
+    "completed": 20,
+    "total": 20,
+    "avg_turns": 10.0,
+    "avg_achievements": 1.0,
+    "achievements_freq": {
+      "collect_sapling": 7,
+      "collect_wood": 9,
+      "collect_drink": 4
+    }
+  }

examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""GRPO Synth Envs Hosted Service."""
+from .hosted_app import TaskApp, create_app
+__all__ = ["create_app", "TaskApp"]

examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py ADDED Viewed

@@ -0,0 +1,143 @@
+from __future__ import annotations
+import logging
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from .registry import registry
+logger = logging.getLogger(__name__)
+router = APIRouter()
+class BranchRequest(BaseModel):
+    env_ids: list[str] | None = None
+    policy_ids: list[str] | None = None
+    num_children: int = 1
+    max_branches: int = 10
+class BranchResponse(BaseModel):
+    env_branches: dict[str, list[str]]
+    policy_branches: dict[str, list[str]]
+@router.post("/branch", response_model=BranchResponse)
+async def create_branches(request: BranchRequest) -> BranchResponse:
+    """Create branches of environments and/or policies."""
+    if request.num_children > request.max_branches:
+        raise HTTPException(
+            status_code=422,
+            detail=f"num_children ({request.num_children}) exceeds max_branches ({request.max_branches})",
+        )
+    env_branches = {}
+    policy_branches = {}
+    try:
+        # Branch environments
+        if request.env_ids:
+            for env_id in request.env_ids:
+                env_handle = registry.get_env(env_id)
+                if not env_handle:
+                    logger.warning(f"Environment {env_id} not found, skipping")
+                    continue
+                child_ids = []
+                for child_idx in range(request.num_children):
+                    # Create snapshot of parent
+                    from .environment_routes import (
+                        EnvSnapshotRequest,
+                        snapshot_environment,
+                    )
+                    snapshot_response = await snapshot_environment(
+                        EnvSnapshotRequest(env_id=env_id)
+                    )
+                    # Restore to new environment with modified seed
+                    from .environment_routes import (
+                        EnvRestoreRequest,
+                        restore_environment,
+                    )
+                    restore_response = await restore_environment(
+                        EnvRestoreRequest(snapshot_id=snapshot_response.snapshot_id)
+                    )
+                    child_id = restore_response.env_id
+                    child_handle = registry.get_env(child_id)
+                    # Update child seed for determinism
+                    if child_handle and child_handle.seed is not None:
+                        child_handle.seed = child_handle.seed + child_idx + 1
+                        child_handle.env.seed = child_handle.seed
+                    child_ids.append(child_id)
+                    # Track parent relationship in snapshot metadata
+                    snapshot_meta = registry.get_snapshot(snapshot_response.snapshot_id)
+                    if snapshot_meta:
+                        snapshot_meta.parent_snapshot_id = env_id
+                env_branches[env_id] = child_ids
+        # Branch policies
+        if request.policy_ids:
+            for policy_id in request.policy_ids:
+                policy_handle = registry.get_policy(policy_id)
+                if not policy_handle:
+                    logger.warning(f"Policy {policy_id} not found, skipping")
+                    continue
+                child_ids = []
+                for child_idx in range(request.num_children):
+                    # Create snapshot of parent
+                    from .policy_routes import PolicySnapshotRequest, snapshot_policy
+                    snapshot_response = await snapshot_policy(
+                        PolicySnapshotRequest(policy_id=policy_id)
+                    )
+                    # Restore to new policy
+                    from .policy_routes import PolicyRestoreRequest, restore_policy
+                    restore_response = await restore_policy(
+                        PolicyRestoreRequest(snapshot_id=snapshot_response.snapshot_id)
+                    )
+                    child_id = restore_response.policy_id
+                    child_ids.append(child_id)
+                    # Copy bound environment if parent had one
+                    child_handle = registry.get_policy(child_id)
+                    if child_handle and policy_handle.bound_env_id:
+                        # If we also branched the env, bind to corresponding child
+                        if policy_handle.bound_env_id in env_branches:
+                            child_envs = env_branches[policy_handle.bound_env_id]
+                            if child_idx < len(child_envs):
+                                child_handle.bound_env_id = child_envs[child_idx]
+                        else:
+                            # Otherwise keep same env binding
+                            child_handle.bound_env_id = policy_handle.bound_env_id
+                    # Track parent relationship
+                    snapshot_meta = registry.get_snapshot(snapshot_response.snapshot_id)
+                    if snapshot_meta:
+                        snapshot_meta.parent_snapshot_id = policy_id
+                policy_branches[policy_id] = child_ids
+        return BranchResponse(
+            env_branches=env_branches,
+            policy_branches=policy_branches,
+        )
+    except Exception as e:
+        logger.error(f"Failed to create branches: {e}")
+        raise HTTPException(status_code=500, detail=str(e)) from e

synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.14py3-none-any.whl → 0.2.16py3-none-any.whl