PyPI - synth-ai - Versions diffs - 0.2.13.dev1__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl - Mend

synth-ai 0.2.13.dev1py3-none-any.whl → 0.2.13.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (226) hide show

examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml CHANGED Viewed

@@ -12,7 +12,7 @@ variety = "gspo"
 # Replace with the Modal URL printed by `uvx synth-ai modal-serve grpo-crafter`
 task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
 # Point at the Synth backend (or compatible service) that exposes /api/judge/v1/*
-judge_url = "https://YOUR-BACKEND-ENDPOINT/api"
+judge_url = "https://synth-backend-dev-docker.onrender.com/api"
 [compute]
 gpu_type = "H200"
@@ -101,6 +101,9 @@ verify_every_k = 0
 [rubric]
 enabled = true
+model = "openai/gpt-oss-120b"
+api_base = "https://synth-backend-dev-docker.onrender.com/api/judge"
+api_key_env = "OPENAI_API_KEY"
 # Blend the hosted judge scores with environment returns inside the trainer.
 [rubric.weights]
 env = 0.2
@@ -110,10 +113,18 @@ outcome = 0.4
 [rubric.event]
 # Hosted judge rubric for per-decision progress scoring.
 rubric_id = "crafter/event@v1"
+criteria = [
+  { key = "progress.unique_achievements", weight = 0.9, description = "Return 1 when this decision explicitly unlocks a brand-new Crafter achievement (inventory or status text confirms it this turn). Otherwise return 0.", aggregation = "weighted_sum" },
+  { key = "process.intent_alignment", weight = 0.1, description = "Use at most 0.3 to acknowledge tightly coupled setup that finishes the last prerequisite; keep ≤0.1 when the agent only repositions or gathers without an imminent unlock.", aggregation = "weighted_sum" },
+]
 [rubric.outcome]
 # Hosted judge rubric for final trajectory scoring.
 rubric_id = "crafter/outcome@v1"
+criteria = [
+  { key = "outcome.goal_completion", weight = 0.6, description = "Full credit when the agent ends with strong survival metrics and a clear crafted milestone (e.g., iron tools, furnace).", aggregation = "weighted_sum" },
+  { key = "outcome.achievement_depth", weight = 0.4, description = "Partial credit for intermediate achievements (saplings, wood/stone tools) that set up future success.", aggregation = "weighted_sum" },
+]
 [judge]
 type = "gemini"                            # or "groq" when routing to Groq-hosted judges

examples/swe/task_app/grpo_swe_mini.py CHANGED Viewed

@@ -60,34 +60,55 @@ try:
     HAS_HOSTED = True
 except Exception:
     try:  # pragma: no cover - optional dependency path
-        from examples.warming_up_to_rl.task_app.synth_envs_hosted.branching import (  # type: ignore
-            router as branching_router,
+        from examples.task_apps.crafter.task_app.synth_envs_hosted.branching import (  # type: ignore
+            BranchingEnvironmentConfig,
         )
-        from examples.warming_up_to_rl.task_app.synth_envs_hosted.environment_routes import (  # type: ignore # noqa: E501
-            router as environment_router,
+        from examples.task_apps.crafter.task_app.synth_envs_hosted.environment_routes import (  # type: ignore # noqa: E501
+            CrafterEnvironmentRoutes,
         )
-        from examples.warming_up_to_rl.task_app.synth_envs_hosted.policy_routes import (  # type: ignore
-            router as policy_router,
+        from examples.task_apps.crafter.task_app.synth_envs_hosted.policy_routes import (  # type: ignore
+            PolicyRoutes,
         )
-        from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (  # type: ignore
+        from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (  # type: ignore
+            RolloutPayload,
+        )
+        from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
+            EnvironmentConfig,
+        )
+        from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
+            PolicyConfig,
+        )
+        from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
+            RolloutRequest,
+        )
+        from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
+            RolloutResponse,
+        )
+        from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
+            RunSpec,
+        )
+        from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
+            ToolUse,
+        )
+        from examples.task_apps.crafter.task_app.hosted.rollout import (  # type: ignore
             RolloutEnvSpec as LegacyRolloutEnvSpec,
         )
-        from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
+        from examples.task_apps.crafter.task_app.hosted.rollout import (
             RolloutPolicySpec as LegacyRolloutPolicySpec,
         )
-        from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
+        from examples.task_apps.crafter.task_app.hosted.rollout import (
             RolloutRecordConfig as LegacyRolloutRecordConfig,
         )
-        from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
+        from examples.task_apps.crafter.task_app.hosted.rollout import (
             RolloutRequest as LegacyRolloutRequest,
         )
-        from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
+        from examples.task_apps.crafter.task_app.hosted.rollout import (
             RolloutResponse as LegacyRolloutResponse,
         )
-        from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
+        from examples.task_apps.crafter.task_app.hosted.rollout import (
             RolloutSafetyConfig as LegacyRolloutSafetyConfig,
         )
-        from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
+        from examples.task_apps.crafter.task_app.hosted.rollout import (
             execute_rollout as legacy_execute_rollout,
         )
         HAS_HOSTED = True
@@ -264,7 +285,7 @@ def build_dataset() -> tuple[TaskDatasetRegistry, MiniSweDataset]:
 def _base_task_info(dataset: MiniSweDataset) -> TaskInfo:
     return TaskInfo(
         task={"id": "swe_mini", "name": "mini-SWE Tasks", "version": "0.1.0"},
-        environments=["swe-mini"],
+        environment="swe-mini",
         action_space={
             "type": "tool",
             "tools": ["run_command", "submit_patch"],
@@ -292,11 +313,6 @@ def _base_task_info(dataset: MiniSweDataset) -> TaskInfo:
             },
             "tool": {"name": "run_command", "parallel_tool_calls": False},
         },
-        capabilities={
-            "supports_rollout": True,
-            "supports_env_lifecycle": True,
-            "requires_api_key_header": True,
-        },
         limits={"max_ops": 2000, "max_time_s": 7200},
     )
@@ -348,18 +364,31 @@ def provide_task_instances(
     dataset: MiniSweDataset, base_info: TaskInfo, seeds: Sequence[int]
 ) -> Iterable[TaskInfo]:
     infos: list[TaskInfo] = []
+    base_observation = getattr(base_info, "observation", None)
+    if hasattr(base_observation, "model_dump"):
+        base_observation_data = base_observation.model_dump()
+    elif isinstance(base_observation, dict):
+        base_observation_data = dict(base_observation)
+    else:
+        base_observation_data = {}
     for seed in seeds:
         instance = dataset.sample_by_index(int(seed))
         infos.append(
             TaskInfo(
                 task=base_info.task,
-                environments=base_info.environments,
+                environment=base_info.environment,
                 action_space=base_info.action_space,
-                observation={**base_info.observation, "instance_id": instance["instance_id"]},
-                dataset={**base_info.dataset, "instance_id": instance["instance_id"]},
+                observation={
+                    **base_observation_data,
+                    "instance_id": instance["instance_id"],
+                },
+                dataset={
+                    **base_info.dataset.model_dump(),
+                    "instance_id": instance["instance_id"],
+                },
                 rubric=base_info.rubric,
                 inference=base_info.inference,
-                capabilities=base_info.capabilities,
                 limits=base_info.limits,
             )
         )
@@ -397,10 +426,10 @@ def build_config() -> TaskAppConfig:
             HostedTaskAppCls = HostedTaskApp
         except Exception:
             try:
-                from examples.warming_up_to_rl.task_app.synth_envs_hosted.hosted_app import (  # type: ignore
-                    TaskApp as HostedTaskApp,
+                from examples.task_apps.crafter.task_app.synth_envs_hosted.hosted_app import (  # type: ignore
+                    create_app,
                 )
-                HostedTaskAppCls = HostedTaskApp
+                HostedTaskAppCls = create_app
             except Exception as exc:  # pragma: no cover - optional dependency path
                 logger.warning("Unable to import HostedTaskApp for swe-mini: %s", exc)
         if HostedTaskAppCls is not None:

examples/swe/task_app/hosted/rollout.py CHANGED Viewed

@@ -1238,6 +1238,15 @@ async def execute_rollout(
                         )
                     # Build partial trajectory and return HTTP 200
+                    # Extract inference_url from policy meta (best effort)
+                    inference_url = None
+                    if policy_handle is not None:
+                        try:
+                            policy_snapshot = policy_handle.snapshot()
+                            inference_url = policy_snapshot.get("config", {}).get("inference_url")
+                        except Exception:
+                            pass
                     trajectory = RolloutTrajectory(
                         env_id=env_id,
                         policy_id=policy_id,
@@ -1249,6 +1258,7 @@ async def execute_rollout(
                             "at_op": op,
                         },
                         length=len(trajectory_steps),
+                        inference_url=inference_url,  # NEW: Required for trace correlation
                         decision_samples=decision_samples if step_rewards_active else None,
                     )
                     metrics = RolloutMetrics(
@@ -1369,6 +1379,15 @@ async def execute_rollout(
                         },
                     )
                     trajectory_steps.append(term_step)
+                    # Extract inference_url from policy meta (best effort)
+                    inference_url = None
+                    if policy_handle is not None:
+                        try:
+                            policy_snapshot = policy_handle.snapshot()
+                            inference_url = policy_snapshot.get("config", {}).get("inference_url")
+                        except Exception:
+                            pass
                     trajectory = RolloutTrajectory(
                         env_id=env_id,
                         policy_id=policy_id,
@@ -1379,6 +1398,7 @@ async def execute_rollout(
                             "at_op": op,
                         },
                         length=len(trajectory_steps),
+                        inference_url=inference_url,  # NEW: Required for trace correlation
                         decision_samples=decision_samples if step_rewards_active else None,
                     )
                     metrics = RolloutMetrics(
@@ -1460,6 +1480,15 @@ async def execute_rollout(
                     )
                     trajectory_steps.append(term_step)
                     # Build partial response
+                    # Extract inference_url from policy meta (best effort)
+                    inference_url = None
+                    if policy_handle is not None:
+                        try:
+                            policy_snapshot = policy_handle.snapshot()
+                            inference_url = policy_snapshot.get("config", {}).get("inference_url")
+                        except Exception:
+                            pass
                     trajectory = RolloutTrajectory(
                         env_id=env_id,
                         policy_id=policy_id,
@@ -1471,6 +1500,7 @@ async def execute_rollout(
                             "at_op": op,
                         },
                         length=len(trajectory_steps),
+                        inference_url=inference_url,  # NEW: Required for trace correlation
                         decision_samples=decision_samples if step_rewards_active else None,
                     )
                     metrics = RolloutMetrics(
@@ -1688,12 +1718,22 @@ async def execute_rollout(
                     timing_final.setdefault("overhead_ms", 0.0)
         # Build trajectory
+        # Extract inference_url from policy meta
+        inference_url = None
+        if policy_handle is not None:
+            try:
+                policy_snapshot = policy_handle.snapshot()
+                inference_url = policy_snapshot.get("config", {}).get("inference_url")
+            except Exception:
+                pass
         trajectory = RolloutTrajectory(
             env_id=env_id,
             policy_id=policy_id,
             steps=trajectory_steps,
             final={"observation": _summarize_observation_for_storage(env_handle, current_obs)},
             length=len(trajectory_steps),
+            inference_url=inference_url,  # NEW: Required for trace correlation
             decision_samples=decision_samples if step_rewards_active else None,
         )

examples/swe/task_app/hosted/test_service.py CHANGED Viewed

@@ -1,15 +1,14 @@
 #!/usr/bin/env python3
-"""
-Simple test script for the GRPO Synth Envs Hosted Service.
-Run this after starting the service with:
-    python main.py
-"""
+"""Manual smoke script for the GRPO Synth Envs Hosted Service."""
 import asyncio
 import json
 import httpx
+import pytest
+pytestmark = pytest.mark.skip(reason="Requires running hosted service on localhost:8000")
 async def test_service():

examples/task_apps/TESTING.md ADDED Viewed

@@ -0,0 +1,275 @@
+# Task App Testing Guide
+This document describes how to run tests for the task apps in this directory.
+## Overview
+Each task app has unit and integration tests following a consistent pattern inspired by the customer environment tests in `customers/`.
+## Test Structure
+```
+examples/task_apps/<app_name>/tests/
+├── __init__.py
+├── integration/
+│   ├── __init__.py
+│   └── test_<app>_eval.py      # Server startup + eval tests
+└── unit/
+    ├── __init__.py
+    └── test_<app>_*.py          # Environment, scoring, dataset tests
+```
+## Running Tests
+### Prerequisites
+```bash
+# Install test dependencies
+uv sync --dev
+# Set required environment variables
+export GROQ_API_KEY="your-groq-key"
+export OPENAI_API_KEY="your-openai-key"  # For Sokoban
+```
+### Run All Tests for a Task App
+```bash
+# Verilog
+pytest examples/task_apps/verilog/tests/ -v
+# Enron
+pytest examples/task_apps/enron/tests/ -v
+# Sokoban
+pytest examples/task_apps/sokoban/tests/ -v
+```
+### Run Only Unit Tests (Fast)
+```bash
+# Runs quickly, no server startup required
+pytest examples/task_apps/verilog/tests/unit/ -v
+pytest examples/task_apps/enron/tests/unit/ -v
+pytest examples/task_apps/sokoban/tests/unit/ -v
+```
+### Run Only Integration Tests
+```bash
+# Slower, starts servers and runs evals
+pytest examples/task_apps/verilog/tests/integration/ -v
+pytest examples/task_apps/enron/tests/integration/ -v
+pytest examples/task_apps/sokoban/tests/integration/ -v
+```
+### Run All Task App Tests
+```bash
+# Run everything
+pytest examples/task_apps/*/tests/ -v
+# Skip slow tests
+pytest examples/task_apps/*/tests/ -v -m "not slow"
+```
+## Test Categories
+### Unit Tests
+**Purpose**: Test individual components in isolation
+- Environment initialization
+- Reward calculation
+- Tool implementations
+- State management
+**Characteristics**:
+- Fast (< 1 second each)
+- No external dependencies
+- No server startup
+- No API calls
+**Examples**:
+- `test_verilog_scoring.py`: Tests reward components (compile, simulate, submit)
+- `test_enron_environment.py`: Tests search, answer, reward calculation
+- `test_sokoban_environment.py`: Tests actions, rewards, truncation
+### Integration Tests
+**Purpose**: Test the full system end-to-end
+- Server startup
+- Health/info endpoints
+- Full evaluation runs
+- **Rollout execution** (manual and policy-driven)
+**Characteristics**:
+- Slower (30-300 seconds)
+- Requires server startup
+- May require API keys
+- Tests real workflows
+**Examples**:
+- `test_verilog_eval.py`: Starts server, runs Groq eval with Qwen3-32B
+- `test_verilog_rollout.py`: **Manual & policy rollouts via /rollout endpoint**
+- `test_enron_eval.py`: Starts server, runs Groq eval
+- `test_enron_rollout.py`: **Manual & policy rollouts, auth testing**
+- `test_sokoban_eval.py`: Starts server, tests manual rollout
+- `test_sokoban_rollout.py`: **6 rollout tests (manual, policy, difficulties, limits)**
+## What Each Test Validates
+### Verilog Tests
+**Unit Tests** (4 tests):
+- ✅ Compile success gives +0.1 reward
+- ✅ Simulation pass gives +1.0 reward
+- ✅ Submit success gives +10.0 reward
+- ✅ Submit checks last simulation output correctly
+**Integration Tests** (5 tests):
+- ✅ Server starts and responds to /health
+- ✅ /task_info returns valid Verilog task metadata
+- ✅ Full eval with Qwen3-32B completes successfully
+- ✅ **Manual rollout** with explicit write/compile/simulate/submit
+- ✅ **Policy rollout** using Groq/Qwen3-32B (verifies LLM integration)
+### Enron Tests
+**Unit Tests** (3 tests):
+- ✅ search_emails tool works correctly
+- ✅ answer_question tool calculates rewards
+- ✅ Exact answer match gives high reward (>0.9)
+- ✅ Partial answer match gives medium reward (>0.5)
+- ✅ Wrong answer gives low reward (<0.5)
+**Integration Tests** (6 tests):
+- ✅ Server starts and responds to /health
+- ✅ /task_info returns valid Enron task metadata
+- ✅ Full eval with Qwen3-32B completes successfully
+- ✅ **Manual rollout** with explicit search/read/answer actions
+- ✅ **Policy rollout** using Groq/Qwen3-32B
+- ✅ **Authentication** enforcement (rejects requests without auth header)
+### Sokoban Tests
+**Unit Tests** (3 tests):
+- ✅ Module imports work correctly
+- ✅ Reward components exist (goal achieved, step penalty)
+- ✅ Engine creation with different difficulty levels
+**Integration Tests** (9 tests):
+- ✅ Server starts and responds to /health
+- ✅ /task_info returns valid Sokoban task metadata
+- ✅ **Manual rollout** with movement actions (left/right/up/down)
+- ✅ **Policy rollout** with OpenAI GPT-5-mini (may skip if slow)
+- ✅ **All difficulty levels** (easy/medium/hard) work correctly
+- ✅ **Max steps limit** enforcement (stops at configured limit)
+- ✅ **Puzzle completion detection** (terminated=True when solved)
+- ✅ Truncation on max_steps
+- ✅ Response structure validation
+## Debugging Test Failures
+### Server Won't Start
+```bash
+# Check if port is already in use
+lsof -i :<port>
+# Check logs manually
+uv run -m synth_ai task-app serve <app_name> --port 8999
+# Check environment variables
+echo $GROQ_API_KEY
+echo $OPENAI_API_KEY
+```
+### Tests Timeout
+```bash
+# Run with more verbose output
+pytest <test_file> -v -s
+# Skip slow tests
+pytest <test_file> -v --timeout=60
+```
+### Import Errors
+```bash
+# Ensure you're in the right directory
+cd /path/to/synth-ai
+# Reinstall dependencies
+uv sync --dev
+```
+## CI/CD Integration
+These tests can be run in CI with:
+```yaml
+# .github/workflows/test-task-apps.yml
+- name: Run task app tests
+  env:
+    GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+    OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  run: |
+    # Unit tests (fast, always run)
+    pytest examples/task_apps/*/tests/unit/ -v
+    # Integration tests (slower, only on main)
+    if [ "$GITHUB_REF" = "refs/heads/main" ]; then
+      pytest examples/task_apps/*/tests/integration/ -v --timeout=300
+    fi
+```
+## Adding Tests for New Task Apps
+When creating a new task app, follow this pattern:
+1. **Create test structure**:
+   ```bash
+   mkdir -p examples/task_apps/<new_app>/tests/{unit,integration}
+   touch examples/task_apps/<new_app>/tests/__init__.py
+   touch examples/task_apps/<new_app>/tests/unit/__init__.py
+   touch examples/task_apps/<new_app>/tests/integration/__init__.py
+   ```
+2. **Create unit tests** (`tests/unit/test_<app>_*.py`):
+   - Test environment initialization
+   - Test reward calculation
+   - Test tool implementations
+   - Test edge cases
+3. **Create integration tests** (`tests/integration/test_<app>_eval.py`):
+   - Copy from an existing integration test
+   - Update app name, port, config path
+   - Add app-specific endpoint tests
+4. **Add to CI**:
+   - Update CI config to include new tests
+   - Ensure required env vars are set
+## Test Coverage Goals
+- Unit test coverage: >80%
+- Integration test coverage: 100% of critical paths
+- All public APIs have at least one integration test
+- All reward components have unit tests
+## Common Issues
+### "Task app terminated immediately"
+- Check that the app name is correct
+- Verify the app is registered in `synth_ai/task/apps.py`
+- Check recent changes to the app code
+### "GROQ_API_KEY must be set"
+- Set the environment variable
+- Or skip the test: `pytest -k "not groq"`
+### "Config file not found"
+- Ensure eval config exists in task app directory
+- Check the path in the test matches actual location

examples/task_apps/__init__.py ADDED Viewed

File without changes

examples/task_apps/crafter/__init__.py ADDED Viewed

File without changes

examples/task_apps/crafter/task_app/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ """Crafter task app implementation."""
2	+

examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py RENAMED Viewed

@@ -68,7 +68,7 @@ def _resolve_repo_root() -> Path:
 def _resolve_task_app_root(repo_root: Path) -> Path:
     """Locate the task_app directory even when the module is copied to a temp mount."""
-    preferred = (repo_root / "examples" / "warming_up_to_rl" / "task_app").resolve()
+    preferred = (repo_root / "examples" / "task_apps" / "crafter" / "task_app").resolve()
     if preferred.is_dir():
         return preferred
@@ -81,7 +81,7 @@ def _resolve_task_app_root(repo_root: Path) -> Path:
         if (candidate / "synth_envs_hosted").is_dir():
             return candidate
-    fallback = Path("/opt/synth_ai_repo/examples/warming_up_to_rl/task_app")
+    fallback = Path("/opt/synth_ai_repo/examples/task_apps/crafter/task_app")
     if fallback.is_dir():
         return fallback.resolve()
@@ -306,13 +306,16 @@ def build_dataset() -> tuple[TaskDatasetRegistry, CrafterDataset]:
 def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
     return TaskInfo(
         task={"id": "crafter_classic", "name": "Crafter Classic", "version": "1.0.0"},
-        environments=["crafter"],
+        environment="crafter",
         action_space={
             "type": "discrete",
+            "description": f"Discrete action space with {len(crafter_constants.actions)} actions including movement, crafting, and interaction",
             "size": len(crafter_constants.actions),
             "actions": list(crafter_constants.actions),
         },
         observation={
+            "type": "dict",
+            "description": "RGB frame (64x64x3) plus inventory counts, achievements, and semantic map patches",
             "summary": "RGB frame plus inventory, achievements, and semantic map patches.",
             "keys": ["image", "inventory", "achievements", "semantic_map_patch7"],
             "image_shape": [64, 64, 3],
@@ -336,11 +339,6 @@ def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
             },
             "tool": {"name": "interact", "parallel_tool_calls": False},
         },
-        capabilities={
-            "supports_rollout": True,
-            "supports_env_lifecycle": True,
-            "requires_api_key_header": True,
-        },
         limits={"max_ops": 100000, "max_time_s": 3600},
     )
@@ -366,29 +364,36 @@ def provide_task_instances(
     dataset: CrafterDataset, base_info: TaskInfo, seeds: Sequence[int]
 ) -> Iterable[TaskInfo]:
     infos: list[TaskInfo] = []
+    base_observation = getattr(base_info, "observation", None)
+    if hasattr(base_observation, "model_dump"):
+        observation_template = base_observation.model_dump()
+    elif isinstance(base_observation, dict):
+        observation_template = dict(base_observation)
+    else:
+        observation_template = {}
     for seed_value in seeds:
         summary = dataset.describe_seed(seed_value)
         infos.append(
             TaskInfo(
                 task=base_info.task,
-                environments=base_info.environments,
+                environment=base_info.environment,
                 action_space=base_info.action_space,
                 observation={
-                    **base_info.observation,
+                    **observation_template,
                     "seed": seed_value,
                     "traits": summary["traits"],
                     "inventory": summary["inventory"],
                     "player_position": summary["player_position"],
                 },
                 dataset={
-                    **base_info.dataset,
+                    **base_info.dataset.model_dump(),
                     "seed": seed_value,
                     "difficulty": summary["difficulty"],
                     "config": summary["config"],
                 },
                 rubric=base_info.rubric,
                 inference=base_info.inference,
-                capabilities=base_info.capabilities,
                 limits=base_info.limits,
             )
         )
@@ -659,7 +664,7 @@ register_task_app(
                 # Mount repo root so local modules resolve when deployed on Modal
                 (str(REPO_ROOT), "/opt/synth_ai_repo"),
                 (str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai"),
-                (str(TASK_APP_ROOT), "/opt/synth_ai_repo/examples/warming_up_to_rl/task_app"),
+                (str(TASK_APP_ROOT), "/opt/synth_ai_repo/examples/task_apps/crafter/task_app"),
             ),
             secret_names=("groq-api-key", "openai-api-key"),
             memory=16384,

examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py RENAMED Viewed

@@ -1,7 +1,7 @@
 """Compatibility wrapper for the GRPO Crafter task app.
 This module now delegates to the TaskAppConfig defined in the colocated example at
-`examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
+`examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
 (running the file directly or targeting `fastapi_app` from external tooling). Prefer using
 `uvx synth-ai serve grpo-crafter` for local development and testing.
 """

synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.13.dev1py3-none-any.whl → 0.2.13.dev2py3-none-any.whl