PyPI - synth-ai - Versions diffs - 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (169) hide show

examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +12 -10
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +1 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +1 -1
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +60 -10
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +1 -1
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +1 -0
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -0
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -0
examples/rl/configs/rl_from_base_qwen17.toml +1 -0
examples/swe/task_app/hosted/inference/openai_client.py +0 -34
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/grpo_crafter.py +24 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +355 -58
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +68 -7
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +78 -21
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/task_app.py +254 -36
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +1 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +53 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +152 -41
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +31 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +1 -0
synth_ai/api/train/builders.py +90 -1
synth_ai/api/train/cli.py +396 -21
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +15 -1
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +29 -0
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +85 -17
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +1 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/eval/core.py +13 -10
synth_ai/cli/commands/filter/core.py +53 -17
synth_ai/cli/commands/help/core.py +0 -1
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/train/judge_schemas.py +1 -0
synth_ai/cli/commands/train/judge_validation.py +1 -0
synth_ai/cli/commands/train/validation.py +0 -57
synth_ai/cli/demo.py +35 -3
synth_ai/cli/deploy/__init__.py +40 -25
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/task_app_deploy.py +1 -1
synth_ai/cli/task_apps.py +53 -53
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/judge_schemas.py +1 -0
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/handlers.py +53 -4
synth_ai/streaming/streamer.py +19 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +44 -8
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +17 -17
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +283 -1
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/METADATA +102 -4
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/RECORD +162 -88
synth_ai/cli/commands/deploy/__init__.py +0 -23
synth_ai/cli/commands/deploy/core.py +0 -614
synth_ai/cli/commands/deploy/errors.py +0 -72
synth_ai/cli/commands/deploy/validation.py +0 -11
synth_ai/cli/deploy/core.py +0 -5
synth_ai/cli/deploy/errors.py +0 -23
synth_ai/cli/deploy/validation.py +0 -5
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

examples/task_apps/pokemon_red/task_app.py CHANGED Viewed

@@ -29,6 +29,8 @@ from synth_ai.task.tracing_utils import (
     tracing_env_enabled,
 )
 from synth_ai.tracing_v3.session_tracer import SessionTracer
+from synth_ai.tracing_v3.abstractions import EnvironmentEvent, TimeRecord
+from datetime import datetime, UTC
 logger = logging.getLogger(__name__)
@@ -261,9 +263,13 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
                 "role": "system",
                 "content": (
                     "You are controlling Pokémon Red, a classic Game Boy game. You can see the game screen in the images provided. "
-                    "Your goal is to make progress in the game. Use the execute_sequence tool to press buttons. "
+                    "Your goal is to make progress in the game. "
+                    "IMPORTANT: Always use the 'execute_sequence' tool to submit 5-10 actions per call. "
+                    "Do not reason about which tool to use - execute_sequence is the only tool available. "
                     "Choose appropriate button presses based on what you see in the game screen. "
-                    "Always respond with exactly one tool call in the format: <tool_call>{\"name\": \"execute_sequence\", \"arguments\": {...}}</tool_call>"
+                    "Plan 5-10 actions ahead to play efficiently. "
+                    "CRITICAL: If stuck in a text box (text_box_active=True), try pressing B button first, then try A. "
+                    "Always respond with exactly one tool call containing 5-10 actions."
                 ),
             },
             {
@@ -279,7 +285,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
                     "type": "function",
                     "function": {
                         "name": "execute_sequence",
-                        "description": "Execute multiple button presses in sequence. More efficient than separate calls. Recommended: 5-10 actions per call.",
+                        "description": "Execute multiple button presses in sequence. More efficient than separate calls. ALWAYS use this tool. Plan 5-10 actions ahead to play efficiently.",
                         "parameters": {
                             "type": "object",
                             "properties": {
@@ -302,31 +308,15 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
                                         },
                                         "required": ["button", "frames"]
                                     },
-                                    "minItems": 1,
-                                    "maxItems": 20,
-                                    "description": "Sequence of button presses to execute"
+                                    "minItems": 5,
+                                    "maxItems": 10,
+                                    "description": "Sequence of 5-10 button presses to execute. Plan ahead to navigate efficiently."
                                 }
                             },
                             "required": ["actions"],
                             "additionalProperties": False,
                         },
                     },
-                },
-                {
-                    "type": "function",
-                    "function": {
-                        "name": "press_button",
-                        "description": "Press a single Game Boy button for N frames (use execute_sequence for multiple actions)",
-                        "parameters": {
-                            "type": "object",
-                            "properties": {
-                                "button": {"type": "string", "enum": ["UP", "DOWN", "LEFT", "RIGHT", "A", "B", "START", "SELECT"]},
-                                "frames": {"type": "integer", "minimum": 1, "maximum": 120},
-                            },
-                            "required": ["button"],
-                            "additionalProperties": False,
-                        },
-                    },
                 }
             ],
             "tool_choice": {"type": "function", "function": {"name": "execute_sequence"}},
@@ -352,35 +342,154 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
             if "api.openai.com" in inference_url and not inference_url.endswith("/chat/completions"):
                 inference_url = inference_url + "/v1/chat/completions"
+        # Debug: print exact payload being sent
+        import json as _json_debug
+        print(f"\n{'='*80}")
+        print(f"[pokemon_red] INFERENCE REQUEST DEBUG")
+        print(f"{'='*80}")
+        print(f"Inference URL: {inference_url}")
+        print(f"Payload keys: {list(payload.keys())}")
+        print(f"Payload (formatted):")
+        print(_json_debug.dumps(payload, indent=2)[:2000])
+        print(f"{'='*80}\n")
         if is_external:
             # External API: use direct HTTP client with auth header
             headers = {}
+            import os
             if "api.openai.com" in inference_url:
-                import os
                 api_key = os.getenv("OPENAI_API_KEY")
                 if api_key:
                     headers["Authorization"] = f"Bearer {api_key}"
+            elif "modal.run" in inference_url or "synth" in inference_url.lower():
+                # Synth API: use SYNTH_API_KEY
+                api_key = os.getenv("SYNTH_API_KEY")
+                if api_key:
+                    headers["Authorization"] = f"Bearer {api_key}"
+                print(f"[pokemon_red] Using Synth API auth: {'Bearer ' + api_key[:10] + '...' if api_key else 'NONE'}")
+                # For 30B-A3B models, require H200 (A100 doesn't have enough memory)
+                model_id = payload.get("model", "")
+                if "30B-A3B" in model_id or "A3B" in model_id:
+                    headers["X-GPU-Preference"] = "H200"
+                    print(f"[pokemon_red] Setting X-GPU-Preference: H200 (required for A3B MoE)")
-            async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
+            async with httpx.AsyncClient(timeout=httpx.Timeout(connect=30.0, read=1800.0, write=60.0, pool=60.0)) as client:  # 30 min read timeout for cold starts
                 resp = await client.post(inference_url, json=payload, headers=headers)
         else:
             # Internal proxy: use local base_url
             async with httpx.AsyncClient(
                 base_url="http://127.0.0.1:" + str(fastapi_request.url.port or 8913),
-                timeout=httpx.Timeout(60.0)
+                timeout=httpx.Timeout(connect=30.0, read=1800.0, write=60.0, pool=60.0)  # 30 min read timeout for cold starts
             ) as client:
                 resp = await client.post(inference_url, json=payload)
         resp.raise_for_status()
         data = resp.json()
-        # Extract first tool call
+        # Record user message (system + user)
+        if tracer_instance is not None:
+            try:
+                print(f"[pokemon_red] Recording messages: tracer_instance={tracer_instance is not None}", flush=True)
+                # Record system message
+                await tracer_instance.record_message(
+                    content=messages[0].get("content", ""),
+                    message_type="system",
+                )
+                # Record user message
+                user_msg_content = messages[1].get("content", "")
+                if isinstance(user_msg_content, list):
+                    # For multimodal content, extract text summary
+                    text_parts = [item.get("text", "") for item in user_msg_content if item.get("type") == "text"]
+                    user_msg_content = " ".join(text_parts) if text_parts else str(user_msg_content)
+                await tracer_instance.record_message(
+                    content=user_msg_content,
+                    message_type="user",
+                )
+                print(f"[pokemon_red] Recorded user messages", flush=True)
+            except Exception as exc:
+                logger.debug(f"[pokemon_red] Failed to record user messages: {exc}")
+                print(f"[pokemon_red] ERROR recording user messages: {exc}", flush=True)
+        # Debug logging for tool calls
+        print(f"\n{'='*80}")
+        print(f"[pokemon_red] INFERENCE RESPONSE DEBUG")
+        print(f"{'='*80}")
+        print(f"Response status: {resp.status_code}")
+        print(f"Response keys: {list(data.keys())}")
         choices = data.get("choices") or []
+        if choices:
+            message = choices[0].get("message") or {}
+            print(f"Message keys: {list(message.keys())}")
+            print(f"Message content preview: {str(message.get('content', ''))[:200]}")
+            print(f"Tool calls: {message.get('tool_calls', [])}")
+            print(f"Full message (formatted):")
+            print(_json_debug.dumps(message, indent=2)[:1500])
+        print(f"{'='*80}\n")
+        # Record assistant message/tool calls
+        if tracer_instance is not None:
+            try:
+                message = choices[0].get("message", {}) if choices else {}
+                tool_calls = message.get("tool_calls", [])
+                content = message.get("content", "")
+                if tool_calls:
+                    # Record tool calls as assistant message
+                    import json as _json_record
+                    await tracer_instance.record_message(
+                        content=_json_record.dumps(tool_calls) if tool_calls else (content or ""),
+                        message_type="assistant",
+                        metadata={"is_tool_call": True} if tool_calls else {},
+                    )
+                elif content:
+                    # Record text content as assistant message
+                    await tracer_instance.record_message(
+                        content=content,
+                        message_type="assistant",
+                    )
+            except Exception as exc:
+                logger.debug(f"[pokemon_red] Failed to record assistant message: {exc}")
+        # Extract first tool call
         if not choices:
+            print("[pokemon_red] WARNING: No choices in inference response")
             return {}
         message = choices[0].get("message") or {}
         raw_calls = message.get("tool_calls") or []
+        # If no structured tool_calls, try parsing XML tool calls from content
+        if not raw_calls:
+            content = message.get("content", "")
+            if content and "<tool_call>" in content:
+                import re as _re
+                import json as _json_parse
+                # Parse XML tool calls: <tool_call>{...}</tool_call>
+                xml_pattern = r'<tool_call>\s*({.*?})\s*</tool_call>'
+                matches = _re.findall(xml_pattern, content, _re.DOTALL)
+                if matches:
+                    print(f"[pokemon_red] Parsed {len(matches)} XML tool call(s) from content")
+                    try:
+                        tool_data = _json_parse.loads(matches[0])
+                        tool_name = tool_data.get("name", "")
+                        args = tool_data.get("arguments", {})
+                        print(f"[pokemon_red] Parsed tool: {tool_name}, args: {str(args)[:200]}")
+                        # Handle execute_sequence tool
+                        if tool_name == "execute_sequence":
+                            return {"actions": args.get("actions", [])}
+                        # Handle press_button tool (legacy single action)
+                        if tool_name == "press_button":
+                            return {"button": args.get("button"), "frames": int(args.get("frames") or 30)}
+                    except Exception as parse_err:
+                        print(f"[pokemon_red] Error parsing XML tool call: {parse_err}")
         if not raw_calls:
+            print(f"[pokemon_red] WARNING: No tool_calls in response. Content: {message.get('content', '')[:200]}")
             return {}
         f = raw_calls[0].get("function") or {}
         tool_name = f.get("name", "")
         args = f.get("arguments")
@@ -439,6 +548,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
                     action_context = _build_action_context(prev_state, current_state)
                     step_reward = await reward_fn.score(current_state, action_context)
+                    # Record environment event
+                    if tracer_instance is not None:
+                        try:
+                            event = EnvironmentEvent(
+                                system_instance_id="environment:pokemon_red",
+                                time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
+                                reward=step_reward,
+                                terminated=False,
+                                truncated=False,
+                                system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
+                                system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
+                                metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
+                            )
+                            await tracer_instance.record_event(event)
+                        except Exception as exc:
+                            logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
                     sequence_reward += step_reward
                     sequence_tool_calls.append({"tool": "press_button", "args": {"button": button, "frames": frames}})
@@ -490,6 +616,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
                 current_state = dict(obs1) if isinstance(obs1, Mapping) else {}
                 action_context = _build_action_context(prev_state, current_state)
                 step_reward = await reward_fn.score(current_state, action_context)
+                # Record environment event
+                if tracer_instance is not None:
+                    try:
+                        event = EnvironmentEvent(
+                            system_instance_id="environment:pokemon_red",
+                            time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
+                            reward=step_reward,
+                            terminated=False,
+                            truncated=False,
+                            system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
+                            system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
+                            metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
+                        )
+                        await tracer_instance.record_event(event)
+                    except Exception as exc:
+                        logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
                 total_reward += step_reward
                 # Track reward components if non-zero
@@ -530,6 +673,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
             # Attempt policy-driven step if policy.config present
             policy_cfg = request.policy.config or {}
             if policy_cfg:
+                print(f"[pokemon_red] Calling _call_inference: tracer_instance={tracer_instance is not None}", flush=True)
                 try:
                     action = await _call_inference(policy_cfg, final_obs if isinstance(final_obs, Mapping) else {})
@@ -548,6 +692,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
                             action_context = _build_action_context(prev_state, current_state)
                             step_reward = await reward_fn.score(current_state, action_context)
+                            # Record environment event
+                            if tracer_instance is not None:
+                                try:
+                                    event = EnvironmentEvent(
+                                        system_instance_id="environment:pokemon_red",
+                                        time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
+                                        reward=step_reward,
+                                        terminated=False,
+                                        truncated=False,
+                                        system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
+                                        system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
+                                        metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
+                                    )
+                                    await tracer_instance.record_event(event)
+                                except Exception as exc:
+                                    logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
                             sequence_reward += step_reward
                             sequence_tool_calls.append({"tool": "press_button", "args": {"button": button, "frames": frames}})
@@ -686,23 +847,58 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
             # End session and get trace
             session_trace = await tracer_instance.end_session()
-            # Build trace payload if requested
+            # Build trace payload if requested - ALWAYS use full format when return_trace=True
+            # This ensures markov_blanket_message_history is always included
             record_config = getattr(request, 'record', None)
+            print(f"[pokemon_red] TRACE DEBUG: record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}, session_trace={session_trace is not None}", flush=True)
+            if session_trace:
+                print(f"[pokemon_red] TRACE DEBUG: IMMEDIATELY AFTER end_session: session_trace has {len(session_trace.markov_blanket_message_history)} messages, {len(session_trace.event_history)} events", flush=True)
+                print(f"[pokemon_red] TRACE DEBUG: session_trace.markov_blanket_message_history type: {type(session_trace.markov_blanket_message_history)}", flush=True)
+                if session_trace.markov_blanket_message_history:
+                    print(f"[pokemon_red] TRACE DEBUG: First message type: {type(session_trace.markov_blanket_message_history[0])}, content: {str(session_trace.markov_blanket_message_history[0].content)[:100]}", flush=True)
+                else:
+                    print(f"[pokemon_red] TRACE DEBUG: WARNING - markov_blanket_message_history is EMPTY RIGHT AFTER end_session!", flush=True)
             if record_config and getattr(record_config, 'return_trace', False) and session_trace:
-                trace_payload = {
-                    "session_id": session_trace.session_id,
-                    "created_at": session_trace.created_at.isoformat() if session_trace.created_at else None,
-                    "metadata": dict(session_trace.metadata or {}),
-                    "num_timesteps": session_trace.num_timesteps,
-                    "num_events": session_trace.num_events,
-                    "num_messages": session_trace.num_messages,
-                }
+                # Always return full trace with all messages and events (no compact format)
+                import dataclasses
+                trace_payload = session_trace.to_dict()
+                print(f"[pokemon_red] TRACE DEBUG: to_dict() returned keys: {list(trace_payload.keys())}", flush=True)
+                print(f"[pokemon_red] TRACE DEBUG: to_dict() markov_blanket_message_history length: {len(trace_payload.get('markov_blanket_message_history', []))}", flush=True)
+                # Always manually serialize messages and events to ensure they're included
+                # asdict() may not recursively serialize nested dataclasses correctly
+                from synth_ai.tracing_v3.abstractions import SessionEventMarkovBlanketMessage, BaseEvent
+                if session_trace.markov_blanket_message_history:
+                    print(f"[pokemon_red] TRACE DEBUG: Manually serializing {len(session_trace.markov_blanket_message_history)} messages", flush=True)
+                    trace_payload["markov_blanket_message_history"] = [
+                        dataclasses.asdict(msg) if isinstance(msg, SessionEventMarkovBlanketMessage) else (msg if isinstance(msg, dict) else str(msg))
+                        for msg in session_trace.markov_blanket_message_history
+                    ]
+                else:
+                    print(f"[pokemon_red] TRACE DEBUG: WARNING - session_trace.markov_blanket_message_history is EMPTY!", flush=True)
+                if session_trace.event_history:
+                    print(f"[pokemon_red] TRACE DEBUG: Manually serializing {len(session_trace.event_history)} events", flush=True)
+                    trace_payload["event_history"] = [
+                        dataclasses.asdict(evt) if isinstance(evt, BaseEvent) else (evt if isinstance(evt, dict) else str(evt))
+                        for evt in session_trace.event_history
+                    ]
+                else:
+                    print(f"[pokemon_red] TRACE DEBUG: WARNING - session_trace.event_history is EMPTY!", flush=True)
+                print(f"[pokemon_red] TRACE DEBUG: Final trace payload has {len(trace_payload.get('markov_blanket_message_history', []))} messages, {len(trace_payload.get('event_history', []))} events", flush=True)
+                print(f"[pokemon_red] TRACE DEBUG: Final trace payload keys: {list(trace_payload.keys())}", flush=True)
+            else:
+                print(f"[pokemon_red] TRACE DEBUG: SKIPPING trace payload build - record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}, session_trace={session_trace is not None}", flush=True)
         except Exception as exc:
             logger.warning(f"[pokemon_red] tracing finalization failed: {exc}")
+            print(f"[pokemon_red] TRACE DEBUG EXCEPTION: {exc}", flush=True)
+            import traceback
+            print(f"[pokemon_red] TRACE DEBUG EXCEPTION TRACEBACK: {traceback.format_exc()}", flush=True)
     # Fallback trace payload if no tracer but CLI needs it
     if trace_payload is None:
         record_config = getattr(request, 'record', None)
+        print(f"[pokemon_red] TRACE DEBUG: trace_payload is None, using fallback. record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}", flush=True)
         if record_config and getattr(record_config, 'return_trace', False):
             trace_payload = {
                 "session_id": request.run_id,
@@ -720,8 +916,22 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
                 "num_events": len(steps),
                 "num_messages": len(steps) * 2,
             }
+            print(f"[pokemon_red] TRACE DEBUG: Created fallback trace_payload with keys: {list(trace_payload.keys())}", flush=True)
+    print(f"[pokemon_red] TRACE DEBUG: About to return RolloutResponse with trace_payload={trace_payload is not None}, keys={list(trace_payload.keys()) if trace_payload else []}", flush=True)
+    if trace_payload:
+        import json as _json_final
+        markov_msgs = trace_payload.get('markov_blanket_message_history', [])
+        event_history = trace_payload.get('event_history', [])
+        print(f"[pokemon_red] TRACE DEBUG: trace_payload markov_blanket_message_history length: {len(markov_msgs)}", flush=True)
+        print(f"[pokemon_red] TRACE DEBUG: trace_payload event_history length: {len(event_history)}", flush=True)
+        if markov_msgs:
+            print(f"[pokemon_red] TRACE DEBUG: First markov message type: {type(markov_msgs[0]) if markov_msgs else None}", flush=True)
+            print(f"[pokemon_red] TRACE DEBUG: First markov message (first 500 chars): {_json_final.dumps(markov_msgs[0] if markov_msgs else {}, indent=2, default=str)[:500]}", flush=True)
+        else:
+            print(f"[pokemon_red] TRACE DEBUG: WARNING - markov_blanket_message_history is EMPTY in final trace_payload!", flush=True)
-    return RolloutResponse(
+    response = RolloutResponse(
         run_id=request.run_id,
         trajectories=[trajectory],
         branches={},
@@ -730,6 +940,14 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
         ops_executed=len(request.ops or []),
         trace=trace_payload,
     )
+    # Final check: inspect what's actually in the response
+    if response.trace:
+        import json as _json_response
+        resp_markov = response.trace.get('markov_blanket_message_history', []) if isinstance(response.trace, dict) else []
+        print(f"[pokemon_red] TRACE DEBUG: Response.trace markov_blanket_message_history length: {len(resp_markov)}", flush=True)
+    return response
 def import_datetime():

examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml CHANGED Viewed

@@ -49,6 +49,7 @@ policy_name = "crafter-react"
 max_concurrent_rollouts = 8
 batches_per_step = 2
 ops = ["agent", "env"]
+task_app_origin_rewards_only = true
 [evaluation]
 # Run baseline evaluation over the first 100 seeds every 20 training iterations

examples/warming_up_to_rl/task_app/grpo_crafter.py CHANGED Viewed

@@ -5,6 +5,7 @@ from __future__ import annotations
 import logging
 import os
 import sys
+from urllib.parse import parse_qs, urlparse
 from collections.abc import Iterable, Sequence
 from contextlib import suppress
 from dataclasses import dataclass
@@ -130,7 +131,16 @@ except Exception:  # pragma: no cover - fallback when optional deps missing
     def ensure_chat_completions_url(raw_url, mode=None):
         return raw_url
-    def extract_trace_correlation_id(_raw_url):
+    def extract_trace_correlation_id(_raw_url, mode=None):
+        if not isinstance(_raw_url, str):
+            return None
+        parsed = urlparse(_raw_url)
+        query_params = parse_qs(parsed.query or "")
+        for key in ("cid", "trace", "trace_correlation_id"):
+            values = query_params.get(key) or []
+            for value in values:
+                if isinstance(value, str) and value.strip():
+                    return value.strip()
         return None
 HAS_HOSTED = True
@@ -415,6 +425,13 @@ def provide_task_instances(
     dataset: CrafterDataset, base_info: TaskInfo, seeds: Sequence[int]
 ) -> Iterable[TaskInfo]:
     infos: list[TaskInfo] = []
+    base_observation = getattr(base_info, "observation", None)
+    if hasattr(base_observation, "model_dump"):
+        observation_template = base_observation.model_dump()
+    elif isinstance(base_observation, dict):
+        observation_template = dict(base_observation)
+    else:
+        observation_template = {}
     for seed_value in seeds:
         summary = dataset.describe_seed(seed_value)
         infos.append(
@@ -423,14 +440,14 @@ def provide_task_instances(
                 environment=base_info.environment,
                 action_space=base_info.action_space,
                 observation={
-                    **base_info.observation,
+                    **observation_template,
                     "seed": seed_value,
                     "traits": summary["traits"],
                     "inventory": summary["inventory"],
                     "player_position": summary["player_position"],
                 },
                 dataset={
-                    **base_info.dataset,
+                    **base_info.dataset.model_dump(),
                     "seed": seed_value,
                     "difficulty": summary["difficulty"],
                     "config": summary["config"],
@@ -580,7 +597,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
             policy_cfg.get("inference_url"),
         )
-    trace_correlation_id = extract_trace_correlation_id(policy_cfg.get("inference_url"))
+    trace_correlation_id = extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=request.mode)
     if request.mode == RolloutMode.RL:
         assert trace_correlation_id, (
             f"FATAL: trace_correlation_id extraction failed for run_id={request.run_id}. "
@@ -698,6 +715,17 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
     # Propagate inference_url into each legacy trajectory entry for downstream tooling.
     inferred_url = policy_cfg.get("inference_url")
+    # Normalize the url before propagating into trajectories
+    try:
+        from .synth_envs_hosted.utils import (
+            ensure_chat_completions_url as _ensure_cc,
+            force_normalize_chat_completions_url as _force_cc,
+        )
+        if isinstance(inferred_url, str) and inferred_url:
+            inferred_url = _force_cc(inferred_url)
+            inferred_url = _ensure_cc(inferred_url, mode=request.mode)
+    except Exception:
+        pass
     if "trajectories" in data:
         normalized_trajs: list[dict[str, Any]] = []
@@ -711,6 +739,27 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
             traj_dict.setdefault("trace_correlation_id", final_cid)
             if isinstance(inferred_url, str) and inferred_url and not traj_dict.get("inference_url"):
                 traj_dict["inference_url"] = inferred_url
+            # Inject nested info.meta.inference_url for each step (required by RL trainer)
+            try:
+                steps = traj_dict.get("steps", [])
+                if isinstance(steps, list):
+                    for step in steps:
+                        if not isinstance(step, dict):
+                            continue
+                        info = step.get("info")
+                        if not isinstance(info, dict):
+                            info = {}
+                        meta = info.get("meta")
+                        if not isinstance(meta, dict):
+                            meta = {}
+                        if isinstance(inferred_url, str) and inferred_url and not meta.get("inference_url"):
+                            meta["inference_url"] = inferred_url
+                        info["meta"] = meta
+                        step["info"] = info
+            except Exception:
+                pass
             normalized_trajs.append(traj_dict)
         if normalized_trajs:
             data["trajectories"] = normalized_trajs

examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import contextlib
+import logging
 import os
 from fastapi import FastAPI
@@ -9,6 +10,52 @@ from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from starlette.requests import Request
+logger = logging.getLogger(__name__)
+_VERSION_LOGGED = False
+def _resolve_task_app_version() -> str:
+    env_version = os.getenv("TASK_APP_VERSION")
+    if isinstance(env_version, str) and env_version.strip():
+        return env_version.strip()
+    try:
+        import importlib.metadata as importlib_metadata
+        pkg_version = importlib_metadata.version("synth-ai")
+        if isinstance(pkg_version, str) and pkg_version.strip():
+            return pkg_version.strip()
+    except Exception:
+        pass
+    try:
+        import synth_ai
+        attr_version = getattr(synth_ai, "__version__", None)
+        if isinstance(attr_version, str) and attr_version.strip():
+            return attr_version.strip()
+    except Exception:
+        pass
+    return "unknown"
+def _log_task_app_version_once() -> None:
+    global _VERSION_LOGGED
+    if _VERSION_LOGGED:
+        return
+    version = _resolve_task_app_version()
+    build_id = os.getenv("TASK_APP_BUILD_ID")
+    if build_id:
+        logger.info("TASK_APP_VERSION: %s (build=%s)", version, build_id)
+    else:
+        logger.info("TASK_APP_VERSION: %s", version)
+    _VERSION_LOGGED = True
 class TaskApp:
     """Holds service configuration and shared state."""
@@ -56,6 +103,8 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
         allow_headers=["*"],
     )
+    _log_task_app_version_once()
     # Initialize task app configuration
     task_app = TaskApp()
     app.state.task_app = task_app

synth-ai 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl