synth-ai 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (169) hide show
  1. examples/baseline/banking77_baseline.py +204 -0
  2. examples/baseline/crafter_baseline.py +407 -0
  3. examples/baseline/pokemon_red_baseline.py +326 -0
  4. examples/baseline/simple_baseline.py +56 -0
  5. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  6. examples/blog_posts/gepa/README.md +355 -0
  7. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  9. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  10. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  13. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  15. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  16. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  18. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  19. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  20. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  21. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  22. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  23. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  24. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  25. examples/blog_posts/gepa/task_apps.py +105 -0
  26. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  27. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  28. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  29. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +12 -10
  30. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +1 -0
  31. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  32. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  33. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  34. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  35. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  36. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  37. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  38. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  39. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  40. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  41. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  42. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  43. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +1 -1
  44. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  45. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +60 -10
  46. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +1 -1
  47. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  48. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  49. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  50. examples/multi_step/configs/crafter_rl_outcome.toml +1 -0
  51. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -0
  52. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -0
  53. examples/rl/configs/rl_from_base_qwen17.toml +1 -0
  54. examples/swe/task_app/hosted/inference/openai_client.py +0 -34
  55. examples/swe/task_app/hosted/policy_routes.py +17 -0
  56. examples/swe/task_app/hosted/rollout.py +4 -2
  57. examples/task_apps/banking77/__init__.py +6 -0
  58. examples/task_apps/banking77/banking77_task_app.py +841 -0
  59. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  60. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  61. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  62. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  63. examples/task_apps/crafter/task_app/grpo_crafter.py +24 -2
  64. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  65. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +355 -58
  66. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +68 -7
  67. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +78 -21
  68. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  69. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  70. examples/task_apps/gepa_benchmarks/common.py +260 -0
  71. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  72. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  73. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  74. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  75. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  76. examples/task_apps/pokemon_red/task_app.py +254 -36
  77. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +1 -0
  78. examples/warming_up_to_rl/task_app/grpo_crafter.py +53 -4
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +152 -41
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +31 -1
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  83. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  84. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +1 -0
  85. synth_ai/api/train/builders.py +90 -1
  86. synth_ai/api/train/cli.py +396 -21
  87. synth_ai/api/train/config_finder.py +13 -2
  88. synth_ai/api/train/configs/__init__.py +15 -1
  89. synth_ai/api/train/configs/prompt_learning.py +442 -0
  90. synth_ai/api/train/configs/rl.py +29 -0
  91. synth_ai/api/train/task_app.py +1 -1
  92. synth_ai/api/train/validators.py +277 -0
  93. synth_ai/baseline/__init__.py +25 -0
  94. synth_ai/baseline/config.py +209 -0
  95. synth_ai/baseline/discovery.py +214 -0
  96. synth_ai/baseline/execution.py +146 -0
  97. synth_ai/cli/__init__.py +85 -17
  98. synth_ai/cli/__main__.py +0 -0
  99. synth_ai/cli/claude.py +70 -0
  100. synth_ai/cli/codex.py +84 -0
  101. synth_ai/cli/commands/__init__.py +1 -0
  102. synth_ai/cli/commands/baseline/__init__.py +12 -0
  103. synth_ai/cli/commands/baseline/core.py +637 -0
  104. synth_ai/cli/commands/baseline/list.py +93 -0
  105. synth_ai/cli/commands/eval/core.py +13 -10
  106. synth_ai/cli/commands/filter/core.py +53 -17
  107. synth_ai/cli/commands/help/core.py +0 -1
  108. synth_ai/cli/commands/smoke/__init__.py +7 -0
  109. synth_ai/cli/commands/smoke/core.py +1436 -0
  110. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  111. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  112. synth_ai/cli/commands/train/judge_schemas.py +1 -0
  113. synth_ai/cli/commands/train/judge_validation.py +1 -0
  114. synth_ai/cli/commands/train/validation.py +0 -57
  115. synth_ai/cli/demo.py +35 -3
  116. synth_ai/cli/deploy/__init__.py +40 -25
  117. synth_ai/cli/deploy.py +162 -0
  118. synth_ai/cli/legacy_root_backup.py +14 -8
  119. synth_ai/cli/opencode.py +107 -0
  120. synth_ai/cli/root.py +9 -5
  121. synth_ai/cli/task_app_deploy.py +1 -1
  122. synth_ai/cli/task_apps.py +53 -53
  123. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  124. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  125. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  126. synth_ai/judge_schemas.py +1 -0
  127. synth_ai/learning/__init__.py +10 -0
  128. synth_ai/learning/prompt_learning_client.py +276 -0
  129. synth_ai/learning/prompt_learning_types.py +184 -0
  130. synth_ai/pricing/__init__.py +2 -0
  131. synth_ai/pricing/model_pricing.py +57 -0
  132. synth_ai/streaming/handlers.py +53 -4
  133. synth_ai/streaming/streamer.py +19 -0
  134. synth_ai/task/apps/__init__.py +1 -0
  135. synth_ai/task/config.py +2 -0
  136. synth_ai/task/tracing_utils.py +25 -25
  137. synth_ai/task/validators.py +44 -8
  138. synth_ai/task_app_cfgs.py +21 -0
  139. synth_ai/tracing_v3/config.py +162 -19
  140. synth_ai/tracing_v3/constants.py +1 -1
  141. synth_ai/tracing_v3/db_config.py +24 -38
  142. synth_ai/tracing_v3/storage/config.py +47 -13
  143. synth_ai/tracing_v3/storage/factory.py +3 -3
  144. synth_ai/tracing_v3/turso/daemon.py +113 -11
  145. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  146. synth_ai/types.py +8 -0
  147. synth_ai/urls.py +11 -0
  148. synth_ai/utils/__init__.py +30 -1
  149. synth_ai/utils/agents.py +74 -0
  150. synth_ai/utils/bin.py +39 -0
  151. synth_ai/utils/cli.py +149 -5
  152. synth_ai/utils/env.py +17 -17
  153. synth_ai/utils/json.py +72 -0
  154. synth_ai/utils/modal.py +283 -1
  155. synth_ai/utils/paths.py +48 -0
  156. synth_ai/utils/uvicorn.py +113 -0
  157. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/METADATA +102 -4
  158. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/RECORD +162 -88
  159. synth_ai/cli/commands/deploy/__init__.py +0 -23
  160. synth_ai/cli/commands/deploy/core.py +0 -614
  161. synth_ai/cli/commands/deploy/errors.py +0 -72
  162. synth_ai/cli/commands/deploy/validation.py +0 -11
  163. synth_ai/cli/deploy/core.py +0 -5
  164. synth_ai/cli/deploy/errors.py +0 -23
  165. synth_ai/cli/deploy/validation.py +0 -5
  166. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  167. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  168. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  169. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -29,6 +29,8 @@ from synth_ai.task.tracing_utils import (
29
29
  tracing_env_enabled,
30
30
  )
31
31
  from synth_ai.tracing_v3.session_tracer import SessionTracer
32
+ from synth_ai.tracing_v3.abstractions import EnvironmentEvent, TimeRecord
33
+ from datetime import datetime, UTC
32
34
 
33
35
  logger = logging.getLogger(__name__)
34
36
 
@@ -261,9 +263,13 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
261
263
  "role": "system",
262
264
  "content": (
263
265
  "You are controlling Pokémon Red, a classic Game Boy game. You can see the game screen in the images provided. "
264
- "Your goal is to make progress in the game. Use the execute_sequence tool to press buttons. "
266
+ "Your goal is to make progress in the game. "
267
+ "IMPORTANT: Always use the 'execute_sequence' tool to submit 5-10 actions per call. "
268
+ "Do not reason about which tool to use - execute_sequence is the only tool available. "
265
269
  "Choose appropriate button presses based on what you see in the game screen. "
266
- "Always respond with exactly one tool call in the format: <tool_call>{\"name\": \"execute_sequence\", \"arguments\": {...}}</tool_call>"
270
+ "Plan 5-10 actions ahead to play efficiently. "
271
+ "CRITICAL: If stuck in a text box (text_box_active=True), try pressing B button first, then try A. "
272
+ "Always respond with exactly one tool call containing 5-10 actions."
267
273
  ),
268
274
  },
269
275
  {
@@ -279,7 +285,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
279
285
  "type": "function",
280
286
  "function": {
281
287
  "name": "execute_sequence",
282
- "description": "Execute multiple button presses in sequence. More efficient than separate calls. Recommended: 5-10 actions per call.",
288
+ "description": "Execute multiple button presses in sequence. More efficient than separate calls. ALWAYS use this tool. Plan 5-10 actions ahead to play efficiently.",
283
289
  "parameters": {
284
290
  "type": "object",
285
291
  "properties": {
@@ -302,31 +308,15 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
302
308
  },
303
309
  "required": ["button", "frames"]
304
310
  },
305
- "minItems": 1,
306
- "maxItems": 20,
307
- "description": "Sequence of button presses to execute"
311
+ "minItems": 5,
312
+ "maxItems": 10,
313
+ "description": "Sequence of 5-10 button presses to execute. Plan ahead to navigate efficiently."
308
314
  }
309
315
  },
310
316
  "required": ["actions"],
311
317
  "additionalProperties": False,
312
318
  },
313
319
  },
314
- },
315
- {
316
- "type": "function",
317
- "function": {
318
- "name": "press_button",
319
- "description": "Press a single Game Boy button for N frames (use execute_sequence for multiple actions)",
320
- "parameters": {
321
- "type": "object",
322
- "properties": {
323
- "button": {"type": "string", "enum": ["UP", "DOWN", "LEFT", "RIGHT", "A", "B", "START", "SELECT"]},
324
- "frames": {"type": "integer", "minimum": 1, "maximum": 120},
325
- },
326
- "required": ["button"],
327
- "additionalProperties": False,
328
- },
329
- },
330
320
  }
331
321
  ],
332
322
  "tool_choice": {"type": "function", "function": {"name": "execute_sequence"}},
@@ -352,35 +342,154 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
352
342
  if "api.openai.com" in inference_url and not inference_url.endswith("/chat/completions"):
353
343
  inference_url = inference_url + "/v1/chat/completions"
354
344
 
345
+ # Debug: print exact payload being sent
346
+ import json as _json_debug
347
+ print(f"\n{'='*80}")
348
+ print(f"[pokemon_red] INFERENCE REQUEST DEBUG")
349
+ print(f"{'='*80}")
350
+ print(f"Inference URL: {inference_url}")
351
+ print(f"Payload keys: {list(payload.keys())}")
352
+ print(f"Payload (formatted):")
353
+ print(_json_debug.dumps(payload, indent=2)[:2000])
354
+ print(f"{'='*80}\n")
355
+
356
+
355
357
  if is_external:
356
358
  # External API: use direct HTTP client with auth header
357
359
  headers = {}
360
+ import os
358
361
  if "api.openai.com" in inference_url:
359
- import os
360
362
  api_key = os.getenv("OPENAI_API_KEY")
361
363
  if api_key:
362
364
  headers["Authorization"] = f"Bearer {api_key}"
365
+ elif "modal.run" in inference_url or "synth" in inference_url.lower():
366
+ # Synth API: use SYNTH_API_KEY
367
+ api_key = os.getenv("SYNTH_API_KEY")
368
+ if api_key:
369
+ headers["Authorization"] = f"Bearer {api_key}"
370
+ print(f"[pokemon_red] Using Synth API auth: {'Bearer ' + api_key[:10] + '...' if api_key else 'NONE'}")
371
+ # For 30B-A3B models, require H200 (A100 doesn't have enough memory)
372
+ model_id = payload.get("model", "")
373
+ if "30B-A3B" in model_id or "A3B" in model_id:
374
+ headers["X-GPU-Preference"] = "H200"
375
+ print(f"[pokemon_red] Setting X-GPU-Preference: H200 (required for A3B MoE)")
363
376
 
364
- async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
377
+ async with httpx.AsyncClient(timeout=httpx.Timeout(connect=30.0, read=1800.0, write=60.0, pool=60.0)) as client: # 30 min read timeout for cold starts
365
378
  resp = await client.post(inference_url, json=payload, headers=headers)
366
379
  else:
367
380
  # Internal proxy: use local base_url
368
381
  async with httpx.AsyncClient(
369
382
  base_url="http://127.0.0.1:" + str(fastapi_request.url.port or 8913),
370
- timeout=httpx.Timeout(60.0)
383
+ timeout=httpx.Timeout(connect=30.0, read=1800.0, write=60.0, pool=60.0) # 30 min read timeout for cold starts
371
384
  ) as client:
372
385
  resp = await client.post(inference_url, json=payload)
373
386
 
374
387
  resp.raise_for_status()
375
388
  data = resp.json()
376
- # Extract first tool call
389
+
390
+ # Record user message (system + user)
391
+ if tracer_instance is not None:
392
+ try:
393
+ print(f"[pokemon_red] Recording messages: tracer_instance={tracer_instance is not None}", flush=True)
394
+ # Record system message
395
+ await tracer_instance.record_message(
396
+ content=messages[0].get("content", ""),
397
+ message_type="system",
398
+ )
399
+ # Record user message
400
+ user_msg_content = messages[1].get("content", "")
401
+ if isinstance(user_msg_content, list):
402
+ # For multimodal content, extract text summary
403
+ text_parts = [item.get("text", "") for item in user_msg_content if item.get("type") == "text"]
404
+ user_msg_content = " ".join(text_parts) if text_parts else str(user_msg_content)
405
+ await tracer_instance.record_message(
406
+ content=user_msg_content,
407
+ message_type="user",
408
+ )
409
+ print(f"[pokemon_red] Recorded user messages", flush=True)
410
+ except Exception as exc:
411
+ logger.debug(f"[pokemon_red] Failed to record user messages: {exc}")
412
+ print(f"[pokemon_red] ERROR recording user messages: {exc}", flush=True)
413
+
414
+ # Debug logging for tool calls
415
+ print(f"\n{'='*80}")
416
+ print(f"[pokemon_red] INFERENCE RESPONSE DEBUG")
417
+ print(f"{'='*80}")
418
+ print(f"Response status: {resp.status_code}")
419
+ print(f"Response keys: {list(data.keys())}")
377
420
  choices = data.get("choices") or []
421
+ if choices:
422
+ message = choices[0].get("message") or {}
423
+ print(f"Message keys: {list(message.keys())}")
424
+ print(f"Message content preview: {str(message.get('content', ''))[:200]}")
425
+ print(f"Tool calls: {message.get('tool_calls', [])}")
426
+ print(f"Full message (formatted):")
427
+ print(_json_debug.dumps(message, indent=2)[:1500])
428
+ print(f"{'='*80}\n")
429
+
430
+ # Record assistant message/tool calls
431
+ if tracer_instance is not None:
432
+ try:
433
+ message = choices[0].get("message", {}) if choices else {}
434
+ tool_calls = message.get("tool_calls", [])
435
+ content = message.get("content", "")
436
+
437
+ if tool_calls:
438
+ # Record tool calls as assistant message
439
+ import json as _json_record
440
+ await tracer_instance.record_message(
441
+ content=_json_record.dumps(tool_calls) if tool_calls else (content or ""),
442
+ message_type="assistant",
443
+ metadata={"is_tool_call": True} if tool_calls else {},
444
+ )
445
+ elif content:
446
+ # Record text content as assistant message
447
+ await tracer_instance.record_message(
448
+ content=content,
449
+ message_type="assistant",
450
+ )
451
+ except Exception as exc:
452
+ logger.debug(f"[pokemon_red] Failed to record assistant message: {exc}")
453
+
454
+ # Extract first tool call
378
455
  if not choices:
456
+ print("[pokemon_red] WARNING: No choices in inference response")
379
457
  return {}
380
458
  message = choices[0].get("message") or {}
381
459
  raw_calls = message.get("tool_calls") or []
460
+
461
+ # If no structured tool_calls, try parsing XML tool calls from content
462
+ if not raw_calls:
463
+ content = message.get("content", "")
464
+ if content and "<tool_call>" in content:
465
+ import re as _re
466
+ import json as _json_parse
467
+ # Parse XML tool calls: <tool_call>{...}</tool_call>
468
+ xml_pattern = r'<tool_call>\s*({.*?})\s*</tool_call>'
469
+ matches = _re.findall(xml_pattern, content, _re.DOTALL)
470
+ if matches:
471
+ print(f"[pokemon_red] Parsed {len(matches)} XML tool call(s) from content")
472
+ try:
473
+ tool_data = _json_parse.loads(matches[0])
474
+ tool_name = tool_data.get("name", "")
475
+ args = tool_data.get("arguments", {})
476
+
477
+ print(f"[pokemon_red] Parsed tool: {tool_name}, args: {str(args)[:200]}")
478
+
479
+ # Handle execute_sequence tool
480
+ if tool_name == "execute_sequence":
481
+ return {"actions": args.get("actions", [])}
482
+
483
+ # Handle press_button tool (legacy single action)
484
+ if tool_name == "press_button":
485
+ return {"button": args.get("button"), "frames": int(args.get("frames") or 30)}
486
+ except Exception as parse_err:
487
+ print(f"[pokemon_red] Error parsing XML tool call: {parse_err}")
488
+
382
489
  if not raw_calls:
490
+ print(f"[pokemon_red] WARNING: No tool_calls in response. Content: {message.get('content', '')[:200]}")
383
491
  return {}
492
+
384
493
  f = raw_calls[0].get("function") or {}
385
494
  tool_name = f.get("name", "")
386
495
  args = f.get("arguments")
@@ -439,6 +548,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
439
548
  action_context = _build_action_context(prev_state, current_state)
440
549
  step_reward = await reward_fn.score(current_state, action_context)
441
550
 
551
+ # Record environment event
552
+ if tracer_instance is not None:
553
+ try:
554
+ event = EnvironmentEvent(
555
+ system_instance_id="environment:pokemon_red",
556
+ time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
557
+ reward=step_reward,
558
+ terminated=False,
559
+ truncated=False,
560
+ system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
561
+ system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
562
+ metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
563
+ )
564
+ await tracer_instance.record_event(event)
565
+ except Exception as exc:
566
+ logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
567
+
442
568
  sequence_reward += step_reward
443
569
  sequence_tool_calls.append({"tool": "press_button", "args": {"button": button, "frames": frames}})
444
570
 
@@ -490,6 +616,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
490
616
  current_state = dict(obs1) if isinstance(obs1, Mapping) else {}
491
617
  action_context = _build_action_context(prev_state, current_state)
492
618
  step_reward = await reward_fn.score(current_state, action_context)
619
+
620
+ # Record environment event
621
+ if tracer_instance is not None:
622
+ try:
623
+ event = EnvironmentEvent(
624
+ system_instance_id="environment:pokemon_red",
625
+ time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
626
+ reward=step_reward,
627
+ terminated=False,
628
+ truncated=False,
629
+ system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
630
+ system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
631
+ metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
632
+ )
633
+ await tracer_instance.record_event(event)
634
+ except Exception as exc:
635
+ logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
493
636
  total_reward += step_reward
494
637
 
495
638
  # Track reward components if non-zero
@@ -530,6 +673,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
530
673
  # Attempt policy-driven step if policy.config present
531
674
  policy_cfg = request.policy.config or {}
532
675
  if policy_cfg:
676
+ print(f"[pokemon_red] Calling _call_inference: tracer_instance={tracer_instance is not None}", flush=True)
533
677
  try:
534
678
  action = await _call_inference(policy_cfg, final_obs if isinstance(final_obs, Mapping) else {})
535
679
 
@@ -548,6 +692,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
548
692
  action_context = _build_action_context(prev_state, current_state)
549
693
  step_reward = await reward_fn.score(current_state, action_context)
550
694
 
695
+ # Record environment event
696
+ if tracer_instance is not None:
697
+ try:
698
+ event = EnvironmentEvent(
699
+ system_instance_id="environment:pokemon_red",
700
+ time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
701
+ reward=step_reward,
702
+ terminated=False,
703
+ truncated=False,
704
+ system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
705
+ system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
706
+ metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
707
+ )
708
+ await tracer_instance.record_event(event)
709
+ except Exception as exc:
710
+ logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
711
+
551
712
  sequence_reward += step_reward
552
713
  sequence_tool_calls.append({"tool": "press_button", "args": {"button": button, "frames": frames}})
553
714
 
@@ -686,23 +847,58 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
686
847
  # End session and get trace
687
848
  session_trace = await tracer_instance.end_session()
688
849
 
689
- # Build trace payload if requested
850
+ # Build trace payload if requested - ALWAYS use full format when return_trace=True
851
+ # This ensures markov_blanket_message_history is always included
690
852
  record_config = getattr(request, 'record', None)
853
+ print(f"[pokemon_red] TRACE DEBUG: record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}, session_trace={session_trace is not None}", flush=True)
854
+ if session_trace:
855
+ print(f"[pokemon_red] TRACE DEBUG: IMMEDIATELY AFTER end_session: session_trace has {len(session_trace.markov_blanket_message_history)} messages, {len(session_trace.event_history)} events", flush=True)
856
+ print(f"[pokemon_red] TRACE DEBUG: session_trace.markov_blanket_message_history type: {type(session_trace.markov_blanket_message_history)}", flush=True)
857
+ if session_trace.markov_blanket_message_history:
858
+ print(f"[pokemon_red] TRACE DEBUG: First message type: {type(session_trace.markov_blanket_message_history[0])}, content: {str(session_trace.markov_blanket_message_history[0].content)[:100]}", flush=True)
859
+ else:
860
+ print(f"[pokemon_red] TRACE DEBUG: WARNING - markov_blanket_message_history is EMPTY RIGHT AFTER end_session!", flush=True)
861
+
691
862
  if record_config and getattr(record_config, 'return_trace', False) and session_trace:
692
- trace_payload = {
693
- "session_id": session_trace.session_id,
694
- "created_at": session_trace.created_at.isoformat() if session_trace.created_at else None,
695
- "metadata": dict(session_trace.metadata or {}),
696
- "num_timesteps": session_trace.num_timesteps,
697
- "num_events": session_trace.num_events,
698
- "num_messages": session_trace.num_messages,
699
- }
863
+ # Always return full trace with all messages and events (no compact format)
864
+ import dataclasses
865
+ trace_payload = session_trace.to_dict()
866
+ print(f"[pokemon_red] TRACE DEBUG: to_dict() returned keys: {list(trace_payload.keys())}", flush=True)
867
+ print(f"[pokemon_red] TRACE DEBUG: to_dict() markov_blanket_message_history length: {len(trace_payload.get('markov_blanket_message_history', []))}", flush=True)
868
+
869
+ # Always manually serialize messages and events to ensure they're included
870
+ # asdict() may not recursively serialize nested dataclasses correctly
871
+ from synth_ai.tracing_v3.abstractions import SessionEventMarkovBlanketMessage, BaseEvent
872
+ if session_trace.markov_blanket_message_history:
873
+ print(f"[pokemon_red] TRACE DEBUG: Manually serializing {len(session_trace.markov_blanket_message_history)} messages", flush=True)
874
+ trace_payload["markov_blanket_message_history"] = [
875
+ dataclasses.asdict(msg) if isinstance(msg, SessionEventMarkovBlanketMessage) else (msg if isinstance(msg, dict) else str(msg))
876
+ for msg in session_trace.markov_blanket_message_history
877
+ ]
878
+ else:
879
+ print(f"[pokemon_red] TRACE DEBUG: WARNING - session_trace.markov_blanket_message_history is EMPTY!", flush=True)
880
+ if session_trace.event_history:
881
+ print(f"[pokemon_red] TRACE DEBUG: Manually serializing {len(session_trace.event_history)} events", flush=True)
882
+ trace_payload["event_history"] = [
883
+ dataclasses.asdict(evt) if isinstance(evt, BaseEvent) else (evt if isinstance(evt, dict) else str(evt))
884
+ for evt in session_trace.event_history
885
+ ]
886
+ else:
887
+ print(f"[pokemon_red] TRACE DEBUG: WARNING - session_trace.event_history is EMPTY!", flush=True)
888
+ print(f"[pokemon_red] TRACE DEBUG: Final trace payload has {len(trace_payload.get('markov_blanket_message_history', []))} messages, {len(trace_payload.get('event_history', []))} events", flush=True)
889
+ print(f"[pokemon_red] TRACE DEBUG: Final trace payload keys: {list(trace_payload.keys())}", flush=True)
890
+ else:
891
+ print(f"[pokemon_red] TRACE DEBUG: SKIPPING trace payload build - record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}, session_trace={session_trace is not None}", flush=True)
700
892
  except Exception as exc:
701
893
  logger.warning(f"[pokemon_red] tracing finalization failed: {exc}")
894
+ print(f"[pokemon_red] TRACE DEBUG EXCEPTION: {exc}", flush=True)
895
+ import traceback
896
+ print(f"[pokemon_red] TRACE DEBUG EXCEPTION TRACEBACK: {traceback.format_exc()}", flush=True)
702
897
 
703
898
  # Fallback trace payload if no tracer but CLI needs it
704
899
  if trace_payload is None:
705
900
  record_config = getattr(request, 'record', None)
901
+ print(f"[pokemon_red] TRACE DEBUG: trace_payload is None, using fallback. record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}", flush=True)
706
902
  if record_config and getattr(record_config, 'return_trace', False):
707
903
  trace_payload = {
708
904
  "session_id": request.run_id,
@@ -720,8 +916,22 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
720
916
  "num_events": len(steps),
721
917
  "num_messages": len(steps) * 2,
722
918
  }
919
+ print(f"[pokemon_red] TRACE DEBUG: Created fallback trace_payload with keys: {list(trace_payload.keys())}", flush=True)
920
+
921
+ print(f"[pokemon_red] TRACE DEBUG: About to return RolloutResponse with trace_payload={trace_payload is not None}, keys={list(trace_payload.keys()) if trace_payload else []}", flush=True)
922
+ if trace_payload:
923
+ import json as _json_final
924
+ markov_msgs = trace_payload.get('markov_blanket_message_history', [])
925
+ event_history = trace_payload.get('event_history', [])
926
+ print(f"[pokemon_red] TRACE DEBUG: trace_payload markov_blanket_message_history length: {len(markov_msgs)}", flush=True)
927
+ print(f"[pokemon_red] TRACE DEBUG: trace_payload event_history length: {len(event_history)}", flush=True)
928
+ if markov_msgs:
929
+ print(f"[pokemon_red] TRACE DEBUG: First markov message type: {type(markov_msgs[0]) if markov_msgs else None}", flush=True)
930
+ print(f"[pokemon_red] TRACE DEBUG: First markov message (first 500 chars): {_json_final.dumps(markov_msgs[0] if markov_msgs else {}, indent=2, default=str)[:500]}", flush=True)
931
+ else:
932
+ print(f"[pokemon_red] TRACE DEBUG: WARNING - markov_blanket_message_history is EMPTY in final trace_payload!", flush=True)
723
933
 
724
- return RolloutResponse(
934
+ response = RolloutResponse(
725
935
  run_id=request.run_id,
726
936
  trajectories=[trajectory],
727
937
  branches={},
@@ -730,6 +940,14 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
730
940
  ops_executed=len(request.ops or []),
731
941
  trace=trace_payload,
732
942
  )
943
+
944
+ # Final check: inspect what's actually in the response
945
+ if response.trace:
946
+ import json as _json_response
947
+ resp_markov = response.trace.get('markov_blanket_message_history', []) if isinstance(response.trace, dict) else []
948
+ print(f"[pokemon_red] TRACE DEBUG: Response.trace markov_blanket_message_history length: {len(resp_markov)}", flush=True)
949
+
950
+ return response
733
951
 
734
952
 
735
953
  def import_datetime():
@@ -49,6 +49,7 @@ policy_name = "crafter-react"
49
49
  max_concurrent_rollouts = 8
50
50
  batches_per_step = 2
51
51
  ops = ["agent", "env"]
52
+ task_app_origin_rewards_only = true
52
53
 
53
54
  [evaluation]
54
55
  # Run baseline evaluation over the first 100 seeds every 20 training iterations
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
  import logging
6
6
  import os
7
7
  import sys
8
+ from urllib.parse import parse_qs, urlparse
8
9
  from collections.abc import Iterable, Sequence
9
10
  from contextlib import suppress
10
11
  from dataclasses import dataclass
@@ -130,7 +131,16 @@ except Exception: # pragma: no cover - fallback when optional deps missing
130
131
  def ensure_chat_completions_url(raw_url, mode=None):
131
132
  return raw_url
132
133
 
133
- def extract_trace_correlation_id(_raw_url):
134
+ def extract_trace_correlation_id(_raw_url, mode=None):
135
+ if not isinstance(_raw_url, str):
136
+ return None
137
+ parsed = urlparse(_raw_url)
138
+ query_params = parse_qs(parsed.query or "")
139
+ for key in ("cid", "trace", "trace_correlation_id"):
140
+ values = query_params.get(key) or []
141
+ for value in values:
142
+ if isinstance(value, str) and value.strip():
143
+ return value.strip()
134
144
  return None
135
145
 
136
146
  HAS_HOSTED = True
@@ -415,6 +425,13 @@ def provide_task_instances(
415
425
  dataset: CrafterDataset, base_info: TaskInfo, seeds: Sequence[int]
416
426
  ) -> Iterable[TaskInfo]:
417
427
  infos: list[TaskInfo] = []
428
+ base_observation = getattr(base_info, "observation", None)
429
+ if hasattr(base_observation, "model_dump"):
430
+ observation_template = base_observation.model_dump()
431
+ elif isinstance(base_observation, dict):
432
+ observation_template = dict(base_observation)
433
+ else:
434
+ observation_template = {}
418
435
  for seed_value in seeds:
419
436
  summary = dataset.describe_seed(seed_value)
420
437
  infos.append(
@@ -423,14 +440,14 @@ def provide_task_instances(
423
440
  environment=base_info.environment,
424
441
  action_space=base_info.action_space,
425
442
  observation={
426
- **base_info.observation,
443
+ **observation_template,
427
444
  "seed": seed_value,
428
445
  "traits": summary["traits"],
429
446
  "inventory": summary["inventory"],
430
447
  "player_position": summary["player_position"],
431
448
  },
432
449
  dataset={
433
- **base_info.dataset,
450
+ **base_info.dataset.model_dump(),
434
451
  "seed": seed_value,
435
452
  "difficulty": summary["difficulty"],
436
453
  "config": summary["config"],
@@ -580,7 +597,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
580
597
  policy_cfg.get("inference_url"),
581
598
  )
582
599
 
583
- trace_correlation_id = extract_trace_correlation_id(policy_cfg.get("inference_url"))
600
+ trace_correlation_id = extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=request.mode)
584
601
  if request.mode == RolloutMode.RL:
585
602
  assert trace_correlation_id, (
586
603
  f"FATAL: trace_correlation_id extraction failed for run_id={request.run_id}. "
@@ -698,6 +715,17 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
698
715
 
699
716
  # Propagate inference_url into each legacy trajectory entry for downstream tooling.
700
717
  inferred_url = policy_cfg.get("inference_url")
718
+ # Normalize the url before propagating into trajectories
719
+ try:
720
+ from .synth_envs_hosted.utils import (
721
+ ensure_chat_completions_url as _ensure_cc,
722
+ force_normalize_chat_completions_url as _force_cc,
723
+ )
724
+ if isinstance(inferred_url, str) and inferred_url:
725
+ inferred_url = _force_cc(inferred_url)
726
+ inferred_url = _ensure_cc(inferred_url, mode=request.mode)
727
+ except Exception:
728
+ pass
701
729
 
702
730
  if "trajectories" in data:
703
731
  normalized_trajs: list[dict[str, Any]] = []
@@ -711,6 +739,27 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
711
739
  traj_dict.setdefault("trace_correlation_id", final_cid)
712
740
  if isinstance(inferred_url, str) and inferred_url and not traj_dict.get("inference_url"):
713
741
  traj_dict["inference_url"] = inferred_url
742
+
743
+ # Inject nested info.meta.inference_url for each step (required by RL trainer)
744
+ try:
745
+ steps = traj_dict.get("steps", [])
746
+ if isinstance(steps, list):
747
+ for step in steps:
748
+ if not isinstance(step, dict):
749
+ continue
750
+ info = step.get("info")
751
+ if not isinstance(info, dict):
752
+ info = {}
753
+ meta = info.get("meta")
754
+ if not isinstance(meta, dict):
755
+ meta = {}
756
+ if isinstance(inferred_url, str) and inferred_url and not meta.get("inference_url"):
757
+ meta["inference_url"] = inferred_url
758
+ info["meta"] = meta
759
+ step["info"] = info
760
+ except Exception:
761
+ pass
762
+
714
763
  normalized_trajs.append(traj_dict)
715
764
  if normalized_trajs:
716
765
  data["trajectories"] = normalized_trajs
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import contextlib
4
+ import logging
4
5
  import os
5
6
 
6
7
  from fastapi import FastAPI
@@ -9,6 +10,52 @@ from fastapi.responses import JSONResponse
9
10
  from pydantic import BaseModel
10
11
  from starlette.requests import Request
11
12
 
13
+ logger = logging.getLogger(__name__)
14
+
15
+ _VERSION_LOGGED = False
16
+
17
+
18
+ def _resolve_task_app_version() -> str:
19
+ env_version = os.getenv("TASK_APP_VERSION")
20
+ if isinstance(env_version, str) and env_version.strip():
21
+ return env_version.strip()
22
+
23
+ try:
24
+ import importlib.metadata as importlib_metadata
25
+
26
+ pkg_version = importlib_metadata.version("synth-ai")
27
+ if isinstance(pkg_version, str) and pkg_version.strip():
28
+ return pkg_version.strip()
29
+ except Exception:
30
+ pass
31
+
32
+ try:
33
+ import synth_ai
34
+
35
+ attr_version = getattr(synth_ai, "__version__", None)
36
+ if isinstance(attr_version, str) and attr_version.strip():
37
+ return attr_version.strip()
38
+ except Exception:
39
+ pass
40
+
41
+ return "unknown"
42
+
43
+
44
+ def _log_task_app_version_once() -> None:
45
+ global _VERSION_LOGGED
46
+ if _VERSION_LOGGED:
47
+ return
48
+
49
+ version = _resolve_task_app_version()
50
+ build_id = os.getenv("TASK_APP_BUILD_ID")
51
+
52
+ if build_id:
53
+ logger.info("TASK_APP_VERSION: %s (build=%s)", version, build_id)
54
+ else:
55
+ logger.info("TASK_APP_VERSION: %s", version)
56
+
57
+ _VERSION_LOGGED = True
58
+
12
59
 
13
60
  class TaskApp:
14
61
  """Holds service configuration and shared state."""
@@ -56,6 +103,8 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
56
103
  allow_headers=["*"],
57
104
  )
58
105
 
106
+ _log_task_app_version_once()
107
+
59
108
  # Initialize task app configuration
60
109
  task_app = TaskApp()
61
110
  app.state.task_app = task_app