synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (192) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/blog_posts/pokemon_vl/README.md +98 -0
  3. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  5. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  6. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  7. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  8. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  9. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  12. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  13. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  15. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  16. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  17. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  18. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  20. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  21. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  22. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  23. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  24. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  25. examples/qwen_vl/README.md +10 -12
  26. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  27. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  28. examples/qwen_vl/collect_data_via_cli.md +76 -84
  29. examples/qwen_vl/collect_vision_traces.py +4 -4
  30. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  31. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  32. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  33. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  34. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  35. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  36. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  37. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  38. examples/qwen_vl/run_vision_comparison.sh +6 -7
  39. examples/rl/README.md +5 -5
  40. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  41. examples/rl/configs/rl_from_base_qwen17.toml +5 -2
  42. examples/rl/task_app/README.md +1 -2
  43. examples/rl/task_app/math_single_step.py +2 -2
  44. examples/run_crafter_demo.sh +2 -2
  45. examples/sft/README.md +1 -1
  46. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  47. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  48. examples/swe/task_app/README.md +32 -2
  49. examples/swe/task_app/grpo_swe_mini.py +4 -0
  50. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  51. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  52. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  53. examples/swe/task_app/morph_backend.py +178 -0
  54. examples/task_apps/crafter/task_app/README.md +1 -1
  55. examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
  56. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  57. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  58. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  59. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
  60. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
  61. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
  62. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  63. examples/task_apps/math/README.md +1 -2
  64. examples/task_apps/pokemon_red/README.md +3 -4
  65. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  66. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  67. examples/task_apps/pokemon_red/task_app.py +36 -5
  68. examples/task_apps/sokoban/README.md +2 -3
  69. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  70. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  71. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  72. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  73. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  74. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
  75. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  76. examples/warming_up_to_rl/task_app/README.md +1 -1
  77. examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
  78. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
  83. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  84. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  85. synth_ai/api/train/builders.py +9 -3
  86. synth_ai/api/train/cli.py +125 -10
  87. synth_ai/api/train/configs/__init__.py +8 -1
  88. synth_ai/api/train/configs/rl.py +32 -7
  89. synth_ai/api/train/configs/sft.py +6 -2
  90. synth_ai/api/train/configs/shared.py +59 -2
  91. synth_ai/auth/credentials.py +119 -0
  92. synth_ai/cli/__init__.py +12 -4
  93. synth_ai/cli/commands/__init__.py +17 -0
  94. synth_ai/cli/commands/demo/__init__.py +6 -0
  95. synth_ai/cli/commands/demo/core.py +163 -0
  96. synth_ai/cli/commands/deploy/__init__.py +23 -0
  97. synth_ai/cli/commands/deploy/core.py +614 -0
  98. synth_ai/cli/commands/deploy/errors.py +72 -0
  99. synth_ai/cli/commands/deploy/validation.py +11 -0
  100. synth_ai/cli/commands/eval/__init__.py +19 -0
  101. synth_ai/cli/commands/eval/core.py +1109 -0
  102. synth_ai/cli/commands/eval/errors.py +81 -0
  103. synth_ai/cli/commands/eval/validation.py +133 -0
  104. synth_ai/cli/commands/filter/__init__.py +12 -0
  105. synth_ai/cli/commands/filter/core.py +388 -0
  106. synth_ai/cli/commands/filter/errors.py +55 -0
  107. synth_ai/cli/commands/filter/validation.py +77 -0
  108. synth_ai/cli/commands/help/__init__.py +177 -0
  109. synth_ai/cli/commands/help/core.py +73 -0
  110. synth_ai/cli/commands/status/__init__.py +64 -0
  111. synth_ai/cli/commands/status/client.py +192 -0
  112. synth_ai/cli/commands/status/config.py +92 -0
  113. synth_ai/cli/commands/status/errors.py +20 -0
  114. synth_ai/cli/commands/status/formatters.py +164 -0
  115. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  116. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  117. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  118. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  119. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  120. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  121. synth_ai/cli/commands/status/utils.py +114 -0
  122. synth_ai/cli/commands/train/__init__.py +53 -0
  123. synth_ai/cli/commands/train/core.py +21 -0
  124. synth_ai/cli/commands/train/errors.py +117 -0
  125. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  126. synth_ai/cli/commands/train/judge_validation.py +304 -0
  127. synth_ai/cli/commands/train/validation.py +443 -0
  128. synth_ai/cli/demo.py +2 -162
  129. synth_ai/cli/deploy/__init__.py +28 -0
  130. synth_ai/cli/deploy/core.py +5 -0
  131. synth_ai/cli/deploy/errors.py +23 -0
  132. synth_ai/cli/deploy/validation.py +5 -0
  133. synth_ai/cli/eval/__init__.py +36 -0
  134. synth_ai/cli/eval/core.py +5 -0
  135. synth_ai/cli/eval/errors.py +31 -0
  136. synth_ai/cli/eval/validation.py +5 -0
  137. synth_ai/cli/filter/__init__.py +28 -0
  138. synth_ai/cli/filter/core.py +5 -0
  139. synth_ai/cli/filter/errors.py +23 -0
  140. synth_ai/cli/filter/validation.py +5 -0
  141. synth_ai/cli/modal_serve/__init__.py +12 -0
  142. synth_ai/cli/modal_serve/core.py +14 -0
  143. synth_ai/cli/modal_serve/errors.py +8 -0
  144. synth_ai/cli/modal_serve/validation.py +11 -0
  145. synth_ai/cli/serve/__init__.py +12 -0
  146. synth_ai/cli/serve/core.py +14 -0
  147. synth_ai/cli/serve/errors.py +8 -0
  148. synth_ai/cli/serve/validation.py +11 -0
  149. synth_ai/cli/setup.py +20 -265
  150. synth_ai/cli/status.py +7 -126
  151. synth_ai/cli/task_app_deploy.py +1 -10
  152. synth_ai/cli/task_app_modal_serve.py +4 -9
  153. synth_ai/cli/task_app_serve.py +4 -11
  154. synth_ai/cli/task_apps.py +58 -1487
  155. synth_ai/cli/train/__init__.py +12 -0
  156. synth_ai/cli/train/core.py +21 -0
  157. synth_ai/cli/train/errors.py +8 -0
  158. synth_ai/cli/train/validation.py +24 -0
  159. synth_ai/cli/train.py +1 -14
  160. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  161. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  162. synth_ai/environments/examples/red/engine.py +33 -12
  163. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  164. synth_ai/environments/examples/red/environment.py +26 -0
  165. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  166. synth_ai/http.py +12 -0
  167. synth_ai/judge_schemas.py +10 -11
  168. synth_ai/learning/rl/client.py +3 -1
  169. synth_ai/streaming/__init__.py +29 -0
  170. synth_ai/streaming/config.py +94 -0
  171. synth_ai/streaming/handlers.py +469 -0
  172. synth_ai/streaming/streamer.py +301 -0
  173. synth_ai/streaming/types.py +95 -0
  174. synth_ai/task/validators.py +2 -2
  175. synth_ai/tracing_v3/migration_helper.py +1 -2
  176. synth_ai/utils/env.py +25 -18
  177. synth_ai/utils/http.py +4 -1
  178. synth_ai/utils/modal.py +2 -2
  179. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
  180. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
  181. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  182. synth_ai/cli/tui.py +0 -62
  183. synth_ai/tui/__init__.py +0 -5
  184. synth_ai/tui/__main__.py +0 -13
  185. synth_ai/tui/cli/__init__.py +0 -1
  186. synth_ai/tui/cli/query_experiments.py +0 -164
  187. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  188. synth_ai/tui/dashboard.py +0 -911
  189. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  190. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  191. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  192. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,178 @@
1
+ """Utility classes for running swe-mini environments on Morph Cloud."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import os
7
+ import shlex
8
+ import time
9
+ from dataclasses import dataclass, field
10
+ from typing import Any, Dict
11
+
12
+ _IMPORT_ERROR: Exception | None = None
13
+
14
+ try: # pragma: no cover - optional dependency
15
+ from morphcloud.api import MorphCloudClient
16
+ except Exception as exc: # pragma: no cover - optional dependency
17
+ MorphCloudClient = None # type: ignore[assignment]
18
+ _IMPORT_ERROR = exc
19
+
20
+
21
+ def _quote_env_var(key: str, value: str) -> str:
22
+ """Return a safe shell export statement."""
23
+ return f"export {key}={shlex.quote(value)}"
24
+
25
+
26
+ def _now() -> float:
27
+ return time.time()
28
+
29
+
30
+ @dataclass
31
+ class MorphSandboxBackend:
32
+ """Thin wrapper around Morph Cloud instances for command execution.
33
+
34
+ The API mirrors the subset consumed by :class:`MiniSweEnvironmentWrapper`:
35
+ we expose an ``execute`` method that matches the mini-swe environment shape.
36
+ """
37
+
38
+ snapshot_id: str | None = None
39
+ image_id: str | None = None
40
+ cwd: str = "/workspace"
41
+ env: Dict[str, str] | None = None
42
+ metadata: Dict[str, str] | None = None
43
+ vcpus: int = 4
44
+ memory_mb: int = 8192
45
+ disk_mb: int = 65536
46
+ startup_timeout: int = 600
47
+
48
+ _client: MorphCloudClient = field(init=False)
49
+ _instance: Any = field(init=False, default=None)
50
+ _last_exec: Dict[str, Any] = field(init=False, default_factory=dict)
51
+ _started_at: float | None = field(init=False, default=None)
52
+
53
+ def __post_init__(self) -> None:
54
+ if MorphCloudClient is None: # pragma: no cover - optional dependency
55
+ raise RuntimeError(
56
+ "morphcloud package is required for Morph environments. "
57
+ "Install with `pip install morphcloud`."
58
+ ) from _IMPORT_ERROR
59
+
60
+ api_key = os.getenv("MORPH_API_KEY", "")
61
+ if not api_key:
62
+ raise RuntimeError("Set MORPH_API_KEY before using the Morph backend.")
63
+
64
+ # Normalise metadata/env early to avoid shared references.
65
+ self.metadata = {str(k): str(v) for k, v in (self.metadata or {}).items()}
66
+ self.env = {str(k): str(v) for k, v in (self.env or {}).items()}
67
+ self.cwd = self.cwd or "/workspace"
68
+
69
+ self._client = MorphCloudClient()
70
+
71
+ # Public API -----------------------------------------------------------------
72
+
73
+ def execute(self, command: str, timeout: int | None = None) -> Dict[str, Any]:
74
+ """Execute ``command`` inside the Morph instance."""
75
+ if not command.strip():
76
+ command = "true"
77
+
78
+ instance = self._ensure_instance()
79
+
80
+ script_parts = []
81
+ for key, value in self.env.items():
82
+ script_parts.append(_quote_env_var(key, value))
83
+ if self.cwd:
84
+ script_parts.append(f"cd {shlex.quote(self.cwd)}")
85
+ script_parts.append(command)
86
+
87
+ script = " && ".join(script_parts)
88
+ if timeout:
89
+ wrapped = f"timeout {int(timeout)}s bash -lc {shlex.quote(script)}"
90
+ else:
91
+ wrapped = script
92
+
93
+ shell_cmd = f"bash -lc {shlex.quote(wrapped)}"
94
+ started = _now()
95
+ result = instance.exec(shell_cmd)
96
+ duration = _now() - started
97
+
98
+ payload = {
99
+ "output": (result.stdout or ""),
100
+ "stderr": (result.stderr or ""),
101
+ "returncode": getattr(result, "exit_code", None),
102
+ "duration": duration,
103
+ }
104
+ self._last_exec = payload
105
+ return payload
106
+
107
+ def close(self) -> None:
108
+ """Stops the Morph instance if one is running."""
109
+ instance = getattr(self, "_instance", None)
110
+ if not instance:
111
+ return
112
+ try:
113
+ instance.stop()
114
+ except Exception: # pragma: no cover - best-effort shutdown
115
+ pass
116
+ finally:
117
+ self._instance = None
118
+
119
+ # Internal helpers -----------------------------------------------------------
120
+
121
+ def _ensure_instance(self):
122
+ instance = getattr(self, "_instance", None)
123
+ if instance is not None:
124
+ return instance
125
+
126
+ snapshot_id = (
127
+ self.snapshot_id
128
+ or os.getenv("SWE_MINI_MORPH_SNAPSHOT_ID")
129
+ or os.getenv("MORPH_SNAPSHOT_ID")
130
+ )
131
+ metadata = dict(self.metadata)
132
+
133
+ if snapshot_id:
134
+ instance = self._client.instances.start(snapshot_id=snapshot_id, metadata=metadata or None)
135
+ else:
136
+ image_id = (
137
+ self.image_id
138
+ or os.getenv("SWE_MINI_MORPH_IMAGE_ID")
139
+ or os.getenv("MORPH_IMAGE_ID")
140
+ or "morphvm-minimal"
141
+ )
142
+ snapshot = self._client.snapshots.create(
143
+ image_id=image_id,
144
+ vcpus=self.vcpus,
145
+ memory=self.memory_mb,
146
+ disk_size=self.disk_mb,
147
+ )
148
+ instance = self._client.instances.start(snapshot_id=snapshot.id, metadata=metadata or None)
149
+ self.snapshot_id = snapshot.id
150
+
151
+ self._instance = instance
152
+ self._started_at = _now()
153
+ self._wait_until_ready(instance)
154
+ self._ensure_cwd(instance)
155
+ return instance
156
+
157
+ def _wait_until_ready(self, instance) -> None:
158
+ deadline = _now() + float(self.startup_timeout)
159
+ while True:
160
+ try:
161
+ instance.wait_until_ready()
162
+ break
163
+ except Exception as exc: # pragma: no cover - SDK may raise while polling
164
+ if _now() > deadline:
165
+ raise TimeoutError(f"Morph instance did not become ready within {self.startup_timeout}s") from exc
166
+ time.sleep(5.0)
167
+
168
+ def _ensure_cwd(self, instance) -> None:
169
+ if not self.cwd:
170
+ return
171
+ try:
172
+ instance.exec(f"bash -lc {shlex.quote(f'mkdir -p {self.cwd}')}")
173
+ except Exception as exc: # pragma: no cover - surface friendly error
174
+ raise RuntimeError(f"Failed to create remote workspace {self.cwd!r}: {exc}") from exc
175
+
176
+ def __del__(self) -> None: # pragma: no cover - defensive cleanup
177
+ with contextlib.suppress(Exception):
178
+ self.close()
@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
6
6
 
7
7
  ## Local development
8
8
  ```bash
9
- uvx synth-ai serve grpo-crafter --port 8001
9
+ uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
10
10
  # Optional extras:
11
11
  # --env-file path/to/.env # load additional environment variables
12
12
  # --reload # enable uvicorn auto-reload
@@ -9,9 +9,13 @@ import sys
9
9
  from collections.abc import Iterable, Sequence
10
10
  from contextlib import suppress
11
11
  from dataclasses import dataclass
12
+ from datetime import UTC, datetime
12
13
  from pathlib import Path
13
14
  from typing import Any
14
15
 
16
+ from fastapi import HTTPException
17
+ from pydantic import BaseModel
18
+
15
19
  from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
16
20
  from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
17
21
  from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
@@ -657,6 +661,14 @@ def _resolve_trace_correlation_id(policy_cfg: dict[str, Any], mode: Any = None)
657
661
  async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
658
662
  request = _coerce_math_to_crafter(request)
659
663
 
664
+ record_cfg = request.record.model_copy(
665
+ update={
666
+ "return_trace": True,
667
+ "trace_format": "structured",
668
+ }
669
+ )
670
+ request = request.model_copy(update={"record": record_cfg})
671
+
660
672
  policy_cfg = dict(request.policy.config or {})
661
673
  logger.info(
662
674
  "ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
@@ -800,11 +812,38 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
800
812
  trace_correlation_id,
801
813
  )
802
814
  data = legacy_response.model_dump()
815
+ logger.debug(
816
+ "ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
817
+ sorted(data.keys()),
818
+ bool(data.get("trace")),
819
+ )
803
820
  metrics = data.get("metrics", {}) or {}
804
821
  metrics.setdefault("outcome_score", None)
805
822
  metrics.setdefault("events_score", None)
806
823
  metrics.setdefault("details", {})
807
824
  data["metrics"] = metrics
825
+
826
+ if data.get("trace") is None:
827
+ legacy_trace = getattr(legacy_response, "trace", None)
828
+ if legacy_trace is not None:
829
+ data["trace"] = legacy_trace
830
+ else:
831
+ tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
832
+ if callable(tracer_factory):
833
+ tracer = tracer_factory()
834
+ logger.debug("ROLLOUT_EXEC: trace backfill factory=%s", type(tracer))
835
+ if isinstance(tracer, SessionTracer):
836
+ try:
837
+ await tracer.initialize()
838
+ if tracer.db is not None:
839
+ trace_row = await tracer.db.get_session_trace(request.run_id)
840
+ if trace_row is not None:
841
+ data["trace"] = trace_row
842
+ except Exception as exc:
843
+ logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
844
+ finally:
845
+ with suppress(Exception):
846
+ await tracer.close()
808
847
 
809
848
  # Add trace_correlation_id at TOP-LEVEL (REQUIRED for RL training pipeline)
810
849
  # Use fallback if somehow missing
@@ -820,12 +859,30 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
820
859
  if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
821
860
  existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
822
861
  data["pipeline_metadata"] = existing_meta
823
-
862
+
824
863
  # Add trace_correlation_id to each trajectory (required for RL training pipeline)
825
864
  if "trajectories" in data:
865
+ normalized_trajs: list[dict[str, Any]] = []
826
866
  for traj in data.get("trajectories", []):
827
- if isinstance(traj, dict):
828
- traj["trace_correlation_id"] = final_cid
867
+ if isinstance(traj, BaseModel):
868
+ traj_dict = traj.model_dump()
869
+ elif isinstance(traj, dict):
870
+ traj_dict = dict(traj)
871
+ else:
872
+ continue
873
+ traj_dict["trace_correlation_id"] = final_cid
874
+ if not traj_dict.get("inference_url"):
875
+ inferred_url = policy_cfg.get("inference_url")
876
+ if inferred_url:
877
+ traj_dict["inference_url"] = inferred_url
878
+ normalized_trajs.append(traj_dict)
879
+ if normalized_trajs:
880
+ data["trajectories"] = normalized_trajs
881
+ logger.info(
882
+ "ROLLOUT_EXEC: normalized trajectory sample run_id=%s inference_url=%s",
883
+ request.run_id,
884
+ normalized_trajs[0].get("inference_url") if normalized_trajs else None,
885
+ )
829
886
  logger.info(
830
887
  "ROLLOUT_EXEC: final pipeline metadata run_id=%s metadata=%s",
831
888
  request.run_id,
@@ -844,6 +901,12 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
844
901
  request.run_id,
845
902
  existing_meta,
846
903
  )
904
+
905
+ if data.get("trace") is None:
906
+ raise HTTPException(
907
+ status_code=500,
908
+ detail="trace_payload_missing: task app did not emit a SessionTrace",
909
+ )
847
910
 
848
911
  # ASSERTION: Verify trace_correlation_id is present in response at all required levels
849
912
  assert "trace_correlation_id" in data, (
@@ -3,7 +3,7 @@
3
3
  This module now delegates to the TaskAppConfig defined in the colocated example at
4
4
  `examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
5
5
  (running the file directly or targeting `fastapi_app` from external tooling). Prefer using
6
- `uvx synth-ai serve grpo-crafter` for local development and testing.
6
+ `uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
@@ -197,6 +197,8 @@ class CrafterPolicy(Policy):
197
197
  if self.use_tools:
198
198
  payload["tools"] = TOOLS_SCHEMA
199
199
  payload["tool_choice"] = "required"
200
+ payload["function_call"] = {"name": "interact_many"}
201
+ payload["parallel_tool_calls"] = False
200
202
  # Ensure the inference server injects family-specific stop sequences
201
203
  # to terminate immediately after the first tool call for compliance.
202
204
  payload["stop_after_tool_calls"] = 1
@@ -207,13 +209,7 @@ class CrafterPolicy(Policy):
207
209
  response: dict[str, Any],
208
210
  use_tools: bool = True,
209
211
  ) -> list[dict[str, Any]]:
210
- """Turn an inference response into environment tool calls.
211
-
212
- - If tools were used, expect tool_calls-compatible output and forward as-is
213
- in our simple JSON format: {"tool_name": str, "arguments": {...}}.
214
- - If no tools, parse plain-text actions using CrafterReActAgent parser and
215
- wrap them into a single interact_many tool call.
216
- """
212
+ """Turn an inference response into environment tool calls."""
217
213
  # First check if we got actual tool calls
218
214
  choices = response.get("choices", [])
219
215
  tool_calls: list[dict[str, Any]] = []
@@ -272,24 +268,6 @@ class CrafterPolicy(Policy):
272
268
  normalized.append(tc)
273
269
  return normalized
274
270
 
275
- # Otherwise, parse plain text content for actions
276
- text = ""
277
- for choice in choices:
278
- msg = choice.get("message", {})
279
- content = msg.get("content", "")
280
- if content:
281
- text = content
282
- break
283
-
284
- if text:
285
- # Try to parse actions from the text
286
- from .shared import parse_actions
287
-
288
- actions = parse_actions(text)
289
- if actions:
290
- # Wrap actions in interact_many tool call
291
- return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
292
-
293
271
  # No actions found
294
272
  return []
295
273
 
@@ -542,7 +520,7 @@ class CrafterPolicy(Policy):
542
520
  "claude-3", # All Claude 3 models support vision
543
521
  "gemini", # Gemini models
544
522
  "qwen-vl", # Qwen Vision-Language models
545
- "qwen2-vl", # Qwen2 VL
523
+ "qwen3-vl", # Qwen3 VL
546
524
  "pixtral", # Mistral's vision model
547
525
  "llava", # LLaVA models
548
526
  "phi-3-vision", # Microsoft Phi-3 Vision
@@ -45,8 +45,7 @@ class CrafterReActAgent:
45
45
  "Action policy:\n"
46
46
  "- Always return a single tool call: interact_many({actions: [...]})\n"
47
47
  "- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
48
- "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
49
- "- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
48
+ "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n\n"
50
49
  "Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
51
50
  "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
52
51
  "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
@@ -50,20 +50,19 @@ class OpenAIClient:
50
50
  # Make a copy to avoid modifying the original
51
51
  fixed_request = request.copy()
52
52
 
53
- # Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI, Groq);
54
- # strip fields those endpoints don't accept
53
+ # Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI).
54
+ # Groq shares the API surface but we keep tool enforcement fields intact.
55
55
  is_openai = False
56
+ is_groq = False
56
57
  try:
57
58
  if isinstance(target_url, str):
58
59
  low = target_url.lower()
59
- is_openai = (
60
- ("openai.com" in low)
61
- or ("azure" in low and ".openai." in low)
62
- or ("groq.com" in low)
63
- or ("/openai" in low)
64
- or ("/proxy/groq" in low)
65
- or ("/proxy/openai" in low)
66
- )
60
+ if "groq.com" in low or "/proxy/groq" in low:
61
+ is_groq = True
62
+ elif ("openai.com" in low) or ("azure" in low and ".openai." in low) or (
63
+ "/proxy/openai" in low
64
+ ):
65
+ is_openai = True
67
66
  except Exception:
68
67
  is_openai = False
69
68
 
@@ -259,13 +258,13 @@ class OpenAIClient:
259
258
  content_len = len(str(content)) if content else 0
260
259
  logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content_type={type(content).__name__}, len={content_len}")
261
260
 
262
- # Final hard-guard for OpenAI: ensure unsupported field is not present
261
+ # Final hard-guard for OpenAI/Groq: drop unsupported field
263
262
  try:
264
- if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
263
+ low_url = url.lower()
264
+ if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
265
265
  processed_request.pop("stop_after_tool_calls", None)
266
- logger.info("Removed stop_after_tool_calls for OpenAI request")
266
+ logger.info("Removed stop_after_tool_calls for %s request", "Groq/OpenAI")
267
267
  # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
268
- low_url = url.lower()
269
268
  if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
270
269
  processed_request, dict
271
270
  ):
@@ -546,47 +545,16 @@ class OpenAIClient:
546
545
  error_block.get("code") or error_block.get("type") or ""
547
546
  ).lower()
548
547
  if error_code in {"tool_use_failed", "tool_call_failed"}:
549
- logger.warning(
548
+ logger.error(
550
549
  {
551
550
  "tool_use_failed": True,
552
551
  "target": (base_url or self.base_url),
553
552
  "message": error_block.get("message") if isinstance(error_block, dict) else None,
554
553
  }
555
554
  )
556
- fallback_actions = ["move_right", "move_up", "do"]
557
- fallback_response = {
558
- "id": f"fallback-{int(time.time() * 1000)}",
559
- "object": "chat.completion",
560
- "created": int(time.time()),
561
- "model": processed_request.get("model"),
562
- "choices": [
563
- {
564
- "index": 0,
565
- "message": {
566
- "role": "assistant",
567
- "content": "",
568
- "tool_calls": [
569
- {
570
- "id": f"call_fallback_{int(time.time() * 1000)}",
571
- "type": "function",
572
- "function": {
573
- "name": "interact_many",
574
- "arguments": json.dumps(
575
- {"actions": fallback_actions}
576
- ),
577
- },
578
- }
579
- ],
580
- },
581
- "finish_reason": "tool_calls",
582
- }
583
- ],
584
- }
585
- if isinstance(response_data.get("usage"), dict):
586
- fallback_response["usage"] = response_data["usage"]
587
- if isinstance(error_block, dict):
588
- fallback_response["error"] = error_block
589
- return fallback_response
555
+ raise RuntimeError(
556
+ f"Inference 400 response (tool call failed): {error_block.get('message') if isinstance(error_block, dict) else 'Tool call failed'}"
557
+ ) from e
590
558
  # This is a different type of 400 error, don't retry
591
559
  try:
592
560
  redacted_headers = {}
@@ -462,6 +462,8 @@ async def step_policy(
462
462
  )
463
463
 
464
464
  # Emit full system/user prompts for observability (no secrets included)
465
+ system_prompt_records: list[dict[str, Any]] = []
466
+ user_prompt_records: list[dict[str, Any]] = []
465
467
  try:
466
468
 
467
469
  def _as_text(content: object) -> str:
@@ -481,8 +483,6 @@ async def step_policy(
481
483
  return "".join(parts)
482
484
  return str(content)
483
485
 
484
- system_prompt_records: list[dict[str, Any]] = []
485
- user_prompt_records: list[dict[str, Any]] = []
486
486
  for message in msgs:
487
487
  role = message.get("role")
488
488
  raw_content = message.get("content")
@@ -525,6 +525,11 @@ async def step_policy(
525
525
 
526
526
  if tracing_context is not None:
527
527
  try:
528
+ logger.info(
529
+ "[TRACE_DEBUG] record_policy_prompts sys=%s user=%s",
530
+ len(system_prompt_records),
531
+ len(user_prompt_records),
532
+ )
528
533
  await tracing_context.record_policy_prompts(
529
534
  system_prompt_records, user_prompt_records
530
535
  )
@@ -780,9 +785,10 @@ async def step_policy(
780
785
  "sokoban-react",
781
786
  "crafter-react",
782
787
  ) and getattr(policy, "use_tools", True):
783
- req_tools = meta["inference_request"]["tools"]
784
- req_tool_choice = meta["inference_request"]["tool_choice"]
785
- req_stop_after = meta["inference_request"]["stop_after_tool_calls"]
788
+ inf_req = meta.get("inference_request", {})
789
+ req_tools = inf_req.get("tools")
790
+ req_tool_choice = inf_req.get("tool_choice")
791
+ req_stop_after = inf_req.get("stop_after_tool_calls")
786
792
  logger.info(
787
793
  f"TOOLCALL_CONFIG: policy={policy_name} tools_present={bool(req_tools)} tool_choice={req_tool_choice} stop_after={req_stop_after}"
788
794
  )
@@ -791,6 +797,8 @@ async def step_policy(
791
797
  status_code=500,
792
798
  detail=f"TOOLCALL_ASSERTION_FAIL: Missing tools or tool_choice!=required for policy {policy_name}",
793
799
  )
800
+ if req_stop_after is None:
801
+ inf_req["stop_after_tool_calls"] = 1
794
802
 
795
803
  # Call inference service with retries for Flash cold-start (503)
796
804
  import time as _t
@@ -491,6 +491,11 @@ class RolloutTracingContext:
491
491
  getattr(request.record, "trace_format", "compact") or "compact"
492
492
  ).lower()
493
493
  self.return_trace = bool(getattr(request.record, "return_trace", False))
494
+ logger.warning(
495
+ "[TRACE_DEBUG] RolloutTracingContext init: trace_format=%s return_trace=%s",
496
+ self.trace_format,
497
+ self.return_trace,
498
+ )
494
499
  self.sft_output_dir = getattr(fastapi_request.app.state, "sft_output_dir", None)
495
500
  self.session_trace = None
496
501
  self.metadata_updates: dict[str, Any] = {}
@@ -590,7 +595,7 @@ class RolloutTracingContext:
590
595
  # Debug: Check message count
591
596
  if self.tracer and self.tracer._current_trace:
592
597
  msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
593
- logger.info(f"[TRACE_DEBUG] After record_policy_prompts: {msg_count} messages in trace")
598
+ logger.warning("[TRACE_DEBUG] After record_policy_prompts: %s messages", msg_count)
594
599
 
595
600
  def _content_to_text(self, content: Any) -> str:
596
601
  if isinstance(content, str):
@@ -669,6 +674,11 @@ class RolloutTracingContext:
669
674
  message_type="assistant", # Map to standard assistant message type
670
675
  metadata={**self._message_metadata(), "is_tool_call": True},
671
676
  )
677
+ if self.tracer._current_trace:
678
+ logger.warning(
679
+ "[TRACE_DEBUG] After tool invocation: messages=%s",
680
+ len(self.tracer._current_trace.markov_blanket_message_history),
681
+ )
672
682
  except Exception as exc:
673
683
  logger.debug("TRACING_TOOL_MSG_FAIL: %s", exc)
674
684
 
@@ -991,6 +1001,10 @@ class RolloutTracingContext:
991
1001
  if self.trace_format in ("full", "structured"):
992
1002
  payload = session_trace.to_dict()
993
1003
  payload.setdefault("metadata", {}).update(self.metadata_updates)
1004
+ logger.warning(
1005
+ "[TRACE_DEBUG] build_trace_payload returning structured trace with messages=%s",
1006
+ len(payload.get("markov_blanket_message_history") or []),
1007
+ )
994
1008
  return payload
995
1009
 
996
1010
  # For "compact" format, return only summary stats
@@ -2,7 +2,7 @@
2
2
 
3
3
  This mirrors the structure of the Crafter task app wrapper while delegating
4
4
  all configuration to the colocated `grpo_enron.py` module. Normal usage should
5
- prefer invoking `uvx synth-ai serve grpo-enron`, but this module remains for
5
+ prefer invoking `uvx synth-ai deploy --runtime uvicorn grpo-enron`, but this module remains for
6
6
  direct execution or importing the FastAPI app object.
7
7
  """
8
8
 
@@ -3,7 +3,7 @@
3
3
  This directory hosts the legacy entrypoint for the math single-step task app. Prefer starting the app via:
4
4
 
5
5
  ```bash
6
- uvx synth-ai serve math-single-step --env-file examples/rl/.env --port 8101
6
+ uvx synth-ai deploy --runtime uvicorn math-single-step --env-file examples/rl/.env --port 8101
7
7
  ```
8
8
 
9
9
  If you need to run it directly (e.g., for Modal `modal deploy` compatibility), use:
@@ -19,4 +19,3 @@ Environment variables:
19
19
  - `MATH_DATASET_DEFAULT_SPLIT`, `MATH_DATASET_VALIDATION_SPLIT`, `MATH_DATASET_TEST_SPLIT`
20
20
 
21
21
  The task app enforces a single `math_submit` tool call per episode, enabling RL to reward correct final answers and penalise missing or malformed submissions.
22
-
@@ -17,7 +17,7 @@ A reinforcement learning environment for Pokémon Red using PyBoy emulation with
17
17
 
18
18
  ```bash
19
19
  # From synth-ai root
20
- uv run -m synth_ai task-app serve pokemon_red --port 8913
20
+ uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8913
21
21
  ```
22
22
 
23
23
  ### 2. Run a Random Rollout
@@ -232,7 +232,7 @@ uv add pyboy
232
232
  lsof -ti :8913 | xargs -r kill -9
233
233
 
234
234
  # Or use a different port
235
- uv run -m synth_ai task-app serve pokemon_red --port 8914
235
+ uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8914
236
236
  ```
237
237
 
238
238
  ## Examples
@@ -249,7 +249,7 @@ cd /Users/joshpurtell/Documents/GitHub/synth-ai
249
249
  echo "OPENAI_API_KEY=sk-..." >> .env
250
250
 
251
251
  # 2. Start the task app server (in background)
252
- nohup sh -c 'printf "n\n" | uv run -m synth_ai task-app serve pokemon_red --port 8913 --no-reload' > nohup_pokemon.log 2>&1 &
252
+ nohup sh -c 'printf "n\n" | uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8913 --no-reload' > nohup_pokemon.log 2>&1 &
253
253
 
254
254
  # Wait for startup
255
255
  sleep 8
@@ -354,4 +354,3 @@ TOTAL REWARD: 705 points
354
354
  - **PyBoy**: Game Boy emulator - https://github.com/Baekalfen/PyBoy
355
355
  - **Pokémon Red Disassembly**: RAM map reference - https://github.com/pret/pokered
356
356
  - **Datacrystal.org**: Memory address documentation
357
-
@@ -1,11 +1,12 @@
1
- # Evaluation config for Pokemon Red with image-only input
1
+ # Evaluation config for Pokemon Red with image-only input and NEW REWARD SYSTEM
2
2
  # This config uses GPT-4o mini with only image data (no text observations)
3
+ # Uses the comprehensive reward system with deterministic progress milestones
3
4
 
4
5
  [eval]
5
6
  app_id = "pokemon_red"
6
7
  model = "gpt-4o-mini-2024-07-18"
7
- seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
8
- max_turns = 10
8
+ seeds = [0, 1, 2, 3, 4] # Test with fewer seeds for quick results
9
+ max_turns = 20 # Allow more turns to see progress
9
10
  concurrency = 1 # Keep low initially to avoid issues
10
11
  env_name = "pokemon_red"
11
12
  policy_name = "pokemon_red_policy"
@@ -13,7 +14,7 @@ trace_format = "full"
13
14
  return_trace = true
14
15
 
15
16
  [eval.env_config]
16
- max_steps_per_episode = 10
17
+ max_steps_per_episode = 20
17
18
 
18
19
  [eval.policy_config]
19
20
  provider = "openai"
@@ -24,6 +25,6 @@ top_p = 0.95
24
25
  max_tokens = 512
25
26
  use_vision = true
26
27
  image_only_mode = true
27
- max_llm_calls = 10
28
+ max_llm_calls = 20
28
29
 
29
30