synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +5 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +125 -10
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +12 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +58 -1487
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -11
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/validators.py +2 -2
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/utils/env.py +25 -18
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/modal.py +2 -2
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Utility classes for running swe-mini environments on Morph Cloud."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
import os
|
|
7
|
+
import shlex
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any, Dict
|
|
11
|
+
|
|
12
|
+
_IMPORT_ERROR: Exception | None = None
|
|
13
|
+
|
|
14
|
+
try: # pragma: no cover - optional dependency
|
|
15
|
+
from morphcloud.api import MorphCloudClient
|
|
16
|
+
except Exception as exc: # pragma: no cover - optional dependency
|
|
17
|
+
MorphCloudClient = None # type: ignore[assignment]
|
|
18
|
+
_IMPORT_ERROR = exc
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _quote_env_var(key: str, value: str) -> str:
|
|
22
|
+
"""Return a safe shell export statement."""
|
|
23
|
+
return f"export {key}={shlex.quote(value)}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _now() -> float:
|
|
27
|
+
return time.time()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class MorphSandboxBackend:
|
|
32
|
+
"""Thin wrapper around Morph Cloud instances for command execution.
|
|
33
|
+
|
|
34
|
+
The API mirrors the subset consumed by :class:`MiniSweEnvironmentWrapper`:
|
|
35
|
+
we expose an ``execute`` method that matches the mini-swe environment shape.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
snapshot_id: str | None = None
|
|
39
|
+
image_id: str | None = None
|
|
40
|
+
cwd: str = "/workspace"
|
|
41
|
+
env: Dict[str, str] | None = None
|
|
42
|
+
metadata: Dict[str, str] | None = None
|
|
43
|
+
vcpus: int = 4
|
|
44
|
+
memory_mb: int = 8192
|
|
45
|
+
disk_mb: int = 65536
|
|
46
|
+
startup_timeout: int = 600
|
|
47
|
+
|
|
48
|
+
_client: MorphCloudClient = field(init=False)
|
|
49
|
+
_instance: Any = field(init=False, default=None)
|
|
50
|
+
_last_exec: Dict[str, Any] = field(init=False, default_factory=dict)
|
|
51
|
+
_started_at: float | None = field(init=False, default=None)
|
|
52
|
+
|
|
53
|
+
def __post_init__(self) -> None:
|
|
54
|
+
if MorphCloudClient is None: # pragma: no cover - optional dependency
|
|
55
|
+
raise RuntimeError(
|
|
56
|
+
"morphcloud package is required for Morph environments. "
|
|
57
|
+
"Install with `pip install morphcloud`."
|
|
58
|
+
) from _IMPORT_ERROR
|
|
59
|
+
|
|
60
|
+
api_key = os.getenv("MORPH_API_KEY", "")
|
|
61
|
+
if not api_key:
|
|
62
|
+
raise RuntimeError("Set MORPH_API_KEY before using the Morph backend.")
|
|
63
|
+
|
|
64
|
+
# Normalise metadata/env early to avoid shared references.
|
|
65
|
+
self.metadata = {str(k): str(v) for k, v in (self.metadata or {}).items()}
|
|
66
|
+
self.env = {str(k): str(v) for k, v in (self.env or {}).items()}
|
|
67
|
+
self.cwd = self.cwd or "/workspace"
|
|
68
|
+
|
|
69
|
+
self._client = MorphCloudClient()
|
|
70
|
+
|
|
71
|
+
# Public API -----------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
def execute(self, command: str, timeout: int | None = None) -> Dict[str, Any]:
|
|
74
|
+
"""Execute ``command`` inside the Morph instance."""
|
|
75
|
+
if not command.strip():
|
|
76
|
+
command = "true"
|
|
77
|
+
|
|
78
|
+
instance = self._ensure_instance()
|
|
79
|
+
|
|
80
|
+
script_parts = []
|
|
81
|
+
for key, value in self.env.items():
|
|
82
|
+
script_parts.append(_quote_env_var(key, value))
|
|
83
|
+
if self.cwd:
|
|
84
|
+
script_parts.append(f"cd {shlex.quote(self.cwd)}")
|
|
85
|
+
script_parts.append(command)
|
|
86
|
+
|
|
87
|
+
script = " && ".join(script_parts)
|
|
88
|
+
if timeout:
|
|
89
|
+
wrapped = f"timeout {int(timeout)}s bash -lc {shlex.quote(script)}"
|
|
90
|
+
else:
|
|
91
|
+
wrapped = script
|
|
92
|
+
|
|
93
|
+
shell_cmd = f"bash -lc {shlex.quote(wrapped)}"
|
|
94
|
+
started = _now()
|
|
95
|
+
result = instance.exec(shell_cmd)
|
|
96
|
+
duration = _now() - started
|
|
97
|
+
|
|
98
|
+
payload = {
|
|
99
|
+
"output": (result.stdout or ""),
|
|
100
|
+
"stderr": (result.stderr or ""),
|
|
101
|
+
"returncode": getattr(result, "exit_code", None),
|
|
102
|
+
"duration": duration,
|
|
103
|
+
}
|
|
104
|
+
self._last_exec = payload
|
|
105
|
+
return payload
|
|
106
|
+
|
|
107
|
+
def close(self) -> None:
|
|
108
|
+
"""Stops the Morph instance if one is running."""
|
|
109
|
+
instance = getattr(self, "_instance", None)
|
|
110
|
+
if not instance:
|
|
111
|
+
return
|
|
112
|
+
try:
|
|
113
|
+
instance.stop()
|
|
114
|
+
except Exception: # pragma: no cover - best-effort shutdown
|
|
115
|
+
pass
|
|
116
|
+
finally:
|
|
117
|
+
self._instance = None
|
|
118
|
+
|
|
119
|
+
# Internal helpers -----------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
def _ensure_instance(self):
|
|
122
|
+
instance = getattr(self, "_instance", None)
|
|
123
|
+
if instance is not None:
|
|
124
|
+
return instance
|
|
125
|
+
|
|
126
|
+
snapshot_id = (
|
|
127
|
+
self.snapshot_id
|
|
128
|
+
or os.getenv("SWE_MINI_MORPH_SNAPSHOT_ID")
|
|
129
|
+
or os.getenv("MORPH_SNAPSHOT_ID")
|
|
130
|
+
)
|
|
131
|
+
metadata = dict(self.metadata)
|
|
132
|
+
|
|
133
|
+
if snapshot_id:
|
|
134
|
+
instance = self._client.instances.start(snapshot_id=snapshot_id, metadata=metadata or None)
|
|
135
|
+
else:
|
|
136
|
+
image_id = (
|
|
137
|
+
self.image_id
|
|
138
|
+
or os.getenv("SWE_MINI_MORPH_IMAGE_ID")
|
|
139
|
+
or os.getenv("MORPH_IMAGE_ID")
|
|
140
|
+
or "morphvm-minimal"
|
|
141
|
+
)
|
|
142
|
+
snapshot = self._client.snapshots.create(
|
|
143
|
+
image_id=image_id,
|
|
144
|
+
vcpus=self.vcpus,
|
|
145
|
+
memory=self.memory_mb,
|
|
146
|
+
disk_size=self.disk_mb,
|
|
147
|
+
)
|
|
148
|
+
instance = self._client.instances.start(snapshot_id=snapshot.id, metadata=metadata or None)
|
|
149
|
+
self.snapshot_id = snapshot.id
|
|
150
|
+
|
|
151
|
+
self._instance = instance
|
|
152
|
+
self._started_at = _now()
|
|
153
|
+
self._wait_until_ready(instance)
|
|
154
|
+
self._ensure_cwd(instance)
|
|
155
|
+
return instance
|
|
156
|
+
|
|
157
|
+
def _wait_until_ready(self, instance) -> None:
|
|
158
|
+
deadline = _now() + float(self.startup_timeout)
|
|
159
|
+
while True:
|
|
160
|
+
try:
|
|
161
|
+
instance.wait_until_ready()
|
|
162
|
+
break
|
|
163
|
+
except Exception as exc: # pragma: no cover - SDK may raise while polling
|
|
164
|
+
if _now() > deadline:
|
|
165
|
+
raise TimeoutError(f"Morph instance did not become ready within {self.startup_timeout}s") from exc
|
|
166
|
+
time.sleep(5.0)
|
|
167
|
+
|
|
168
|
+
def _ensure_cwd(self, instance) -> None:
|
|
169
|
+
if not self.cwd:
|
|
170
|
+
return
|
|
171
|
+
try:
|
|
172
|
+
instance.exec(f"bash -lc {shlex.quote(f'mkdir -p {self.cwd}')}")
|
|
173
|
+
except Exception as exc: # pragma: no cover - surface friendly error
|
|
174
|
+
raise RuntimeError(f"Failed to create remote workspace {self.cwd!r}: {exc}") from exc
|
|
175
|
+
|
|
176
|
+
def __del__(self) -> None: # pragma: no cover - defensive cleanup
|
|
177
|
+
with contextlib.suppress(Exception):
|
|
178
|
+
self.close()
|
|
@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
|
|
|
6
6
|
|
|
7
7
|
## Local development
|
|
8
8
|
```bash
|
|
9
|
-
uvx synth-ai
|
|
9
|
+
uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
|
|
10
10
|
# Optional extras:
|
|
11
11
|
# --env-file path/to/.env # load additional environment variables
|
|
12
12
|
# --reload # enable uvicorn auto-reload
|
|
@@ -9,9 +9,13 @@ import sys
|
|
|
9
9
|
from collections.abc import Iterable, Sequence
|
|
10
10
|
from contextlib import suppress
|
|
11
11
|
from dataclasses import dataclass
|
|
12
|
+
from datetime import UTC, datetime
|
|
12
13
|
from pathlib import Path
|
|
13
14
|
from typing import Any
|
|
14
15
|
|
|
16
|
+
from fastapi import HTTPException
|
|
17
|
+
from pydantic import BaseModel
|
|
18
|
+
|
|
15
19
|
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
16
20
|
from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
|
|
17
21
|
from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
|
|
@@ -657,6 +661,14 @@ def _resolve_trace_correlation_id(policy_cfg: dict[str, Any], mode: Any = None)
|
|
|
657
661
|
async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
|
|
658
662
|
request = _coerce_math_to_crafter(request)
|
|
659
663
|
|
|
664
|
+
record_cfg = request.record.model_copy(
|
|
665
|
+
update={
|
|
666
|
+
"return_trace": True,
|
|
667
|
+
"trace_format": "structured",
|
|
668
|
+
}
|
|
669
|
+
)
|
|
670
|
+
request = request.model_copy(update={"record": record_cfg})
|
|
671
|
+
|
|
660
672
|
policy_cfg = dict(request.policy.config or {})
|
|
661
673
|
logger.info(
|
|
662
674
|
"ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
|
|
@@ -800,11 +812,38 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
800
812
|
trace_correlation_id,
|
|
801
813
|
)
|
|
802
814
|
data = legacy_response.model_dump()
|
|
815
|
+
logger.debug(
|
|
816
|
+
"ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
|
|
817
|
+
sorted(data.keys()),
|
|
818
|
+
bool(data.get("trace")),
|
|
819
|
+
)
|
|
803
820
|
metrics = data.get("metrics", {}) or {}
|
|
804
821
|
metrics.setdefault("outcome_score", None)
|
|
805
822
|
metrics.setdefault("events_score", None)
|
|
806
823
|
metrics.setdefault("details", {})
|
|
807
824
|
data["metrics"] = metrics
|
|
825
|
+
|
|
826
|
+
if data.get("trace") is None:
|
|
827
|
+
legacy_trace = getattr(legacy_response, "trace", None)
|
|
828
|
+
if legacy_trace is not None:
|
|
829
|
+
data["trace"] = legacy_trace
|
|
830
|
+
else:
|
|
831
|
+
tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
|
|
832
|
+
if callable(tracer_factory):
|
|
833
|
+
tracer = tracer_factory()
|
|
834
|
+
logger.debug("ROLLOUT_EXEC: trace backfill factory=%s", type(tracer))
|
|
835
|
+
if isinstance(tracer, SessionTracer):
|
|
836
|
+
try:
|
|
837
|
+
await tracer.initialize()
|
|
838
|
+
if tracer.db is not None:
|
|
839
|
+
trace_row = await tracer.db.get_session_trace(request.run_id)
|
|
840
|
+
if trace_row is not None:
|
|
841
|
+
data["trace"] = trace_row
|
|
842
|
+
except Exception as exc:
|
|
843
|
+
logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
|
|
844
|
+
finally:
|
|
845
|
+
with suppress(Exception):
|
|
846
|
+
await tracer.close()
|
|
808
847
|
|
|
809
848
|
# Add trace_correlation_id at TOP-LEVEL (REQUIRED for RL training pipeline)
|
|
810
849
|
# Use fallback if somehow missing
|
|
@@ -820,12 +859,30 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
820
859
|
if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
|
|
821
860
|
existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
|
|
822
861
|
data["pipeline_metadata"] = existing_meta
|
|
823
|
-
|
|
862
|
+
|
|
824
863
|
# Add trace_correlation_id to each trajectory (required for RL training pipeline)
|
|
825
864
|
if "trajectories" in data:
|
|
865
|
+
normalized_trajs: list[dict[str, Any]] = []
|
|
826
866
|
for traj in data.get("trajectories", []):
|
|
827
|
-
if isinstance(traj,
|
|
828
|
-
|
|
867
|
+
if isinstance(traj, BaseModel):
|
|
868
|
+
traj_dict = traj.model_dump()
|
|
869
|
+
elif isinstance(traj, dict):
|
|
870
|
+
traj_dict = dict(traj)
|
|
871
|
+
else:
|
|
872
|
+
continue
|
|
873
|
+
traj_dict["trace_correlation_id"] = final_cid
|
|
874
|
+
if not traj_dict.get("inference_url"):
|
|
875
|
+
inferred_url = policy_cfg.get("inference_url")
|
|
876
|
+
if inferred_url:
|
|
877
|
+
traj_dict["inference_url"] = inferred_url
|
|
878
|
+
normalized_trajs.append(traj_dict)
|
|
879
|
+
if normalized_trajs:
|
|
880
|
+
data["trajectories"] = normalized_trajs
|
|
881
|
+
logger.info(
|
|
882
|
+
"ROLLOUT_EXEC: normalized trajectory sample run_id=%s inference_url=%s",
|
|
883
|
+
request.run_id,
|
|
884
|
+
normalized_trajs[0].get("inference_url") if normalized_trajs else None,
|
|
885
|
+
)
|
|
829
886
|
logger.info(
|
|
830
887
|
"ROLLOUT_EXEC: final pipeline metadata run_id=%s metadata=%s",
|
|
831
888
|
request.run_id,
|
|
@@ -844,6 +901,12 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
844
901
|
request.run_id,
|
|
845
902
|
existing_meta,
|
|
846
903
|
)
|
|
904
|
+
|
|
905
|
+
if data.get("trace") is None:
|
|
906
|
+
raise HTTPException(
|
|
907
|
+
status_code=500,
|
|
908
|
+
detail="trace_payload_missing: task app did not emit a SessionTrace",
|
|
909
|
+
)
|
|
847
910
|
|
|
848
911
|
# ASSERTION: Verify trace_correlation_id is present in response at all required levels
|
|
849
912
|
assert "trace_correlation_id" in data, (
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
This module now delegates to the TaskAppConfig defined in the colocated example at
|
|
4
4
|
`examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
|
|
5
5
|
(running the file directly or targeting `fastapi_app` from external tooling). Prefer using
|
|
6
|
-
`uvx synth-ai
|
|
6
|
+
`uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
@@ -197,6 +197,8 @@ class CrafterPolicy(Policy):
|
|
|
197
197
|
if self.use_tools:
|
|
198
198
|
payload["tools"] = TOOLS_SCHEMA
|
|
199
199
|
payload["tool_choice"] = "required"
|
|
200
|
+
payload["function_call"] = {"name": "interact_many"}
|
|
201
|
+
payload["parallel_tool_calls"] = False
|
|
200
202
|
# Ensure the inference server injects family-specific stop sequences
|
|
201
203
|
# to terminate immediately after the first tool call for compliance.
|
|
202
204
|
payload["stop_after_tool_calls"] = 1
|
|
@@ -207,13 +209,7 @@ class CrafterPolicy(Policy):
|
|
|
207
209
|
response: dict[str, Any],
|
|
208
210
|
use_tools: bool = True,
|
|
209
211
|
) -> list[dict[str, Any]]:
|
|
210
|
-
"""Turn an inference response into environment tool calls.
|
|
211
|
-
|
|
212
|
-
- If tools were used, expect tool_calls-compatible output and forward as-is
|
|
213
|
-
in our simple JSON format: {"tool_name": str, "arguments": {...}}.
|
|
214
|
-
- If no tools, parse plain-text actions using CrafterReActAgent parser and
|
|
215
|
-
wrap them into a single interact_many tool call.
|
|
216
|
-
"""
|
|
212
|
+
"""Turn an inference response into environment tool calls."""
|
|
217
213
|
# First check if we got actual tool calls
|
|
218
214
|
choices = response.get("choices", [])
|
|
219
215
|
tool_calls: list[dict[str, Any]] = []
|
|
@@ -272,24 +268,6 @@ class CrafterPolicy(Policy):
|
|
|
272
268
|
normalized.append(tc)
|
|
273
269
|
return normalized
|
|
274
270
|
|
|
275
|
-
# Otherwise, parse plain text content for actions
|
|
276
|
-
text = ""
|
|
277
|
-
for choice in choices:
|
|
278
|
-
msg = choice.get("message", {})
|
|
279
|
-
content = msg.get("content", "")
|
|
280
|
-
if content:
|
|
281
|
-
text = content
|
|
282
|
-
break
|
|
283
|
-
|
|
284
|
-
if text:
|
|
285
|
-
# Try to parse actions from the text
|
|
286
|
-
from .shared import parse_actions
|
|
287
|
-
|
|
288
|
-
actions = parse_actions(text)
|
|
289
|
-
if actions:
|
|
290
|
-
# Wrap actions in interact_many tool call
|
|
291
|
-
return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
|
|
292
|
-
|
|
293
271
|
# No actions found
|
|
294
272
|
return []
|
|
295
273
|
|
|
@@ -542,7 +520,7 @@ class CrafterPolicy(Policy):
|
|
|
542
520
|
"claude-3", # All Claude 3 models support vision
|
|
543
521
|
"gemini", # Gemini models
|
|
544
522
|
"qwen-vl", # Qwen Vision-Language models
|
|
545
|
-
"
|
|
523
|
+
"qwen3-vl", # Qwen3 VL
|
|
546
524
|
"pixtral", # Mistral's vision model
|
|
547
525
|
"llava", # LLaVA models
|
|
548
526
|
"phi-3-vision", # Microsoft Phi-3 Vision
|
|
@@ -45,8 +45,7 @@ class CrafterReActAgent:
|
|
|
45
45
|
"Action policy:\n"
|
|
46
46
|
"- Always return a single tool call: interact_many({actions: [...]})\n"
|
|
47
47
|
"- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
|
|
48
|
-
"- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
|
|
49
|
-
"- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
|
|
48
|
+
"- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n\n"
|
|
50
49
|
"Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
|
|
51
50
|
"place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
|
|
52
51
|
"make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
|
|
@@ -50,20 +50,19 @@ class OpenAIClient:
|
|
|
50
50
|
# Make a copy to avoid modifying the original
|
|
51
51
|
fixed_request = request.copy()
|
|
52
52
|
|
|
53
|
-
# Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI
|
|
54
|
-
#
|
|
53
|
+
# Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI).
|
|
54
|
+
# Groq shares the API surface but we keep tool enforcement fields intact.
|
|
55
55
|
is_openai = False
|
|
56
|
+
is_groq = False
|
|
56
57
|
try:
|
|
57
58
|
if isinstance(target_url, str):
|
|
58
59
|
low = target_url.lower()
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
or ("/proxy/openai" in low)
|
|
66
|
-
)
|
|
60
|
+
if "groq.com" in low or "/proxy/groq" in low:
|
|
61
|
+
is_groq = True
|
|
62
|
+
elif ("openai.com" in low) or ("azure" in low and ".openai." in low) or (
|
|
63
|
+
"/proxy/openai" in low
|
|
64
|
+
):
|
|
65
|
+
is_openai = True
|
|
67
66
|
except Exception:
|
|
68
67
|
is_openai = False
|
|
69
68
|
|
|
@@ -259,13 +258,13 @@ class OpenAIClient:
|
|
|
259
258
|
content_len = len(str(content)) if content else 0
|
|
260
259
|
logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content_type={type(content).__name__}, len={content_len}")
|
|
261
260
|
|
|
262
|
-
# Final hard-guard for OpenAI:
|
|
261
|
+
# Final hard-guard for OpenAI/Groq: drop unsupported field
|
|
263
262
|
try:
|
|
264
|
-
|
|
263
|
+
low_url = url.lower()
|
|
264
|
+
if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
|
|
265
265
|
processed_request.pop("stop_after_tool_calls", None)
|
|
266
|
-
logger.info("Removed stop_after_tool_calls for
|
|
266
|
+
logger.info("Removed stop_after_tool_calls for %s request", "Groq/OpenAI")
|
|
267
267
|
# Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
|
|
268
|
-
low_url = url.lower()
|
|
269
268
|
if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
|
|
270
269
|
processed_request, dict
|
|
271
270
|
):
|
|
@@ -546,47 +545,16 @@ class OpenAIClient:
|
|
|
546
545
|
error_block.get("code") or error_block.get("type") or ""
|
|
547
546
|
).lower()
|
|
548
547
|
if error_code in {"tool_use_failed", "tool_call_failed"}:
|
|
549
|
-
logger.
|
|
548
|
+
logger.error(
|
|
550
549
|
{
|
|
551
550
|
"tool_use_failed": True,
|
|
552
551
|
"target": (base_url or self.base_url),
|
|
553
552
|
"message": error_block.get("message") if isinstance(error_block, dict) else None,
|
|
554
553
|
}
|
|
555
554
|
)
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
"object": "chat.completion",
|
|
560
|
-
"created": int(time.time()),
|
|
561
|
-
"model": processed_request.get("model"),
|
|
562
|
-
"choices": [
|
|
563
|
-
{
|
|
564
|
-
"index": 0,
|
|
565
|
-
"message": {
|
|
566
|
-
"role": "assistant",
|
|
567
|
-
"content": "",
|
|
568
|
-
"tool_calls": [
|
|
569
|
-
{
|
|
570
|
-
"id": f"call_fallback_{int(time.time() * 1000)}",
|
|
571
|
-
"type": "function",
|
|
572
|
-
"function": {
|
|
573
|
-
"name": "interact_many",
|
|
574
|
-
"arguments": json.dumps(
|
|
575
|
-
{"actions": fallback_actions}
|
|
576
|
-
),
|
|
577
|
-
},
|
|
578
|
-
}
|
|
579
|
-
],
|
|
580
|
-
},
|
|
581
|
-
"finish_reason": "tool_calls",
|
|
582
|
-
}
|
|
583
|
-
],
|
|
584
|
-
}
|
|
585
|
-
if isinstance(response_data.get("usage"), dict):
|
|
586
|
-
fallback_response["usage"] = response_data["usage"]
|
|
587
|
-
if isinstance(error_block, dict):
|
|
588
|
-
fallback_response["error"] = error_block
|
|
589
|
-
return fallback_response
|
|
555
|
+
raise RuntimeError(
|
|
556
|
+
f"Inference 400 response (tool call failed): {error_block.get('message') if isinstance(error_block, dict) else 'Tool call failed'}"
|
|
557
|
+
) from e
|
|
590
558
|
# This is a different type of 400 error, don't retry
|
|
591
559
|
try:
|
|
592
560
|
redacted_headers = {}
|
|
@@ -462,6 +462,8 @@ async def step_policy(
|
|
|
462
462
|
)
|
|
463
463
|
|
|
464
464
|
# Emit full system/user prompts for observability (no secrets included)
|
|
465
|
+
system_prompt_records: list[dict[str, Any]] = []
|
|
466
|
+
user_prompt_records: list[dict[str, Any]] = []
|
|
465
467
|
try:
|
|
466
468
|
|
|
467
469
|
def _as_text(content: object) -> str:
|
|
@@ -481,8 +483,6 @@ async def step_policy(
|
|
|
481
483
|
return "".join(parts)
|
|
482
484
|
return str(content)
|
|
483
485
|
|
|
484
|
-
system_prompt_records: list[dict[str, Any]] = []
|
|
485
|
-
user_prompt_records: list[dict[str, Any]] = []
|
|
486
486
|
for message in msgs:
|
|
487
487
|
role = message.get("role")
|
|
488
488
|
raw_content = message.get("content")
|
|
@@ -525,6 +525,11 @@ async def step_policy(
|
|
|
525
525
|
|
|
526
526
|
if tracing_context is not None:
|
|
527
527
|
try:
|
|
528
|
+
logger.info(
|
|
529
|
+
"[TRACE_DEBUG] record_policy_prompts sys=%s user=%s",
|
|
530
|
+
len(system_prompt_records),
|
|
531
|
+
len(user_prompt_records),
|
|
532
|
+
)
|
|
528
533
|
await tracing_context.record_policy_prompts(
|
|
529
534
|
system_prompt_records, user_prompt_records
|
|
530
535
|
)
|
|
@@ -780,9 +785,10 @@ async def step_policy(
|
|
|
780
785
|
"sokoban-react",
|
|
781
786
|
"crafter-react",
|
|
782
787
|
) and getattr(policy, "use_tools", True):
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
788
|
+
inf_req = meta.get("inference_request", {})
|
|
789
|
+
req_tools = inf_req.get("tools")
|
|
790
|
+
req_tool_choice = inf_req.get("tool_choice")
|
|
791
|
+
req_stop_after = inf_req.get("stop_after_tool_calls")
|
|
786
792
|
logger.info(
|
|
787
793
|
f"TOOLCALL_CONFIG: policy={policy_name} tools_present={bool(req_tools)} tool_choice={req_tool_choice} stop_after={req_stop_after}"
|
|
788
794
|
)
|
|
@@ -791,6 +797,8 @@ async def step_policy(
|
|
|
791
797
|
status_code=500,
|
|
792
798
|
detail=f"TOOLCALL_ASSERTION_FAIL: Missing tools or tool_choice!=required for policy {policy_name}",
|
|
793
799
|
)
|
|
800
|
+
if req_stop_after is None:
|
|
801
|
+
inf_req["stop_after_tool_calls"] = 1
|
|
794
802
|
|
|
795
803
|
# Call inference service with retries for Flash cold-start (503)
|
|
796
804
|
import time as _t
|
|
@@ -491,6 +491,11 @@ class RolloutTracingContext:
|
|
|
491
491
|
getattr(request.record, "trace_format", "compact") or "compact"
|
|
492
492
|
).lower()
|
|
493
493
|
self.return_trace = bool(getattr(request.record, "return_trace", False))
|
|
494
|
+
logger.warning(
|
|
495
|
+
"[TRACE_DEBUG] RolloutTracingContext init: trace_format=%s return_trace=%s",
|
|
496
|
+
self.trace_format,
|
|
497
|
+
self.return_trace,
|
|
498
|
+
)
|
|
494
499
|
self.sft_output_dir = getattr(fastapi_request.app.state, "sft_output_dir", None)
|
|
495
500
|
self.session_trace = None
|
|
496
501
|
self.metadata_updates: dict[str, Any] = {}
|
|
@@ -590,7 +595,7 @@ class RolloutTracingContext:
|
|
|
590
595
|
# Debug: Check message count
|
|
591
596
|
if self.tracer and self.tracer._current_trace:
|
|
592
597
|
msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
|
|
593
|
-
logger.
|
|
598
|
+
logger.warning("[TRACE_DEBUG] After record_policy_prompts: %s messages", msg_count)
|
|
594
599
|
|
|
595
600
|
def _content_to_text(self, content: Any) -> str:
|
|
596
601
|
if isinstance(content, str):
|
|
@@ -669,6 +674,11 @@ class RolloutTracingContext:
|
|
|
669
674
|
message_type="assistant", # Map to standard assistant message type
|
|
670
675
|
metadata={**self._message_metadata(), "is_tool_call": True},
|
|
671
676
|
)
|
|
677
|
+
if self.tracer._current_trace:
|
|
678
|
+
logger.warning(
|
|
679
|
+
"[TRACE_DEBUG] After tool invocation: messages=%s",
|
|
680
|
+
len(self.tracer._current_trace.markov_blanket_message_history),
|
|
681
|
+
)
|
|
672
682
|
except Exception as exc:
|
|
673
683
|
logger.debug("TRACING_TOOL_MSG_FAIL: %s", exc)
|
|
674
684
|
|
|
@@ -991,6 +1001,10 @@ class RolloutTracingContext:
|
|
|
991
1001
|
if self.trace_format in ("full", "structured"):
|
|
992
1002
|
payload = session_trace.to_dict()
|
|
993
1003
|
payload.setdefault("metadata", {}).update(self.metadata_updates)
|
|
1004
|
+
logger.warning(
|
|
1005
|
+
"[TRACE_DEBUG] build_trace_payload returning structured trace with messages=%s",
|
|
1006
|
+
len(payload.get("markov_blanket_message_history") or []),
|
|
1007
|
+
)
|
|
994
1008
|
return payload
|
|
995
1009
|
|
|
996
1010
|
# For "compact" format, return only summary stats
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
This mirrors the structure of the Crafter task app wrapper while delegating
|
|
4
4
|
all configuration to the colocated `grpo_enron.py` module. Normal usage should
|
|
5
|
-
prefer invoking `uvx synth-ai
|
|
5
|
+
prefer invoking `uvx synth-ai deploy --runtime uvicorn grpo-enron`, but this module remains for
|
|
6
6
|
direct execution or importing the FastAPI app object.
|
|
7
7
|
"""
|
|
8
8
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
This directory hosts the legacy entrypoint for the math single-step task app. Prefer starting the app via:
|
|
4
4
|
|
|
5
5
|
```bash
|
|
6
|
-
uvx synth-ai
|
|
6
|
+
uvx synth-ai deploy --runtime uvicorn math-single-step --env-file examples/rl/.env --port 8101
|
|
7
7
|
```
|
|
8
8
|
|
|
9
9
|
If you need to run it directly (e.g., for Modal `modal deploy` compatibility), use:
|
|
@@ -19,4 +19,3 @@ Environment variables:
|
|
|
19
19
|
- `MATH_DATASET_DEFAULT_SPLIT`, `MATH_DATASET_VALIDATION_SPLIT`, `MATH_DATASET_TEST_SPLIT`
|
|
20
20
|
|
|
21
21
|
The task app enforces a single `math_submit` tool call per episode, enabling RL to reward correct final answers and penalise missing or malformed submissions.
|
|
22
|
-
|
|
@@ -17,7 +17,7 @@ A reinforcement learning environment for Pokémon Red using PyBoy emulation with
|
|
|
17
17
|
|
|
18
18
|
```bash
|
|
19
19
|
# From synth-ai root
|
|
20
|
-
uv run -m synth_ai task-app
|
|
20
|
+
uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8913
|
|
21
21
|
```
|
|
22
22
|
|
|
23
23
|
### 2. Run a Random Rollout
|
|
@@ -232,7 +232,7 @@ uv add pyboy
|
|
|
232
232
|
lsof -ti :8913 | xargs -r kill -9
|
|
233
233
|
|
|
234
234
|
# Or use a different port
|
|
235
|
-
uv run -m synth_ai task-app
|
|
235
|
+
uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8914
|
|
236
236
|
```
|
|
237
237
|
|
|
238
238
|
## Examples
|
|
@@ -249,7 +249,7 @@ cd /Users/joshpurtell/Documents/GitHub/synth-ai
|
|
|
249
249
|
echo "OPENAI_API_KEY=sk-..." >> .env
|
|
250
250
|
|
|
251
251
|
# 2. Start the task app server (in background)
|
|
252
|
-
nohup sh -c 'printf "n\n" | uv run -m synth_ai task-app
|
|
252
|
+
nohup sh -c 'printf "n\n" | uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8913 --no-reload' > nohup_pokemon.log 2>&1 &
|
|
253
253
|
|
|
254
254
|
# Wait for startup
|
|
255
255
|
sleep 8
|
|
@@ -354,4 +354,3 @@ TOTAL REWARD: 705 points
|
|
|
354
354
|
- **PyBoy**: Game Boy emulator - https://github.com/Baekalfen/PyBoy
|
|
355
355
|
- **Pokémon Red Disassembly**: RAM map reference - https://github.com/pret/pokered
|
|
356
356
|
- **Datacrystal.org**: Memory address documentation
|
|
357
|
-
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
# Evaluation config for Pokemon Red with image-only input
|
|
1
|
+
# Evaluation config for Pokemon Red with image-only input and NEW REWARD SYSTEM
|
|
2
2
|
# This config uses GPT-4o mini with only image data (no text observations)
|
|
3
|
+
# Uses the comprehensive reward system with deterministic progress milestones
|
|
3
4
|
|
|
4
5
|
[eval]
|
|
5
6
|
app_id = "pokemon_red"
|
|
6
7
|
model = "gpt-4o-mini-2024-07-18"
|
|
7
|
-
seeds = [0, 1, 2, 3, 4
|
|
8
|
-
max_turns =
|
|
8
|
+
seeds = [0, 1, 2, 3, 4] # Test with fewer seeds for quick results
|
|
9
|
+
max_turns = 20 # Allow more turns to see progress
|
|
9
10
|
concurrency = 1 # Keep low initially to avoid issues
|
|
10
11
|
env_name = "pokemon_red"
|
|
11
12
|
policy_name = "pokemon_red_policy"
|
|
@@ -13,7 +14,7 @@ trace_format = "full"
|
|
|
13
14
|
return_trace = true
|
|
14
15
|
|
|
15
16
|
[eval.env_config]
|
|
16
|
-
max_steps_per_episode =
|
|
17
|
+
max_steps_per_episode = 20
|
|
17
18
|
|
|
18
19
|
[eval.policy_config]
|
|
19
20
|
provider = "openai"
|
|
@@ -24,6 +25,6 @@ top_p = 0.95
|
|
|
24
25
|
max_tokens = 512
|
|
25
26
|
use_vision = true
|
|
26
27
|
image_only_mode = true
|
|
27
|
-
max_llm_calls =
|
|
28
|
+
max_llm_calls = 20
|
|
28
29
|
|
|
29
30
|
|