synth-ai 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/baseline/banking77_baseline.py +204 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +12 -10
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +1 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +1 -1
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +60 -10
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +1 -1
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -0
- examples/rl/configs/rl_from_base_qwen17.toml +1 -0
- examples/swe/task_app/hosted/inference/openai_client.py +0 -34
- examples/swe/task_app/hosted/policy_routes.py +17 -0
- examples/swe/task_app/hosted/rollout.py +4 -2
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +841 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +24 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +355 -58
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +68 -7
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +78 -21
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
- examples/task_apps/pokemon_red/task_app.py +254 -36
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +1 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +53 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +152 -41
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +31 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +1 -0
- synth_ai/api/train/builders.py +90 -1
- synth_ai/api/train/cli.py +396 -21
- synth_ai/api/train/config_finder.py +13 -2
- synth_ai/api/train/configs/__init__.py +15 -1
- synth_ai/api/train/configs/prompt_learning.py +442 -0
- synth_ai/api/train/configs/rl.py +29 -0
- synth_ai/api/train/task_app.py +1 -1
- synth_ai/api/train/validators.py +277 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cli/__init__.py +85 -17
- synth_ai/cli/__main__.py +0 -0
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +84 -0
- synth_ai/cli/commands/__init__.py +1 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/eval/core.py +13 -10
- synth_ai/cli/commands/filter/core.py +53 -17
- synth_ai/cli/commands/help/core.py +0 -1
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1436 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/train/judge_schemas.py +1 -0
- synth_ai/cli/commands/train/judge_validation.py +1 -0
- synth_ai/cli/commands/train/validation.py +0 -57
- synth_ai/cli/demo.py +35 -3
- synth_ai/cli/deploy/__init__.py +40 -25
- synth_ai/cli/deploy.py +162 -0
- synth_ai/cli/legacy_root_backup.py +14 -8
- synth_ai/cli/opencode.py +107 -0
- synth_ai/cli/root.py +9 -5
- synth_ai/cli/task_app_deploy.py +1 -1
- synth_ai/cli/task_apps.py +53 -53
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/judge_schemas.py +1 -0
- synth_ai/learning/__init__.py +10 -0
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +184 -0
- synth_ai/pricing/__init__.py +2 -0
- synth_ai/pricing/model_pricing.py +57 -0
- synth_ai/streaming/handlers.py +53 -4
- synth_ai/streaming/streamer.py +19 -0
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +2 -0
- synth_ai/task/tracing_utils.py +25 -25
- synth_ai/task/validators.py +44 -8
- synth_ai/task_app_cfgs.py +21 -0
- synth_ai/tracing_v3/config.py +162 -19
- synth_ai/tracing_v3/constants.py +1 -1
- synth_ai/tracing_v3/db_config.py +24 -38
- synth_ai/tracing_v3/storage/config.py +47 -13
- synth_ai/tracing_v3/storage/factory.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +113 -11
- synth_ai/tracing_v3/turso/native_manager.py +92 -16
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +30 -1
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/cli.py +149 -5
- synth_ai/utils/env.py +17 -17
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/modal.py +283 -1
- synth_ai/utils/paths.py +48 -0
- synth_ai/utils/uvicorn.py +113 -0
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/METADATA +102 -4
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/RECORD +162 -88
- synth_ai/cli/commands/deploy/__init__.py +0 -23
- synth_ai/cli/commands/deploy/core.py +0 -614
- synth_ai/cli/commands/deploy/errors.py +0 -72
- synth_ai/cli/commands/deploy/validation.py +0 -11
- synth_ai/cli/deploy/core.py +0 -5
- synth_ai/cli/deploy/errors.py +0 -23
- synth_ai/cli/deploy/validation.py +0 -5
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
synth_ai/cli/opencode.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
from synth_ai.types import MODEL_NAMES, ModelName
|
|
6
|
+
from synth_ai.urls import BACKEND_URL_SYNTH_RESEARCH_BASE
|
|
7
|
+
from synth_ai.utils import (
|
|
8
|
+
create_and_write_json,
|
|
9
|
+
find_bin_path,
|
|
10
|
+
install_bin,
|
|
11
|
+
load_json_to_dict,
|
|
12
|
+
resolve_env_var,
|
|
13
|
+
verify_bin,
|
|
14
|
+
write_agents_md,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
CONFIG_PATH = Path.home() / ".config" / "opencode" / "opencode.json"
|
|
18
|
+
AUTH_PATH = Path.home() / ".local" / "share" / "opencode" / "auth.json"
|
|
19
|
+
SYNTH_PROVIDER_ID = "synth"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@click.command("opencode")
|
|
23
|
+
@click.option(
|
|
24
|
+
"--model",
|
|
25
|
+
"model_name",
|
|
26
|
+
type=str,
|
|
27
|
+
default=None
|
|
28
|
+
)
|
|
29
|
+
@click.option(
|
|
30
|
+
"--force",
|
|
31
|
+
is_flag=True,
|
|
32
|
+
help="Prompt for API keys even if cached values exist."
|
|
33
|
+
)
|
|
34
|
+
@click.option(
|
|
35
|
+
"--url",
|
|
36
|
+
"override_url",
|
|
37
|
+
type=str,
|
|
38
|
+
default=None,
|
|
39
|
+
)
|
|
40
|
+
def opencode_cmd(
|
|
41
|
+
model_name: ModelName | None = None,
|
|
42
|
+
force: bool = False,
|
|
43
|
+
override_url: str | None = None
|
|
44
|
+
) -> None:
|
|
45
|
+
|
|
46
|
+
while True:
|
|
47
|
+
bin_path = find_bin_path("opencode")
|
|
48
|
+
if bin_path:
|
|
49
|
+
break
|
|
50
|
+
if not install_bin(
|
|
51
|
+
"OpenCode",
|
|
52
|
+
[
|
|
53
|
+
"brew install opencode",
|
|
54
|
+
"bun add -g opencode-ai",
|
|
55
|
+
"curl -fsSL https://opencode.ai/install | bash",
|
|
56
|
+
"npm i -g opencode-ai",
|
|
57
|
+
"paru -S opencode"
|
|
58
|
+
]
|
|
59
|
+
):
|
|
60
|
+
print("Failed to find your installed OpenCode")
|
|
61
|
+
print("Please install from: https://opencode.ai")
|
|
62
|
+
return
|
|
63
|
+
print(f"Using OpenCode at {bin_path}")
|
|
64
|
+
|
|
65
|
+
if not verify_bin(bin_path):
|
|
66
|
+
print("Failed to verify OpenCode is runnable")
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
write_agents_md()
|
|
70
|
+
|
|
71
|
+
if model_name is not None:
|
|
72
|
+
if model_name not in MODEL_NAMES:
|
|
73
|
+
raise ValueError(
|
|
74
|
+
f"model_name={model_name} is invalid. Valid values for model_name: {MODEL_NAMES}"
|
|
75
|
+
)
|
|
76
|
+
synth_api_key = resolve_env_var("SYNTH_API_KEY", override_process_env=force)
|
|
77
|
+
data = load_json_to_dict(AUTH_PATH)
|
|
78
|
+
good_entry = {
|
|
79
|
+
"type": "api",
|
|
80
|
+
"key": synth_api_key,
|
|
81
|
+
}
|
|
82
|
+
if data.get(SYNTH_PROVIDER_ID) != good_entry:
|
|
83
|
+
data[SYNTH_PROVIDER_ID] = good_entry
|
|
84
|
+
create_and_write_json(AUTH_PATH, data)
|
|
85
|
+
config = load_json_to_dict(CONFIG_PATH)
|
|
86
|
+
config.setdefault("$schema", "https://opencode.ai/config.json")
|
|
87
|
+
if override_url:
|
|
88
|
+
url = override_url
|
|
89
|
+
print("Using override URL:", url)
|
|
90
|
+
else:
|
|
91
|
+
url = BACKEND_URL_SYNTH_RESEARCH_BASE
|
|
92
|
+
provider_section = config.setdefault("provider", {})
|
|
93
|
+
synth_provider = provider_section.setdefault(SYNTH_PROVIDER_ID, {})
|
|
94
|
+
synth_provider["npm"] = "@ai-sdk/openai-compatible"
|
|
95
|
+
synth_provider.setdefault("name", "Synth")
|
|
96
|
+
models = synth_provider.setdefault("models", {})
|
|
97
|
+
models.setdefault(model_name, {})
|
|
98
|
+
options = synth_provider.setdefault("options", {})
|
|
99
|
+
options["baseURL"] = url
|
|
100
|
+
full_model_name = f"{SYNTH_PROVIDER_ID}/{model_name}"
|
|
101
|
+
config["model"] = full_model_name
|
|
102
|
+
create_and_write_json(CONFIG_PATH, config)
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
subprocess.run([str(bin_path)], check=True)
|
|
106
|
+
except subprocess.CalledProcessError:
|
|
107
|
+
print("Failed to launch OpenCode")
|
synth_ai/cli/root.py
CHANGED
|
@@ -158,7 +158,7 @@ def cli():
|
|
|
158
158
|
|
|
159
159
|
@cli.command()
|
|
160
160
|
@click.option("--db-file", default="traces/v3/synth_ai.db", help="Database file path")
|
|
161
|
-
@click.option("--sqld-port", default=8080, type=int, help="Port for sqld HTTP
|
|
161
|
+
@click.option("--sqld-port", default=8080, type=int, help="Port for sqld Hrana WebSocket interface (HTTP API will be port+1)")
|
|
162
162
|
@click.option("--env-port", default=8901, type=int, help="Port for environment service")
|
|
163
163
|
@click.option("--no-sqld", is_flag=True, help="Skip starting sqld daemon")
|
|
164
164
|
@click.option("--no-env", is_flag=True, help="Skip starting environment service")
|
|
@@ -204,21 +204,25 @@ def serve_deprecated(
|
|
|
204
204
|
|
|
205
205
|
if not no_sqld:
|
|
206
206
|
try:
|
|
207
|
+
hrana_port = sqld_port
|
|
208
|
+
http_port = sqld_port + 1
|
|
207
209
|
result = subprocess.run(
|
|
208
|
-
["pgrep", "-f", f"sqld
|
|
210
|
+
["pgrep", "-f", f"sqld.*(--hrana-listen-addr.*:{hrana_port}|--http-listen-addr.*:{http_port})"],
|
|
209
211
|
capture_output=True,
|
|
210
212
|
text=True,
|
|
211
213
|
)
|
|
212
214
|
if result.returncode != 0:
|
|
213
215
|
sqld_bin = find_sqld_binary() or install_sqld()
|
|
214
|
-
click.echo(f"🗄️ Starting sqld (local only) on port {
|
|
216
|
+
click.echo(f"🗄️ Starting sqld (local only) on hrana port {hrana_port}, HTTP API port {http_port}")
|
|
215
217
|
proc = subprocess.Popen(
|
|
216
218
|
[
|
|
217
219
|
sqld_bin,
|
|
218
220
|
"--db-path",
|
|
219
221
|
db_file,
|
|
222
|
+
"--hrana-listen-addr",
|
|
223
|
+
f"127.0.0.1:{hrana_port}",
|
|
220
224
|
"--http-listen-addr",
|
|
221
|
-
f"127.0.0.1:{
|
|
225
|
+
f"127.0.0.1:{http_port}",
|
|
222
226
|
],
|
|
223
227
|
stdout=open("sqld.log", "w"), # noqa: SIM115
|
|
224
228
|
stderr=subprocess.STDOUT,
|
|
@@ -274,7 +278,7 @@ def serve_deprecated(
|
|
|
274
278
|
click.echo(f" Working directory: {os.getcwd()}")
|
|
275
279
|
click.echo("")
|
|
276
280
|
click.echo("🔄 Starting services...")
|
|
277
|
-
click.echo(f" - sqld daemon: http://127.0.0.1:{sqld_port}")
|
|
281
|
+
click.echo(f" - sqld daemon: libsql://127.0.0.1:{sqld_port} (HTTP API: http://127.0.0.1:{sqld_port + 1})")
|
|
278
282
|
click.echo(f" - Environment service: http://127.0.0.1:{env_port}")
|
|
279
283
|
click.echo("")
|
|
280
284
|
click.echo("💡 Tips:")
|
synth_ai/cli/task_app_deploy.py
CHANGED
synth_ai/cli/task_apps.py
CHANGED
|
@@ -32,7 +32,6 @@ except Exception: # pragma: no cover - fallback
|
|
|
32
32
|
|
|
33
33
|
import click
|
|
34
34
|
from click.exceptions import Abort
|
|
35
|
-
from synth_ai.cli.commands import deploy as _deploy_commands
|
|
36
35
|
from synth_ai.cli.commands.eval import core as eval_core
|
|
37
36
|
from synth_ai.cli.commands.filter import core as filter_core
|
|
38
37
|
|
|
@@ -269,20 +268,25 @@ def _markov_message_from_dict(payload: dict[str, Any]) -> SessionEventMarkovBlan
|
|
|
269
268
|
json_payload=content_payload.get("json_payload"),
|
|
270
269
|
)
|
|
271
270
|
raw_type = (payload.get("message_type") or "").lower()
|
|
272
|
-
|
|
271
|
+
original_type = payload.get("message_type") or raw_type
|
|
272
|
+
|
|
273
|
+
if raw_type in ("observation", "policy_system_prompt"):
|
|
273
274
|
normalized_type = "system"
|
|
274
|
-
elif raw_type
|
|
275
|
+
elif raw_type in ("action", "policy_tool_call"):
|
|
275
276
|
normalized_type = "assistant"
|
|
276
277
|
elif raw_type in {"user", "assistant", "system", "tool_use", "tool_result"}:
|
|
277
278
|
normalized_type = raw_type
|
|
278
279
|
else:
|
|
279
280
|
normalized_type = "system"
|
|
280
281
|
|
|
282
|
+
metadata = dict(payload.get("metadata") or {})
|
|
283
|
+
metadata["original_message_type"] = original_type
|
|
284
|
+
|
|
281
285
|
return SessionEventMarkovBlanketMessage(
|
|
282
286
|
content=content,
|
|
283
287
|
message_type=normalized_type,
|
|
284
288
|
time_record=_time_record_from_dict(payload.get("time_record")),
|
|
285
|
-
metadata=
|
|
289
|
+
metadata=metadata,
|
|
286
290
|
)
|
|
287
291
|
|
|
288
292
|
|
|
@@ -354,12 +358,8 @@ async def _store_trace(
|
|
|
354
358
|
_logger.info(f"[STORE_TRACE_DEBUG] Called with tracer={tracer is not None}, trace_namespace={trace_namespace is not None}")
|
|
355
359
|
|
|
356
360
|
if tracer is None or not isinstance(trace_namespace, dict):
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
f"Got tracer_present={tracer is not None}, payload_type={type(trace_namespace)}"
|
|
360
|
-
)
|
|
361
|
-
_logger.error("[STORE_TRACE_DEBUG] %s", message)
|
|
362
|
-
raise ValueError(message)
|
|
361
|
+
_logger.warning(f"[STORE_TRACE_DEBUG] Early return: tracer={tracer is not None}, trace_namespace type={type(trace_namespace)}")
|
|
362
|
+
return
|
|
363
363
|
|
|
364
364
|
_logger.info(f"[STORE_TRACE_DEBUG] trace_namespace keys: {list(trace_namespace.keys())}")
|
|
365
365
|
|
|
@@ -373,13 +373,8 @@ async def _store_trace(
|
|
|
373
373
|
session_payload = trace_namespace
|
|
374
374
|
_logger.info("[STORE_TRACE_DEBUG] Using trace_namespace directly as session_payload (no session_trace key)")
|
|
375
375
|
else:
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
"session fields (session_id, markov_blanket_message_history). "
|
|
379
|
-
f"Payload keys: {list(trace_namespace.keys())}"
|
|
380
|
-
)
|
|
381
|
-
_logger.error("[STORE_TRACE_DEBUG] %s", message)
|
|
382
|
-
raise ValueError(message)
|
|
376
|
+
_logger.warning(f"[STORE_TRACE_DEBUG] No session_trace found or wrong type: {type(session_payload)}")
|
|
377
|
+
return
|
|
383
378
|
|
|
384
379
|
_logger.info(f"[STORE_TRACE_DEBUG] session_payload keys: {list(session_payload.keys())}")
|
|
385
380
|
msg_count = len(session_payload.get("markov_blanket_message_history", []))
|
|
@@ -387,26 +382,8 @@ async def _store_trace(
|
|
|
387
382
|
|
|
388
383
|
trace_obj = _session_trace_from_dict(session_payload)
|
|
389
384
|
if trace_obj is None:
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
raise ValueError(message)
|
|
393
|
-
|
|
394
|
-
if not trace_obj.markov_blanket_message_history:
|
|
395
|
-
message = (
|
|
396
|
-
"Session trace is missing markov_blanket_message_history; "
|
|
397
|
-
"eval output must include all prompts/tool calls. "
|
|
398
|
-
f"session_id={trace_obj.session_id}"
|
|
399
|
-
)
|
|
400
|
-
_logger.error("[STORE_TRACE_DEBUG] %s", message)
|
|
401
|
-
raise ValueError(message)
|
|
402
|
-
|
|
403
|
-
if not trace_obj.event_history:
|
|
404
|
-
message = (
|
|
405
|
-
"Session trace is missing event_history; rollout should emit environment/LLM events. "
|
|
406
|
-
f"session_id={trace_obj.session_id}"
|
|
407
|
-
)
|
|
408
|
-
_logger.error("[STORE_TRACE_DEBUG] %s", message)
|
|
409
|
-
raise ValueError(message)
|
|
385
|
+
_logger.warning("[STORE_TRACE_DEBUG] _session_trace_from_dict returned None")
|
|
386
|
+
return
|
|
410
387
|
|
|
411
388
|
_logger.info(f"[STORE_TRACE_DEBUG] Created SessionTrace object with {len(trace_obj.markov_blanket_message_history)} messages")
|
|
412
389
|
|
|
@@ -2454,16 +2431,7 @@ def serve_command(
|
|
|
2454
2431
|
trace_dir: str | None,
|
|
2455
2432
|
trace_db: str | None,
|
|
2456
2433
|
) -> None:
|
|
2457
|
-
|
|
2458
|
-
app_id,
|
|
2459
|
-
host,
|
|
2460
|
-
port,
|
|
2461
|
-
env_file,
|
|
2462
|
-
reload_flag,
|
|
2463
|
-
force,
|
|
2464
|
-
trace_dir,
|
|
2465
|
-
trace_db,
|
|
2466
|
-
)
|
|
2434
|
+
return None
|
|
2467
2435
|
|
|
2468
2436
|
|
|
2469
2437
|
@task_app_group.command("info")
|
|
@@ -2575,20 +2543,52 @@ def serve_task_group(
|
|
|
2575
2543
|
trace_dir: str | None,
|
|
2576
2544
|
trace_db: str | None,
|
|
2577
2545
|
) -> None:
|
|
2578
|
-
|
|
2579
|
-
|
|
2546
|
+
"""Serve a TaskAppConfig-based task app using uvicorn."""
|
|
2547
|
+
import contextlib
|
|
2548
|
+
|
|
2549
|
+
if not host:
|
|
2550
|
+
host = "0.0.0.0"
|
|
2551
|
+
|
|
2552
|
+
if port is None:
|
|
2553
|
+
port = 8001
|
|
2554
|
+
|
|
2555
|
+
# Auto-enable tracing by default
|
|
2556
|
+
try:
|
|
2557
|
+
auto_trace = os.getenv("SYNTH_AUTO_TRACE", "1")
|
|
2558
|
+
auto_trace_enabled = auto_trace not in {"0", "false", "False", ""}
|
|
2559
|
+
except Exception:
|
|
2560
|
+
auto_trace_enabled = True
|
|
2561
|
+
|
|
2562
|
+
if auto_trace_enabled:
|
|
2563
|
+
demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
|
|
2564
|
+
if trace_dir is None:
|
|
2565
|
+
default_trace_dir = (demo_base / "traces" / "v3").resolve()
|
|
2566
|
+
with contextlib.suppress(Exception):
|
|
2567
|
+
default_trace_dir.mkdir(parents=True, exist_ok=True)
|
|
2568
|
+
trace_dir = str(default_trace_dir)
|
|
2569
|
+
click.echo(f"[trace] Using trace directory: {trace_dir}")
|
|
2570
|
+
if trace_dir and trace_db is None:
|
|
2571
|
+
default_trace_db = (Path(trace_dir) / "synth_ai.db").resolve()
|
|
2572
|
+
with contextlib.suppress(Exception):
|
|
2573
|
+
default_trace_db.parent.mkdir(parents=True, exist_ok=True)
|
|
2574
|
+
trace_db = str(default_trace_db)
|
|
2575
|
+
click.echo(f"[trace] Using trace DB: {trace_db}")
|
|
2576
|
+
|
|
2577
|
+
# Select and serve the app
|
|
2578
|
+
choice = _select_app_choice(app_id, purpose="serve")
|
|
2579
|
+
entry = choice.ensure_entry()
|
|
2580
|
+
_serve_entry(
|
|
2581
|
+
entry,
|
|
2580
2582
|
host,
|
|
2581
2583
|
port,
|
|
2582
2584
|
env_file,
|
|
2583
2585
|
reload_flag,
|
|
2584
2586
|
force,
|
|
2585
|
-
trace_dir,
|
|
2586
|
-
trace_db,
|
|
2587
|
+
trace_dir=trace_dir,
|
|
2588
|
+
trace_db=trace_db,
|
|
2587
2589
|
)
|
|
2588
2590
|
|
|
2589
2591
|
|
|
2590
|
-
_deploy_commands.register_task_app_commands(task_app_group)
|
|
2591
|
-
|
|
2592
2592
|
|
|
2593
2593
|
def _determine_env_files(
|
|
2594
2594
|
entry: TaskAppEntryType, user_env_files: Sequence[str], *, original_path: Path | None = None
|
|
@@ -6,16 +6,18 @@ the hash-based set-iteration nondeterminism that caused the drift.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import collections
|
|
9
|
+
import os
|
|
9
10
|
|
|
10
11
|
import crafter
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
# Patch messages permanently disabled to reduce noise
|
|
14
|
+
# print("[PATCH] Attempting to apply Crafter deterministic patch...")
|
|
13
15
|
|
|
14
16
|
# -----------------------------------------------------------------------------
|
|
15
17
|
# 1. Make per–chunk object order stable
|
|
16
18
|
# -----------------------------------------------------------------------------
|
|
17
19
|
if not hasattr(crafter.Env, "_orig_balance_object"):
|
|
18
|
-
print("[PATCH] Patching crafter.Env._balance_object...")
|
|
20
|
+
# print("[PATCH] Patching crafter.Env._balance_object...")
|
|
19
21
|
crafter.Env._orig_balance_object = crafter.Env._balance_object
|
|
20
22
|
|
|
21
23
|
def _balance_object_det(self, chunk, objs, *args, **kwargs):
|
|
@@ -25,9 +27,10 @@ if not hasattr(crafter.Env, "_orig_balance_object"):
|
|
|
25
27
|
return crafter.Env._orig_balance_object(self, chunk, objs, *args, **kwargs)
|
|
26
28
|
|
|
27
29
|
crafter.Env._balance_object = _balance_object_det
|
|
28
|
-
print("[PATCH] crafter.Env._balance_object patched.")
|
|
30
|
+
# print("[PATCH] crafter.Env._balance_object patched.")
|
|
29
31
|
else:
|
|
30
|
-
|
|
32
|
+
pass
|
|
33
|
+
# print("[PATCH] crafter.Env._balance_object already patched or _orig_balance_object exists.")
|
|
31
34
|
|
|
32
35
|
# -----------------------------------------------------------------------------
|
|
33
36
|
# 2. Make *chunk* iteration order stable
|
|
@@ -4,6 +4,7 @@ This version handles player references for Zombie and Skeleton objects.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
import collections
|
|
7
|
+
import os
|
|
7
8
|
import pickle
|
|
8
9
|
from typing import Any, Dict, Optional, Set
|
|
9
10
|
|
|
@@ -11,11 +12,12 @@ import crafter
|
|
|
11
12
|
import numpy as np
|
|
12
13
|
from crafter import objects
|
|
13
14
|
|
|
14
|
-
|
|
15
|
+
# Patch messages permanently disabled
|
|
16
|
+
# print("[PATCH] Attempting to apply Crafter serialization patch v3...")
|
|
15
17
|
|
|
16
18
|
# Check if already patched
|
|
17
19
|
if not hasattr(crafter.Env, "save"):
|
|
18
|
-
print("[PATCH] Adding enhanced save/load methods to crafter.Env...")
|
|
20
|
+
# print("[PATCH] Adding enhanced save/load methods to crafter.Env...")
|
|
19
21
|
|
|
20
22
|
def _save(self) -> Dict[str, Any]:
|
|
21
23
|
"""Save complete environment state including all details."""
|
|
@@ -260,8 +262,10 @@ if not hasattr(crafter.Env, "save"):
|
|
|
260
262
|
crafter.Env.save = _save
|
|
261
263
|
crafter.Env.load = _load
|
|
262
264
|
|
|
263
|
-
|
|
265
|
+
pass
|
|
266
|
+
# print("[PATCH] crafter.Env.save() and load() methods added (v3).")
|
|
264
267
|
else:
|
|
265
|
-
|
|
268
|
+
pass
|
|
269
|
+
# print("[PATCH] crafter.Env already has save/load methods.")
|
|
266
270
|
|
|
267
|
-
print("[PATCH] Crafter serialization patch v3 complete.")
|
|
271
|
+
# print("[PATCH] Crafter serialization patch v3 complete.")
|
|
@@ -9,7 +9,8 @@ from typing import Any, Dict, Optional
|
|
|
9
9
|
|
|
10
10
|
import crafter
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
# Patch messages permanently disabled
|
|
13
|
+
# print("[PATCH] Attempting to apply simplified Crafter world configuration patch...")
|
|
13
14
|
|
|
14
15
|
# World configuration presets
|
|
15
16
|
WORLD_CONFIGS = {
|
|
@@ -279,8 +280,8 @@ def patched_env_init(
|
|
|
279
280
|
|
|
280
281
|
crafter.Env.__init__ = patched_env_init
|
|
281
282
|
|
|
282
|
-
print("[PATCH] Simplified Crafter world configuration patch complete.")
|
|
283
|
-
print("[PATCH] Available configs: easy, normal, hard, peaceful")
|
|
283
|
+
# print("[PATCH] Simplified Crafter world configuration patch complete.")
|
|
284
|
+
# print("[PATCH] Available configs: easy, normal, hard, peaceful")
|
|
284
285
|
|
|
285
286
|
# Example custom config
|
|
286
287
|
EXAMPLE_CUSTOM_CONFIG = {
|
synth_ai/judge_schemas.py
CHANGED
|
@@ -124,3 +124,4 @@ class JudgeScoreRequest(BaseModel):
|
|
|
124
124
|
trace: JudgeTracePayload = Field(..., description="Trajectory trace to evaluate")
|
|
125
125
|
options: JudgeOptions = Field(default_factory=lambda: JudgeOptions(), description="Judge options")
|
|
126
126
|
rubric: Optional[dict[str, Any]] = Field(None, description="Optional explicit rubric criteria")
|
|
127
|
+
|
synth_ai/learning/__init__.py
CHANGED
|
@@ -3,6 +3,12 @@ from synth_ai.task import task_app_health, validate_task_app_url
|
|
|
3
3
|
from .client import LearningClient
|
|
4
4
|
from .health import backend_health, balance_autumn_normalized, pricing_preflight
|
|
5
5
|
from .jobs import JobHandle, JobsApiResolver
|
|
6
|
+
from .prompt_learning_client import (
|
|
7
|
+
PromptLearningClient,
|
|
8
|
+
get_prompt_text,
|
|
9
|
+
get_prompts,
|
|
10
|
+
get_scoring_summary,
|
|
11
|
+
)
|
|
6
12
|
from .rl import (
|
|
7
13
|
MAX_ENVIRONMENT_API_KEY_BYTES,
|
|
8
14
|
RlClient,
|
|
@@ -32,6 +38,10 @@ __all__ = [
|
|
|
32
38
|
"FtClient",
|
|
33
39
|
"SFTJobConfig",
|
|
34
40
|
"prepare_sft_job_payload",
|
|
41
|
+
"PromptLearningClient",
|
|
42
|
+
"get_prompts",
|
|
43
|
+
"get_prompt_text",
|
|
44
|
+
"get_scoring_summary",
|
|
35
45
|
"RolloutEnvSpec",
|
|
36
46
|
"RolloutPolicySpec",
|
|
37
47
|
"RolloutRecordConfig",
|