synth-ai 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/baseline/banking77_baseline.py +204 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +12 -10
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +1 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +1 -1
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +60 -10
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +1 -1
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -0
- examples/rl/configs/rl_from_base_qwen17.toml +1 -0
- examples/swe/task_app/hosted/inference/openai_client.py +0 -34
- examples/swe/task_app/hosted/policy_routes.py +17 -0
- examples/swe/task_app/hosted/rollout.py +4 -2
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +841 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +24 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +355 -58
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +68 -7
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +78 -21
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
- examples/task_apps/pokemon_red/task_app.py +254 -36
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +1 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +53 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +152 -41
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +31 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +1 -0
- synth_ai/api/train/builders.py +90 -1
- synth_ai/api/train/cli.py +396 -21
- synth_ai/api/train/config_finder.py +13 -2
- synth_ai/api/train/configs/__init__.py +15 -1
- synth_ai/api/train/configs/prompt_learning.py +442 -0
- synth_ai/api/train/configs/rl.py +29 -0
- synth_ai/api/train/task_app.py +1 -1
- synth_ai/api/train/validators.py +277 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cli/__init__.py +85 -17
- synth_ai/cli/__main__.py +0 -0
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +84 -0
- synth_ai/cli/commands/__init__.py +1 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/eval/core.py +13 -10
- synth_ai/cli/commands/filter/core.py +53 -17
- synth_ai/cli/commands/help/core.py +0 -1
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1436 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/train/judge_schemas.py +1 -0
- synth_ai/cli/commands/train/judge_validation.py +1 -0
- synth_ai/cli/commands/train/validation.py +0 -57
- synth_ai/cli/demo.py +35 -3
- synth_ai/cli/deploy/__init__.py +40 -25
- synth_ai/cli/deploy.py +162 -0
- synth_ai/cli/legacy_root_backup.py +14 -8
- synth_ai/cli/opencode.py +107 -0
- synth_ai/cli/root.py +9 -5
- synth_ai/cli/task_app_deploy.py +1 -1
- synth_ai/cli/task_apps.py +53 -53
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/judge_schemas.py +1 -0
- synth_ai/learning/__init__.py +10 -0
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +184 -0
- synth_ai/pricing/__init__.py +2 -0
- synth_ai/pricing/model_pricing.py +57 -0
- synth_ai/streaming/handlers.py +53 -4
- synth_ai/streaming/streamer.py +19 -0
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +2 -0
- synth_ai/task/tracing_utils.py +25 -25
- synth_ai/task/validators.py +44 -8
- synth_ai/task_app_cfgs.py +21 -0
- synth_ai/tracing_v3/config.py +162 -19
- synth_ai/tracing_v3/constants.py +1 -1
- synth_ai/tracing_v3/db_config.py +24 -38
- synth_ai/tracing_v3/storage/config.py +47 -13
- synth_ai/tracing_v3/storage/factory.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +113 -11
- synth_ai/tracing_v3/turso/native_manager.py +92 -16
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +30 -1
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/cli.py +149 -5
- synth_ai/utils/env.py +17 -17
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/modal.py +283 -1
- synth_ai/utils/paths.py +48 -0
- synth_ai/utils/uvicorn.py +113 -0
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/METADATA +102 -4
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/RECORD +162 -88
- synth_ai/cli/commands/deploy/__init__.py +0 -23
- synth_ai/cli/commands/deploy/core.py +0 -614
- synth_ai/cli/commands/deploy/errors.py +0 -72
- synth_ai/cli/commands/deploy/validation.py +0 -11
- synth_ai/cli/deploy/core.py +0 -5
- synth_ai/cli/deploy/errors.py +0 -23
- synth_ai/cli/deploy/validation.py +0 -5
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""List command for baseline discovery."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
from synth_ai.baseline.config import BaselineConfig
|
|
10
|
+
from synth_ai.baseline.discovery import (
|
|
11
|
+
BaselineChoice,
|
|
12
|
+
discover_baseline_files,
|
|
13
|
+
load_baseline_config_from_file,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@click.command("list")
|
|
18
|
+
@click.option(
|
|
19
|
+
"--tag",
|
|
20
|
+
multiple=True,
|
|
21
|
+
help="Filter baselines by tag (can be specified multiple times)",
|
|
22
|
+
)
|
|
23
|
+
@click.option(
|
|
24
|
+
"--metadata",
|
|
25
|
+
type=str,
|
|
26
|
+
help="Filter by metadata key-value pair (format: key=value)",
|
|
27
|
+
)
|
|
28
|
+
@click.option(
|
|
29
|
+
"--verbose",
|
|
30
|
+
is_flag=True,
|
|
31
|
+
help="Show detailed information about each baseline",
|
|
32
|
+
)
|
|
33
|
+
def list_command(tag: tuple[str, ...], metadata: Optional[str], verbose: bool) -> None:
|
|
34
|
+
"""List all available baseline files."""
|
|
35
|
+
search_roots = [Path.cwd()]
|
|
36
|
+
choices = discover_baseline_files(search_roots)
|
|
37
|
+
|
|
38
|
+
if not choices:
|
|
39
|
+
click.echo("No baseline files found.", err=True)
|
|
40
|
+
click.echo("Create baseline files in examples/baseline/ or */*_baseline.py")
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
# Load configs for filtering
|
|
44
|
+
configs: list[tuple[BaselineChoice, BaselineConfig]] = []
|
|
45
|
+
for choice in choices:
|
|
46
|
+
try:
|
|
47
|
+
config = load_baseline_config_from_file(choice.baseline_id, choice.path)
|
|
48
|
+
configs.append((choice, config))
|
|
49
|
+
except Exception as e:
|
|
50
|
+
if verbose:
|
|
51
|
+
click.echo(f"Warning: Could not load {choice.baseline_id}: {e}", err=True)
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
# Apply filters
|
|
55
|
+
filtered_configs = configs
|
|
56
|
+
|
|
57
|
+
if tag:
|
|
58
|
+
tag_set = {t.lower() for t in tag}
|
|
59
|
+
filtered_configs = [
|
|
60
|
+
(c, config) for c, config in filtered_configs
|
|
61
|
+
if any(config.matches_tag(t) for t in tag_set)
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
if metadata:
|
|
65
|
+
if "=" not in metadata:
|
|
66
|
+
raise click.ClickException("--metadata must be in format key=value")
|
|
67
|
+
key, value = metadata.split("=", 1)
|
|
68
|
+
filtered_configs = [
|
|
69
|
+
(c, config) for c, config in filtered_configs
|
|
70
|
+
if config.matches_metadata(key.strip(), value.strip())
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
if not filtered_configs:
|
|
74
|
+
click.echo("No baselines match the specified filters.")
|
|
75
|
+
return
|
|
76
|
+
|
|
77
|
+
# Display results
|
|
78
|
+
click.echo(f"Found {len(filtered_configs)} baseline(s):\n")
|
|
79
|
+
|
|
80
|
+
for choice, config in filtered_configs:
|
|
81
|
+
click.echo(f" {config.baseline_id}")
|
|
82
|
+
click.echo(f" Name: {config.name}")
|
|
83
|
+
if config.description:
|
|
84
|
+
click.echo(f" Description: {config.description}")
|
|
85
|
+
if config.tags:
|
|
86
|
+
click.echo(f" Tags: {', '.join(config.tags)}")
|
|
87
|
+
click.echo(f" Splits: {', '.join(config.splits.keys())}")
|
|
88
|
+
if verbose:
|
|
89
|
+
click.echo(f" Path: {choice.path}")
|
|
90
|
+
if config.metadata:
|
|
91
|
+
click.echo(f" Metadata: {config.metadata}")
|
|
92
|
+
click.echo()
|
|
93
|
+
|
|
@@ -17,6 +17,7 @@ from typing import TYPE_CHECKING, Any, cast
|
|
|
17
17
|
|
|
18
18
|
import click
|
|
19
19
|
from synth_ai.task.config import EvalConfig
|
|
20
|
+
from synth_ai.tracing_v3.session_tracer import SessionTracer
|
|
20
21
|
from synth_ai.utils.task_app_discovery import discover_eval_config_paths
|
|
21
22
|
|
|
22
23
|
from .errors import (
|
|
@@ -199,8 +200,9 @@ def _eval_command_impl(
|
|
|
199
200
|
if cfg:
|
|
200
201
|
try:
|
|
201
202
|
normalized_cfg = validate_eval_options(cfg)
|
|
202
|
-
|
|
203
|
-
|
|
203
|
+
normalized_cfg_dict = dict(normalized_cfg)
|
|
204
|
+
eval_cfg = EvalConfig.from_dict(normalized_cfg_dict)
|
|
205
|
+
cfg = normalized_cfg_dict
|
|
204
206
|
click.echo(f"✓ Config validated: {len(eval_cfg.seeds)} seeds, model={eval_cfg.model}")
|
|
205
207
|
except (ValueError, TypeError) as validation_error:
|
|
206
208
|
raise InvalidEvalConfigError(detail=str(validation_error)) from validation_error
|
|
@@ -261,11 +263,9 @@ def _eval_command_impl(
|
|
|
261
263
|
trace_path = Path(trace_db).expanduser()
|
|
262
264
|
trace_path.parent.mkdir(parents=True, exist_ok=True)
|
|
263
265
|
trace_db_url = f"sqlite+aiosqlite:///{trace_path}"
|
|
264
|
-
trace_tracer =
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
else None
|
|
268
|
-
)
|
|
266
|
+
trace_tracer: SessionTracer | None = None
|
|
267
|
+
if trace_db_url and session_tracer_cls is not None:
|
|
268
|
+
trace_tracer = cast(SessionTracer, session_tracer_cls(db_url=trace_db_url, auto_save=True))
|
|
269
269
|
|
|
270
270
|
# Determine selection params (CLI takes precedence; TOML only fills unset model/seeds/env)
|
|
271
271
|
if cfg.get("model") and not model:
|
|
@@ -723,14 +723,17 @@ def _eval_command_impl(
|
|
|
723
723
|
"mode": "eval", # RolloutMode.EVAL: use inference URLs as-is, no transformations
|
|
724
724
|
}
|
|
725
725
|
if env_name:
|
|
726
|
-
body
|
|
727
|
-
|
|
726
|
+
env_section = body.get("env")
|
|
727
|
+
if isinstance(env_section, dict):
|
|
728
|
+
env_section["env_name"] = env_name
|
|
729
|
+
else:
|
|
730
|
+
body["env"] = {"env_name": env_name}
|
|
731
|
+
|
|
728
732
|
# Debug: print the body being sent
|
|
729
733
|
if seed_val == 0:
|
|
730
734
|
click.echo(f"[DEBUG] rollout body env: {body['env']}")
|
|
731
735
|
click.echo(f"[DEBUG] rollout body policy: {body['policy']}")
|
|
732
736
|
click.echo(f"[DEBUG] rollout body mode: {body.get('mode', 'NOT SET')}")
|
|
733
|
-
click.echo(f"[DEBUG] rollout record payload: {body.get('record')}")
|
|
734
737
|
rollout_elapsed: float | None = None
|
|
735
738
|
rollout_start = time.perf_counter()
|
|
736
739
|
try:
|
|
@@ -139,12 +139,23 @@ def _select_messages(message_rows: Sequence[dict[str, Any]]) -> list[dict[str, A
|
|
|
139
139
|
if msg_type not in {"user", "policy_user_prompt"}:
|
|
140
140
|
continue
|
|
141
141
|
|
|
142
|
+
# Look backwards for system prompt
|
|
143
|
+
system_msg = None
|
|
144
|
+
for prev in range(index - 1, -1, -1):
|
|
145
|
+
prev_type = message_rows[prev].get("message_type")
|
|
146
|
+
if prev_type == "policy_system_prompt":
|
|
147
|
+
system_msg = message_rows[prev]
|
|
148
|
+
break
|
|
149
|
+
|
|
142
150
|
assistant_msg = None
|
|
151
|
+
tool_call_msg = None
|
|
143
152
|
for follow in range(index + 1, len(message_rows)):
|
|
144
153
|
next_type = message_rows[follow].get("message_type")
|
|
145
|
-
if next_type
|
|
146
|
-
|
|
147
|
-
|
|
154
|
+
if next_type == "assistant":
|
|
155
|
+
assistant_msg = message_rows[follow]
|
|
156
|
+
break
|
|
157
|
+
elif next_type == "policy_tool_call":
|
|
158
|
+
tool_call_msg = message_rows[follow]
|
|
148
159
|
break
|
|
149
160
|
|
|
150
161
|
try:
|
|
@@ -157,8 +168,34 @@ def _select_messages(message_rows: Sequence[dict[str, Any]]) -> list[dict[str, A
|
|
|
157
168
|
if not user_text:
|
|
158
169
|
continue
|
|
159
170
|
|
|
171
|
+
messages = []
|
|
172
|
+
|
|
173
|
+
# Add system prompt if found
|
|
174
|
+
if system_msg is not None:
|
|
175
|
+
try:
|
|
176
|
+
system_content_raw = system_msg.get("content")
|
|
177
|
+
system_content = json.loads(system_content_raw) if isinstance(system_content_raw, str) else system_content_raw
|
|
178
|
+
system_content = _extract_content(system_content)
|
|
179
|
+
system_text = _extract_text(system_content)
|
|
180
|
+
if system_text:
|
|
181
|
+
messages.append({"role": "system", "content": system_text})
|
|
182
|
+
except Exception:
|
|
183
|
+
pass
|
|
184
|
+
|
|
185
|
+
# Add user message
|
|
186
|
+
user_payload = user_content if isinstance(user_content, list) else user_text
|
|
187
|
+
messages.append({"role": "user", "content": user_payload})
|
|
188
|
+
|
|
189
|
+
# Add assistant/tool call response
|
|
160
190
|
assistant_content = None
|
|
161
|
-
if
|
|
191
|
+
if tool_call_msg is not None:
|
|
192
|
+
raw = tool_call_msg.get("content")
|
|
193
|
+
try:
|
|
194
|
+
assistant_content = json.loads(raw) if isinstance(raw, str) else raw
|
|
195
|
+
except Exception:
|
|
196
|
+
assistant_content = raw
|
|
197
|
+
assistant_content = _extract_content(assistant_content)
|
|
198
|
+
elif assistant_msg is not None:
|
|
162
199
|
raw = assistant_msg.get("content")
|
|
163
200
|
try:
|
|
164
201
|
assistant_content = json.loads(raw) if isinstance(raw, str) else raw
|
|
@@ -166,22 +203,14 @@ def _select_messages(message_rows: Sequence[dict[str, Any]]) -> list[dict[str, A
|
|
|
166
203
|
assistant_content = raw
|
|
167
204
|
assistant_content = _extract_content(assistant_content)
|
|
168
205
|
|
|
169
|
-
assistant_text = _extract_text(assistant_content) if assistant_content is not None else ""
|
|
170
|
-
user_payload = user_content if isinstance(user_content, list) else user_text
|
|
171
206
|
assistant_payload = (
|
|
172
207
|
assistant_content
|
|
173
208
|
if isinstance(assistant_content, list)
|
|
174
|
-
else (
|
|
209
|
+
else (_extract_text(assistant_content) if assistant_content is not None else "[no response recorded]")
|
|
175
210
|
)
|
|
211
|
+
messages.append({"role": "assistant", "content": assistant_payload})
|
|
176
212
|
|
|
177
|
-
records.append(
|
|
178
|
-
{
|
|
179
|
-
"messages": [
|
|
180
|
-
{"role": "user", "content": user_payload},
|
|
181
|
-
{"role": "assistant", "content": assistant_payload},
|
|
182
|
-
]
|
|
183
|
-
}
|
|
184
|
-
)
|
|
213
|
+
records.append({"messages": messages})
|
|
185
214
|
return records
|
|
186
215
|
|
|
187
216
|
|
|
@@ -219,7 +248,9 @@ def filter_command(config_path: str) -> None:
|
|
|
219
248
|
async def _run() -> None:
|
|
220
249
|
tracer = SessionTracer(db_url=db_url, auto_save=False)
|
|
221
250
|
await tracer.initialize()
|
|
222
|
-
|
|
251
|
+
|
|
252
|
+
if tracer.db is None:
|
|
253
|
+
raise FilterCliError("Database not initialized")
|
|
223
254
|
|
|
224
255
|
df = await tracer.db.query_traces(
|
|
225
256
|
"SELECT session_id, created_at, metadata FROM session_traces ORDER BY created_at"
|
|
@@ -261,6 +292,8 @@ def filter_command(config_path: str) -> None:
|
|
|
261
292
|
total_reward = None
|
|
262
293
|
achievements_count = None
|
|
263
294
|
if min_official is not None or max_official is not None:
|
|
295
|
+
if tracer.db is None:
|
|
296
|
+
raise FilterCliError("Database not initialized")
|
|
264
297
|
reward_rows = await tracer.db.query_traces(
|
|
265
298
|
"SELECT total_reward, achievements_count FROM outcome_rewards WHERE session_id = :session_id",
|
|
266
299
|
{"session_id": session_id},
|
|
@@ -296,6 +329,8 @@ def filter_command(config_path: str) -> None:
|
|
|
296
329
|
messages_query = (
|
|
297
330
|
"\n SELECT message_type, content, timestamp \n FROM messages \n WHERE session_id = :session_id\n ORDER BY timestamp ASC, id ASC\n "
|
|
298
331
|
)
|
|
332
|
+
if tracer.db is None:
|
|
333
|
+
raise FilterCliError("Database not initialized")
|
|
299
334
|
msg_df = await tracer.db.query_traces(messages_query, {"session_id": session_id})
|
|
300
335
|
message_rows = (
|
|
301
336
|
msg_df.to_dict("records") if hasattr(msg_df, "to_dict") else []
|
|
@@ -353,7 +388,8 @@ def filter_command(config_path: str) -> None:
|
|
|
353
388
|
handle.write("\n")
|
|
354
389
|
|
|
355
390
|
click.echo(f"Wrote {len(accepted)} examples -> {output_path}")
|
|
356
|
-
|
|
391
|
+
if tracer.db is not None:
|
|
392
|
+
await tracer.db.close()
|
|
357
393
|
|
|
358
394
|
try:
|
|
359
395
|
asyncio.run(_run())
|