PyPI - synth-ai - Versions diffs - 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (169) hide show

examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +12 -10
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +1 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +1 -1
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +60 -10
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +1 -1
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +1 -0
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -0
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -0
examples/rl/configs/rl_from_base_qwen17.toml +1 -0
examples/swe/task_app/hosted/inference/openai_client.py +0 -34
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/grpo_crafter.py +24 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +355 -58
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +68 -7
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +78 -21
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/task_app.py +254 -36
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +1 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +53 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +152 -41
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +31 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +1 -0
synth_ai/api/train/builders.py +90 -1
synth_ai/api/train/cli.py +396 -21
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +15 -1
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +29 -0
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +85 -17
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +1 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/eval/core.py +13 -10
synth_ai/cli/commands/filter/core.py +53 -17
synth_ai/cli/commands/help/core.py +0 -1
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/train/judge_schemas.py +1 -0
synth_ai/cli/commands/train/judge_validation.py +1 -0
synth_ai/cli/commands/train/validation.py +0 -57
synth_ai/cli/demo.py +35 -3
synth_ai/cli/deploy/__init__.py +40 -25
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/task_app_deploy.py +1 -1
synth_ai/cli/task_apps.py +53 -53
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/judge_schemas.py +1 -0
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/handlers.py +53 -4
synth_ai/streaming/streamer.py +19 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +44 -8
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +17 -17
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +283 -1
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/METADATA +102 -4
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/RECORD +162 -88
synth_ai/cli/commands/deploy/__init__.py +0 -23
synth_ai/cli/commands/deploy/core.py +0 -614
synth_ai/cli/commands/deploy/errors.py +0 -72
synth_ai/cli/commands/deploy/validation.py +0 -11
synth_ai/cli/deploy/core.py +0 -5
synth_ai/cli/deploy/errors.py +0 -23
synth_ai/cli/deploy/validation.py +0 -5
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

synth_ai/cli/commands/baseline/list.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""List command for baseline discovery."""
+from __future__ import annotations
+from pathlib import Path
+from typing import Optional
+import click
+from synth_ai.baseline.config import BaselineConfig
+from synth_ai.baseline.discovery import (
+    BaselineChoice,
+    discover_baseline_files,
+    load_baseline_config_from_file,
+)
+@click.command("list")
+@click.option(
+    "--tag",
+    multiple=True,
+    help="Filter baselines by tag (can be specified multiple times)",
+)
+@click.option(
+    "--metadata",
+    type=str,
+    help="Filter by metadata key-value pair (format: key=value)",
+)
+@click.option(
+    "--verbose",
+    is_flag=True,
+    help="Show detailed information about each baseline",
+)
+def list_command(tag: tuple[str, ...], metadata: Optional[str], verbose: bool) -> None:
+    """List all available baseline files."""
+    search_roots = [Path.cwd()]
+    choices = discover_baseline_files(search_roots)
+    if not choices:
+        click.echo("No baseline files found.", err=True)
+        click.echo("Create baseline files in examples/baseline/ or */*_baseline.py")
+        return
+    # Load configs for filtering
+    configs: list[tuple[BaselineChoice, BaselineConfig]] = []
+    for choice in choices:
+        try:
+            config = load_baseline_config_from_file(choice.baseline_id, choice.path)
+            configs.append((choice, config))
+        except Exception as e:
+            if verbose:
+                click.echo(f"Warning: Could not load {choice.baseline_id}: {e}", err=True)
+            continue
+    # Apply filters
+    filtered_configs = configs
+    if tag:
+        tag_set = {t.lower() for t in tag}
+        filtered_configs = [
+            (c, config) for c, config in filtered_configs
+            if any(config.matches_tag(t) for t in tag_set)
+        ]
+    if metadata:
+        if "=" not in metadata:
+            raise click.ClickException("--metadata must be in format key=value")
+        key, value = metadata.split("=", 1)
+        filtered_configs = [
+            (c, config) for c, config in filtered_configs
+            if config.matches_metadata(key.strip(), value.strip())
+        ]
+    if not filtered_configs:
+        click.echo("No baselines match the specified filters.")
+        return
+    # Display results
+    click.echo(f"Found {len(filtered_configs)} baseline(s):\n")
+    for choice, config in filtered_configs:
+        click.echo(f"  {config.baseline_id}")
+        click.echo(f"    Name: {config.name}")
+        if config.description:
+            click.echo(f"    Description: {config.description}")
+        if config.tags:
+            click.echo(f"    Tags: {', '.join(config.tags)}")
+        click.echo(f"    Splits: {', '.join(config.splits.keys())}")
+        if verbose:
+            click.echo(f"    Path: {choice.path}")
+            if config.metadata:
+                click.echo(f"    Metadata: {config.metadata}")
+        click.echo()

synth_ai/cli/commands/eval/core.py CHANGED Viewed

@@ -17,6 +17,7 @@ from typing import TYPE_CHECKING, Any, cast
 import click
 from synth_ai.task.config import EvalConfig
+from synth_ai.tracing_v3.session_tracer import SessionTracer
 from synth_ai.utils.task_app_discovery import discover_eval_config_paths
 from .errors import (
@@ -199,8 +200,9 @@ def _eval_command_impl(
     if cfg:
         try:
             normalized_cfg = validate_eval_options(cfg)
-            eval_cfg = EvalConfig.from_dict(dict(normalized_cfg))
-            cfg = dict(normalized_cfg)
+            normalized_cfg_dict = dict(normalized_cfg)
+            eval_cfg = EvalConfig.from_dict(normalized_cfg_dict)
+            cfg = normalized_cfg_dict
             click.echo(f"✓ Config validated: {len(eval_cfg.seeds)} seeds, model={eval_cfg.model}")
         except (ValueError, TypeError) as validation_error:
             raise InvalidEvalConfigError(detail=str(validation_error)) from validation_error
@@ -261,11 +263,9 @@ def _eval_command_impl(
             trace_path = Path(trace_db).expanduser()
             trace_path.parent.mkdir(parents=True, exist_ok=True)
             trace_db_url = f"sqlite+aiosqlite:///{trace_path}"
-    trace_tracer = (
-        session_tracer_cls(db_url=trace_db_url, auto_save=True)
-        if trace_db_url and session_tracer_cls is not None
-        else None
-    )
+    trace_tracer: SessionTracer | None = None
+    if trace_db_url and session_tracer_cls is not None:
+        trace_tracer = cast(SessionTracer, session_tracer_cls(db_url=trace_db_url, auto_save=True))
     # Determine selection params (CLI takes precedence; TOML only fills unset model/seeds/env)
     if cfg.get("model") and not model:
@@ -723,14 +723,17 @@ def _eval_command_impl(
                     "mode": "eval",  # RolloutMode.EVAL: use inference URLs as-is, no transformations
                 }
                 if env_name:
-                    body["env"]["env_name"] = env_name  # type: ignore[assignment]
+                    env_section = body.get("env")
+                    if isinstance(env_section, dict):
+                        env_section["env_name"] = env_name
+                    else:
+                        body["env"] = {"env_name": env_name}
                 # Debug: print the body being sent
                 if seed_val == 0:
                     click.echo(f"[DEBUG] rollout body env: {body['env']}")
                     click.echo(f"[DEBUG] rollout body policy: {body['policy']}")
                     click.echo(f"[DEBUG] rollout body mode: {body.get('mode', 'NOT SET')}")
-                    click.echo(f"[DEBUG] rollout record payload: {body.get('record')}")
                 rollout_elapsed: float | None = None
                 rollout_start = time.perf_counter()
                 try:

synth_ai/cli/commands/filter/core.py CHANGED Viewed

@@ -139,12 +139,23 @@ def _select_messages(message_rows: Sequence[dict[str, Any]]) -> list[dict[str, A
         if msg_type not in {"user", "policy_user_prompt"}:
             continue
+        # Look backwards for system prompt
+        system_msg = None
+        for prev in range(index - 1, -1, -1):
+            prev_type = message_rows[prev].get("message_type")
+            if prev_type == "policy_system_prompt":
+                system_msg = message_rows[prev]
+                break
         assistant_msg = None
+        tool_call_msg = None
         for follow in range(index + 1, len(message_rows)):
             next_type = message_rows[follow].get("message_type")
-            if next_type in {"assistant", "policy_system_prompt"}:
-                if next_type == "assistant":
-                    assistant_msg = message_rows[follow]
+            if next_type == "assistant":
+                assistant_msg = message_rows[follow]
+                break
+            elif next_type == "policy_tool_call":
+                tool_call_msg = message_rows[follow]
                 break
         try:
@@ -157,8 +168,34 @@ def _select_messages(message_rows: Sequence[dict[str, Any]]) -> list[dict[str, A
         if not user_text:
             continue
+        messages = []
+        # Add system prompt if found
+        if system_msg is not None:
+            try:
+                system_content_raw = system_msg.get("content")
+                system_content = json.loads(system_content_raw) if isinstance(system_content_raw, str) else system_content_raw
+                system_content = _extract_content(system_content)
+                system_text = _extract_text(system_content)
+                if system_text:
+                    messages.append({"role": "system", "content": system_text})
+            except Exception:
+                pass
+        # Add user message
+        user_payload = user_content if isinstance(user_content, list) else user_text
+        messages.append({"role": "user", "content": user_payload})
+        # Add assistant/tool call response
         assistant_content = None
-        if assistant_msg is not None:
+        if tool_call_msg is not None:
+            raw = tool_call_msg.get("content")
+            try:
+                assistant_content = json.loads(raw) if isinstance(raw, str) else raw
+            except Exception:
+                assistant_content = raw
+            assistant_content = _extract_content(assistant_content)
+        elif assistant_msg is not None:
             raw = assistant_msg.get("content")
             try:
                 assistant_content = json.loads(raw) if isinstance(raw, str) else raw
@@ -166,22 +203,14 @@ def _select_messages(message_rows: Sequence[dict[str, Any]]) -> list[dict[str, A
                 assistant_content = raw
             assistant_content = _extract_content(assistant_content)
-        assistant_text = _extract_text(assistant_content) if assistant_content is not None else ""
-        user_payload = user_content if isinstance(user_content, list) else user_text
         assistant_payload = (
             assistant_content
             if isinstance(assistant_content, list)
-            else (assistant_text or "[no response recorded]")
+            else (_extract_text(assistant_content) if assistant_content is not None else "[no response recorded]")
         )
+        messages.append({"role": "assistant", "content": assistant_payload})
-        records.append(
-            {
-                "messages": [
-                    {"role": "user", "content": user_payload},
-                    {"role": "assistant", "content": assistant_payload},
-                ]
-            }
-        )
+        records.append({"messages": messages})
     return records
@@ -219,7 +248,9 @@ def filter_command(config_path: str) -> None:
     async def _run() -> None:
         tracer = SessionTracer(db_url=db_url, auto_save=False)
         await tracer.initialize()
-        assert tracer.db is not None, "Database should be initialized"
+        if tracer.db is None:
+            raise FilterCliError("Database not initialized")
         df = await tracer.db.query_traces(
             "SELECT session_id, created_at, metadata FROM session_traces ORDER BY created_at"
@@ -261,6 +292,8 @@ def filter_command(config_path: str) -> None:
             total_reward = None
             achievements_count = None
             if min_official is not None or max_official is not None:
+                if tracer.db is None:
+                    raise FilterCliError("Database not initialized")
                 reward_rows = await tracer.db.query_traces(
                     "SELECT total_reward, achievements_count FROM outcome_rewards WHERE session_id = :session_id",
                     {"session_id": session_id},
@@ -296,6 +329,8 @@ def filter_command(config_path: str) -> None:
             messages_query = (
                 "\n            SELECT message_type, content, timestamp \n            FROM messages \n            WHERE session_id = :session_id\n            ORDER BY timestamp ASC, id ASC\n        "
             )
+            if tracer.db is None:
+                raise FilterCliError("Database not initialized")
             msg_df = await tracer.db.query_traces(messages_query, {"session_id": session_id})
             message_rows = (
                 msg_df.to_dict("records") if hasattr(msg_df, "to_dict") else []
@@ -353,7 +388,8 @@ def filter_command(config_path: str) -> None:
                 handle.write("\n")
         click.echo(f"Wrote {len(accepted)} examples -> {output_path}")
-        await tracer.db.close()
+        if tracer.db is not None:
+            await tracer.db.close()
     try:
         asyncio.run(_run())

synth_ai/cli/commands/help/core.py CHANGED Viewed

	@@ -70,4 +70,3 @@ def register(group: click.Group) -> None:
70 70
71 71
72 72	__all__ = ["help_command", "get_command", "register"]
73	-

synth_ai/cli/commands/smoke/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+from __future__ import annotations
+from .core import command, register
+__all__ = ["command", "register"]

synth-ai 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl