PyPI - synth-ai - Versions diffs - 0.2.4.dev5__py3-none-any.whl → 0.2.4.dev7__py3-none-any.whl - Mend

synth-ai 0.2.4.dev5py3-none-any.whl → 0.2.4.dev7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (229) hide show

synth_ai/__init__.py +18 -9
synth_ai/cli/__init__.py +10 -5
synth_ai/cli/balance.py +22 -17
synth_ai/cli/calc.py +2 -3
synth_ai/cli/demo.py +3 -5
synth_ai/cli/legacy_root_backup.py +58 -32
synth_ai/cli/man.py +22 -19
synth_ai/cli/recent.py +9 -8
synth_ai/cli/root.py +58 -13
synth_ai/cli/status.py +13 -6
synth_ai/cli/traces.py +45 -21
synth_ai/cli/watch.py +40 -37
synth_ai/config/base_url.py +1 -3
synth_ai/core/experiment.py +1 -2
synth_ai/environments/__init__.py +2 -6
synth_ai/environments/environment/artifacts/base.py +3 -1
synth_ai/environments/environment/db/sqlite.py +1 -1
synth_ai/environments/environment/registry.py +19 -20
synth_ai/environments/environment/resources/sqlite.py +2 -3
synth_ai/environments/environment/rewards/core.py +3 -2
synth_ai/environments/environment/tools/__init__.py +6 -4
synth_ai/environments/examples/crafter_classic/__init__.py +1 -1
synth_ai/environments/examples/crafter_classic/engine.py +21 -17
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +1 -0
synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +2 -1
synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +2 -1
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +3 -2
synth_ai/environments/examples/crafter_classic/environment.py +16 -15
synth_ai/environments/examples/crafter_classic/taskset.py +2 -2
synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +2 -3
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +2 -1
synth_ai/environments/examples/crafter_custom/crafter/__init__.py +2 -2
synth_ai/environments/examples/crafter_custom/crafter/config.py +2 -2
synth_ai/environments/examples/crafter_custom/crafter/env.py +1 -5
synth_ai/environments/examples/crafter_custom/crafter/objects.py +1 -2
synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +1 -2
synth_ai/environments/examples/crafter_custom/dataset_builder.py +5 -5
synth_ai/environments/examples/crafter_custom/environment.py +13 -13
synth_ai/environments/examples/crafter_custom/run_dataset.py +5 -5
synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +2 -2
synth_ai/environments/examples/enron/art_helpers/local_email_db.py +5 -4
synth_ai/environments/examples/enron/art_helpers/types_enron.py +2 -1
synth_ai/environments/examples/enron/engine.py +18 -14
synth_ai/environments/examples/enron/environment.py +12 -11
synth_ai/environments/examples/enron/taskset.py +7 -7
synth_ai/environments/examples/minigrid/__init__.py +6 -6
synth_ai/environments/examples/minigrid/engine.py +6 -6
synth_ai/environments/examples/minigrid/environment.py +6 -6
synth_ai/environments/examples/minigrid/puzzle_loader.py +3 -2
synth_ai/environments/examples/minigrid/taskset.py +13 -13
synth_ai/environments/examples/nethack/achievements.py +1 -1
synth_ai/environments/examples/nethack/engine.py +8 -7
synth_ai/environments/examples/nethack/environment.py +10 -9
synth_ai/environments/examples/nethack/helpers/__init__.py +8 -9
synth_ai/environments/examples/nethack/helpers/action_mapping.py +1 -1
synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +2 -1
synth_ai/environments/examples/nethack/helpers/observation_utils.py +1 -1
synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +3 -4
synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +6 -5
synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +5 -5
synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +7 -6
synth_ai/environments/examples/nethack/taskset.py +5 -5
synth_ai/environments/examples/red/engine.py +9 -8
synth_ai/environments/examples/red/engine_helpers/reward_components.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +7 -7
synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +3 -2
synth_ai/environments/examples/red/engine_helpers/state_extraction.py +2 -1
synth_ai/environments/examples/red/environment.py +18 -15
synth_ai/environments/examples/red/taskset.py +5 -3
synth_ai/environments/examples/sokoban/engine.py +16 -13
synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +3 -2
synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +2 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +1 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +7 -5
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +1 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +2 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +5 -4
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +3 -2
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +2 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +5 -4
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +1 -1
synth_ai/environments/examples/sokoban/environment.py +15 -14
synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +5 -3
synth_ai/environments/examples/sokoban/puzzle_loader.py +3 -2
synth_ai/environments/examples/sokoban/taskset.py +13 -10
synth_ai/environments/examples/tictactoe/engine.py +6 -6
synth_ai/environments/examples/tictactoe/environment.py +8 -7
synth_ai/environments/examples/tictactoe/taskset.py +6 -5
synth_ai/environments/examples/verilog/engine.py +4 -3
synth_ai/environments/examples/verilog/environment.py +11 -10
synth_ai/environments/examples/verilog/taskset.py +14 -12
synth_ai/environments/examples/wordle/__init__.py +29 -0
synth_ai/environments/examples/wordle/engine.py +398 -0
synth_ai/environments/examples/wordle/environment.py +159 -0
synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +75 -0
synth_ai/environments/examples/wordle/taskset.py +230 -0
synth_ai/environments/reproducibility/core.py +1 -1
synth_ai/environments/reproducibility/tree.py +21 -21
synth_ai/environments/service/app.py +11 -2
synth_ai/environments/service/core_routes.py +137 -105
synth_ai/environments/service/external_registry.py +1 -2
synth_ai/environments/service/registry.py +1 -1
synth_ai/environments/stateful/core.py +1 -2
synth_ai/environments/stateful/engine.py +1 -1
synth_ai/environments/tasks/api.py +4 -4
synth_ai/environments/tasks/core.py +14 -12
synth_ai/environments/tasks/filters.py +6 -4
synth_ai/environments/tasks/utils.py +13 -11
synth_ai/evals/base.py +2 -3
synth_ai/experimental/synth_oss.py +4 -4
synth_ai/learning/gateway.py +1 -3
synth_ai/learning/prompts/banking77_injection_eval.py +168 -0
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +213 -0
synth_ai/learning/prompts/mipro.py +282 -1
synth_ai/learning/prompts/random_search.py +246 -0
synth_ai/learning/prompts/run_mipro_banking77.py +172 -0
synth_ai/learning/prompts/run_random_search_banking77.py +324 -0
synth_ai/lm/__init__.py +5 -5
synth_ai/lm/caching/ephemeral.py +9 -9
synth_ai/lm/caching/handler.py +20 -20
synth_ai/lm/caching/persistent.py +10 -10
synth_ai/lm/config.py +3 -3
synth_ai/lm/constants.py +7 -7
synth_ai/lm/core/all.py +17 -3
synth_ai/lm/core/exceptions.py +0 -2
synth_ai/lm/core/main.py +26 -41
synth_ai/lm/core/main_v3.py +20 -10
synth_ai/lm/core/vendor_clients.py +18 -17
synth_ai/lm/injection.py +80 -0
synth_ai/lm/overrides.py +206 -0
synth_ai/lm/provider_support/__init__.py +1 -1
synth_ai/lm/provider_support/anthropic.py +51 -24
synth_ai/lm/provider_support/openai.py +51 -22
synth_ai/lm/structured_outputs/handler.py +34 -32
synth_ai/lm/structured_outputs/inject.py +24 -27
synth_ai/lm/structured_outputs/rehabilitate.py +19 -15
synth_ai/lm/tools/base.py +17 -16
synth_ai/lm/unified_interface.py +17 -18
synth_ai/lm/vendors/base.py +20 -18
synth_ai/lm/vendors/core/anthropic_api.py +50 -25
synth_ai/lm/vendors/core/gemini_api.py +31 -36
synth_ai/lm/vendors/core/mistral_api.py +19 -19
synth_ai/lm/vendors/core/openai_api.py +11 -10
synth_ai/lm/vendors/openai_standard.py +144 -88
synth_ai/lm/vendors/openai_standard_responses.py +74 -61
synth_ai/lm/vendors/retries.py +9 -1
synth_ai/lm/vendors/supported/custom_endpoint.py +26 -26
synth_ai/lm/vendors/supported/deepseek.py +10 -10
synth_ai/lm/vendors/supported/grok.py +8 -8
synth_ai/lm/vendors/supported/ollama.py +2 -1
synth_ai/lm/vendors/supported/openrouter.py +11 -9
synth_ai/lm/vendors/synth_client.py +69 -63
synth_ai/lm/warmup.py +8 -7
synth_ai/tracing/__init__.py +22 -10
synth_ai/tracing_v1/__init__.py +22 -20
synth_ai/tracing_v3/__init__.py +7 -7
synth_ai/tracing_v3/abstractions.py +56 -52
synth_ai/tracing_v3/config.py +4 -2
synth_ai/tracing_v3/db_config.py +6 -8
synth_ai/tracing_v3/decorators.py +29 -30
synth_ai/tracing_v3/examples/basic_usage.py +12 -12
synth_ai/tracing_v3/hooks.py +21 -21
synth_ai/tracing_v3/llm_call_record_helpers.py +85 -98
synth_ai/tracing_v3/lm_call_record_abstractions.py +2 -4
synth_ai/tracing_v3/migration_helper.py +3 -5
synth_ai/tracing_v3/replica_sync.py +30 -32
synth_ai/tracing_v3/session_tracer.py +35 -29
synth_ai/tracing_v3/storage/__init__.py +1 -1
synth_ai/tracing_v3/storage/base.py +8 -7
synth_ai/tracing_v3/storage/config.py +4 -4
synth_ai/tracing_v3/storage/factory.py +4 -4
synth_ai/tracing_v3/storage/utils.py +9 -9
synth_ai/tracing_v3/turso/__init__.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +9 -9
synth_ai/tracing_v3/turso/manager.py +60 -48
synth_ai/tracing_v3/turso/models.py +24 -19
synth_ai/tracing_v3/utils.py +5 -5
synth_ai/tui/__main__.py +1 -1
synth_ai/tui/cli/query_experiments.py +2 -3
synth_ai/tui/cli/query_experiments_v3.py +2 -3
synth_ai/tui/dashboard.py +97 -86
synth_ai/v0/tracing/abstractions.py +28 -28
synth_ai/v0/tracing/base_client.py +9 -9
synth_ai/v0/tracing/client_manager.py +7 -7
synth_ai/v0/tracing/config.py +7 -7
synth_ai/v0/tracing/context.py +6 -6
synth_ai/v0/tracing/decorators.py +6 -5
synth_ai/v0/tracing/events/manage.py +1 -1
synth_ai/v0/tracing/events/store.py +5 -4
synth_ai/v0/tracing/immediate_client.py +4 -5
synth_ai/v0/tracing/local.py +3 -3
synth_ai/v0/tracing/log_client_base.py +4 -5
synth_ai/v0/tracing/retry_queue.py +5 -6
synth_ai/v0/tracing/trackers.py +25 -25
synth_ai/v0/tracing/upload.py +6 -0
synth_ai/v0/tracing_v1/__init__.py +1 -1
synth_ai/v0/tracing_v1/abstractions.py +28 -28
synth_ai/v0/tracing_v1/base_client.py +9 -9
synth_ai/v0/tracing_v1/client_manager.py +7 -7
synth_ai/v0/tracing_v1/config.py +7 -7
synth_ai/v0/tracing_v1/context.py +6 -6
synth_ai/v0/tracing_v1/decorators.py +7 -6
synth_ai/v0/tracing_v1/events/manage.py +1 -1
synth_ai/v0/tracing_v1/events/store.py +5 -4
synth_ai/v0/tracing_v1/immediate_client.py +4 -5
synth_ai/v0/tracing_v1/local.py +3 -3
synth_ai/v0/tracing_v1/log_client_base.py +4 -5
synth_ai/v0/tracing_v1/retry_queue.py +5 -6
synth_ai/v0/tracing_v1/trackers.py +25 -25
synth_ai/v0/tracing_v1/upload.py +25 -24
synth_ai/zyk/__init__.py +1 -0
{synth_ai-0.2.4.dev5.dist-info → synth_ai-0.2.4.dev7.dist-info}/METADATA +2 -11
synth_ai-0.2.4.dev7.dist-info/RECORD +299 -0
synth_ai-0.2.4.dev5.dist-info/RECORD +0 -287
{synth_ai-0.2.4.dev5.dist-info → synth_ai-0.2.4.dev7.dist-info}/WHEEL +0 -0
{synth_ai-0.2.4.dev5.dist-info → synth_ai-0.2.4.dev7.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.4.dev5.dist-info → synth_ai-0.2.4.dev7.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.4.dev5.dist-info → synth_ai-0.2.4.dev7.dist-info}/top_level.txt +0 -0

synth_ai/learning/prompts/run_random_search_banking77.py ADDED Viewed

@@ -0,0 +1,324 @@
+"""
+Example: Random Search optimizer on Banking77 using Groq gpt-oss-20b.
+Requires:
+- .env with GROQ_API_KEY
+- datasets (`uv add datasets` if needed)
+Run:
+- uv run -q python -m synth_ai.learning.prompts.run_random_search_banking77
+"""
+from __future__ import annotations
+import asyncio
+import json
+import os
+import random
+import time
+from collections.abc import Sequence
+from dataclasses import dataclass, replace
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+from datasets import load_dataset
+from dotenv import load_dotenv
+from synth_ai.learning.prompts.random_search import random_search_compile
+from synth_ai.lm.core.main_v3 import LM, build_messages
+from tqdm import tqdm
+def choose_label(pred: str, label_names: list[str]) -> str:
+    norm = (pred or "").strip().lower()
+    d = {ln.lower(): ln for ln in label_names}
+    if norm in d:
+        return d[norm]
+    def score(cand: str) -> int:
+        c = cand.lower()
+        return sum(1 for w in c.split() if w in norm)
+    return max(label_names, key=score)
+def accuracy(pred: str, gold: str, labels: list[str]) -> float:
+    return 1.0 if choose_label(pred, labels) == gold else 0.0
+@dataclass
+class StudentProgram:
+    lm: LM
+    label_names: list[str]
+    instruction: str
+    demos: list[tuple[str, str]]
+    def reset_copy(self):
+        return replace(self, instruction=self.instruction, demos=list(self.demos))
+    def deepcopy(self):
+        return replace(self, instruction=str(self.instruction), demos=list(self.demos))
+    def with_demos(self, demos: list[tuple[str, str]]):
+        return replace(self, demos=list(demos))
+    def run(self, x: str) -> str:
+        # Build a prompt with optional demos
+        examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in self.demos)
+        sys = self.instruction or "You are an intent classifier for Banking77."
+        user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
+        messages = build_messages(sys, user, images_bytes=None, model_name=self.lm.model)
+        # Call LM synchronously via asyncio
+        async def _call():
+            resp = await self.lm.respond_async(messages=messages)
+            return (resp.raw_response or "").strip()
+        return asyncio.run(_call())
+    async def _apredict(self, x: str):
+        examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in self.demos)
+        sys = self.instruction or "You are an intent classifier for Banking77."
+        user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
+        messages = build_messages(sys, user, images_bytes=None, model_name=self.lm.model)
+        resp = await self.lm.respond_async(messages=messages)
+        return (resp.raw_response or "").strip(), (resp.usage or {})
+def main():
+    load_dotenv()
+    random.seed(0)
+    model = os.getenv("MODEL", "openai/gpt-oss-20b")
+    vendor = os.getenv("VENDOR", "groq")
+    lm = LM(model=model, vendor=vendor, temperature=0.0)
+    print("Loading Banking77 dataset (train/dev split of test for demo)...")
+    ds = load_dataset("banking77")
+    label_names: list[str] = ds["test"].features["label"].names  # type: ignore
+    # Create small train/val from the test split for speed
+    all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
+    random.shuffle(all_items)
+    trainset: Sequence[tuple[str, str]] = all_items[:40]
+    valset: Sequence[tuple[str, str]] = all_items[40:60]  # 20 examples
+    student = StudentProgram(
+        lm=lm,
+        label_names=label_names,
+        instruction="You are an intent classifier for the Banking77 dataset. Return exactly one label.",
+        demos=[],
+    )
+    def metric(yhat: str, y: str) -> float:
+        return accuracy(yhat, y, label_names)
+    total_candidates = 3 + 3  # zero-shot, labeled few-shot, bootstrapped + 3 random seeds
+    print(
+        f"Running Random Search optimizer ({total_candidates} candidates, parallel eval of 20 questions)..."
+    )
+    def eval_parallel(program: StudentProgram, dataset: Sequence[tuple[str, str]], metric_fn):
+        async def _run():
+            xs = [x for x, _ in dataset]
+            ys = [y for _, y in dataset]
+            preds: list[Optional[str]] = [None] * len(xs)
+            sem = asyncio.Semaphore(int(os.getenv("CONCURRENCY", "5")))
+            async def worker(i: int, x: str, y: str):
+                import time
+                t_start = time.monotonic()
+                try:
+                    async with sem:
+                        pred, usage = await asyncio.wait_for(
+                            program._apredict(x),
+                            timeout=float(os.getenv("TIMEOUT_S", "45")),
+                        )
+                        t_end = time.monotonic()
+                        return i, y, pred, t_start, t_end, usage or {}
+                except asyncio.CancelledError:
+                    # Respect cancellation but return a placeholder record so scheduler can proceed
+                    t_end = time.monotonic()
+                    return i, y, "", t_start, t_end, {}
+                except Exception:
+                    t_end = time.monotonic()
+                    return i, y, "", t_start, t_end, {}
+            tasks = [asyncio.create_task(worker(i, x, y)) for i, (x, y) in enumerate(zip(xs, ys, strict=False))]
+            correct_sum = 0.0
+            processed = 0
+            import statistics
+            import time
+            durations: list[float] = []
+            in_tok_sum = 0
+            out_tok_sum = 0
+            in_tok_count = 0
+            out_tok_count = 0
+            details: list[dict[str, Any]] = []
+            t_batch_start = time.monotonic()
+            deadline = float(os.getenv("BATCH_DEADLINE_S", "20"))
+            with tqdm(total=len(tasks), desc="Rollouts", leave=False) as pbar:
+                pending = set(tasks)
+                # Process completions until all done or deadline reached
+                while pending:
+                    elapsed = time.monotonic() - t_batch_start
+                    remaining = max(0.0, deadline - elapsed)
+                    if remaining <= 0.0:
+                        # Cancel any remaining
+                        for t in pending:
+                            t.cancel()
+                        done, _ = await asyncio.wait(pending, return_when=asyncio.ALL_COMPLETED)
+                        # Record canceled as zeros
+                        for task in done:
+                            try:
+                                i, y_true, pred, t_start, t_end, usage = task.result()
+                            except Exception:
+                                # Unknown index: we can't recover; skip as it's canceled before start
+                                continue
+                            # Already processed ones shouldn't be in pending; skip
+                        break
+                    # Wait for at least one completion within remaining time (polling granularity <= 1s)
+                    timeout = min(1.0, remaining)
+                    done, pending = await asyncio.wait(
+                        pending, timeout=timeout, return_when=asyncio.FIRST_COMPLETED
+                    )
+                    import contextlib
+                    for task in done:
+                        try:
+                            i, y_true, pred, t_start, t_end, usage = task.result()
+                        except BaseException:
+                            # Treat as failure/cancelled
+                            continue
+                        durations.append(max(0.0, t_end - t_start))
+                        preds[i] = pred
+                        processed += 1
+                        with contextlib.suppress(Exception):
+                            correct_sum += float(metric_fn(pred, y_true))
+                        with contextlib.suppress(Exception):
+                            pt = usage.get("prompt_tokens") or usage.get("input_tokens")
+                            ct = usage.get("completion_tokens") or usage.get("output_tokens")
+                            if isinstance(pt, (int, float)):
+                                in_tok_sum += int(pt)
+                                in_tok_count += 1
+                            if isinstance(ct, (int, float)):
+                                out_tok_sum += int(ct)
+                                out_tok_count += 1
+                        details.append(
+                            {
+                                "index": i,
+                                "seconds": max(0.0, t_end - t_start),
+                                "score": float(metric_fn(pred, y_true)),
+                                "usage": {
+                                    "prompt_tokens": usage.get("prompt_tokens")
+                                    or usage.get("input_tokens"),
+                                    "completion_tokens": usage.get("completion_tokens")
+                                    or usage.get("output_tokens"),
+                                },
+                            }
+                        )
+                        pbar.update(1)
+                        med = statistics.median(durations) if durations else 0.0
+                        mx = max(durations) if durations else 0.0
+                        avg_in = (in_tok_sum / in_tok_count) if in_tok_count else 0.0
+                        avg_out = (out_tok_sum / out_tok_count) if out_tok_count else 0.0
+                        pbar.set_postfix(
+                            {
+                                "acc": f"{(correct_sum / processed):.2f}",
+                                "done": f"{processed}/{len(tasks)}",
+                                "med_s": f"{med:.1f}",
+                                "max_s": f"{mx:.1f}",
+                                "tin": f"{avg_in:.1f}",
+                                "tout": f"{avg_out:.1f}",
+                            }
+                        )
+            # Compute score only from completed/successful rollouts (drop timeouts/cancelled)
+            subs = [float(d.get("score", 0.0)) for d in details]
+            result = SimpleNamespace(score=(sum(subs) / max(1, len(subs))), subscores=subs)
+            result.details = details
+            result.mean_in = (in_tok_sum / in_tok_count) if in_tok_count else 0.0
+            result.mean_out = (out_tok_sum / out_tok_count) if out_tok_count else 0.0
+            return result
+        return asyncio.run(_run())
+    pbar = tqdm(total=total_candidates, desc="Candidates")
+    candidate_eval_details: dict[int, Any] = {}
+    def on_cand(idx: int, score: float, res, intervention):
+        pbar.update(1)
+        pbar.set_postfix({"score": f"{score:.2f}"})
+        # store per-instance details (for apples-to-apples)
+        import contextlib
+        with contextlib.suppress(Exception):
+            candidate_eval_details[idx] = {
+                "score": score,
+                "mean_in": getattr(res, "mean_in", None),
+                "mean_out": getattr(res, "mean_out", None),
+                "instances": getattr(res, "details", None),
+            }
+        # visible summary line per candidate
+        kind = (
+            intervention.get("kind", "candidate") if isinstance(intervention, dict) else "candidate"
+        )
+        label = intervention.get("label") if isinstance(intervention, dict) else None
+        seed = intervention.get("seed") if isinstance(intervention, dict) else None
+        processed = len(getattr(res, "details", []) or [])
+        from tqdm import tqdm as _tqdm
+        _tqdm.write(
+            f"Candidate {idx}/{total_candidates} [{kind}{'' if label is None else f', label={label}'}{'' if seed is None else f', seed={seed}'}]: "
+            f"score={score:.2f} | mean tin/tout={getattr(res, 'mean_in', 0):.1f}/{getattr(res, 'mean_out', 0):.1f} | N={processed}"
+        )
+    best, records = random_search_compile(
+        student=student,
+        trainset=trainset,
+        valset=valset,
+        metric=metric,
+        evaluate_fn=eval_parallel,
+        max_bootstrapped_demos=0,
+        max_labeled_demos=4,
+        max_rounds=2,
+        num_candidate_programs=3,
+        on_candidate_evaluated=on_cand,
+    )
+    pbar.close()
+    # Evaluate best on holdout (valset) with parallel rollouts
+    print("Evaluating best program on val (parallel rollouts)...")
+    best_res = eval_parallel(best, valset, metric)
+    correct = int(round(best_res.score * max(1, len(best_res.subscores))))
+    print(
+        "Best program accuracy on val: "
+        f"{correct}/{len(valset)} ({best_res.score:.2%}) "
+        f"| mean tokens in/out: {getattr(best_res, 'mean_in', 0):.1f}/{getattr(best_res, 'mean_out', 0):.1f}"
+    )
+    # Save per-candidate scores and interventions
+    out = {
+        "context": {
+            "model": model,
+            "vendor": vendor,
+            "train_size": len(trainset),
+            "val_size": len(valset),
+        },
+        "candidates": records,
+        "candidate_eval_details": candidate_eval_details,
+        "best_eval_details": {
+            "score": best_res.score,
+            "mean_in": getattr(best_res, "mean_in", None),
+            "mean_out": getattr(best_res, "mean_out", None),
+            "instances": getattr(best_res, "details", None),
+        },
+    }
+    out_dir = Path(__file__).parent
+    fname = str(out_dir / f"random_search_banking77_{int(time.time())}.json")
+    with open(fname, "w") as f:
+        json.dump(out, f, indent=2)
+    print(f"Saved candidate records to {fname}")
+if __name__ == "__main__":
+    main()

synth_ai/lm/__init__.py CHANGED Viewed

@@ -4,24 +4,24 @@ Synth AI Language Model Interface.
 Provides a unified interface for multiple LLM providers including OpenAI and Synth.
 """
-from .config import SynthConfig, OpenAIConfig
-from .warmup import warmup_synth_model, get_warmup_status
+from .config import OpenAIConfig, SynthConfig
+from .core.main_v3 import LM
 from .unified_interface import (
-    UnifiedLMProvider,
     OpenAIProvider,
     SynthProvider,
     UnifiedLMClient,
+    UnifiedLMProvider,
     create_provider,
 )
 from .vendors.synth_client import (
     AsyncSynthClient,
     SyncSynthClient,
     create_async_client,
-    create_sync_client,
     create_chat_completion_async,
     create_chat_completion_sync,
+    create_sync_client,
 )
-from .core.main_v3 import LM
+from .warmup import get_warmup_status, warmup_synth_model
 __all__ = [
     # Configuration

synth_ai/lm/caching/ephemeral.py CHANGED Viewed

@@ -7,7 +7,6 @@ of the application run, useful for avoiding redundant API calls within a session
 import os
 from dataclasses import dataclass
-from typing import Optional, Union
 from diskcache import Cache
 from pydantic import BaseModel
@@ -20,24 +19,25 @@ from synth_ai.lm.vendors.base import BaseLMResponse
 class EphemeralCache:
     """
     Ephemeral cache implementation using diskcache.
     This cache stores LM responses temporarily on disk with a size limit.
     The cache is cleared when the application restarts.
     """
     def __init__(self, fast_cache_dir: str = ".cache/ephemeral_cache"):
         os.makedirs(fast_cache_dir, exist_ok=True)
         self.fast_cache = Cache(fast_cache_dir, size_limit=DISKCACHE_SIZE_LIMIT)
     def hit_cache(
-        self, key: str, response_model: Optional[BaseModel] = None
-    ) -> Optional[BaseLMResponse]:
+        self, key: str, response_model: BaseModel | None = None
+    ) -> BaseLMResponse | None:
         """
         Check if a response exists in cache for the given key.
         Args:
             key: Cache key to look up
             response_model: Optional Pydantic model to reconstruct structured output
         Returns:
             BaseLMResponse if found in cache, None otherwise
         """
@@ -65,14 +65,14 @@ class EphemeralCache:
             tool_calls=tool_calls,
         )
-    def add_to_cache(self, key: str, response: Union[BaseLMResponse, str]) -> None:
+    def add_to_cache(self, key: str, response: BaseLMResponse | str) -> None:
         """
         Add a response to the cache.
         Args:
             key: Cache key to store under
             response: Either a BaseLMResponse object or raw string response
         Raises:
             ValueError: If response type is not supported
         """

synth_ai/lm/caching/handler.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import hashlib
-from typing import Any, Dict, List, Optional, Type
+from typing import Any
 from pydantic import BaseModel
@@ -17,11 +17,11 @@ logger = logging.getLogger(__name__)
 def map_params_to_key(
-    messages: List[Dict],
+    messages: list[dict],
     model: str,
     temperature: float,
-    response_model: Optional[Type[BaseModel]],
-    tools: Optional[List[BaseTool]] = None,
+    response_model: type[BaseModel] | None,
+    tools: list[BaseTool] | None = None,
     reasoning_effort: str = "low",
 ) -> str:
     if any(m is None for m in messages):
@@ -76,37 +76,37 @@ class CacheHandler:
         self.use_persistent_store = use_persistent_store
         self.use_ephemeral_store = use_ephemeral_store
-    def _validate_messages(self, messages: List[Dict[str, Any]]) -> None:
+    def _validate_messages(self, messages: list[dict[str, Any]]) -> None:
         """Validate that messages are in the correct format."""
-        assert all([type(msg["content"]) == str for msg in messages]), (
+        assert all(isinstance(msg["content"], str) for msg in messages), (
             "All message contents must be strings"
         )
     def hit_managed_cache(
         self,
         model: str,
-        messages: List[Dict[str, Any]],
-        lm_config: Dict[str, Any],
-        tools: Optional[List[BaseTool]] = None,
-    ) -> Optional[BaseLMResponse]:
+        messages: list[dict[str, Any]],
+        lm_config: dict[str, Any],
+        tools: list[BaseTool] | None = None,
+    ) -> BaseLMResponse | None:
         """Hit the cache with the given key."""
         self._validate_messages(messages)
-        assert type(lm_config) == dict, "lm_config must be a dictionary"
+        assert isinstance(lm_config, dict), "lm_config must be a dictionary"
         key = map_params_to_key(
             messages,
             model,
             lm_config.get("temperature", 0.0),
-            lm_config.get("response_model", None),
+            lm_config.get("response_model"),
             tools,
             lm_config.get("reasoning_effort", "low"),
         )
         if self.use_persistent_store:
             return persistent_cache.hit_cache(
-                key=key, response_model=lm_config.get("response_model", None)
+                key=key, response_model=lm_config.get("response_model")
             )
         elif self.use_ephemeral_store:
             return ephemeral_cache.hit_cache(
-                key=key, response_model=lm_config.get("response_model", None)
+                key=key, response_model=lm_config.get("response_model")
             )
         else:
             return None
@@ -114,20 +114,20 @@ class CacheHandler:
     def add_to_managed_cache(
         self,
         model: str,
-        messages: List[Dict[str, Any]],
-        lm_config: Dict[str, Any],
+        messages: list[dict[str, Any]],
+        lm_config: dict[str, Any],
         output: BaseLMResponse,
-        tools: Optional[List[BaseTool]] = None,
+        tools: list[BaseTool] | None = None,
     ) -> None:
         """Add the given output to the cache."""
         self._validate_messages(messages)
-        assert type(output) == BaseLMResponse, "output must be a BaseLMResponse"
-        assert type(lm_config) == dict, "lm_config must be a dictionary"
+        assert isinstance(output, BaseLMResponse), "output must be a BaseLMResponse"
+        assert isinstance(lm_config, dict), "lm_config must be a dictionary"
         key = map_params_to_key(
             messages,
             model,
             lm_config.get("temperature", 0.0),
-            lm_config.get("response_model", None),
+            lm_config.get("response_model"),
             tools,
             lm_config.get("reasoning_effort", "low"),
         )

synth_ai/lm/caching/persistent.py CHANGED Viewed

@@ -9,7 +9,6 @@ import json
 import os
 import sqlite3
 from dataclasses import dataclass
-from typing import Optional, Type, Union
 from pydantic import BaseModel
@@ -20,10 +19,11 @@ from synth_ai.lm.vendors.base import BaseLMResponse
 class PersistentCache:
     """
     Persistent cache implementation using SQLite.
     This cache stores LM responses in a SQLite database that persists
     across application restarts.
     """
     def __init__(self, db_path: str = ".cache/persistent_cache.db"):
         os.makedirs(os.path.dirname(db_path), exist_ok=True)
         self.conn = sqlite3.connect(db_path)
@@ -33,15 +33,15 @@ class PersistentCache:
         self.conn.commit()
     def hit_cache(
-        self, key: str, response_model: Optional[Type[BaseModel]] = None
-    ) -> Optional[BaseLMResponse]:
+        self, key: str, response_model: type[BaseModel] | None = None
+    ) -> BaseLMResponse | None:
         """
         Check if a response exists in cache for the given key.
         Args:
             key: Cache key to look up
             response_model: Optional Pydantic model class to reconstruct structured output
         Returns:
             BaseLMResponse if found in cache, None otherwise
         """
@@ -72,17 +72,17 @@ class PersistentCache:
             tool_calls=tool_calls,
         )
-    def add_to_cache(self, key: str, response: Union[BaseLMResponse, str]) -> None:
+    def add_to_cache(self, key: str, response: BaseLMResponse | str) -> None:
         """
         Add a response to the cache.
         Args:
             key: Cache key to store under
             response: Either a BaseLMResponse object or raw string response
         Raises:
             ValueError: If response type is not supported
         Note:
             Uses INSERT OR REPLACE to update existing cache entries.
         """

synth_ai/lm/config.py CHANGED Viewed

@@ -4,8 +4,8 @@ Loads sensitive configuration from environment variables.
 """
 import os
-from typing import Optional
 from dataclasses import dataclass
 from dotenv import load_dotenv
 # Load environment variables from .env file
@@ -15,10 +15,10 @@ load_dotenv()
 def should_use_cache() -> bool:
     """
     Check if caching should be enabled based on environment variable.
     Returns:
         bool: True if caching is enabled (default), False if explicitly disabled.
     Note:
         Caching is controlled by the USE_ZYK_CACHE environment variable.
         Set to "false", "0", or "no" to disable caching.

synth_ai/lm/constants.py CHANGED Viewed

@@ -13,20 +13,20 @@ GEMINI_REASONING_MODELS = ["gemini-2.5-flash", "gemini-2.5-pro"]
 # Gemini models that support thinking
 GEMINI_REASONING_MODELS = ["gemini-2.5-flash", "gemini-2.5-pro"]
 GEMINI_THINKING_BUDGETS = {
-    "high": 10000,    # High thinking budget for complex reasoning
-    "medium": 5000,   # Medium thinking budget for standard reasoning
-    "low": 2500,      # Low thinking budget for simple reasoning
+    "high": 10000,  # High thinking budget for complex reasoning
+    "medium": 5000,  # Medium thinking budget for standard reasoning
+    "low": 2500,  # Low thinking budget for simple reasoning
 }
 # Anthropic Sonnet 3.7 budgets
 SONNET_37_BUDGETS = {
-    "high": 8192,     # High budget for complex tasks
-    "medium": 4096,   # Medium budget for standard tasks
-    "low": 2048,      # Low budget for simple tasks
+    "high": 8192,  # High budget for complex tasks
+    "medium": 4096,  # Medium budget for standard tasks
+    "low": 2048,  # Low budget for simple tasks
 }
 # Combined list of all reasoning models
 REASONING_MODELS = OPENAI_REASONING_MODELS + CLAUDE_REASONING_MODELS + GEMINI_REASONING_MODELS
 # Special base temperatures for reasoning models (all set to 1.0)
-SPECIAL_BASE_TEMPS = {model: 1 for model in REASONING_MODELS}
+SPECIAL_BASE_TEMPS = dict.fromkeys(REASONING_MODELS, 1)

synth_ai/lm/core/all.py CHANGED Viewed

@@ -4,12 +4,12 @@ from synth_ai.lm.vendors.core.openai_api import (
     OpenAIPrivate,
     OpenAIStructuredOutputClient,
 )
+from synth_ai.lm.vendors.supported.custom_endpoint import CustomEndpointAPI
 from synth_ai.lm.vendors.supported.deepseek import DeepSeekAPI
-from synth_ai.lm.vendors.supported.together import TogetherAPI
-from synth_ai.lm.vendors.supported.groq import GroqAPI
 from synth_ai.lm.vendors.supported.grok import GrokAPI
-from synth_ai.lm.vendors.supported.custom_endpoint import CustomEndpointAPI
+from synth_ai.lm.vendors.supported.groq import GroqAPI
 from synth_ai.lm.vendors.supported.openrouter import OpenRouterAPI
+from synth_ai.lm.vendors.supported.together import TogetherAPI
 class OpenAIClient(OpenAIPrivate):
@@ -57,3 +57,17 @@ class CustomEndpointClient(CustomEndpointAPI):
 class OpenRouterClient(OpenRouterAPI):
     def __init__(self):
         super().__init__()
+__all__ = [
+    "OpenAIClient",
+    "AnthropicClient",
+    "GeminiClient",
+    "DeepSeekClient",
+    "TogetherClient",
+    "GroqClient",
+    "GrokClient",
+    "CustomEndpointClient",
+    "OpenRouterClient",
+    "OpenAIStructuredOutputClient",
+]

synth_ai/lm/core/exceptions.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from abc import ABC, abstractmethod
-from typing import Any, Callable, Dict, List, Literal, Optional, Union
 class StructuredOutputCoercionFailureException(Exception):

synth-ai 0.2.4.dev5__py3-none-any.whl → 0.2.4.dev7__py3-none-any.whl

synth-ai 0.2.4.dev5py3-none-any.whl → 0.2.4.dev7py3-none-any.whl