PyPI - synth-ai - Versions diffs - 0.2.4.dev6__py3-none-any.whl → 0.2.4.dev7__py3-none-any.whl - Mend

synth-ai 0.2.4.dev6py3-none-any.whl → 0.2.4.dev7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (229) hide show

synth_ai/__init__.py +18 -9
synth_ai/cli/__init__.py +10 -5
synth_ai/cli/balance.py +22 -17
synth_ai/cli/calc.py +2 -3
synth_ai/cli/demo.py +3 -5
synth_ai/cli/legacy_root_backup.py +58 -32
synth_ai/cli/man.py +22 -19
synth_ai/cli/recent.py +9 -8
synth_ai/cli/root.py +58 -13
synth_ai/cli/status.py +13 -6
synth_ai/cli/traces.py +45 -21
synth_ai/cli/watch.py +40 -37
synth_ai/config/base_url.py +1 -3
synth_ai/core/experiment.py +1 -2
synth_ai/environments/__init__.py +2 -6
synth_ai/environments/environment/artifacts/base.py +3 -1
synth_ai/environments/environment/db/sqlite.py +1 -1
synth_ai/environments/environment/registry.py +19 -20
synth_ai/environments/environment/resources/sqlite.py +2 -3
synth_ai/environments/environment/rewards/core.py +3 -2
synth_ai/environments/environment/tools/__init__.py +6 -4
synth_ai/environments/examples/crafter_classic/__init__.py +1 -1
synth_ai/environments/examples/crafter_classic/engine.py +13 -13
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +1 -0
synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +2 -1
synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +2 -1
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +3 -2
synth_ai/environments/examples/crafter_classic/environment.py +16 -15
synth_ai/environments/examples/crafter_classic/taskset.py +2 -2
synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +2 -3
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +2 -1
synth_ai/environments/examples/crafter_custom/crafter/__init__.py +2 -2
synth_ai/environments/examples/crafter_custom/crafter/config.py +2 -2
synth_ai/environments/examples/crafter_custom/crafter/env.py +1 -5
synth_ai/environments/examples/crafter_custom/crafter/objects.py +1 -2
synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +1 -2
synth_ai/environments/examples/crafter_custom/dataset_builder.py +5 -5
synth_ai/environments/examples/crafter_custom/environment.py +13 -13
synth_ai/environments/examples/crafter_custom/run_dataset.py +5 -5
synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +2 -2
synth_ai/environments/examples/enron/art_helpers/local_email_db.py +5 -4
synth_ai/environments/examples/enron/art_helpers/types_enron.py +2 -1
synth_ai/environments/examples/enron/engine.py +18 -14
synth_ai/environments/examples/enron/environment.py +12 -11
synth_ai/environments/examples/enron/taskset.py +7 -7
synth_ai/environments/examples/minigrid/__init__.py +6 -6
synth_ai/environments/examples/minigrid/engine.py +6 -6
synth_ai/environments/examples/minigrid/environment.py +6 -6
synth_ai/environments/examples/minigrid/puzzle_loader.py +3 -2
synth_ai/environments/examples/minigrid/taskset.py +13 -13
synth_ai/environments/examples/nethack/achievements.py +1 -1
synth_ai/environments/examples/nethack/engine.py +8 -7
synth_ai/environments/examples/nethack/environment.py +10 -9
synth_ai/environments/examples/nethack/helpers/__init__.py +8 -9
synth_ai/environments/examples/nethack/helpers/action_mapping.py +1 -1
synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +2 -1
synth_ai/environments/examples/nethack/helpers/observation_utils.py +1 -1
synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +3 -4
synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +6 -5
synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +5 -5
synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +7 -6
synth_ai/environments/examples/nethack/taskset.py +5 -5
synth_ai/environments/examples/red/engine.py +9 -8
synth_ai/environments/examples/red/engine_helpers/reward_components.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +7 -7
synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +3 -2
synth_ai/environments/examples/red/engine_helpers/state_extraction.py +2 -1
synth_ai/environments/examples/red/environment.py +18 -15
synth_ai/environments/examples/red/taskset.py +5 -3
synth_ai/environments/examples/sokoban/engine.py +16 -13
synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +3 -2
synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +2 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +1 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +7 -5
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +1 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +2 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +5 -4
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +3 -2
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +2 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +5 -4
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +1 -1
synth_ai/environments/examples/sokoban/environment.py +15 -14
synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +5 -3
synth_ai/environments/examples/sokoban/puzzle_loader.py +3 -2
synth_ai/environments/examples/sokoban/taskset.py +13 -10
synth_ai/environments/examples/tictactoe/engine.py +6 -6
synth_ai/environments/examples/tictactoe/environment.py +8 -7
synth_ai/environments/examples/tictactoe/taskset.py +6 -5
synth_ai/environments/examples/verilog/engine.py +4 -3
synth_ai/environments/examples/verilog/environment.py +11 -10
synth_ai/environments/examples/verilog/taskset.py +14 -12
synth_ai/environments/examples/wordle/__init__.py +5 -5
synth_ai/environments/examples/wordle/engine.py +32 -25
synth_ai/environments/examples/wordle/environment.py +21 -16
synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +6 -6
synth_ai/environments/examples/wordle/taskset.py +20 -12
synth_ai/environments/reproducibility/core.py +1 -1
synth_ai/environments/reproducibility/tree.py +21 -21
synth_ai/environments/service/app.py +3 -2
synth_ai/environments/service/core_routes.py +104 -110
synth_ai/environments/service/external_registry.py +1 -2
synth_ai/environments/service/registry.py +1 -1
synth_ai/environments/stateful/core.py +1 -2
synth_ai/environments/stateful/engine.py +1 -1
synth_ai/environments/tasks/api.py +4 -4
synth_ai/environments/tasks/core.py +14 -12
synth_ai/environments/tasks/filters.py +6 -4
synth_ai/environments/tasks/utils.py +13 -11
synth_ai/evals/base.py +2 -3
synth_ai/experimental/synth_oss.py +4 -4
synth_ai/learning/gateway.py +1 -3
synth_ai/learning/prompts/banking77_injection_eval.py +15 -10
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +26 -14
synth_ai/learning/prompts/mipro.py +61 -52
synth_ai/learning/prompts/random_search.py +42 -43
synth_ai/learning/prompts/run_mipro_banking77.py +32 -20
synth_ai/learning/prompts/run_random_search_banking77.py +71 -52
synth_ai/lm/__init__.py +5 -5
synth_ai/lm/caching/ephemeral.py +9 -9
synth_ai/lm/caching/handler.py +20 -20
synth_ai/lm/caching/persistent.py +10 -10
synth_ai/lm/config.py +3 -3
synth_ai/lm/constants.py +7 -7
synth_ai/lm/core/all.py +17 -3
synth_ai/lm/core/exceptions.py +0 -2
synth_ai/lm/core/main.py +26 -41
synth_ai/lm/core/main_v3.py +20 -10
synth_ai/lm/core/vendor_clients.py +18 -17
synth_ai/lm/injection.py +7 -8
synth_ai/lm/overrides.py +21 -19
synth_ai/lm/provider_support/__init__.py +1 -1
synth_ai/lm/provider_support/anthropic.py +15 -15
synth_ai/lm/provider_support/openai.py +23 -21
synth_ai/lm/structured_outputs/handler.py +34 -32
synth_ai/lm/structured_outputs/inject.py +24 -27
synth_ai/lm/structured_outputs/rehabilitate.py +19 -15
synth_ai/lm/tools/base.py +17 -16
synth_ai/lm/unified_interface.py +17 -18
synth_ai/lm/vendors/base.py +20 -18
synth_ai/lm/vendors/core/anthropic_api.py +36 -27
synth_ai/lm/vendors/core/gemini_api.py +31 -36
synth_ai/lm/vendors/core/mistral_api.py +19 -19
synth_ai/lm/vendors/core/openai_api.py +11 -10
synth_ai/lm/vendors/openai_standard.py +113 -87
synth_ai/lm/vendors/openai_standard_responses.py +74 -61
synth_ai/lm/vendors/retries.py +9 -1
synth_ai/lm/vendors/supported/custom_endpoint.py +26 -26
synth_ai/lm/vendors/supported/deepseek.py +10 -10
synth_ai/lm/vendors/supported/grok.py +8 -8
synth_ai/lm/vendors/supported/ollama.py +2 -1
synth_ai/lm/vendors/supported/openrouter.py +11 -9
synth_ai/lm/vendors/synth_client.py +69 -63
synth_ai/lm/warmup.py +8 -7
synth_ai/tracing/__init__.py +22 -10
synth_ai/tracing_v1/__init__.py +22 -20
synth_ai/tracing_v3/__init__.py +7 -7
synth_ai/tracing_v3/abstractions.py +56 -52
synth_ai/tracing_v3/config.py +4 -2
synth_ai/tracing_v3/db_config.py +6 -8
synth_ai/tracing_v3/decorators.py +29 -30
synth_ai/tracing_v3/examples/basic_usage.py +12 -12
synth_ai/tracing_v3/hooks.py +21 -21
synth_ai/tracing_v3/llm_call_record_helpers.py +85 -98
synth_ai/tracing_v3/lm_call_record_abstractions.py +2 -4
synth_ai/tracing_v3/migration_helper.py +3 -5
synth_ai/tracing_v3/replica_sync.py +30 -32
synth_ai/tracing_v3/session_tracer.py +35 -29
synth_ai/tracing_v3/storage/__init__.py +1 -1
synth_ai/tracing_v3/storage/base.py +8 -7
synth_ai/tracing_v3/storage/config.py +4 -4
synth_ai/tracing_v3/storage/factory.py +4 -4
synth_ai/tracing_v3/storage/utils.py +9 -9
synth_ai/tracing_v3/turso/__init__.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +9 -9
synth_ai/tracing_v3/turso/manager.py +60 -48
synth_ai/tracing_v3/turso/models.py +24 -19
synth_ai/tracing_v3/utils.py +5 -5
synth_ai/tui/__main__.py +1 -1
synth_ai/tui/cli/query_experiments.py +2 -3
synth_ai/tui/cli/query_experiments_v3.py +2 -3
synth_ai/tui/dashboard.py +97 -86
synth_ai/v0/tracing/abstractions.py +28 -28
synth_ai/v0/tracing/base_client.py +9 -9
synth_ai/v0/tracing/client_manager.py +7 -7
synth_ai/v0/tracing/config.py +7 -7
synth_ai/v0/tracing/context.py +6 -6
synth_ai/v0/tracing/decorators.py +6 -5
synth_ai/v0/tracing/events/manage.py +1 -1
synth_ai/v0/tracing/events/store.py +5 -4
synth_ai/v0/tracing/immediate_client.py +4 -5
synth_ai/v0/tracing/local.py +3 -3
synth_ai/v0/tracing/log_client_base.py +4 -5
synth_ai/v0/tracing/retry_queue.py +5 -6
synth_ai/v0/tracing/trackers.py +25 -25
synth_ai/v0/tracing/upload.py +6 -0
synth_ai/v0/tracing_v1/__init__.py +1 -1
synth_ai/v0/tracing_v1/abstractions.py +28 -28
synth_ai/v0/tracing_v1/base_client.py +9 -9
synth_ai/v0/tracing_v1/client_manager.py +7 -7
synth_ai/v0/tracing_v1/config.py +7 -7
synth_ai/v0/tracing_v1/context.py +6 -6
synth_ai/v0/tracing_v1/decorators.py +7 -6
synth_ai/v0/tracing_v1/events/manage.py +1 -1
synth_ai/v0/tracing_v1/events/store.py +5 -4
synth_ai/v0/tracing_v1/immediate_client.py +4 -5
synth_ai/v0/tracing_v1/local.py +3 -3
synth_ai/v0/tracing_v1/log_client_base.py +4 -5
synth_ai/v0/tracing_v1/retry_queue.py +5 -6
synth_ai/v0/tracing_v1/trackers.py +25 -25
synth_ai/v0/tracing_v1/upload.py +25 -24
synth_ai/zyk/__init__.py +1 -0
{synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/METADATA +1 -11
synth_ai-0.2.4.dev7.dist-info/RECORD +299 -0
synth_ai-0.2.4.dev6.dist-info/RECORD +0 -299
{synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/WHEEL +0 -0
{synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/top_level.txt +0 -0

synth_ai/learning/prompts/random_search.py CHANGED Viewed

@@ -8,10 +8,11 @@ metric, and this module will explore baselines and bootstrapped few-shot variant
 from __future__ import annotations
+import contextlib
 import random
+from collections.abc import Callable, Sequence
 from dataclasses import dataclass
-from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
+from typing import Any
 # ---------------------------
 # Protocol-like expectations (duck-typed)
@@ -25,7 +26,7 @@ class _ProgramLike:
     def deepcopy(self):  # deep copy
         return self
-    def with_demos(self, demos: List[Tuple[Any, Any]]):
+    def with_demos(self, demos: list[tuple[Any, Any]]):
         return self
     def run(self, x: Any) -> Any:
@@ -40,10 +41,12 @@ class _ProgramLike:
 @dataclass
 class EvalResult:
     score: float
-    subscores: List[float]
+    subscores: list[float]
-def evaluate(program: _ProgramLike, dataset: Sequence[Tuple[Any, Any]], metric: Callable[[Any, Any], float]) -> EvalResult:
+def evaluate(
+    program: _ProgramLike, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
+) -> EvalResult:
     subs = []
     for x, y in dataset:
         subs.append(metric(program.run(x), y))
@@ -54,7 +57,9 @@ class LabeledFewShot:
     def __init__(self, k: int):
         self.k = k
-    def compile(self, student: _ProgramLike, trainset: Sequence[Tuple[Any, Any]], sample: bool = True) -> _ProgramLike:
+    def compile(
+        self, student: _ProgramLike, trainset: Sequence[tuple[Any, Any]], sample: bool = True
+    ) -> _ProgramLike:
         p = getattr(student, "deepcopy", student.reset_copy)()
         demos = list(trainset)
         if sample:
@@ -68,10 +73,10 @@ class BootstrapFewShot:
         self,
         *,
         metric: Callable[[Any, Any], float],
-        metric_threshold: Optional[float] = None,
+        metric_threshold: float | None = None,
         max_bootstrapped_demos: int = 8,
         max_labeled_demos: int = 0,
-        teacher_settings: Optional[Dict[str, Any]] = None,
+        teacher_settings: dict[str, Any] | None = None,
         max_rounds: int = 1,
     ):
         self.metric = metric
@@ -84,18 +89,18 @@ class BootstrapFewShot:
     def compile(
         self,
         student: _ProgramLike,
-        teacher: Optional[_ProgramLike],
-        trainset: Sequence[Tuple[Any, Any]],
+        teacher: _ProgramLike | None,
+        trainset: Sequence[tuple[Any, Any]],
     ) -> _ProgramLike:
         p = getattr(student, "deepcopy", student.reset_copy)()
         rng = random.Random()
         # If bootstrapped demos disabled, return labeled-only few-shot quickly
         if self.max_bootstrapped_demos <= 0:
-            demos: List[Tuple[Any, Any]] = []
+            demos: list[tuple[Any, Any]] = []
             if self.max_labeled_demos > 0:
                 demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
             return p.with_demos(demos)
-        boot: List[Tuple[Any, Any]] = []
+        boot: list[tuple[Any, Any]] = []
         # Bootstrap demos by self consistency
         for _ in range(self.max_rounds):
             rng.shuffle(trainset := list(trainset))
@@ -127,33 +132,29 @@ class BootstrapFewShot:
 @dataclass
 class Candidate:
     score: float
-    subscores: List[float]
+    subscores: list[float]
     seed: int
     program: _ProgramLike
 def random_search_compile(
     student: _ProgramLike,
-    trainset: Sequence[Tuple[Any, Any]],
-    valset: Sequence[Tuple[Any, Any]],
+    trainset: Sequence[tuple[Any, Any]],
+    valset: Sequence[tuple[Any, Any]],
     metric: Callable[[Any, Any], float],
     *,
     max_bootstrapped_demos: int = 8,
     max_labeled_demos: int = 4,
     max_rounds: int = 2,
     num_candidate_programs: int = 16,
-    stop_at_score: Optional[float] = None,
-    evaluate_fn: Optional[Callable[[
-        _ProgramLike,
-        Sequence[Tuple[Any, Any]],
-        Callable[[Any, Any], float]
-    ], EvalResult]] = None,
-    on_candidate_evaluated: Optional[Callable[[int, float, EvalResult, Dict[str, Any]], None]] = None,
-) -> Tuple[_ProgramLike, List[Dict[str, Any]]]:
-    best_program: Optional[_ProgramLike] = None
+    stop_at_score: float | None = None,
+    evaluate_fn: Callable[[_ProgramLike, Sequence[tuple[Any, Any]], Callable[[Any, Any], float]], EvalResult] | None = None,
+    on_candidate_evaluated: Callable[[int, float, EvalResult, dict[str, Any]], None] | None = None,
+) -> tuple[_ProgramLike, list[dict[str, Any]]]:
+    best_program: _ProgramLike | None = None
     best_score = float("-inf")
-    candidates: List[Candidate] = []
-    records: List[Dict[str, Any]] = []
+    candidates: list[Candidate] = []
+    records: list[dict[str, Any]] = []
     seeds = list(range(num_candidate_programs))
     seeds = [-3, -2, -1] + seeds  # zero-shot, labeled few-shot, bootstrapped few-shot
@@ -174,7 +175,9 @@ def random_search_compile(
             if max_bootstrapped_demos <= 0:
                 size = 0
             else:
-                size = max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
+                size = (
+                    max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
+                )
             program = BootstrapFewShot(
                 metric=metric,
                 metric_threshold=None,
@@ -184,14 +187,18 @@ def random_search_compile(
                 max_rounds=max_rounds,
             ).compile(student, teacher=None, trainset=train_copy)
-        res = (evaluate_fn(program, valset, metric) if evaluate_fn else evaluate(program, valset, metric))
+        res = (
+            evaluate_fn(program, valset, metric)
+            if evaluate_fn
+            else evaluate(program, valset, metric)
+        )
         cand = Candidate(score=res.score, subscores=res.subscores, seed=seed, program=program)
         candidates.append(cand)
         # Record an intervention summary for reproducibility
-        intervention: Dict[str, Any] = {"seed": seed}
+        intervention: dict[str, Any] = {"seed": seed}
         if hasattr(program, "demos"):
             try:
-                intervention["demos"] = getattr(program, "demos")  # type: ignore
+                intervention["demos"] = program.demos  # type: ignore
             except Exception:
                 intervention["demos"] = None
         # Type of candidate
@@ -203,12 +210,6 @@ def random_search_compile(
             intervention["label"] = f"labeled-{max_labeled_demos}"
         else:
             intervention["kind"] = "bootstrapped_few_shot"
-            bs = 0
-            try:
-                # try to infer from program demos length if present
-                bs = len(intervention.get("demos") or [])
-            except Exception:
-                bs = 0
             intervention["label"] = f"boot-b{max_bootstrapped_demos}-l{max_labeled_demos}"
         record_obj = {
             "score": cand.score,
@@ -224,18 +225,16 @@ def random_search_compile(
             break
         if on_candidate_evaluated is not None:
-            try:
+            with contextlib.suppress(Exception):
                 on_candidate_evaluated(idx + 1, res.score, res, intervention)
-            except Exception:
-                pass
     # Attach candidates for inspection
     if hasattr(best_program, "candidate_programs"):
         # If user object supports attribute assignment
-        try:
-            best_program.candidate_programs = sorted(candidates, key=lambda c: c.score, reverse=True)  # type: ignore[attr-defined]
-        except Exception:
-            pass
+        with contextlib.suppress(Exception):
+            best_program.candidate_programs = sorted(
+                candidates, key=lambda c: c.score, reverse=True
+            )  # type: ignore[attr-defined]
     return (best_program or getattr(student, "deepcopy", student)(), records)

synth_ai/learning/prompts/run_mipro_banking77.py CHANGED Viewed

@@ -12,39 +12,41 @@ Run:
 from __future__ import annotations
 import asyncio
+import json
 import os
 import random
-from dataclasses import dataclass, replace
-from typing import Any, Dict, List, Sequence, Tuple
+import time
+from collections.abc import Sequence
+from pathlib import Path
+from typing import Any
-from dotenv import load_dotenv
 from datasets import load_dataset
+from dotenv import load_dotenv
+from synth_ai.learning.prompts.mipro import ProgramAdapter, evaluate_program, mipro_v2_compile
 from synth_ai.lm.core.main_v3 import LM, build_messages
-import json
-import time
-from pathlib import Path
-from synth_ai.learning.prompts.mipro import ProgramAdapter, mipro_v2_compile, evaluate_program
-def choose_label(pred: str, label_names: List[str]) -> str:
+def choose_label(pred: str, label_names: list[str]) -> str:
     norm = (pred or "").strip().lower()
     d = {ln.lower(): ln for ln in label_names}
     if norm in d:
         return d[norm]
     def score(cand: str) -> int:
         c = cand.lower()
         return sum(1 for w in c.split() if w in norm)
     return max(label_names, key=score)
-def accuracy(pred: str, gold: str, labels: List[str]) -> float:
+def accuracy(pred: str, gold: str, labels: list[str]) -> float:
     return 1.0 if choose_label(pred, labels) == gold else 0.0
 class NaivePromptModel:
     """Toy prompt model that returns simple instruction variants."""
-    def generate_instructions(self, ctx: Dict[str, Any], k: int = 8) -> List[str]:
+    def generate_instructions(self, ctx: dict[str, Any], k: int = 8) -> list[str]:
         base = "Classify the Banking77 intent and return exactly one label."
         variants = [
             base,
@@ -60,28 +62,33 @@ class NaivePromptModel:
         return variants[:k]
-def build_run_fn(lm: LM, label_names: List[str]):
+def build_run_fn(lm: LM, label_names: list[str]):
     def run_fn(x: str, _model: Any | None = None) -> str:
         # Use instructions and demos from adapter state (set by set_instructions/set_demos)
         # The adapter passes state via closure; we rebuild messages here
-        instructions = state_ref.get("instructions", {}).get("main", "You are an intent classifier for Banking77.")
+        instructions = state_ref.get("instructions", {}).get(
+            "main", "You are an intent classifier for Banking77."
+        )
         examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in state_ref.get("demos", []))
         sys = instructions
         user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
         messages = build_messages(sys, user, images_bytes=None, model_name=lm.model)
         async def _call():
             resp = await lm.respond_async(messages=messages)
             return (resp.raw_response or "").strip()
         return asyncio.run(_call())
     return run_fn
-def set_instructions(new_instr: Dict[str, str], state: Dict[str, Any]) -> Dict[str, Any]:
+def set_instructions(new_instr: dict[str, str], state: dict[str, Any]) -> dict[str, Any]:
     state["instructions"] = {**state.get("instructions", {}), **new_instr}
     return state
-def set_demos(demos: List[Tuple[str, str]], state: Dict[str, Any]) -> Dict[str, Any]:
+def set_demos(demos: list[tuple[str, str]], state: dict[str, Any]) -> dict[str, Any]:
     state["demos"] = list(demos)
     return state
@@ -96,15 +103,18 @@ def main():
     print("Loading Banking77 dataset (train/dev split of test for demo)...")
     ds = load_dataset("banking77")
-    label_names: List[str] = ds["test"].features["label"].names  # type: ignore
+    label_names: list[str] = ds["test"].features["label"].names  # type: ignore
     all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
     random.shuffle(all_items)
-    trainset: Sequence[Tuple[str, str]] = all_items[:80]
-    valset: Sequence[Tuple[str, str]] = all_items[80:160]
+    trainset: Sequence[tuple[str, str]] = all_items[:80]
+    valset: Sequence[tuple[str, str]] = all_items[80:160]
     global state_ref
-    state_ref = {"instructions": {"main": "You are an intent classifier for Banking77."}, "demos": []}
+    state_ref = {
+        "instructions": {"main": "You are an intent classifier for Banking77."},
+        "demos": [],
+    }
     adapter = ProgramAdapter(
         run_fn=build_run_fn(lm, label_names),
         state=state_ref,
@@ -138,7 +148,9 @@ def main():
     )
     res = evaluate_program(best, valset, metric)
-    print(f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})")
+    print(
+        f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})"
+    )
     out = {
         "context": {

synth_ai/learning/prompts/run_random_search_banking77.py CHANGED Viewed

@@ -12,44 +12,46 @@ Run:
 from __future__ import annotations
 import asyncio
+import json
 import os
 import random
+import time
+from collections.abc import Sequence
 from dataclasses import dataclass, replace
+from pathlib import Path
 from types import SimpleNamespace
-from tqdm import tqdm
-from typing import Any, Dict, List, Sequence, Tuple
+from typing import Any
-from dotenv import load_dotenv
 from datasets import load_dataset
-from synth_ai.lm.core.main_v3 import LM, build_messages
-import json
-import time
-from pathlib import Path
+from dotenv import load_dotenv
 from synth_ai.learning.prompts.random_search import random_search_compile
+from synth_ai.lm.core.main_v3 import LM, build_messages
+from tqdm import tqdm
-def choose_label(pred: str, label_names: List[str]) -> str:
+def choose_label(pred: str, label_names: list[str]) -> str:
     norm = (pred or "").strip().lower()
     d = {ln.lower(): ln for ln in label_names}
     if norm in d:
         return d[norm]
     def score(cand: str) -> int:
         c = cand.lower()
         return sum(1 for w in c.split() if w in norm)
     return max(label_names, key=score)
-def accuracy(pred: str, gold: str, labels: List[str]) -> float:
+def accuracy(pred: str, gold: str, labels: list[str]) -> float:
     return 1.0 if choose_label(pred, labels) == gold else 0.0
 @dataclass
 class StudentProgram:
     lm: LM
-    label_names: List[str]
+    label_names: list[str]
     instruction: str
-    demos: List[Tuple[str, str]]
+    demos: list[tuple[str, str]]
     def reset_copy(self):
         return replace(self, instruction=self.instruction, demos=list(self.demos))
@@ -57,7 +59,7 @@ class StudentProgram:
     def deepcopy(self):
         return replace(self, instruction=str(self.instruction), demos=list(self.demos))
-    def with_demos(self, demos: List[Tuple[str, str]]):
+    def with_demos(self, demos: list[tuple[str, str]]):
         return replace(self, demos=list(demos))
     def run(self, x: str) -> str:
@@ -66,10 +68,12 @@ class StudentProgram:
         sys = self.instruction or "You are an intent classifier for Banking77."
         user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
         messages = build_messages(sys, user, images_bytes=None, model_name=self.lm.model)
         # Call LM synchronously via asyncio
         async def _call():
             resp = await self.lm.respond_async(messages=messages)
             return (resp.raw_response or "").strip()
         return asyncio.run(_call())
     async def _apredict(self, x: str):
@@ -91,13 +95,13 @@ def main():
     print("Loading Banking77 dataset (train/dev split of test for demo)...")
     ds = load_dataset("banking77")
-    label_names: List[str] = ds["test"].features["label"].names  # type: ignore
+    label_names: list[str] = ds["test"].features["label"].names  # type: ignore
     # Create small train/val from the test split for speed
     all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
     random.shuffle(all_items)
-    trainset: Sequence[Tuple[str, str]] = all_items[:40]
-    valset: Sequence[Tuple[str, str]] = all_items[40:60]  # 20 examples
+    trainset: Sequence[tuple[str, str]] = all_items[:40]
+    valset: Sequence[tuple[str, str]] = all_items[40:60]  # 20 examples
     student = StudentProgram(
         lm=lm,
@@ -110,17 +114,20 @@ def main():
         return accuracy(yhat, y, label_names)
     total_candidates = 3 + 3  # zero-shot, labeled few-shot, bootstrapped + 3 random seeds
-    print(f"Running Random Search optimizer ({total_candidates} candidates, parallel eval of 20 questions)...")
+    print(
+        f"Running Random Search optimizer ({total_candidates} candidates, parallel eval of 20 questions)..."
+    )
-    def eval_parallel(program: StudentProgram, dataset: Sequence[Tuple[str, str]], metric_fn):
+    def eval_parallel(program: StudentProgram, dataset: Sequence[tuple[str, str]], metric_fn):
         async def _run():
             xs = [x for x, _ in dataset]
             ys = [y for _, y in dataset]
-            preds: List[Optional[str]] = [None] * len(xs)
+            preds: list[Optional[str]] = [None] * len(xs)
             sem = asyncio.Semaphore(int(os.getenv("CONCURRENCY", "5")))
             async def worker(i: int, x: str, y: str):
                 import time
                 t_start = time.monotonic()
                 try:
                     async with sem:
@@ -138,16 +145,18 @@ def main():
                     t_end = time.monotonic()
                     return i, y, "", t_start, t_end, {}
-            tasks = [asyncio.create_task(worker(i, x, y)) for i, (x, y) in enumerate(zip(xs, ys))]
+            tasks = [asyncio.create_task(worker(i, x, y)) for i, (x, y) in enumerate(zip(xs, ys, strict=False))]
             correct_sum = 0.0
             processed = 0
-            import time, statistics
-            durations: List[float] = []
+            import statistics
+            import time
+            durations: list[float] = []
             in_tok_sum = 0
             out_tok_sum = 0
             in_tok_count = 0
             out_tok_count = 0
-            details: List[Dict[str, Any]] = []
+            details: list[dict[str, Any]] = []
             t_batch_start = time.monotonic()
             deadline = float(os.getenv("BATCH_DEADLINE_S", "20"))
             with tqdm(total=len(tasks), desc="Rollouts", leave=False) as pbar:
@@ -172,7 +181,10 @@ def main():
                         break
                     # Wait for at least one completion within remaining time (polling granularity <= 1s)
                     timeout = min(1.0, remaining)
-                    done, pending = await asyncio.wait(pending, timeout=timeout, return_when=asyncio.FIRST_COMPLETED)
+                    done, pending = await asyncio.wait(
+                        pending, timeout=timeout, return_when=asyncio.FIRST_COMPLETED
+                    )
+                    import contextlib
                     for task in done:
                         try:
                             i, y_true, pred, t_start, t_end, usage = task.result()
@@ -182,11 +194,9 @@ def main():
                         durations.append(max(0.0, t_end - t_start))
                         preds[i] = pred
                         processed += 1
-                        try:
+                        with contextlib.suppress(Exception):
                             correct_sum += float(metric_fn(pred, y_true))
-                        except Exception:
-                            pass
-                        try:
+                        with contextlib.suppress(Exception):
                             pt = usage.get("prompt_tokens") or usage.get("input_tokens")
                             ct = usage.get("completion_tokens") or usage.get("output_tokens")
                             if isinstance(pt, (int, float)):
@@ -195,30 +205,34 @@ def main():
                             if isinstance(ct, (int, float)):
                                 out_tok_sum += int(ct)
                                 out_tok_count += 1
-                        except Exception:
-                            pass
-                        details.append({
-                            "index": i,
-                            "seconds": max(0.0, t_end - t_start),
-                            "score": float(metric_fn(pred, y_true)),
-                            "usage": {
-                                "prompt_tokens": usage.get("prompt_tokens") or usage.get("input_tokens"),
-                                "completion_tokens": usage.get("completion_tokens") or usage.get("output_tokens"),
-                            },
-                        })
+                        details.append(
+                            {
+                                "index": i,
+                                "seconds": max(0.0, t_end - t_start),
+                                "score": float(metric_fn(pred, y_true)),
+                                "usage": {
+                                    "prompt_tokens": usage.get("prompt_tokens")
+                                    or usage.get("input_tokens"),
+                                    "completion_tokens": usage.get("completion_tokens")
+                                    or usage.get("output_tokens"),
+                                },
+                            }
+                        )
                         pbar.update(1)
                         med = statistics.median(durations) if durations else 0.0
                         mx = max(durations) if durations else 0.0
                         avg_in = (in_tok_sum / in_tok_count) if in_tok_count else 0.0
                         avg_out = (out_tok_sum / out_tok_count) if out_tok_count else 0.0
-                        pbar.set_postfix({
-                            "acc": f"{(correct_sum/processed):.2f}",
-                            "done": f"{processed}/{len(tasks)}",
-                            "med_s": f"{med:.1f}",
-                            "max_s": f"{mx:.1f}",
-                            "tin": f"{avg_in:.1f}",
-                            "tout": f"{avg_out:.1f}",
-                        })
+                        pbar.set_postfix(
+                            {
+                                "acc": f"{(correct_sum / processed):.2f}",
+                                "done": f"{processed}/{len(tasks)}",
+                                "med_s": f"{med:.1f}",
+                                "max_s": f"{mx:.1f}",
+                                "tin": f"{avg_in:.1f}",
+                                "tout": f"{avg_out:.1f}",
+                            }
+                        )
             # Compute score only from completed/successful rollouts (drop timeouts/cancelled)
             subs = [float(d.get("score", 0.0)) for d in details]
             result = SimpleNamespace(score=(sum(subs) / max(1, len(subs))), subscores=subs)
@@ -226,28 +240,33 @@ def main():
             result.mean_in = (in_tok_sum / in_tok_count) if in_tok_count else 0.0
             result.mean_out = (out_tok_sum / out_tok_count) if out_tok_count else 0.0
             return result
         return asyncio.run(_run())
     pbar = tqdm(total=total_candidates, desc="Candidates")
-    candidate_eval_details: Dict[int, Any] = {}
+    candidate_eval_details: dict[int, Any] = {}
     def on_cand(idx: int, score: float, res, intervention):
         pbar.update(1)
         pbar.set_postfix({"score": f"{score:.2f}"})
         # store per-instance details (for apples-to-apples)
-        try:
+        import contextlib
+        with contextlib.suppress(Exception):
             candidate_eval_details[idx] = {
                 "score": score,
                 "mean_in": getattr(res, "mean_in", None),
                 "mean_out": getattr(res, "mean_out", None),
                 "instances": getattr(res, "details", None),
             }
-        except Exception:
-            pass
         # visible summary line per candidate
-        kind = intervention.get("kind", "candidate") if isinstance(intervention, dict) else "candidate"
+        kind = (
+            intervention.get("kind", "candidate") if isinstance(intervention, dict) else "candidate"
+        )
         label = intervention.get("label") if isinstance(intervention, dict) else None
         seed = intervention.get("seed") if isinstance(intervention, dict) else None
         processed = len(getattr(res, "details", []) or [])
         from tqdm import tqdm as _tqdm
         _tqdm.write(
             f"Candidate {idx}/{total_candidates} [{kind}{'' if label is None else f', label={label}'}{'' if seed is None else f', seed={seed}'}]: "
             f"score={score:.2f} | mean tin/tout={getattr(res, 'mean_in', 0):.1f}/{getattr(res, 'mean_out', 0):.1f} | N={processed}"

synth_ai/lm/__init__.py CHANGED Viewed

@@ -4,24 +4,24 @@ Synth AI Language Model Interface.
 Provides a unified interface for multiple LLM providers including OpenAI and Synth.
 """
-from .config import SynthConfig, OpenAIConfig
-from .warmup import warmup_synth_model, get_warmup_status
+from .config import OpenAIConfig, SynthConfig
+from .core.main_v3 import LM
 from .unified_interface import (
-    UnifiedLMProvider,
     OpenAIProvider,
     SynthProvider,
     UnifiedLMClient,
+    UnifiedLMProvider,
     create_provider,
 )
 from .vendors.synth_client import (
     AsyncSynthClient,
     SyncSynthClient,
     create_async_client,
-    create_sync_client,
     create_chat_completion_async,
     create_chat_completion_sync,
+    create_sync_client,
 )
-from .core.main_v3 import LM
+from .warmup import get_warmup_status, warmup_synth_model
 __all__ = [
     # Configuration

synth_ai/lm/caching/ephemeral.py CHANGED Viewed

@@ -7,7 +7,6 @@ of the application run, useful for avoiding redundant API calls within a session
 import os
 from dataclasses import dataclass
-from typing import Optional, Union
 from diskcache import Cache
 from pydantic import BaseModel
@@ -20,24 +19,25 @@ from synth_ai.lm.vendors.base import BaseLMResponse
 class EphemeralCache:
     """
     Ephemeral cache implementation using diskcache.
     This cache stores LM responses temporarily on disk with a size limit.
     The cache is cleared when the application restarts.
     """
     def __init__(self, fast_cache_dir: str = ".cache/ephemeral_cache"):
         os.makedirs(fast_cache_dir, exist_ok=True)
         self.fast_cache = Cache(fast_cache_dir, size_limit=DISKCACHE_SIZE_LIMIT)
     def hit_cache(
-        self, key: str, response_model: Optional[BaseModel] = None
-    ) -> Optional[BaseLMResponse]:
+        self, key: str, response_model: BaseModel | None = None
+    ) -> BaseLMResponse | None:
         """
         Check if a response exists in cache for the given key.
         Args:
             key: Cache key to look up
             response_model: Optional Pydantic model to reconstruct structured output
         Returns:
             BaseLMResponse if found in cache, None otherwise
         """
@@ -65,14 +65,14 @@ class EphemeralCache:
             tool_calls=tool_calls,
         )
-    def add_to_cache(self, key: str, response: Union[BaseLMResponse, str]) -> None:
+    def add_to_cache(self, key: str, response: BaseLMResponse | str) -> None:
         """
         Add a response to the cache.
         Args:
             key: Cache key to store under
             response: Either a BaseLMResponse object or raw string response
         Raises:
             ValueError: If response type is not supported
         """

synth-ai 0.2.4.dev6__py3-none-any.whl → 0.2.4.dev7__py3-none-any.whl

synth-ai 0.2.4.dev6py3-none-any.whl → 0.2.4.dev7py3-none-any.whl