PyPI - synth-ai - Versions diffs - 0.2.4.dev6__py3-none-any.whl → 0.2.4.dev7__py3-none-any.whl - Mend

synth-ai 0.2.4.dev6py3-none-any.whl → 0.2.4.dev7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (229) hide show

synth_ai/__init__.py +18 -9
synth_ai/cli/__init__.py +10 -5
synth_ai/cli/balance.py +22 -17
synth_ai/cli/calc.py +2 -3
synth_ai/cli/demo.py +3 -5
synth_ai/cli/legacy_root_backup.py +58 -32
synth_ai/cli/man.py +22 -19
synth_ai/cli/recent.py +9 -8
synth_ai/cli/root.py +58 -13
synth_ai/cli/status.py +13 -6
synth_ai/cli/traces.py +45 -21
synth_ai/cli/watch.py +40 -37
synth_ai/config/base_url.py +1 -3
synth_ai/core/experiment.py +1 -2
synth_ai/environments/__init__.py +2 -6
synth_ai/environments/environment/artifacts/base.py +3 -1
synth_ai/environments/environment/db/sqlite.py +1 -1
synth_ai/environments/environment/registry.py +19 -20
synth_ai/environments/environment/resources/sqlite.py +2 -3
synth_ai/environments/environment/rewards/core.py +3 -2
synth_ai/environments/environment/tools/__init__.py +6 -4
synth_ai/environments/examples/crafter_classic/__init__.py +1 -1
synth_ai/environments/examples/crafter_classic/engine.py +13 -13
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +1 -0
synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +2 -1
synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +2 -1
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +3 -2
synth_ai/environments/examples/crafter_classic/environment.py +16 -15
synth_ai/environments/examples/crafter_classic/taskset.py +2 -2
synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +2 -3
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +2 -1
synth_ai/environments/examples/crafter_custom/crafter/__init__.py +2 -2
synth_ai/environments/examples/crafter_custom/crafter/config.py +2 -2
synth_ai/environments/examples/crafter_custom/crafter/env.py +1 -5
synth_ai/environments/examples/crafter_custom/crafter/objects.py +1 -2
synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +1 -2
synth_ai/environments/examples/crafter_custom/dataset_builder.py +5 -5
synth_ai/environments/examples/crafter_custom/environment.py +13 -13
synth_ai/environments/examples/crafter_custom/run_dataset.py +5 -5
synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +2 -2
synth_ai/environments/examples/enron/art_helpers/local_email_db.py +5 -4
synth_ai/environments/examples/enron/art_helpers/types_enron.py +2 -1
synth_ai/environments/examples/enron/engine.py +18 -14
synth_ai/environments/examples/enron/environment.py +12 -11
synth_ai/environments/examples/enron/taskset.py +7 -7
synth_ai/environments/examples/minigrid/__init__.py +6 -6
synth_ai/environments/examples/minigrid/engine.py +6 -6
synth_ai/environments/examples/minigrid/environment.py +6 -6
synth_ai/environments/examples/minigrid/puzzle_loader.py +3 -2
synth_ai/environments/examples/minigrid/taskset.py +13 -13
synth_ai/environments/examples/nethack/achievements.py +1 -1
synth_ai/environments/examples/nethack/engine.py +8 -7
synth_ai/environments/examples/nethack/environment.py +10 -9
synth_ai/environments/examples/nethack/helpers/__init__.py +8 -9
synth_ai/environments/examples/nethack/helpers/action_mapping.py +1 -1
synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +2 -1
synth_ai/environments/examples/nethack/helpers/observation_utils.py +1 -1
synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +3 -4
synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +6 -5
synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +5 -5
synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +7 -6
synth_ai/environments/examples/nethack/taskset.py +5 -5
synth_ai/environments/examples/red/engine.py +9 -8
synth_ai/environments/examples/red/engine_helpers/reward_components.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +7 -7
synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +2 -1
synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +3 -2
synth_ai/environments/examples/red/engine_helpers/state_extraction.py +2 -1
synth_ai/environments/examples/red/environment.py +18 -15
synth_ai/environments/examples/red/taskset.py +5 -3
synth_ai/environments/examples/sokoban/engine.py +16 -13
synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +3 -2
synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +2 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +1 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +7 -5
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +1 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +2 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +5 -4
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +3 -2
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +2 -1
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +5 -4
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +1 -1
synth_ai/environments/examples/sokoban/environment.py +15 -14
synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +5 -3
synth_ai/environments/examples/sokoban/puzzle_loader.py +3 -2
synth_ai/environments/examples/sokoban/taskset.py +13 -10
synth_ai/environments/examples/tictactoe/engine.py +6 -6
synth_ai/environments/examples/tictactoe/environment.py +8 -7
synth_ai/environments/examples/tictactoe/taskset.py +6 -5
synth_ai/environments/examples/verilog/engine.py +4 -3
synth_ai/environments/examples/verilog/environment.py +11 -10
synth_ai/environments/examples/verilog/taskset.py +14 -12
synth_ai/environments/examples/wordle/__init__.py +5 -5
synth_ai/environments/examples/wordle/engine.py +32 -25
synth_ai/environments/examples/wordle/environment.py +21 -16
synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +6 -6
synth_ai/environments/examples/wordle/taskset.py +20 -12
synth_ai/environments/reproducibility/core.py +1 -1
synth_ai/environments/reproducibility/tree.py +21 -21
synth_ai/environments/service/app.py +3 -2
synth_ai/environments/service/core_routes.py +104 -110
synth_ai/environments/service/external_registry.py +1 -2
synth_ai/environments/service/registry.py +1 -1
synth_ai/environments/stateful/core.py +1 -2
synth_ai/environments/stateful/engine.py +1 -1
synth_ai/environments/tasks/api.py +4 -4
synth_ai/environments/tasks/core.py +14 -12
synth_ai/environments/tasks/filters.py +6 -4
synth_ai/environments/tasks/utils.py +13 -11
synth_ai/evals/base.py +2 -3
synth_ai/experimental/synth_oss.py +4 -4
synth_ai/learning/gateway.py +1 -3
synth_ai/learning/prompts/banking77_injection_eval.py +15 -10
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +26 -14
synth_ai/learning/prompts/mipro.py +61 -52
synth_ai/learning/prompts/random_search.py +42 -43
synth_ai/learning/prompts/run_mipro_banking77.py +32 -20
synth_ai/learning/prompts/run_random_search_banking77.py +71 -52
synth_ai/lm/__init__.py +5 -5
synth_ai/lm/caching/ephemeral.py +9 -9
synth_ai/lm/caching/handler.py +20 -20
synth_ai/lm/caching/persistent.py +10 -10
synth_ai/lm/config.py +3 -3
synth_ai/lm/constants.py +7 -7
synth_ai/lm/core/all.py +17 -3
synth_ai/lm/core/exceptions.py +0 -2
synth_ai/lm/core/main.py +26 -41
synth_ai/lm/core/main_v3.py +20 -10
synth_ai/lm/core/vendor_clients.py +18 -17
synth_ai/lm/injection.py +7 -8
synth_ai/lm/overrides.py +21 -19
synth_ai/lm/provider_support/__init__.py +1 -1
synth_ai/lm/provider_support/anthropic.py +15 -15
synth_ai/lm/provider_support/openai.py +23 -21
synth_ai/lm/structured_outputs/handler.py +34 -32
synth_ai/lm/structured_outputs/inject.py +24 -27
synth_ai/lm/structured_outputs/rehabilitate.py +19 -15
synth_ai/lm/tools/base.py +17 -16
synth_ai/lm/unified_interface.py +17 -18
synth_ai/lm/vendors/base.py +20 -18
synth_ai/lm/vendors/core/anthropic_api.py +36 -27
synth_ai/lm/vendors/core/gemini_api.py +31 -36
synth_ai/lm/vendors/core/mistral_api.py +19 -19
synth_ai/lm/vendors/core/openai_api.py +11 -10
synth_ai/lm/vendors/openai_standard.py +113 -87
synth_ai/lm/vendors/openai_standard_responses.py +74 -61
synth_ai/lm/vendors/retries.py +9 -1
synth_ai/lm/vendors/supported/custom_endpoint.py +26 -26
synth_ai/lm/vendors/supported/deepseek.py +10 -10
synth_ai/lm/vendors/supported/grok.py +8 -8
synth_ai/lm/vendors/supported/ollama.py +2 -1
synth_ai/lm/vendors/supported/openrouter.py +11 -9
synth_ai/lm/vendors/synth_client.py +69 -63
synth_ai/lm/warmup.py +8 -7
synth_ai/tracing/__init__.py +22 -10
synth_ai/tracing_v1/__init__.py +22 -20
synth_ai/tracing_v3/__init__.py +7 -7
synth_ai/tracing_v3/abstractions.py +56 -52
synth_ai/tracing_v3/config.py +4 -2
synth_ai/tracing_v3/db_config.py +6 -8
synth_ai/tracing_v3/decorators.py +29 -30
synth_ai/tracing_v3/examples/basic_usage.py +12 -12
synth_ai/tracing_v3/hooks.py +21 -21
synth_ai/tracing_v3/llm_call_record_helpers.py +85 -98
synth_ai/tracing_v3/lm_call_record_abstractions.py +2 -4
synth_ai/tracing_v3/migration_helper.py +3 -5
synth_ai/tracing_v3/replica_sync.py +30 -32
synth_ai/tracing_v3/session_tracer.py +35 -29
synth_ai/tracing_v3/storage/__init__.py +1 -1
synth_ai/tracing_v3/storage/base.py +8 -7
synth_ai/tracing_v3/storage/config.py +4 -4
synth_ai/tracing_v3/storage/factory.py +4 -4
synth_ai/tracing_v3/storage/utils.py +9 -9
synth_ai/tracing_v3/turso/__init__.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +9 -9
synth_ai/tracing_v3/turso/manager.py +60 -48
synth_ai/tracing_v3/turso/models.py +24 -19
synth_ai/tracing_v3/utils.py +5 -5
synth_ai/tui/__main__.py +1 -1
synth_ai/tui/cli/query_experiments.py +2 -3
synth_ai/tui/cli/query_experiments_v3.py +2 -3
synth_ai/tui/dashboard.py +97 -86
synth_ai/v0/tracing/abstractions.py +28 -28
synth_ai/v0/tracing/base_client.py +9 -9
synth_ai/v0/tracing/client_manager.py +7 -7
synth_ai/v0/tracing/config.py +7 -7
synth_ai/v0/tracing/context.py +6 -6
synth_ai/v0/tracing/decorators.py +6 -5
synth_ai/v0/tracing/events/manage.py +1 -1
synth_ai/v0/tracing/events/store.py +5 -4
synth_ai/v0/tracing/immediate_client.py +4 -5
synth_ai/v0/tracing/local.py +3 -3
synth_ai/v0/tracing/log_client_base.py +4 -5
synth_ai/v0/tracing/retry_queue.py +5 -6
synth_ai/v0/tracing/trackers.py +25 -25
synth_ai/v0/tracing/upload.py +6 -0
synth_ai/v0/tracing_v1/__init__.py +1 -1
synth_ai/v0/tracing_v1/abstractions.py +28 -28
synth_ai/v0/tracing_v1/base_client.py +9 -9
synth_ai/v0/tracing_v1/client_manager.py +7 -7
synth_ai/v0/tracing_v1/config.py +7 -7
synth_ai/v0/tracing_v1/context.py +6 -6
synth_ai/v0/tracing_v1/decorators.py +7 -6
synth_ai/v0/tracing_v1/events/manage.py +1 -1
synth_ai/v0/tracing_v1/events/store.py +5 -4
synth_ai/v0/tracing_v1/immediate_client.py +4 -5
synth_ai/v0/tracing_v1/local.py +3 -3
synth_ai/v0/tracing_v1/log_client_base.py +4 -5
synth_ai/v0/tracing_v1/retry_queue.py +5 -6
synth_ai/v0/tracing_v1/trackers.py +25 -25
synth_ai/v0/tracing_v1/upload.py +25 -24
synth_ai/zyk/__init__.py +1 -0
{synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/METADATA +1 -11
synth_ai-0.2.4.dev7.dist-info/RECORD +299 -0
synth_ai-0.2.4.dev6.dist-info/RECORD +0 -299
{synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/WHEEL +0 -0
{synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/top_level.txt +0 -0

synth_ai/environments/tasks/filters.py CHANGED Viewed

@@ -1,6 +1,8 @@
-from typing import Any, Collection, Optional
+from collections.abc import Collection
 from dataclasses import dataclass
-from synth_ai.environments.tasks.core import TaskInstanceMetadataFilter, TaskInstance
+from typing import Any
+from synth_ai.environments.tasks.core import TaskInstance, TaskInstanceMetadataFilter
 @dataclass
@@ -18,8 +20,8 @@ class ValueFilter(TaskInstanceMetadataFilter):
 @dataclass
 class RangeFilter(TaskInstanceMetadataFilter):
     key: str
-    min_val: Optional[float] = None
-    max_val: Optional[float] = None
+    min_val: float | None = None
+    max_val: float | None = None
     def __call__(self, instance: TaskInstance) -> bool:
         instance_value = getattr(instance.metadata, self.key, None)

synth_ai/environments/tasks/utils.py CHANGED Viewed

@@ -2,17 +2,19 @@
 Utility functions and generic filters for taskset creation.
 """
-from typing import Any, Collection, Optional, List, Set
+from collections.abc import Collection
+from typing import Any
 from uuid import UUID, uuid4
 from synth_ai.environments.tasks.core import (
-    TaskInstanceMetadataFilter,
-    TaskInstanceSet,
     SplitInfo,
     TaskInstance,
+    TaskInstanceMetadataFilter,
+    TaskInstanceSet,
 )
-def parse_or_new_uuid(raw_id: Optional[str]) -> UUID:
+def parse_or_new_uuid(raw_id: str | None) -> UUID:
     """
     Parse a raw ID string into a UUID, or generate a new one if invalid or missing.
     """
@@ -43,8 +45,8 @@ class RangeFilter(TaskInstanceMetadataFilter):
     def __init__(
         self,
         key: str,
-        min_value: Optional[float] = None,
-        max_value: Optional[float] = None,
+        min_value: float | None = None,
+        max_value: float | None = None,
     ):
         self.key = key
         self.min_value = min_value
@@ -62,15 +64,15 @@ class RangeFilter(TaskInstanceMetadataFilter):
 def make_taskset(
     name: str,
     description: str,
-    instances: List[TaskInstance],
-    val_filter: Optional[TaskInstanceMetadataFilter] = None,
-    test_filter: Optional[TaskInstanceMetadataFilter] = None,
+    instances: list[TaskInstance],
+    val_filter: TaskInstanceMetadataFilter | None = None,
+    test_filter: TaskInstanceMetadataFilter | None = None,
 ) -> TaskInstanceSet:
     """
     Assemble a TaskInstanceSet by applying optional validation and test filters.
     """
-    val_ids: Set[Any] = set()
-    test_ids: Set[Any] = set()
+    val_ids: set[Any] = set()
+    test_ids: set[Any] = set()
     if val_filter:
         val_ids = {inst.id for inst in instances if val_filter(inst)}
     if test_filter:

synth_ai/evals/base.py CHANGED Viewed

@@ -1,9 +1,8 @@
-from typing import List
 class Judgement:
     def __init__(
-        self, criteria: str, score: float, reasoning: str = "", evidence: List[str] = None
+        self, criteria: str, score: float, reasoning: str = "", evidence: list[str] = None
     ):
         self.criteria = criteria
         self.score = score
@@ -12,5 +11,5 @@ class Judgement:
 class BaseEval:
-    async def run(self, data: any) -> List[Judgement]:
+    async def run(self, data: any) -> list[Judgement]:
         pass

synth_ai/experimental/synth_oss.py CHANGED Viewed

@@ -1,5 +1,5 @@
-"""
+# ruff: noqa
+'''
 Synth OSS Integration Module
 This module provides integration with Synth's open-source inference and training APIs
@@ -336,7 +336,7 @@ Implementation sketch (backend == "synth")
 The method is a *no-op* for the default (OpenAI) backend so existing code keeps
 working.
-"""
+'''
 """
@@ -443,4 +443,4 @@ async def warmup(
 So: **the existing endpoint does not yet support GPU selection; we need to add
 the small change above on the `learning_v2` side and then LM.warmup can request
 specific GPUs.**
-"""
+"""

synth_ai/learning/gateway.py CHANGED Viewed

@@ -1,4 +1,2 @@
 class OfflineGateway:
-    pass
+    pass

synth_ai/learning/prompts/banking77_injection_eval.py CHANGED Viewed

@@ -18,16 +18,15 @@ from __future__ import annotations
 import asyncio
 import os
 import random
-from typing import List, Dict, Any, Tuple
+from typing import Any
-from dotenv import load_dotenv
 from datasets import load_dataset
+from dotenv import load_dotenv
 from synth_ai.lm.core.main_v3 import LM, build_messages
 from synth_ai.lm.overrides import LMOverridesContext
-async def classify_one(lm: LM, text: str, label_names: List[str]) -> str:
+async def classify_one(lm: LM, text: str, label_names: list[str]) -> str:
     labels_joined = ", ".join(label_names)
     system_message = (
         "You are an intent classifier for the Banking77 dataset. "
@@ -41,7 +40,7 @@ async def classify_one(lm: LM, text: str, label_names: List[str]) -> str:
     return (resp.raw_response or "").strip()
-def choose_label(pred: str, label_names: List[str]) -> str:
+def choose_label(pred: str, label_names: list[str]) -> str:
     norm_pred = pred.strip().lower()
     label_lookup = {ln.lower(): ln for ln in label_names}
     mapped = label_lookup.get(norm_pred)
@@ -56,12 +55,18 @@ def choose_label(pred: str, label_names: List[str]) -> str:
     return max(label_names, key=score)
-async def eval_context(lm: LM, items: List[Tuple[str, str]], label_names: List[str], ctx_name: str, specs: List[Dict[str, Any]]) -> Tuple[str, int, int]:
+async def eval_context(
+    lm: LM,
+    items: list[tuple[str, str]],
+    label_names: list[str],
+    ctx_name: str,
+    specs: list[dict[str, Any]],
+) -> tuple[str, int, int]:
     correct = 0
     with LMOverridesContext(specs):
         tasks = [classify_one(lm, text, label_names) for text, _ in items]
         results = await asyncio.gather(*tasks, return_exceptions=True)
-    for (text, gold), pred in zip(items, results):
+    for (text, gold), pred in zip(items, results, strict=False):
         if isinstance(pred, Exception):
             # Treat exceptions as incorrect
             continue
@@ -81,7 +86,7 @@ async def main() -> None:
     print("Loading Banking77 dataset (split='test')...")
     ds = load_dataset("banking77", split="test")
-    label_names: List[str] = ds.features["label"].names  # type: ignore
+    label_names: list[str] = ds.features["label"].names  # type: ignore
     idxs = random.sample(range(len(ds)), k=min(n, len(ds)))
     items = [
@@ -90,7 +95,7 @@ async def main() -> None:
     ]
     # Define a few override contexts to compare
-    contexts: List[Dict[str, Any]] = [
+    contexts: list[dict[str, Any]] = [
         {
             "name": "baseline (no overrides)",
             "overrides": [],
@@ -145,7 +150,7 @@ async def main() -> None:
     print(f"\nEvaluating {len(contexts)} contexts on {len(items)} Banking77 samples (async)...")
     # Evaluate each context sequentially but batched (each context classifies in parallel)
-    results: List[Tuple[str, int, int]] = []
+    results: list[tuple[str, int, int]] = []
     for ctx in contexts:
         name = ctx["name"]
         specs = ctx["overrides"]

synth_ai/learning/prompts/hello_world_in_context_injection_ex.py CHANGED Viewed

@@ -27,18 +27,17 @@ from __future__ import annotations
 import asyncio
 import os
 import random
-from typing import Any, Dict, List, Optional
 from datasets import load_dataset
 # Use the v3 LM class present in this repo
 from synth_ai.lm.core.main_v3 import LM, build_messages
-from synth_ai.tracing_v3.session_tracer import SessionTracer
-from synth_ai.tracing_v3.abstractions import LMCAISEvent
 # Use Overrides context to demonstrate matching by content
 from synth_ai.lm.overrides import LMOverridesContext
+from synth_ai.tracing_v3.abstractions import LMCAISEvent
+from synth_ai.tracing_v3.session_tracer import SessionTracer
 INJECTION_RULES = [
     {"find": "accnt", "replace": "account"},
     {"find": "atm", "replace": "ATM"},
@@ -46,7 +45,7 @@ INJECTION_RULES = [
 ]
-async def classify_sample(lm: LM, text: str, label_names: List[str]) -> str:
+async def classify_sample(lm: LM, text: str, label_names: list[str]) -> str:
     """Classify one Banking77 utterance and return the predicted label name."""
     labels_joined = ", ".join(label_names)
     system_message = (
@@ -77,7 +76,7 @@ async def main() -> None:
     # Columns: {"text": str, "label": int}; label names at ds.features["label"].names
     print("Loading Banking77 dataset (split='test')...")
     ds = load_dataset("banking77", split="test")
-    label_names: List[str] = ds.features["label"].names  # type: ignore
+    label_names: list[str] = ds.features["label"].names  # type: ignore
     # Sample a few items for a quick demo
     n = int(os.getenv("N_SAMPLES", "8"))
@@ -116,7 +115,9 @@ async def main() -> None:
             is_correct = pred_label == gold_label
             correct += int(is_correct)
-            print(f"[{i}] text={text!r}\n    gold={gold_label}\n    pred={pred} -> mapped={pred_label} {'✅' if is_correct else '❌'}")
+            print(
+                f"[{i}] text={text!r}\n    gold={gold_label}\n    pred={pred} -> mapped={pred_label} {'✅' if is_correct else '❌'}"
+            )
     if idxs:
         acc = correct / len(idxs)
@@ -137,7 +138,11 @@ async def main() -> None:
     with LMOverridesContext([{"match": {"contains": "atm"}, "injection_rules": INJECTION_RULES}]):
         _ = await classify_sample(lm_traced, test_text, label_names)
     # inspect trace
-    events = [e for e in (tracer.current_session.event_history if tracer.current_session else []) if isinstance(e, LMCAISEvent)]
+    events = [
+        e
+        for e in (tracer.current_session.event_history if tracer.current_session else [])
+        if isinstance(e, LMCAISEvent)
+    ]
     assert events, "No LMCAISEvent recorded by SessionTracer"
     cr = events[-1].call_records[0]
     traced_user = ""
@@ -145,7 +150,7 @@ async def main() -> None:
         if m.role == "user":
             for part in m.parts:
                 if getattr(part, "type", None) == "text":
-                    traced_user += (part.text or "")
+                    traced_user += part.text or ""
     assert "ATM" in traced_user, f"Expected substitution in traced prompt; got: {traced_user!r}"
     print("LM path trace verified: substitution present in traced prompt.")
     await tracer.end_timestep()
@@ -155,7 +160,7 @@ async def main() -> None:
     try:
         import synth_ai.lm.provider_support.openai as _synth_openai_patch  # noqa: F401
         from openai import AsyncOpenAI
-        from datasets import load_dataset as _ld  # ensure datasets present
         base_url = os.getenv("OPENAI_BASE_URL", "https://api.groq.com/openai/v1")
         api_key = os.getenv("OPENAI_API_KEY") or os.getenv("GROQ_API_KEY") or ""
         client = AsyncOpenAI(base_url=base_url, api_key=api_key)
@@ -163,8 +168,12 @@ async def main() -> None:
             {"role": "system", "content": "Echo user label."},
             {"role": "user", "content": f"Please classify: {test_text}"},
         ]
-        with LMOverridesContext([{"match": {"contains": "atm"}, "injection_rules": INJECTION_RULES}]):
-            resp = await client.chat.completions.create(model=model, messages=messages, temperature=0)
+        with LMOverridesContext(
+            [{"match": {"contains": "atm"}, "injection_rules": INJECTION_RULES}]
+        ):
+            _ = await client.chat.completions.create(
+                model=model, messages=messages, temperature=0
+            )
         # Not all models echo input; instead, verify that our injected expectation matches
         expected_user = messages[1]["content"].replace("atm", "ATM")
         if messages[1]["content"] == expected_user:
@@ -176,13 +185,16 @@ async def main() -> None:
     # 3) Anthropic wrapper path (AsyncClient): ensure apply_injection is active
     try:
-        import synth_ai.lm.provider_support.anthropic as _synth_anthropic_patch  # noqa: F401
         import anthropic
+        import synth_ai.lm.provider_support.anthropic as _synth_anthropic_patch  # noqa: F401
         a_model = os.getenv("ANTHROPIC_MODEL", "claude-3-5-haiku-20241022")
         a_key = os.getenv("ANTHROPIC_API_KEY")
         if a_key:
             a_client = anthropic.AsyncClient(api_key=a_key)
-            with LMOverridesContext([{"match": {"contains": "atm"}, "injection_rules": INJECTION_RULES}]):
+            with LMOverridesContext(
+                [{"match": {"contains": "atm"}, "injection_rules": INJECTION_RULES}]
+            ):
                 _ = await a_client.messages.create(
                     model=a_model,
                     system="Echo user label.",

synth_ai/learning/prompts/mipro.py CHANGED Viewed

@@ -20,9 +20,9 @@ Notes
 from __future__ import annotations
 import random
+from collections.abc import Callable, Sequence
 from dataclasses import dataclass, replace
-from typing import Any, Callable, Dict, Iterable, List, Optional, Protocol, Sequence, Tuple
+from typing import Any, Protocol
 # ---------------------------
 # Program adapter and protocols
@@ -36,16 +36,16 @@ class PredictProgram(Protocol):
     by wrapping it with `ProgramAdapter` below.
     """
-    def deepcopy(self) -> "PredictProgram": ...
+    def deepcopy(self) -> PredictProgram: ...
-    def run(self, x: Any, *, model: Optional[Any] = None) -> Any: ...
+    def run(self, x: Any, *, model: Any | None = None) -> Any: ...
-    def with_instructions(self, instructions: Dict[str, str]) -> "PredictProgram": ...
+    def with_instructions(self, instructions: dict[str, str]) -> PredictProgram: ...
-    def with_demos(self, demos: List[Tuple[Any, Any]]) -> "PredictProgram": ...
+    def with_demos(self, demos: list[tuple[Any, Any]]) -> PredictProgram: ...
     @property
-    def predictors(self) -> List[str]: ...
+    def predictors(self) -> list[str]: ...
 @dataclass
@@ -59,28 +59,28 @@ class ProgramAdapter:
     - set_demos: Callable to update demos (global or per predictor)
     """
-    run_fn: Callable[[Any, Optional[Any]], Any]
-    state: Dict[str, Any]
-    _predictors: List[str]
-    set_instructions: Callable[[Dict[str, str], Dict[str, Any]], Dict[str, Any]]
-    set_demos: Callable[[List[Tuple[Any, Any]], Dict[str, Any]], Dict[str, Any]]
+    run_fn: Callable[[Any, Any | None], Any]
+    state: dict[str, Any]
+    _predictors: list[str]
+    set_instructions: Callable[[dict[str, str], dict[str, Any]], dict[str, Any]]
+    set_demos: Callable[[list[tuple[Any, Any]], dict[str, Any]], dict[str, Any]]
-    def deepcopy(self) -> "ProgramAdapter":
+    def deepcopy(self) -> ProgramAdapter:
         return replace(self, state={**self.state})
-    def run(self, x: Any, *, model: Optional[Any] = None) -> Any:
+    def run(self, x: Any, *, model: Any | None = None) -> Any:
         return self.run_fn(x, model)
-    def with_instructions(self, instructions: Dict[str, str]) -> "ProgramAdapter":
+    def with_instructions(self, instructions: dict[str, str]) -> ProgramAdapter:
         new_state = self.set_instructions(instructions, {**self.state})
         return replace(self, state=new_state)
-    def with_demos(self, demos: List[Tuple[Any, Any]]) -> "ProgramAdapter":
+    def with_demos(self, demos: list[tuple[Any, Any]]) -> ProgramAdapter:
         new_state = self.set_demos(demos, {**self.state})
         return replace(self, state=new_state)
     @property
-    def predictors(self) -> List[str]:
+    def predictors(self) -> list[str]:
         return list(self._predictors)
@@ -89,9 +89,11 @@ class ProgramAdapter:
 # ---------------------------
-def summarize_dataset(trainset: Sequence[Tuple[Any, Any]], max_items: int = 50) -> str:
+def summarize_dataset(trainset: Sequence[tuple[Any, Any]], max_items: int = 50) -> str:
     n = len(trainset)
-    ex = ", ".join(repr(trainset[i][0])[:40] for i in range(0, min(max_items, n), max(1, n // max_items or 1)))
+    ex = ", ".join(
+        repr(trainset[i][0])[:40] for i in range(0, min(max_items, n), max(1, n // max_items or 1))
+    )
     return f"Dataset size: {n}. Example inputs: {ex}"
@@ -109,7 +111,7 @@ def random_tip(rng: random.Random) -> str:
     return rng.choice(tips)
-def choose(items: Sequence[Any], rng: Optional[random.Random] = None) -> Any:
+def choose(items: Sequence[Any], rng: random.Random | None = None) -> Any:
     r = rng or random
     return r.choice(items)
@@ -122,10 +124,12 @@ def choose(items: Sequence[Any], rng: Optional[random.Random] = None) -> Any:
 @dataclass
 class EvalResult:
     score: float
-    subscores: List[float]
+    subscores: list[float]
-def evaluate_program(program: PredictProgram, dataset: Sequence[Tuple[Any, Any]], metric: Callable[[Any, Any], float]) -> EvalResult:
+def evaluate_program(
+    program: PredictProgram, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
+) -> EvalResult:
     subs = []
     for x, y in dataset:
         yhat = program.run(x)
@@ -140,8 +144,8 @@ def evaluate_program(program: PredictProgram, dataset: Sequence[Tuple[Any, Any]]
 def mipro_v2_compile(
     student: PredictProgram,
-    trainset: Sequence[Tuple[Any, Any]],
-    valset: Sequence[Tuple[Any, Any]],
+    trainset: Sequence[tuple[Any, Any]],
+    valset: Sequence[tuple[Any, Any]],
     metric: Callable[[Any, Any], float],
     *,
     prompt_model: Any,
@@ -159,7 +163,7 @@ def mipro_v2_compile(
     data_aware: bool = True,
     tip_aware: bool = True,
     fewshot_aware: bool = True,
-) -> Tuple[PredictProgram, List[Dict[str, Any]]]:
+) -> tuple[PredictProgram, list[dict[str, Any]]]:
     """MIPROv2-style optimizer.
     Arguments mirror the DSPy pseudocode but remain provider-agnostic. The
@@ -171,9 +175,9 @@ def mipro_v2_compile(
     program = student.deepcopy()
     # Step 1: bootstrap few-shot example candidates
-    demo_candidates: List[Dict[str, Any]] = []
+    demo_candidates: list[dict[str, Any]] = []
     for _ in range(num_candidates):
-        boot: List[Tuple[Any, Any]] = []
+        boot: list[tuple[Any, Any]] = []
         # collect bootstrapped, self-consistent demos
         while len(boot) < max_bootstrapped_demos:
             x, y = rng.choice(trainset)
@@ -184,9 +188,9 @@ def mipro_v2_compile(
         demo_candidates.append({"boot": boot, "labeled": labeled})
     # Step 2: propose instruction candidates per predictor
-    instr_candidates: Dict[str, List[str]] = {}
-    for pred in (program.predictors or ["predictor"]):
-        ctx: Dict[str, Any] = {}
+    instr_candidates: dict[str, list[str]] = {}
+    for pred in program.predictors or ["predictor"]:
+        ctx: dict[str, Any] = {}
         if data_aware:
             ctx["dataset_summary"] = summarize_dataset(trainset)
         if program_aware:
@@ -199,12 +203,12 @@ def mipro_v2_compile(
         instr_candidates[pred] = list(cand)
     # Step 3: Bayesian-optimization-like search (random proposer placeholder)
-    history: List[Tuple[Dict[str, Any], float]] = []
-    records: List[Dict[str, Any]] = []
+    history: list[tuple[dict[str, Any], float]] = []
+    records: list[dict[str, Any]] = []
     best_score = -1.0
-    best_cfg: Optional[Dict[str, Any]] = None
+    best_cfg: dict[str, Any] | None = None
-    def propose(history_: List[Tuple[Dict[str, Any], float]]) -> Dict[str, Any]:
+    def propose(history_: list[tuple[dict[str, Any], float]]) -> dict[str, Any]:
         # Placeholder: randomly sample from the cartesian product
         instructions = {pred: choose(instr_candidates[pred], rng) for pred in instr_candidates}
         demos = choose(demo_candidates, rng) if demo_candidates else None
@@ -227,15 +231,17 @@ def mipro_v2_compile(
         batch_res = evaluate_program(program_t, batch, metric)
         s_t = batch_res.score
         history.append((theta, s_t))
-        records.append({
-            "trial": t,
-            "evaluation": "batch" if minibatch else "full",
-            "score": s_t,
-            "intervention": {
-                "instructions": theta.get("instructions"),
-                "demo_set": theta.get("demo_set"),
-            },
-        })
+        records.append(
+            {
+                "trial": t,
+                "evaluation": "batch" if minibatch else "full",
+                "score": s_t,
+                "intervention": {
+                    "instructions": theta.get("instructions"),
+                    "demo_set": theta.get("demo_set"),
+                },
+            }
+        )
         if (not minibatch) or (t % max(1, minibatch_full_eval_steps) == 0):
             full_res = evaluate_program(program_t, valset, metric)
@@ -243,15 +249,17 @@ def mipro_v2_compile(
             if s_full > best_score:
                 best_score = s_full
                 best_cfg = theta
-            records.append({
-                "trial": t,
-                "evaluation": "full",
-                "score": s_full,
-                "intervention": {
-                    "instructions": theta.get("instructions"),
-                    "demo_set": theta.get("demo_set"),
-                },
-            })
+            records.append(
+                {
+                    "trial": t,
+                    "evaluation": "full",
+                    "score": s_full,
+                    "intervention": {
+                        "instructions": theta.get("instructions"),
+                        "demo_set": theta.get("demo_set"),
+                    },
+                }
+            )
     if best_cfg is None:
         return program, records
@@ -275,6 +283,7 @@ __all__ = [
 class ExampleTwoStepDag:
     pass
 """
 A -> B
 """

synth-ai 0.2.4.dev6__py3-none-any.whl → 0.2.4.dev7__py3-none-any.whl

synth-ai 0.2.4.dev6py3-none-any.whl → 0.2.4.dev7py3-none-any.whl