synth-ai 0.2.4.dev6__py3-none-any.whl → 0.2.4.dev7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +18 -9
- synth_ai/cli/__init__.py +10 -5
- synth_ai/cli/balance.py +22 -17
- synth_ai/cli/calc.py +2 -3
- synth_ai/cli/demo.py +3 -5
- synth_ai/cli/legacy_root_backup.py +58 -32
- synth_ai/cli/man.py +22 -19
- synth_ai/cli/recent.py +9 -8
- synth_ai/cli/root.py +58 -13
- synth_ai/cli/status.py +13 -6
- synth_ai/cli/traces.py +45 -21
- synth_ai/cli/watch.py +40 -37
- synth_ai/config/base_url.py +1 -3
- synth_ai/core/experiment.py +1 -2
- synth_ai/environments/__init__.py +2 -6
- synth_ai/environments/environment/artifacts/base.py +3 -1
- synth_ai/environments/environment/db/sqlite.py +1 -1
- synth_ai/environments/environment/registry.py +19 -20
- synth_ai/environments/environment/resources/sqlite.py +2 -3
- synth_ai/environments/environment/rewards/core.py +3 -2
- synth_ai/environments/environment/tools/__init__.py +6 -4
- synth_ai/environments/examples/crafter_classic/__init__.py +1 -1
- synth_ai/environments/examples/crafter_classic/engine.py +13 -13
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +1 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +2 -1
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +2 -1
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +3 -2
- synth_ai/environments/examples/crafter_classic/environment.py +16 -15
- synth_ai/environments/examples/crafter_classic/taskset.py +2 -2
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +2 -3
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +2 -1
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +2 -2
- synth_ai/environments/examples/crafter_custom/crafter/config.py +2 -2
- synth_ai/environments/examples/crafter_custom/crafter/env.py +1 -5
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +1 -2
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +1 -2
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +5 -5
- synth_ai/environments/examples/crafter_custom/environment.py +13 -13
- synth_ai/environments/examples/crafter_custom/run_dataset.py +5 -5
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +2 -2
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +5 -4
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +2 -1
- synth_ai/environments/examples/enron/engine.py +18 -14
- synth_ai/environments/examples/enron/environment.py +12 -11
- synth_ai/environments/examples/enron/taskset.py +7 -7
- synth_ai/environments/examples/minigrid/__init__.py +6 -6
- synth_ai/environments/examples/minigrid/engine.py +6 -6
- synth_ai/environments/examples/minigrid/environment.py +6 -6
- synth_ai/environments/examples/minigrid/puzzle_loader.py +3 -2
- synth_ai/environments/examples/minigrid/taskset.py +13 -13
- synth_ai/environments/examples/nethack/achievements.py +1 -1
- synth_ai/environments/examples/nethack/engine.py +8 -7
- synth_ai/environments/examples/nethack/environment.py +10 -9
- synth_ai/environments/examples/nethack/helpers/__init__.py +8 -9
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +1 -1
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +2 -1
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +1 -1
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +3 -4
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +6 -5
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +5 -5
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +7 -6
- synth_ai/environments/examples/nethack/taskset.py +5 -5
- synth_ai/environments/examples/red/engine.py +9 -8
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +7 -7
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +3 -2
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +2 -1
- synth_ai/environments/examples/red/environment.py +18 -15
- synth_ai/environments/examples/red/taskset.py +5 -3
- synth_ai/environments/examples/sokoban/engine.py +16 -13
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +3 -2
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +2 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +1 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +7 -5
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +1 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +2 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +5 -4
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +3 -2
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +2 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +5 -4
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +1 -1
- synth_ai/environments/examples/sokoban/environment.py +15 -14
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +5 -3
- synth_ai/environments/examples/sokoban/puzzle_loader.py +3 -2
- synth_ai/environments/examples/sokoban/taskset.py +13 -10
- synth_ai/environments/examples/tictactoe/engine.py +6 -6
- synth_ai/environments/examples/tictactoe/environment.py +8 -7
- synth_ai/environments/examples/tictactoe/taskset.py +6 -5
- synth_ai/environments/examples/verilog/engine.py +4 -3
- synth_ai/environments/examples/verilog/environment.py +11 -10
- synth_ai/environments/examples/verilog/taskset.py +14 -12
- synth_ai/environments/examples/wordle/__init__.py +5 -5
- synth_ai/environments/examples/wordle/engine.py +32 -25
- synth_ai/environments/examples/wordle/environment.py +21 -16
- synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +6 -6
- synth_ai/environments/examples/wordle/taskset.py +20 -12
- synth_ai/environments/reproducibility/core.py +1 -1
- synth_ai/environments/reproducibility/tree.py +21 -21
- synth_ai/environments/service/app.py +3 -2
- synth_ai/environments/service/core_routes.py +104 -110
- synth_ai/environments/service/external_registry.py +1 -2
- synth_ai/environments/service/registry.py +1 -1
- synth_ai/environments/stateful/core.py +1 -2
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/api.py +4 -4
- synth_ai/environments/tasks/core.py +14 -12
- synth_ai/environments/tasks/filters.py +6 -4
- synth_ai/environments/tasks/utils.py +13 -11
- synth_ai/evals/base.py +2 -3
- synth_ai/experimental/synth_oss.py +4 -4
- synth_ai/learning/gateway.py +1 -3
- synth_ai/learning/prompts/banking77_injection_eval.py +15 -10
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +26 -14
- synth_ai/learning/prompts/mipro.py +61 -52
- synth_ai/learning/prompts/random_search.py +42 -43
- synth_ai/learning/prompts/run_mipro_banking77.py +32 -20
- synth_ai/learning/prompts/run_random_search_banking77.py +71 -52
- synth_ai/lm/__init__.py +5 -5
- synth_ai/lm/caching/ephemeral.py +9 -9
- synth_ai/lm/caching/handler.py +20 -20
- synth_ai/lm/caching/persistent.py +10 -10
- synth_ai/lm/config.py +3 -3
- synth_ai/lm/constants.py +7 -7
- synth_ai/lm/core/all.py +17 -3
- synth_ai/lm/core/exceptions.py +0 -2
- synth_ai/lm/core/main.py +26 -41
- synth_ai/lm/core/main_v3.py +20 -10
- synth_ai/lm/core/vendor_clients.py +18 -17
- synth_ai/lm/injection.py +7 -8
- synth_ai/lm/overrides.py +21 -19
- synth_ai/lm/provider_support/__init__.py +1 -1
- synth_ai/lm/provider_support/anthropic.py +15 -15
- synth_ai/lm/provider_support/openai.py +23 -21
- synth_ai/lm/structured_outputs/handler.py +34 -32
- synth_ai/lm/structured_outputs/inject.py +24 -27
- synth_ai/lm/structured_outputs/rehabilitate.py +19 -15
- synth_ai/lm/tools/base.py +17 -16
- synth_ai/lm/unified_interface.py +17 -18
- synth_ai/lm/vendors/base.py +20 -18
- synth_ai/lm/vendors/core/anthropic_api.py +36 -27
- synth_ai/lm/vendors/core/gemini_api.py +31 -36
- synth_ai/lm/vendors/core/mistral_api.py +19 -19
- synth_ai/lm/vendors/core/openai_api.py +11 -10
- synth_ai/lm/vendors/openai_standard.py +113 -87
- synth_ai/lm/vendors/openai_standard_responses.py +74 -61
- synth_ai/lm/vendors/retries.py +9 -1
- synth_ai/lm/vendors/supported/custom_endpoint.py +26 -26
- synth_ai/lm/vendors/supported/deepseek.py +10 -10
- synth_ai/lm/vendors/supported/grok.py +8 -8
- synth_ai/lm/vendors/supported/ollama.py +2 -1
- synth_ai/lm/vendors/supported/openrouter.py +11 -9
- synth_ai/lm/vendors/synth_client.py +69 -63
- synth_ai/lm/warmup.py +8 -7
- synth_ai/tracing/__init__.py +22 -10
- synth_ai/tracing_v1/__init__.py +22 -20
- synth_ai/tracing_v3/__init__.py +7 -7
- synth_ai/tracing_v3/abstractions.py +56 -52
- synth_ai/tracing_v3/config.py +4 -2
- synth_ai/tracing_v3/db_config.py +6 -8
- synth_ai/tracing_v3/decorators.py +29 -30
- synth_ai/tracing_v3/examples/basic_usage.py +12 -12
- synth_ai/tracing_v3/hooks.py +21 -21
- synth_ai/tracing_v3/llm_call_record_helpers.py +85 -98
- synth_ai/tracing_v3/lm_call_record_abstractions.py +2 -4
- synth_ai/tracing_v3/migration_helper.py +3 -5
- synth_ai/tracing_v3/replica_sync.py +30 -32
- synth_ai/tracing_v3/session_tracer.py +35 -29
- synth_ai/tracing_v3/storage/__init__.py +1 -1
- synth_ai/tracing_v3/storage/base.py +8 -7
- synth_ai/tracing_v3/storage/config.py +4 -4
- synth_ai/tracing_v3/storage/factory.py +4 -4
- synth_ai/tracing_v3/storage/utils.py +9 -9
- synth_ai/tracing_v3/turso/__init__.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +9 -9
- synth_ai/tracing_v3/turso/manager.py +60 -48
- synth_ai/tracing_v3/turso/models.py +24 -19
- synth_ai/tracing_v3/utils.py +5 -5
- synth_ai/tui/__main__.py +1 -1
- synth_ai/tui/cli/query_experiments.py +2 -3
- synth_ai/tui/cli/query_experiments_v3.py +2 -3
- synth_ai/tui/dashboard.py +97 -86
- synth_ai/v0/tracing/abstractions.py +28 -28
- synth_ai/v0/tracing/base_client.py +9 -9
- synth_ai/v0/tracing/client_manager.py +7 -7
- synth_ai/v0/tracing/config.py +7 -7
- synth_ai/v0/tracing/context.py +6 -6
- synth_ai/v0/tracing/decorators.py +6 -5
- synth_ai/v0/tracing/events/manage.py +1 -1
- synth_ai/v0/tracing/events/store.py +5 -4
- synth_ai/v0/tracing/immediate_client.py +4 -5
- synth_ai/v0/tracing/local.py +3 -3
- synth_ai/v0/tracing/log_client_base.py +4 -5
- synth_ai/v0/tracing/retry_queue.py +5 -6
- synth_ai/v0/tracing/trackers.py +25 -25
- synth_ai/v0/tracing/upload.py +6 -0
- synth_ai/v0/tracing_v1/__init__.py +1 -1
- synth_ai/v0/tracing_v1/abstractions.py +28 -28
- synth_ai/v0/tracing_v1/base_client.py +9 -9
- synth_ai/v0/tracing_v1/client_manager.py +7 -7
- synth_ai/v0/tracing_v1/config.py +7 -7
- synth_ai/v0/tracing_v1/context.py +6 -6
- synth_ai/v0/tracing_v1/decorators.py +7 -6
- synth_ai/v0/tracing_v1/events/manage.py +1 -1
- synth_ai/v0/tracing_v1/events/store.py +5 -4
- synth_ai/v0/tracing_v1/immediate_client.py +4 -5
- synth_ai/v0/tracing_v1/local.py +3 -3
- synth_ai/v0/tracing_v1/log_client_base.py +4 -5
- synth_ai/v0/tracing_v1/retry_queue.py +5 -6
- synth_ai/v0/tracing_v1/trackers.py +25 -25
- synth_ai/v0/tracing_v1/upload.py +25 -24
- synth_ai/zyk/__init__.py +1 -0
- {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/METADATA +1 -11
- synth_ai-0.2.4.dev7.dist-info/RECORD +299 -0
- synth_ai-0.2.4.dev6.dist-info/RECORD +0 -299
- {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,8 @@
|
|
1
|
-
from
|
1
|
+
from collections.abc import Collection
|
2
2
|
from dataclasses import dataclass
|
3
|
-
from
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
from synth_ai.environments.tasks.core import TaskInstance, TaskInstanceMetadataFilter
|
4
6
|
|
5
7
|
|
6
8
|
@dataclass
|
@@ -18,8 +20,8 @@ class ValueFilter(TaskInstanceMetadataFilter):
|
|
18
20
|
@dataclass
|
19
21
|
class RangeFilter(TaskInstanceMetadataFilter):
|
20
22
|
key: str
|
21
|
-
min_val:
|
22
|
-
max_val:
|
23
|
+
min_val: float | None = None
|
24
|
+
max_val: float | None = None
|
23
25
|
|
24
26
|
def __call__(self, instance: TaskInstance) -> bool:
|
25
27
|
instance_value = getattr(instance.metadata, self.key, None)
|
@@ -2,17 +2,19 @@
|
|
2
2
|
Utility functions and generic filters for taskset creation.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from
|
5
|
+
from collections.abc import Collection
|
6
|
+
from typing import Any
|
6
7
|
from uuid import UUID, uuid4
|
8
|
+
|
7
9
|
from synth_ai.environments.tasks.core import (
|
8
|
-
TaskInstanceMetadataFilter,
|
9
|
-
TaskInstanceSet,
|
10
10
|
SplitInfo,
|
11
11
|
TaskInstance,
|
12
|
+
TaskInstanceMetadataFilter,
|
13
|
+
TaskInstanceSet,
|
12
14
|
)
|
13
15
|
|
14
16
|
|
15
|
-
def parse_or_new_uuid(raw_id:
|
17
|
+
def parse_or_new_uuid(raw_id: str | None) -> UUID:
|
16
18
|
"""
|
17
19
|
Parse a raw ID string into a UUID, or generate a new one if invalid or missing.
|
18
20
|
"""
|
@@ -43,8 +45,8 @@ class RangeFilter(TaskInstanceMetadataFilter):
|
|
43
45
|
def __init__(
|
44
46
|
self,
|
45
47
|
key: str,
|
46
|
-
min_value:
|
47
|
-
max_value:
|
48
|
+
min_value: float | None = None,
|
49
|
+
max_value: float | None = None,
|
48
50
|
):
|
49
51
|
self.key = key
|
50
52
|
self.min_value = min_value
|
@@ -62,15 +64,15 @@ class RangeFilter(TaskInstanceMetadataFilter):
|
|
62
64
|
def make_taskset(
|
63
65
|
name: str,
|
64
66
|
description: str,
|
65
|
-
instances:
|
66
|
-
val_filter:
|
67
|
-
test_filter:
|
67
|
+
instances: list[TaskInstance],
|
68
|
+
val_filter: TaskInstanceMetadataFilter | None = None,
|
69
|
+
test_filter: TaskInstanceMetadataFilter | None = None,
|
68
70
|
) -> TaskInstanceSet:
|
69
71
|
"""
|
70
72
|
Assemble a TaskInstanceSet by applying optional validation and test filters.
|
71
73
|
"""
|
72
|
-
val_ids:
|
73
|
-
test_ids:
|
74
|
+
val_ids: set[Any] = set()
|
75
|
+
test_ids: set[Any] = set()
|
74
76
|
if val_filter:
|
75
77
|
val_ids = {inst.id for inst in instances if val_filter(inst)}
|
76
78
|
if test_filter:
|
synth_ai/evals/base.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1
|
-
from typing import List
|
2
1
|
|
3
2
|
|
4
3
|
class Judgement:
|
5
4
|
def __init__(
|
6
|
-
self, criteria: str, score: float, reasoning: str = "", evidence:
|
5
|
+
self, criteria: str, score: float, reasoning: str = "", evidence: list[str] = None
|
7
6
|
):
|
8
7
|
self.criteria = criteria
|
9
8
|
self.score = score
|
@@ -12,5 +11,5 @@ class Judgement:
|
|
12
11
|
|
13
12
|
|
14
13
|
class BaseEval:
|
15
|
-
async def run(self, data: any) ->
|
14
|
+
async def run(self, data: any) -> list[Judgement]:
|
16
15
|
pass
|
@@ -1,5 +1,5 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# ruff: noqa
|
2
|
+
'''
|
3
3
|
Synth OSS Integration Module
|
4
4
|
|
5
5
|
This module provides integration with Synth's open-source inference and training APIs
|
@@ -336,7 +336,7 @@ Implementation sketch (backend == "synth")
|
|
336
336
|
The method is a *no-op* for the default (OpenAI) backend so existing code keeps
|
337
337
|
working.
|
338
338
|
|
339
|
-
|
339
|
+
'''
|
340
340
|
|
341
341
|
|
342
342
|
"""
|
@@ -443,4 +443,4 @@ async def warmup(
|
|
443
443
|
So: **the existing endpoint does not yet support GPU selection; we need to add
|
444
444
|
the small change above on the `learning_v2` side and then LM.warmup can request
|
445
445
|
specific GPUs.**
|
446
|
-
"""
|
446
|
+
"""
|
synth_ai/learning/gateway.py
CHANGED
@@ -18,16 +18,15 @@ from __future__ import annotations
|
|
18
18
|
import asyncio
|
19
19
|
import os
|
20
20
|
import random
|
21
|
-
from typing import
|
21
|
+
from typing import Any
|
22
22
|
|
23
|
-
from dotenv import load_dotenv
|
24
23
|
from datasets import load_dataset
|
25
|
-
|
24
|
+
from dotenv import load_dotenv
|
26
25
|
from synth_ai.lm.core.main_v3 import LM, build_messages
|
27
26
|
from synth_ai.lm.overrides import LMOverridesContext
|
28
27
|
|
29
28
|
|
30
|
-
async def classify_one(lm: LM, text: str, label_names:
|
29
|
+
async def classify_one(lm: LM, text: str, label_names: list[str]) -> str:
|
31
30
|
labels_joined = ", ".join(label_names)
|
32
31
|
system_message = (
|
33
32
|
"You are an intent classifier for the Banking77 dataset. "
|
@@ -41,7 +40,7 @@ async def classify_one(lm: LM, text: str, label_names: List[str]) -> str:
|
|
41
40
|
return (resp.raw_response or "").strip()
|
42
41
|
|
43
42
|
|
44
|
-
def choose_label(pred: str, label_names:
|
43
|
+
def choose_label(pred: str, label_names: list[str]) -> str:
|
45
44
|
norm_pred = pred.strip().lower()
|
46
45
|
label_lookup = {ln.lower(): ln for ln in label_names}
|
47
46
|
mapped = label_lookup.get(norm_pred)
|
@@ -56,12 +55,18 @@ def choose_label(pred: str, label_names: List[str]) -> str:
|
|
56
55
|
return max(label_names, key=score)
|
57
56
|
|
58
57
|
|
59
|
-
async def eval_context(
|
58
|
+
async def eval_context(
|
59
|
+
lm: LM,
|
60
|
+
items: list[tuple[str, str]],
|
61
|
+
label_names: list[str],
|
62
|
+
ctx_name: str,
|
63
|
+
specs: list[dict[str, Any]],
|
64
|
+
) -> tuple[str, int, int]:
|
60
65
|
correct = 0
|
61
66
|
with LMOverridesContext(specs):
|
62
67
|
tasks = [classify_one(lm, text, label_names) for text, _ in items]
|
63
68
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
64
|
-
for (text, gold), pred in zip(items, results):
|
69
|
+
for (text, gold), pred in zip(items, results, strict=False):
|
65
70
|
if isinstance(pred, Exception):
|
66
71
|
# Treat exceptions as incorrect
|
67
72
|
continue
|
@@ -81,7 +86,7 @@ async def main() -> None:
|
|
81
86
|
|
82
87
|
print("Loading Banking77 dataset (split='test')...")
|
83
88
|
ds = load_dataset("banking77", split="test")
|
84
|
-
label_names:
|
89
|
+
label_names: list[str] = ds.features["label"].names # type: ignore
|
85
90
|
|
86
91
|
idxs = random.sample(range(len(ds)), k=min(n, len(ds)))
|
87
92
|
items = [
|
@@ -90,7 +95,7 @@ async def main() -> None:
|
|
90
95
|
]
|
91
96
|
|
92
97
|
# Define a few override contexts to compare
|
93
|
-
contexts:
|
98
|
+
contexts: list[dict[str, Any]] = [
|
94
99
|
{
|
95
100
|
"name": "baseline (no overrides)",
|
96
101
|
"overrides": [],
|
@@ -145,7 +150,7 @@ async def main() -> None:
|
|
145
150
|
print(f"\nEvaluating {len(contexts)} contexts on {len(items)} Banking77 samples (async)...")
|
146
151
|
|
147
152
|
# Evaluate each context sequentially but batched (each context classifies in parallel)
|
148
|
-
results:
|
153
|
+
results: list[tuple[str, int, int]] = []
|
149
154
|
for ctx in contexts:
|
150
155
|
name = ctx["name"]
|
151
156
|
specs = ctx["overrides"]
|
@@ -27,18 +27,17 @@ from __future__ import annotations
|
|
27
27
|
import asyncio
|
28
28
|
import os
|
29
29
|
import random
|
30
|
-
from typing import Any, Dict, List, Optional
|
31
30
|
|
32
31
|
from datasets import load_dataset
|
33
32
|
|
34
33
|
# Use the v3 LM class present in this repo
|
35
34
|
from synth_ai.lm.core.main_v3 import LM, build_messages
|
36
|
-
from synth_ai.tracing_v3.session_tracer import SessionTracer
|
37
|
-
from synth_ai.tracing_v3.abstractions import LMCAISEvent
|
38
|
-
|
39
35
|
|
40
36
|
# Use Overrides context to demonstrate matching by content
|
41
37
|
from synth_ai.lm.overrides import LMOverridesContext
|
38
|
+
from synth_ai.tracing_v3.abstractions import LMCAISEvent
|
39
|
+
from synth_ai.tracing_v3.session_tracer import SessionTracer
|
40
|
+
|
42
41
|
INJECTION_RULES = [
|
43
42
|
{"find": "accnt", "replace": "account"},
|
44
43
|
{"find": "atm", "replace": "ATM"},
|
@@ -46,7 +45,7 @@ INJECTION_RULES = [
|
|
46
45
|
]
|
47
46
|
|
48
47
|
|
49
|
-
async def classify_sample(lm: LM, text: str, label_names:
|
48
|
+
async def classify_sample(lm: LM, text: str, label_names: list[str]) -> str:
|
50
49
|
"""Classify one Banking77 utterance and return the predicted label name."""
|
51
50
|
labels_joined = ", ".join(label_names)
|
52
51
|
system_message = (
|
@@ -77,7 +76,7 @@ async def main() -> None:
|
|
77
76
|
# Columns: {"text": str, "label": int}; label names at ds.features["label"].names
|
78
77
|
print("Loading Banking77 dataset (split='test')...")
|
79
78
|
ds = load_dataset("banking77", split="test")
|
80
|
-
label_names:
|
79
|
+
label_names: list[str] = ds.features["label"].names # type: ignore
|
81
80
|
|
82
81
|
# Sample a few items for a quick demo
|
83
82
|
n = int(os.getenv("N_SAMPLES", "8"))
|
@@ -116,7 +115,9 @@ async def main() -> None:
|
|
116
115
|
|
117
116
|
is_correct = pred_label == gold_label
|
118
117
|
correct += int(is_correct)
|
119
|
-
print(
|
118
|
+
print(
|
119
|
+
f"[{i}] text={text!r}\n gold={gold_label}\n pred={pred} -> mapped={pred_label} {'✅' if is_correct else '❌'}"
|
120
|
+
)
|
120
121
|
|
121
122
|
if idxs:
|
122
123
|
acc = correct / len(idxs)
|
@@ -137,7 +138,11 @@ async def main() -> None:
|
|
137
138
|
with LMOverridesContext([{"match": {"contains": "atm"}, "injection_rules": INJECTION_RULES}]):
|
138
139
|
_ = await classify_sample(lm_traced, test_text, label_names)
|
139
140
|
# inspect trace
|
140
|
-
events = [
|
141
|
+
events = [
|
142
|
+
e
|
143
|
+
for e in (tracer.current_session.event_history if tracer.current_session else [])
|
144
|
+
if isinstance(e, LMCAISEvent)
|
145
|
+
]
|
141
146
|
assert events, "No LMCAISEvent recorded by SessionTracer"
|
142
147
|
cr = events[-1].call_records[0]
|
143
148
|
traced_user = ""
|
@@ -145,7 +150,7 @@ async def main() -> None:
|
|
145
150
|
if m.role == "user":
|
146
151
|
for part in m.parts:
|
147
152
|
if getattr(part, "type", None) == "text":
|
148
|
-
traced_user +=
|
153
|
+
traced_user += part.text or ""
|
149
154
|
assert "ATM" in traced_user, f"Expected substitution in traced prompt; got: {traced_user!r}"
|
150
155
|
print("LM path trace verified: substitution present in traced prompt.")
|
151
156
|
await tracer.end_timestep()
|
@@ -155,7 +160,7 @@ async def main() -> None:
|
|
155
160
|
try:
|
156
161
|
import synth_ai.lm.provider_support.openai as _synth_openai_patch # noqa: F401
|
157
162
|
from openai import AsyncOpenAI
|
158
|
-
|
163
|
+
|
159
164
|
base_url = os.getenv("OPENAI_BASE_URL", "https://api.groq.com/openai/v1")
|
160
165
|
api_key = os.getenv("OPENAI_API_KEY") or os.getenv("GROQ_API_KEY") or ""
|
161
166
|
client = AsyncOpenAI(base_url=base_url, api_key=api_key)
|
@@ -163,8 +168,12 @@ async def main() -> None:
|
|
163
168
|
{"role": "system", "content": "Echo user label."},
|
164
169
|
{"role": "user", "content": f"Please classify: {test_text}"},
|
165
170
|
]
|
166
|
-
with LMOverridesContext(
|
167
|
-
|
171
|
+
with LMOverridesContext(
|
172
|
+
[{"match": {"contains": "atm"}, "injection_rules": INJECTION_RULES}]
|
173
|
+
):
|
174
|
+
_ = await client.chat.completions.create(
|
175
|
+
model=model, messages=messages, temperature=0
|
176
|
+
)
|
168
177
|
# Not all models echo input; instead, verify that our injected expectation matches
|
169
178
|
expected_user = messages[1]["content"].replace("atm", "ATM")
|
170
179
|
if messages[1]["content"] == expected_user:
|
@@ -176,13 +185,16 @@ async def main() -> None:
|
|
176
185
|
|
177
186
|
# 3) Anthropic wrapper path (AsyncClient): ensure apply_injection is active
|
178
187
|
try:
|
179
|
-
import synth_ai.lm.provider_support.anthropic as _synth_anthropic_patch # noqa: F401
|
180
188
|
import anthropic
|
189
|
+
import synth_ai.lm.provider_support.anthropic as _synth_anthropic_patch # noqa: F401
|
190
|
+
|
181
191
|
a_model = os.getenv("ANTHROPIC_MODEL", "claude-3-5-haiku-20241022")
|
182
192
|
a_key = os.getenv("ANTHROPIC_API_KEY")
|
183
193
|
if a_key:
|
184
194
|
a_client = anthropic.AsyncClient(api_key=a_key)
|
185
|
-
with LMOverridesContext(
|
195
|
+
with LMOverridesContext(
|
196
|
+
[{"match": {"contains": "atm"}, "injection_rules": INJECTION_RULES}]
|
197
|
+
):
|
186
198
|
_ = await a_client.messages.create(
|
187
199
|
model=a_model,
|
188
200
|
system="Echo user label.",
|
@@ -20,9 +20,9 @@ Notes
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
22
|
import random
|
23
|
+
from collections.abc import Callable, Sequence
|
23
24
|
from dataclasses import dataclass, replace
|
24
|
-
from typing import Any,
|
25
|
-
|
25
|
+
from typing import Any, Protocol
|
26
26
|
|
27
27
|
# ---------------------------
|
28
28
|
# Program adapter and protocols
|
@@ -36,16 +36,16 @@ class PredictProgram(Protocol):
|
|
36
36
|
by wrapping it with `ProgramAdapter` below.
|
37
37
|
"""
|
38
38
|
|
39
|
-
def deepcopy(self) ->
|
39
|
+
def deepcopy(self) -> PredictProgram: ...
|
40
40
|
|
41
|
-
def run(self, x: Any, *, model:
|
41
|
+
def run(self, x: Any, *, model: Any | None = None) -> Any: ...
|
42
42
|
|
43
|
-
def with_instructions(self, instructions:
|
43
|
+
def with_instructions(self, instructions: dict[str, str]) -> PredictProgram: ...
|
44
44
|
|
45
|
-
def with_demos(self, demos:
|
45
|
+
def with_demos(self, demos: list[tuple[Any, Any]]) -> PredictProgram: ...
|
46
46
|
|
47
47
|
@property
|
48
|
-
def predictors(self) ->
|
48
|
+
def predictors(self) -> list[str]: ...
|
49
49
|
|
50
50
|
|
51
51
|
@dataclass
|
@@ -59,28 +59,28 @@ class ProgramAdapter:
|
|
59
59
|
- set_demos: Callable to update demos (global or per predictor)
|
60
60
|
"""
|
61
61
|
|
62
|
-
run_fn: Callable[[Any,
|
63
|
-
state:
|
64
|
-
_predictors:
|
65
|
-
set_instructions: Callable[[
|
66
|
-
set_demos: Callable[[
|
62
|
+
run_fn: Callable[[Any, Any | None], Any]
|
63
|
+
state: dict[str, Any]
|
64
|
+
_predictors: list[str]
|
65
|
+
set_instructions: Callable[[dict[str, str], dict[str, Any]], dict[str, Any]]
|
66
|
+
set_demos: Callable[[list[tuple[Any, Any]], dict[str, Any]], dict[str, Any]]
|
67
67
|
|
68
|
-
def deepcopy(self) ->
|
68
|
+
def deepcopy(self) -> ProgramAdapter:
|
69
69
|
return replace(self, state={**self.state})
|
70
70
|
|
71
|
-
def run(self, x: Any, *, model:
|
71
|
+
def run(self, x: Any, *, model: Any | None = None) -> Any:
|
72
72
|
return self.run_fn(x, model)
|
73
73
|
|
74
|
-
def with_instructions(self, instructions:
|
74
|
+
def with_instructions(self, instructions: dict[str, str]) -> ProgramAdapter:
|
75
75
|
new_state = self.set_instructions(instructions, {**self.state})
|
76
76
|
return replace(self, state=new_state)
|
77
77
|
|
78
|
-
def with_demos(self, demos:
|
78
|
+
def with_demos(self, demos: list[tuple[Any, Any]]) -> ProgramAdapter:
|
79
79
|
new_state = self.set_demos(demos, {**self.state})
|
80
80
|
return replace(self, state=new_state)
|
81
81
|
|
82
82
|
@property
|
83
|
-
def predictors(self) ->
|
83
|
+
def predictors(self) -> list[str]:
|
84
84
|
return list(self._predictors)
|
85
85
|
|
86
86
|
|
@@ -89,9 +89,11 @@ class ProgramAdapter:
|
|
89
89
|
# ---------------------------
|
90
90
|
|
91
91
|
|
92
|
-
def summarize_dataset(trainset: Sequence[
|
92
|
+
def summarize_dataset(trainset: Sequence[tuple[Any, Any]], max_items: int = 50) -> str:
|
93
93
|
n = len(trainset)
|
94
|
-
ex = ", ".join(
|
94
|
+
ex = ", ".join(
|
95
|
+
repr(trainset[i][0])[:40] for i in range(0, min(max_items, n), max(1, n // max_items or 1))
|
96
|
+
)
|
95
97
|
return f"Dataset size: {n}. Example inputs: {ex}"
|
96
98
|
|
97
99
|
|
@@ -109,7 +111,7 @@ def random_tip(rng: random.Random) -> str:
|
|
109
111
|
return rng.choice(tips)
|
110
112
|
|
111
113
|
|
112
|
-
def choose(items: Sequence[Any], rng:
|
114
|
+
def choose(items: Sequence[Any], rng: random.Random | None = None) -> Any:
|
113
115
|
r = rng or random
|
114
116
|
return r.choice(items)
|
115
117
|
|
@@ -122,10 +124,12 @@ def choose(items: Sequence[Any], rng: Optional[random.Random] = None) -> Any:
|
|
122
124
|
@dataclass
|
123
125
|
class EvalResult:
|
124
126
|
score: float
|
125
|
-
subscores:
|
127
|
+
subscores: list[float]
|
126
128
|
|
127
129
|
|
128
|
-
def evaluate_program(
|
130
|
+
def evaluate_program(
|
131
|
+
program: PredictProgram, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
|
132
|
+
) -> EvalResult:
|
129
133
|
subs = []
|
130
134
|
for x, y in dataset:
|
131
135
|
yhat = program.run(x)
|
@@ -140,8 +144,8 @@ def evaluate_program(program: PredictProgram, dataset: Sequence[Tuple[Any, Any]]
|
|
140
144
|
|
141
145
|
def mipro_v2_compile(
|
142
146
|
student: PredictProgram,
|
143
|
-
trainset: Sequence[
|
144
|
-
valset: Sequence[
|
147
|
+
trainset: Sequence[tuple[Any, Any]],
|
148
|
+
valset: Sequence[tuple[Any, Any]],
|
145
149
|
metric: Callable[[Any, Any], float],
|
146
150
|
*,
|
147
151
|
prompt_model: Any,
|
@@ -159,7 +163,7 @@ def mipro_v2_compile(
|
|
159
163
|
data_aware: bool = True,
|
160
164
|
tip_aware: bool = True,
|
161
165
|
fewshot_aware: bool = True,
|
162
|
-
) ->
|
166
|
+
) -> tuple[PredictProgram, list[dict[str, Any]]]:
|
163
167
|
"""MIPROv2-style optimizer.
|
164
168
|
|
165
169
|
Arguments mirror the DSPy pseudocode but remain provider-agnostic. The
|
@@ -171,9 +175,9 @@ def mipro_v2_compile(
|
|
171
175
|
program = student.deepcopy()
|
172
176
|
|
173
177
|
# Step 1: bootstrap few-shot example candidates
|
174
|
-
demo_candidates:
|
178
|
+
demo_candidates: list[dict[str, Any]] = []
|
175
179
|
for _ in range(num_candidates):
|
176
|
-
boot:
|
180
|
+
boot: list[tuple[Any, Any]] = []
|
177
181
|
# collect bootstrapped, self-consistent demos
|
178
182
|
while len(boot) < max_bootstrapped_demos:
|
179
183
|
x, y = rng.choice(trainset)
|
@@ -184,9 +188,9 @@ def mipro_v2_compile(
|
|
184
188
|
demo_candidates.append({"boot": boot, "labeled": labeled})
|
185
189
|
|
186
190
|
# Step 2: propose instruction candidates per predictor
|
187
|
-
instr_candidates:
|
188
|
-
for pred in
|
189
|
-
ctx:
|
191
|
+
instr_candidates: dict[str, list[str]] = {}
|
192
|
+
for pred in program.predictors or ["predictor"]:
|
193
|
+
ctx: dict[str, Any] = {}
|
190
194
|
if data_aware:
|
191
195
|
ctx["dataset_summary"] = summarize_dataset(trainset)
|
192
196
|
if program_aware:
|
@@ -199,12 +203,12 @@ def mipro_v2_compile(
|
|
199
203
|
instr_candidates[pred] = list(cand)
|
200
204
|
|
201
205
|
# Step 3: Bayesian-optimization-like search (random proposer placeholder)
|
202
|
-
history:
|
203
|
-
records:
|
206
|
+
history: list[tuple[dict[str, Any], float]] = []
|
207
|
+
records: list[dict[str, Any]] = []
|
204
208
|
best_score = -1.0
|
205
|
-
best_cfg:
|
209
|
+
best_cfg: dict[str, Any] | None = None
|
206
210
|
|
207
|
-
def propose(history_:
|
211
|
+
def propose(history_: list[tuple[dict[str, Any], float]]) -> dict[str, Any]:
|
208
212
|
# Placeholder: randomly sample from the cartesian product
|
209
213
|
instructions = {pred: choose(instr_candidates[pred], rng) for pred in instr_candidates}
|
210
214
|
demos = choose(demo_candidates, rng) if demo_candidates else None
|
@@ -227,15 +231,17 @@ def mipro_v2_compile(
|
|
227
231
|
batch_res = evaluate_program(program_t, batch, metric)
|
228
232
|
s_t = batch_res.score
|
229
233
|
history.append((theta, s_t))
|
230
|
-
records.append(
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
"
|
236
|
-
|
237
|
-
|
238
|
-
|
234
|
+
records.append(
|
235
|
+
{
|
236
|
+
"trial": t,
|
237
|
+
"evaluation": "batch" if minibatch else "full",
|
238
|
+
"score": s_t,
|
239
|
+
"intervention": {
|
240
|
+
"instructions": theta.get("instructions"),
|
241
|
+
"demo_set": theta.get("demo_set"),
|
242
|
+
},
|
243
|
+
}
|
244
|
+
)
|
239
245
|
|
240
246
|
if (not minibatch) or (t % max(1, minibatch_full_eval_steps) == 0):
|
241
247
|
full_res = evaluate_program(program_t, valset, metric)
|
@@ -243,15 +249,17 @@ def mipro_v2_compile(
|
|
243
249
|
if s_full > best_score:
|
244
250
|
best_score = s_full
|
245
251
|
best_cfg = theta
|
246
|
-
records.append(
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
"
|
252
|
-
|
253
|
-
|
254
|
-
|
252
|
+
records.append(
|
253
|
+
{
|
254
|
+
"trial": t,
|
255
|
+
"evaluation": "full",
|
256
|
+
"score": s_full,
|
257
|
+
"intervention": {
|
258
|
+
"instructions": theta.get("instructions"),
|
259
|
+
"demo_set": theta.get("demo_set"),
|
260
|
+
},
|
261
|
+
}
|
262
|
+
)
|
255
263
|
|
256
264
|
if best_cfg is None:
|
257
265
|
return program, records
|
@@ -275,6 +283,7 @@ __all__ = [
|
|
275
283
|
class ExampleTwoStepDag:
|
276
284
|
pass
|
277
285
|
|
286
|
+
|
278
287
|
"""
|
279
288
|
A -> B
|
280
289
|
"""
|