synth-ai 0.2.4.dev6__py3-none-any.whl → 0.2.4.dev8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +18 -9
- synth_ai/cli/__init__.py +10 -5
- synth_ai/cli/balance.py +25 -32
- synth_ai/cli/calc.py +2 -3
- synth_ai/cli/demo.py +3 -5
- synth_ai/cli/legacy_root_backup.py +58 -32
- synth_ai/cli/man.py +22 -19
- synth_ai/cli/recent.py +9 -8
- synth_ai/cli/root.py +58 -13
- synth_ai/cli/status.py +13 -6
- synth_ai/cli/traces.py +45 -21
- synth_ai/cli/watch.py +40 -37
- synth_ai/config/base_url.py +47 -2
- synth_ai/core/experiment.py +1 -2
- synth_ai/environments/__init__.py +2 -6
- synth_ai/environments/environment/artifacts/base.py +3 -1
- synth_ai/environments/environment/db/sqlite.py +1 -1
- synth_ai/environments/environment/registry.py +19 -20
- synth_ai/environments/environment/resources/sqlite.py +2 -3
- synth_ai/environments/environment/rewards/core.py +3 -2
- synth_ai/environments/environment/tools/__init__.py +6 -4
- synth_ai/environments/examples/crafter_classic/__init__.py +1 -1
- synth_ai/environments/examples/crafter_classic/engine.py +13 -13
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +1 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +2 -1
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +2 -1
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +3 -2
- synth_ai/environments/examples/crafter_classic/environment.py +16 -15
- synth_ai/environments/examples/crafter_classic/taskset.py +2 -2
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +2 -3
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +2 -1
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +2 -2
- synth_ai/environments/examples/crafter_custom/crafter/config.py +2 -2
- synth_ai/environments/examples/crafter_custom/crafter/env.py +1 -5
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +1 -2
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +1 -2
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +5 -5
- synth_ai/environments/examples/crafter_custom/environment.py +13 -13
- synth_ai/environments/examples/crafter_custom/run_dataset.py +5 -5
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +2 -2
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +5 -4
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +2 -1
- synth_ai/environments/examples/enron/engine.py +18 -14
- synth_ai/environments/examples/enron/environment.py +12 -11
- synth_ai/environments/examples/enron/taskset.py +7 -7
- synth_ai/environments/examples/minigrid/__init__.py +6 -6
- synth_ai/environments/examples/minigrid/engine.py +6 -6
- synth_ai/environments/examples/minigrid/environment.py +6 -6
- synth_ai/environments/examples/minigrid/puzzle_loader.py +3 -2
- synth_ai/environments/examples/minigrid/taskset.py +13 -13
- synth_ai/environments/examples/nethack/achievements.py +1 -1
- synth_ai/environments/examples/nethack/engine.py +8 -7
- synth_ai/environments/examples/nethack/environment.py +10 -9
- synth_ai/environments/examples/nethack/helpers/__init__.py +8 -9
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +1 -1
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +2 -1
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +1 -1
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +3 -4
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +6 -5
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +5 -5
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +7 -6
- synth_ai/environments/examples/nethack/taskset.py +5 -5
- synth_ai/environments/examples/red/engine.py +9 -8
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +7 -7
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +3 -2
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +2 -1
- synth_ai/environments/examples/red/environment.py +18 -15
- synth_ai/environments/examples/red/taskset.py +5 -3
- synth_ai/environments/examples/sokoban/engine.py +16 -13
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +3 -2
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +2 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +1 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +7 -5
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +1 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +2 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +5 -4
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +3 -2
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +2 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +5 -4
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +1 -1
- synth_ai/environments/examples/sokoban/environment.py +15 -14
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +5 -3
- synth_ai/environments/examples/sokoban/puzzle_loader.py +3 -2
- synth_ai/environments/examples/sokoban/taskset.py +13 -10
- synth_ai/environments/examples/tictactoe/engine.py +6 -6
- synth_ai/environments/examples/tictactoe/environment.py +8 -7
- synth_ai/environments/examples/tictactoe/taskset.py +6 -5
- synth_ai/environments/examples/verilog/engine.py +4 -3
- synth_ai/environments/examples/verilog/environment.py +11 -10
- synth_ai/environments/examples/verilog/taskset.py +14 -12
- synth_ai/environments/examples/wordle/__init__.py +5 -5
- synth_ai/environments/examples/wordle/engine.py +32 -25
- synth_ai/environments/examples/wordle/environment.py +21 -16
- synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +6 -6
- synth_ai/environments/examples/wordle/taskset.py +20 -12
- synth_ai/environments/reproducibility/core.py +1 -1
- synth_ai/environments/reproducibility/tree.py +21 -21
- synth_ai/environments/service/app.py +3 -2
- synth_ai/environments/service/core_routes.py +104 -110
- synth_ai/environments/service/external_registry.py +1 -2
- synth_ai/environments/service/registry.py +1 -1
- synth_ai/environments/stateful/core.py +1 -2
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/api.py +4 -4
- synth_ai/environments/tasks/core.py +14 -12
- synth_ai/environments/tasks/filters.py +6 -4
- synth_ai/environments/tasks/utils.py +13 -11
- synth_ai/evals/base.py +2 -3
- synth_ai/experimental/synth_oss.py +4 -4
- synth_ai/http.py +102 -0
- synth_ai/inference/__init__.py +7 -0
- synth_ai/inference/client.py +20 -0
- synth_ai/jobs/client.py +246 -0
- synth_ai/learning/__init__.py +24 -0
- synth_ai/learning/client.py +149 -0
- synth_ai/learning/config.py +43 -0
- synth_ai/learning/constants.py +29 -0
- synth_ai/learning/ft_client.py +59 -0
- synth_ai/learning/gateway.py +1 -3
- synth_ai/learning/health.py +43 -0
- synth_ai/learning/jobs.py +205 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +15 -10
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +26 -14
- synth_ai/learning/prompts/mipro.py +61 -52
- synth_ai/learning/prompts/random_search.py +42 -43
- synth_ai/learning/prompts/run_mipro_banking77.py +32 -20
- synth_ai/learning/prompts/run_random_search_banking77.py +71 -52
- synth_ai/learning/rl_client.py +256 -0
- synth_ai/learning/sse.py +58 -0
- synth_ai/learning/validators.py +48 -0
- synth_ai/lm/__init__.py +5 -5
- synth_ai/lm/caching/ephemeral.py +9 -9
- synth_ai/lm/caching/handler.py +20 -20
- synth_ai/lm/caching/persistent.py +10 -10
- synth_ai/lm/config.py +3 -3
- synth_ai/lm/constants.py +7 -7
- synth_ai/lm/core/all.py +17 -3
- synth_ai/lm/core/exceptions.py +0 -2
- synth_ai/lm/core/main.py +26 -41
- synth_ai/lm/core/main_v3.py +33 -10
- synth_ai/lm/core/synth_models.py +48 -0
- synth_ai/lm/core/vendor_clients.py +26 -22
- synth_ai/lm/injection.py +7 -8
- synth_ai/lm/overrides.py +21 -19
- synth_ai/lm/provider_support/__init__.py +1 -1
- synth_ai/lm/provider_support/anthropic.py +15 -15
- synth_ai/lm/provider_support/openai.py +23 -21
- synth_ai/lm/structured_outputs/handler.py +34 -32
- synth_ai/lm/structured_outputs/inject.py +24 -27
- synth_ai/lm/structured_outputs/rehabilitate.py +19 -15
- synth_ai/lm/tools/base.py +17 -16
- synth_ai/lm/unified_interface.py +17 -18
- synth_ai/lm/vendors/base.py +20 -18
- synth_ai/lm/vendors/core/anthropic_api.py +36 -27
- synth_ai/lm/vendors/core/gemini_api.py +31 -36
- synth_ai/lm/vendors/core/mistral_api.py +19 -19
- synth_ai/lm/vendors/core/openai_api.py +42 -13
- synth_ai/lm/vendors/openai_standard.py +158 -101
- synth_ai/lm/vendors/openai_standard_responses.py +74 -61
- synth_ai/lm/vendors/retries.py +9 -1
- synth_ai/lm/vendors/supported/custom_endpoint.py +38 -28
- synth_ai/lm/vendors/supported/deepseek.py +10 -10
- synth_ai/lm/vendors/supported/grok.py +8 -8
- synth_ai/lm/vendors/supported/ollama.py +2 -1
- synth_ai/lm/vendors/supported/openrouter.py +11 -9
- synth_ai/lm/vendors/synth_client.py +425 -75
- synth_ai/lm/warmup.py +8 -7
- synth_ai/rl/__init__.py +30 -0
- synth_ai/rl/contracts.py +32 -0
- synth_ai/rl/env_keys.py +137 -0
- synth_ai/rl/secrets.py +19 -0
- synth_ai/scripts/verify_rewards.py +100 -0
- synth_ai/task/__init__.py +10 -0
- synth_ai/task/contracts.py +120 -0
- synth_ai/task/health.py +28 -0
- synth_ai/task/validators.py +12 -0
- synth_ai/tracing/__init__.py +22 -10
- synth_ai/tracing_v1/__init__.py +22 -20
- synth_ai/tracing_v3/__init__.py +7 -7
- synth_ai/tracing_v3/abstractions.py +56 -52
- synth_ai/tracing_v3/config.py +4 -2
- synth_ai/tracing_v3/db_config.py +6 -8
- synth_ai/tracing_v3/decorators.py +29 -30
- synth_ai/tracing_v3/examples/basic_usage.py +12 -12
- synth_ai/tracing_v3/hooks.py +24 -22
- synth_ai/tracing_v3/llm_call_record_helpers.py +85 -98
- synth_ai/tracing_v3/lm_call_record_abstractions.py +2 -4
- synth_ai/tracing_v3/migration_helper.py +3 -5
- synth_ai/tracing_v3/replica_sync.py +30 -32
- synth_ai/tracing_v3/session_tracer.py +158 -31
- synth_ai/tracing_v3/storage/__init__.py +1 -1
- synth_ai/tracing_v3/storage/base.py +8 -7
- synth_ai/tracing_v3/storage/config.py +4 -4
- synth_ai/tracing_v3/storage/factory.py +4 -4
- synth_ai/tracing_v3/storage/utils.py +9 -9
- synth_ai/tracing_v3/turso/__init__.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +9 -9
- synth_ai/tracing_v3/turso/manager.py +278 -48
- synth_ai/tracing_v3/turso/models.py +77 -19
- synth_ai/tracing_v3/utils.py +5 -5
- synth_ai/v0/tracing/abstractions.py +28 -28
- synth_ai/v0/tracing/base_client.py +9 -9
- synth_ai/v0/tracing/client_manager.py +7 -7
- synth_ai/v0/tracing/config.py +7 -7
- synth_ai/v0/tracing/context.py +6 -6
- synth_ai/v0/tracing/decorators.py +6 -5
- synth_ai/v0/tracing/events/manage.py +1 -1
- synth_ai/v0/tracing/events/store.py +5 -4
- synth_ai/v0/tracing/immediate_client.py +4 -5
- synth_ai/v0/tracing/local.py +3 -3
- synth_ai/v0/tracing/log_client_base.py +4 -5
- synth_ai/v0/tracing/retry_queue.py +5 -6
- synth_ai/v0/tracing/trackers.py +25 -25
- synth_ai/v0/tracing/upload.py +6 -0
- synth_ai/v0/tracing_v1/__init__.py +1 -1
- synth_ai/v0/tracing_v1/abstractions.py +28 -28
- synth_ai/v0/tracing_v1/base_client.py +9 -9
- synth_ai/v0/tracing_v1/client_manager.py +7 -7
- synth_ai/v0/tracing_v1/config.py +7 -7
- synth_ai/v0/tracing_v1/context.py +6 -6
- synth_ai/v0/tracing_v1/decorators.py +7 -6
- synth_ai/v0/tracing_v1/events/manage.py +1 -1
- synth_ai/v0/tracing_v1/events/store.py +5 -4
- synth_ai/v0/tracing_v1/immediate_client.py +4 -5
- synth_ai/v0/tracing_v1/local.py +3 -3
- synth_ai/v0/tracing_v1/log_client_base.py +4 -5
- synth_ai/v0/tracing_v1/retry_queue.py +5 -6
- synth_ai/v0/tracing_v1/trackers.py +25 -25
- synth_ai/v0/tracing_v1/upload.py +25 -24
- synth_ai/zyk/__init__.py +1 -0
- synth_ai-0.2.4.dev8.dist-info/METADATA +635 -0
- synth_ai-0.2.4.dev8.dist-info/RECORD +317 -0
- synth_ai/tui/__init__.py +0 -1
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -165
- synth_ai/tui/cli/query_experiments_v3.py +0 -165
- synth_ai/tui/dashboard.py +0 -329
- synth_ai-0.2.4.dev6.dist-info/METADATA +0 -203
- synth_ai-0.2.4.dev6.dist-info/RECORD +0 -299
- {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev8.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev8.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev8.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev8.dist-info}/top_level.txt +0 -0
@@ -20,9 +20,9 @@ Notes
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
22
|
import random
|
23
|
+
from collections.abc import Callable, Sequence
|
23
24
|
from dataclasses import dataclass, replace
|
24
|
-
from typing import Any,
|
25
|
-
|
25
|
+
from typing import Any, Protocol
|
26
26
|
|
27
27
|
# ---------------------------
|
28
28
|
# Program adapter and protocols
|
@@ -36,16 +36,16 @@ class PredictProgram(Protocol):
|
|
36
36
|
by wrapping it with `ProgramAdapter` below.
|
37
37
|
"""
|
38
38
|
|
39
|
-
def deepcopy(self) ->
|
39
|
+
def deepcopy(self) -> PredictProgram: ...
|
40
40
|
|
41
|
-
def run(self, x: Any, *, model:
|
41
|
+
def run(self, x: Any, *, model: Any | None = None) -> Any: ...
|
42
42
|
|
43
|
-
def with_instructions(self, instructions:
|
43
|
+
def with_instructions(self, instructions: dict[str, str]) -> PredictProgram: ...
|
44
44
|
|
45
|
-
def with_demos(self, demos:
|
45
|
+
def with_demos(self, demos: list[tuple[Any, Any]]) -> PredictProgram: ...
|
46
46
|
|
47
47
|
@property
|
48
|
-
def predictors(self) ->
|
48
|
+
def predictors(self) -> list[str]: ...
|
49
49
|
|
50
50
|
|
51
51
|
@dataclass
|
@@ -59,28 +59,28 @@ class ProgramAdapter:
|
|
59
59
|
- set_demos: Callable to update demos (global or per predictor)
|
60
60
|
"""
|
61
61
|
|
62
|
-
run_fn: Callable[[Any,
|
63
|
-
state:
|
64
|
-
_predictors:
|
65
|
-
set_instructions: Callable[[
|
66
|
-
set_demos: Callable[[
|
62
|
+
run_fn: Callable[[Any, Any | None], Any]
|
63
|
+
state: dict[str, Any]
|
64
|
+
_predictors: list[str]
|
65
|
+
set_instructions: Callable[[dict[str, str], dict[str, Any]], dict[str, Any]]
|
66
|
+
set_demos: Callable[[list[tuple[Any, Any]], dict[str, Any]], dict[str, Any]]
|
67
67
|
|
68
|
-
def deepcopy(self) ->
|
68
|
+
def deepcopy(self) -> ProgramAdapter:
|
69
69
|
return replace(self, state={**self.state})
|
70
70
|
|
71
|
-
def run(self, x: Any, *, model:
|
71
|
+
def run(self, x: Any, *, model: Any | None = None) -> Any:
|
72
72
|
return self.run_fn(x, model)
|
73
73
|
|
74
|
-
def with_instructions(self, instructions:
|
74
|
+
def with_instructions(self, instructions: dict[str, str]) -> ProgramAdapter:
|
75
75
|
new_state = self.set_instructions(instructions, {**self.state})
|
76
76
|
return replace(self, state=new_state)
|
77
77
|
|
78
|
-
def with_demos(self, demos:
|
78
|
+
def with_demos(self, demos: list[tuple[Any, Any]]) -> ProgramAdapter:
|
79
79
|
new_state = self.set_demos(demos, {**self.state})
|
80
80
|
return replace(self, state=new_state)
|
81
81
|
|
82
82
|
@property
|
83
|
-
def predictors(self) ->
|
83
|
+
def predictors(self) -> list[str]:
|
84
84
|
return list(self._predictors)
|
85
85
|
|
86
86
|
|
@@ -89,9 +89,11 @@ class ProgramAdapter:
|
|
89
89
|
# ---------------------------
|
90
90
|
|
91
91
|
|
92
|
-
def summarize_dataset(trainset: Sequence[
|
92
|
+
def summarize_dataset(trainset: Sequence[tuple[Any, Any]], max_items: int = 50) -> str:
|
93
93
|
n = len(trainset)
|
94
|
-
ex = ", ".join(
|
94
|
+
ex = ", ".join(
|
95
|
+
repr(trainset[i][0])[:40] for i in range(0, min(max_items, n), max(1, n // max_items or 1))
|
96
|
+
)
|
95
97
|
return f"Dataset size: {n}. Example inputs: {ex}"
|
96
98
|
|
97
99
|
|
@@ -109,7 +111,7 @@ def random_tip(rng: random.Random) -> str:
|
|
109
111
|
return rng.choice(tips)
|
110
112
|
|
111
113
|
|
112
|
-
def choose(items: Sequence[Any], rng:
|
114
|
+
def choose(items: Sequence[Any], rng: random.Random | None = None) -> Any:
|
113
115
|
r = rng or random
|
114
116
|
return r.choice(items)
|
115
117
|
|
@@ -122,10 +124,12 @@ def choose(items: Sequence[Any], rng: Optional[random.Random] = None) -> Any:
|
|
122
124
|
@dataclass
|
123
125
|
class EvalResult:
|
124
126
|
score: float
|
125
|
-
subscores:
|
127
|
+
subscores: list[float]
|
126
128
|
|
127
129
|
|
128
|
-
def evaluate_program(
|
130
|
+
def evaluate_program(
|
131
|
+
program: PredictProgram, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
|
132
|
+
) -> EvalResult:
|
129
133
|
subs = []
|
130
134
|
for x, y in dataset:
|
131
135
|
yhat = program.run(x)
|
@@ -140,8 +144,8 @@ def evaluate_program(program: PredictProgram, dataset: Sequence[Tuple[Any, Any]]
|
|
140
144
|
|
141
145
|
def mipro_v2_compile(
|
142
146
|
student: PredictProgram,
|
143
|
-
trainset: Sequence[
|
144
|
-
valset: Sequence[
|
147
|
+
trainset: Sequence[tuple[Any, Any]],
|
148
|
+
valset: Sequence[tuple[Any, Any]],
|
145
149
|
metric: Callable[[Any, Any], float],
|
146
150
|
*,
|
147
151
|
prompt_model: Any,
|
@@ -159,7 +163,7 @@ def mipro_v2_compile(
|
|
159
163
|
data_aware: bool = True,
|
160
164
|
tip_aware: bool = True,
|
161
165
|
fewshot_aware: bool = True,
|
162
|
-
) ->
|
166
|
+
) -> tuple[PredictProgram, list[dict[str, Any]]]:
|
163
167
|
"""MIPROv2-style optimizer.
|
164
168
|
|
165
169
|
Arguments mirror the DSPy pseudocode but remain provider-agnostic. The
|
@@ -171,9 +175,9 @@ def mipro_v2_compile(
|
|
171
175
|
program = student.deepcopy()
|
172
176
|
|
173
177
|
# Step 1: bootstrap few-shot example candidates
|
174
|
-
demo_candidates:
|
178
|
+
demo_candidates: list[dict[str, Any]] = []
|
175
179
|
for _ in range(num_candidates):
|
176
|
-
boot:
|
180
|
+
boot: list[tuple[Any, Any]] = []
|
177
181
|
# collect bootstrapped, self-consistent demos
|
178
182
|
while len(boot) < max_bootstrapped_demos:
|
179
183
|
x, y = rng.choice(trainset)
|
@@ -184,9 +188,9 @@ def mipro_v2_compile(
|
|
184
188
|
demo_candidates.append({"boot": boot, "labeled": labeled})
|
185
189
|
|
186
190
|
# Step 2: propose instruction candidates per predictor
|
187
|
-
instr_candidates:
|
188
|
-
for pred in
|
189
|
-
ctx:
|
191
|
+
instr_candidates: dict[str, list[str]] = {}
|
192
|
+
for pred in program.predictors or ["predictor"]:
|
193
|
+
ctx: dict[str, Any] = {}
|
190
194
|
if data_aware:
|
191
195
|
ctx["dataset_summary"] = summarize_dataset(trainset)
|
192
196
|
if program_aware:
|
@@ -199,12 +203,12 @@ def mipro_v2_compile(
|
|
199
203
|
instr_candidates[pred] = list(cand)
|
200
204
|
|
201
205
|
# Step 3: Bayesian-optimization-like search (random proposer placeholder)
|
202
|
-
history:
|
203
|
-
records:
|
206
|
+
history: list[tuple[dict[str, Any], float]] = []
|
207
|
+
records: list[dict[str, Any]] = []
|
204
208
|
best_score = -1.0
|
205
|
-
best_cfg:
|
209
|
+
best_cfg: dict[str, Any] | None = None
|
206
210
|
|
207
|
-
def propose(history_:
|
211
|
+
def propose(history_: list[tuple[dict[str, Any], float]]) -> dict[str, Any]:
|
208
212
|
# Placeholder: randomly sample from the cartesian product
|
209
213
|
instructions = {pred: choose(instr_candidates[pred], rng) for pred in instr_candidates}
|
210
214
|
demos = choose(demo_candidates, rng) if demo_candidates else None
|
@@ -227,15 +231,17 @@ def mipro_v2_compile(
|
|
227
231
|
batch_res = evaluate_program(program_t, batch, metric)
|
228
232
|
s_t = batch_res.score
|
229
233
|
history.append((theta, s_t))
|
230
|
-
records.append(
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
"
|
236
|
-
|
237
|
-
|
238
|
-
|
234
|
+
records.append(
|
235
|
+
{
|
236
|
+
"trial": t,
|
237
|
+
"evaluation": "batch" if minibatch else "full",
|
238
|
+
"score": s_t,
|
239
|
+
"intervention": {
|
240
|
+
"instructions": theta.get("instructions"),
|
241
|
+
"demo_set": theta.get("demo_set"),
|
242
|
+
},
|
243
|
+
}
|
244
|
+
)
|
239
245
|
|
240
246
|
if (not minibatch) or (t % max(1, minibatch_full_eval_steps) == 0):
|
241
247
|
full_res = evaluate_program(program_t, valset, metric)
|
@@ -243,15 +249,17 @@ def mipro_v2_compile(
|
|
243
249
|
if s_full > best_score:
|
244
250
|
best_score = s_full
|
245
251
|
best_cfg = theta
|
246
|
-
records.append(
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
"
|
252
|
-
|
253
|
-
|
254
|
-
|
252
|
+
records.append(
|
253
|
+
{
|
254
|
+
"trial": t,
|
255
|
+
"evaluation": "full",
|
256
|
+
"score": s_full,
|
257
|
+
"intervention": {
|
258
|
+
"instructions": theta.get("instructions"),
|
259
|
+
"demo_set": theta.get("demo_set"),
|
260
|
+
},
|
261
|
+
}
|
262
|
+
)
|
255
263
|
|
256
264
|
if best_cfg is None:
|
257
265
|
return program, records
|
@@ -275,6 +283,7 @@ __all__ = [
|
|
275
283
|
class ExampleTwoStepDag:
|
276
284
|
pass
|
277
285
|
|
286
|
+
|
278
287
|
"""
|
279
288
|
A -> B
|
280
289
|
"""
|
@@ -8,10 +8,11 @@ metric, and this module will explore baselines and bootstrapped few-shot variant
|
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
|
+
import contextlib
|
11
12
|
import random
|
13
|
+
from collections.abc import Callable, Sequence
|
12
14
|
from dataclasses import dataclass
|
13
|
-
from typing import Any
|
14
|
-
|
15
|
+
from typing import Any
|
15
16
|
|
16
17
|
# ---------------------------
|
17
18
|
# Protocol-like expectations (duck-typed)
|
@@ -25,7 +26,7 @@ class _ProgramLike:
|
|
25
26
|
def deepcopy(self): # deep copy
|
26
27
|
return self
|
27
28
|
|
28
|
-
def with_demos(self, demos:
|
29
|
+
def with_demos(self, demos: list[tuple[Any, Any]]):
|
29
30
|
return self
|
30
31
|
|
31
32
|
def run(self, x: Any) -> Any:
|
@@ -40,10 +41,12 @@ class _ProgramLike:
|
|
40
41
|
@dataclass
|
41
42
|
class EvalResult:
|
42
43
|
score: float
|
43
|
-
subscores:
|
44
|
+
subscores: list[float]
|
44
45
|
|
45
46
|
|
46
|
-
def evaluate(
|
47
|
+
def evaluate(
|
48
|
+
program: _ProgramLike, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
|
49
|
+
) -> EvalResult:
|
47
50
|
subs = []
|
48
51
|
for x, y in dataset:
|
49
52
|
subs.append(metric(program.run(x), y))
|
@@ -54,7 +57,9 @@ class LabeledFewShot:
|
|
54
57
|
def __init__(self, k: int):
|
55
58
|
self.k = k
|
56
59
|
|
57
|
-
def compile(
|
60
|
+
def compile(
|
61
|
+
self, student: _ProgramLike, trainset: Sequence[tuple[Any, Any]], sample: bool = True
|
62
|
+
) -> _ProgramLike:
|
58
63
|
p = getattr(student, "deepcopy", student.reset_copy)()
|
59
64
|
demos = list(trainset)
|
60
65
|
if sample:
|
@@ -68,10 +73,10 @@ class BootstrapFewShot:
|
|
68
73
|
self,
|
69
74
|
*,
|
70
75
|
metric: Callable[[Any, Any], float],
|
71
|
-
metric_threshold:
|
76
|
+
metric_threshold: float | None = None,
|
72
77
|
max_bootstrapped_demos: int = 8,
|
73
78
|
max_labeled_demos: int = 0,
|
74
|
-
teacher_settings:
|
79
|
+
teacher_settings: dict[str, Any] | None = None,
|
75
80
|
max_rounds: int = 1,
|
76
81
|
):
|
77
82
|
self.metric = metric
|
@@ -84,18 +89,18 @@ class BootstrapFewShot:
|
|
84
89
|
def compile(
|
85
90
|
self,
|
86
91
|
student: _ProgramLike,
|
87
|
-
teacher:
|
88
|
-
trainset: Sequence[
|
92
|
+
teacher: _ProgramLike | None,
|
93
|
+
trainset: Sequence[tuple[Any, Any]],
|
89
94
|
) -> _ProgramLike:
|
90
95
|
p = getattr(student, "deepcopy", student.reset_copy)()
|
91
96
|
rng = random.Random()
|
92
97
|
# If bootstrapped demos disabled, return labeled-only few-shot quickly
|
93
98
|
if self.max_bootstrapped_demos <= 0:
|
94
|
-
demos:
|
99
|
+
demos: list[tuple[Any, Any]] = []
|
95
100
|
if self.max_labeled_demos > 0:
|
96
101
|
demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
|
97
102
|
return p.with_demos(demos)
|
98
|
-
boot:
|
103
|
+
boot: list[tuple[Any, Any]] = []
|
99
104
|
# Bootstrap demos by self consistency
|
100
105
|
for _ in range(self.max_rounds):
|
101
106
|
rng.shuffle(trainset := list(trainset))
|
@@ -127,33 +132,29 @@ class BootstrapFewShot:
|
|
127
132
|
@dataclass
|
128
133
|
class Candidate:
|
129
134
|
score: float
|
130
|
-
subscores:
|
135
|
+
subscores: list[float]
|
131
136
|
seed: int
|
132
137
|
program: _ProgramLike
|
133
138
|
|
134
139
|
|
135
140
|
def random_search_compile(
|
136
141
|
student: _ProgramLike,
|
137
|
-
trainset: Sequence[
|
138
|
-
valset: Sequence[
|
142
|
+
trainset: Sequence[tuple[Any, Any]],
|
143
|
+
valset: Sequence[tuple[Any, Any]],
|
139
144
|
metric: Callable[[Any, Any], float],
|
140
145
|
*,
|
141
146
|
max_bootstrapped_demos: int = 8,
|
142
147
|
max_labeled_demos: int = 4,
|
143
148
|
max_rounds: int = 2,
|
144
149
|
num_candidate_programs: int = 16,
|
145
|
-
stop_at_score:
|
146
|
-
evaluate_fn:
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
], EvalResult]] = None,
|
151
|
-
on_candidate_evaluated: Optional[Callable[[int, float, EvalResult, Dict[str, Any]], None]] = None,
|
152
|
-
) -> Tuple[_ProgramLike, List[Dict[str, Any]]]:
|
153
|
-
best_program: Optional[_ProgramLike] = None
|
150
|
+
stop_at_score: float | None = None,
|
151
|
+
evaluate_fn: Callable[[_ProgramLike, Sequence[tuple[Any, Any]], Callable[[Any, Any], float]], EvalResult] | None = None,
|
152
|
+
on_candidate_evaluated: Callable[[int, float, EvalResult, dict[str, Any]], None] | None = None,
|
153
|
+
) -> tuple[_ProgramLike, list[dict[str, Any]]]:
|
154
|
+
best_program: _ProgramLike | None = None
|
154
155
|
best_score = float("-inf")
|
155
|
-
candidates:
|
156
|
-
records:
|
156
|
+
candidates: list[Candidate] = []
|
157
|
+
records: list[dict[str, Any]] = []
|
157
158
|
|
158
159
|
seeds = list(range(num_candidate_programs))
|
159
160
|
seeds = [-3, -2, -1] + seeds # zero-shot, labeled few-shot, bootstrapped few-shot
|
@@ -174,7 +175,9 @@ def random_search_compile(
|
|
174
175
|
if max_bootstrapped_demos <= 0:
|
175
176
|
size = 0
|
176
177
|
else:
|
177
|
-
size =
|
178
|
+
size = (
|
179
|
+
max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
|
180
|
+
)
|
178
181
|
program = BootstrapFewShot(
|
179
182
|
metric=metric,
|
180
183
|
metric_threshold=None,
|
@@ -184,14 +187,18 @@ def random_search_compile(
|
|
184
187
|
max_rounds=max_rounds,
|
185
188
|
).compile(student, teacher=None, trainset=train_copy)
|
186
189
|
|
187
|
-
res = (
|
190
|
+
res = (
|
191
|
+
evaluate_fn(program, valset, metric)
|
192
|
+
if evaluate_fn
|
193
|
+
else evaluate(program, valset, metric)
|
194
|
+
)
|
188
195
|
cand = Candidate(score=res.score, subscores=res.subscores, seed=seed, program=program)
|
189
196
|
candidates.append(cand)
|
190
197
|
# Record an intervention summary for reproducibility
|
191
|
-
intervention:
|
198
|
+
intervention: dict[str, Any] = {"seed": seed}
|
192
199
|
if hasattr(program, "demos"):
|
193
200
|
try:
|
194
|
-
intervention["demos"] =
|
201
|
+
intervention["demos"] = program.demos # type: ignore
|
195
202
|
except Exception:
|
196
203
|
intervention["demos"] = None
|
197
204
|
# Type of candidate
|
@@ -203,12 +210,6 @@ def random_search_compile(
|
|
203
210
|
intervention["label"] = f"labeled-{max_labeled_demos}"
|
204
211
|
else:
|
205
212
|
intervention["kind"] = "bootstrapped_few_shot"
|
206
|
-
bs = 0
|
207
|
-
try:
|
208
|
-
# try to infer from program demos length if present
|
209
|
-
bs = len(intervention.get("demos") or [])
|
210
|
-
except Exception:
|
211
|
-
bs = 0
|
212
213
|
intervention["label"] = f"boot-b{max_bootstrapped_demos}-l{max_labeled_demos}"
|
213
214
|
record_obj = {
|
214
215
|
"score": cand.score,
|
@@ -224,18 +225,16 @@ def random_search_compile(
|
|
224
225
|
break
|
225
226
|
|
226
227
|
if on_candidate_evaluated is not None:
|
227
|
-
|
228
|
+
with contextlib.suppress(Exception):
|
228
229
|
on_candidate_evaluated(idx + 1, res.score, res, intervention)
|
229
|
-
except Exception:
|
230
|
-
pass
|
231
230
|
|
232
231
|
# Attach candidates for inspection
|
233
232
|
if hasattr(best_program, "candidate_programs"):
|
234
233
|
# If user object supports attribute assignment
|
235
|
-
|
236
|
-
best_program.candidate_programs = sorted(
|
237
|
-
|
238
|
-
|
234
|
+
with contextlib.suppress(Exception):
|
235
|
+
best_program.candidate_programs = sorted(
|
236
|
+
candidates, key=lambda c: c.score, reverse=True
|
237
|
+
) # type: ignore[attr-defined]
|
239
238
|
|
240
239
|
return (best_program or getattr(student, "deepcopy", student)(), records)
|
241
240
|
|
@@ -12,39 +12,41 @@ Run:
|
|
12
12
|
from __future__ import annotations
|
13
13
|
|
14
14
|
import asyncio
|
15
|
+
import json
|
15
16
|
import os
|
16
17
|
import random
|
17
|
-
|
18
|
-
from
|
18
|
+
import time
|
19
|
+
from collections.abc import Sequence
|
20
|
+
from pathlib import Path
|
21
|
+
from typing import Any
|
19
22
|
|
20
|
-
from dotenv import load_dotenv
|
21
23
|
from datasets import load_dataset
|
22
|
-
|
24
|
+
from dotenv import load_dotenv
|
25
|
+
from synth_ai.learning.prompts.mipro import ProgramAdapter, evaluate_program, mipro_v2_compile
|
23
26
|
from synth_ai.lm.core.main_v3 import LM, build_messages
|
24
|
-
import json
|
25
|
-
import time
|
26
|
-
from pathlib import Path
|
27
|
-
from synth_ai.learning.prompts.mipro import ProgramAdapter, mipro_v2_compile, evaluate_program
|
28
27
|
|
29
28
|
|
30
|
-
def choose_label(pred: str, label_names:
|
29
|
+
def choose_label(pred: str, label_names: list[str]) -> str:
|
31
30
|
norm = (pred or "").strip().lower()
|
32
31
|
d = {ln.lower(): ln for ln in label_names}
|
33
32
|
if norm in d:
|
34
33
|
return d[norm]
|
34
|
+
|
35
35
|
def score(cand: str) -> int:
|
36
36
|
c = cand.lower()
|
37
37
|
return sum(1 for w in c.split() if w in norm)
|
38
|
+
|
38
39
|
return max(label_names, key=score)
|
39
40
|
|
40
41
|
|
41
|
-
def accuracy(pred: str, gold: str, labels:
|
42
|
+
def accuracy(pred: str, gold: str, labels: list[str]) -> float:
|
42
43
|
return 1.0 if choose_label(pred, labels) == gold else 0.0
|
43
44
|
|
44
45
|
|
45
46
|
class NaivePromptModel:
|
46
47
|
"""Toy prompt model that returns simple instruction variants."""
|
47
|
-
|
48
|
+
|
49
|
+
def generate_instructions(self, ctx: dict[str, Any], k: int = 8) -> list[str]:
|
48
50
|
base = "Classify the Banking77 intent and return exactly one label."
|
49
51
|
variants = [
|
50
52
|
base,
|
@@ -60,28 +62,33 @@ class NaivePromptModel:
|
|
60
62
|
return variants[:k]
|
61
63
|
|
62
64
|
|
63
|
-
def build_run_fn(lm: LM, label_names:
|
65
|
+
def build_run_fn(lm: LM, label_names: list[str]):
|
64
66
|
def run_fn(x: str, _model: Any | None = None) -> str:
|
65
67
|
# Use instructions and demos from adapter state (set by set_instructions/set_demos)
|
66
68
|
# The adapter passes state via closure; we rebuild messages here
|
67
|
-
instructions = state_ref.get("instructions", {}).get(
|
69
|
+
instructions = state_ref.get("instructions", {}).get(
|
70
|
+
"main", "You are an intent classifier for Banking77."
|
71
|
+
)
|
68
72
|
examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in state_ref.get("demos", []))
|
69
73
|
sys = instructions
|
70
74
|
user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
|
71
75
|
messages = build_messages(sys, user, images_bytes=None, model_name=lm.model)
|
76
|
+
|
72
77
|
async def _call():
|
73
78
|
resp = await lm.respond_async(messages=messages)
|
74
79
|
return (resp.raw_response or "").strip()
|
80
|
+
|
75
81
|
return asyncio.run(_call())
|
82
|
+
|
76
83
|
return run_fn
|
77
84
|
|
78
85
|
|
79
|
-
def set_instructions(new_instr:
|
86
|
+
def set_instructions(new_instr: dict[str, str], state: dict[str, Any]) -> dict[str, Any]:
|
80
87
|
state["instructions"] = {**state.get("instructions", {}), **new_instr}
|
81
88
|
return state
|
82
89
|
|
83
90
|
|
84
|
-
def set_demos(demos:
|
91
|
+
def set_demos(demos: list[tuple[str, str]], state: dict[str, Any]) -> dict[str, Any]:
|
85
92
|
state["demos"] = list(demos)
|
86
93
|
return state
|
87
94
|
|
@@ -96,15 +103,18 @@ def main():
|
|
96
103
|
|
97
104
|
print("Loading Banking77 dataset (train/dev split of test for demo)...")
|
98
105
|
ds = load_dataset("banking77")
|
99
|
-
label_names:
|
106
|
+
label_names: list[str] = ds["test"].features["label"].names # type: ignore
|
100
107
|
|
101
108
|
all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
|
102
109
|
random.shuffle(all_items)
|
103
|
-
trainset: Sequence[
|
104
|
-
valset: Sequence[
|
110
|
+
trainset: Sequence[tuple[str, str]] = all_items[:80]
|
111
|
+
valset: Sequence[tuple[str, str]] = all_items[80:160]
|
105
112
|
|
106
113
|
global state_ref
|
107
|
-
state_ref = {
|
114
|
+
state_ref = {
|
115
|
+
"instructions": {"main": "You are an intent classifier for Banking77."},
|
116
|
+
"demos": [],
|
117
|
+
}
|
108
118
|
adapter = ProgramAdapter(
|
109
119
|
run_fn=build_run_fn(lm, label_names),
|
110
120
|
state=state_ref,
|
@@ -138,7 +148,9 @@ def main():
|
|
138
148
|
)
|
139
149
|
|
140
150
|
res = evaluate_program(best, valset, metric)
|
141
|
-
print(
|
151
|
+
print(
|
152
|
+
f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})"
|
153
|
+
)
|
142
154
|
|
143
155
|
out = {
|
144
156
|
"context": {
|