synth-ai 0.2.4.dev5__py3-none-any.whl → 0.2.4.dev7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +18 -9
- synth_ai/cli/__init__.py +10 -5
- synth_ai/cli/balance.py +22 -17
- synth_ai/cli/calc.py +2 -3
- synth_ai/cli/demo.py +3 -5
- synth_ai/cli/legacy_root_backup.py +58 -32
- synth_ai/cli/man.py +22 -19
- synth_ai/cli/recent.py +9 -8
- synth_ai/cli/root.py +58 -13
- synth_ai/cli/status.py +13 -6
- synth_ai/cli/traces.py +45 -21
- synth_ai/cli/watch.py +40 -37
- synth_ai/config/base_url.py +1 -3
- synth_ai/core/experiment.py +1 -2
- synth_ai/environments/__init__.py +2 -6
- synth_ai/environments/environment/artifacts/base.py +3 -1
- synth_ai/environments/environment/db/sqlite.py +1 -1
- synth_ai/environments/environment/registry.py +19 -20
- synth_ai/environments/environment/resources/sqlite.py +2 -3
- synth_ai/environments/environment/rewards/core.py +3 -2
- synth_ai/environments/environment/tools/__init__.py +6 -4
- synth_ai/environments/examples/crafter_classic/__init__.py +1 -1
- synth_ai/environments/examples/crafter_classic/engine.py +21 -17
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +1 -0
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +2 -1
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +2 -1
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +3 -2
- synth_ai/environments/examples/crafter_classic/environment.py +16 -15
- synth_ai/environments/examples/crafter_classic/taskset.py +2 -2
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +2 -3
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +2 -1
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +2 -2
- synth_ai/environments/examples/crafter_custom/crafter/config.py +2 -2
- synth_ai/environments/examples/crafter_custom/crafter/env.py +1 -5
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +1 -2
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +1 -2
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +5 -5
- synth_ai/environments/examples/crafter_custom/environment.py +13 -13
- synth_ai/environments/examples/crafter_custom/run_dataset.py +5 -5
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +2 -2
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +5 -4
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +2 -1
- synth_ai/environments/examples/enron/engine.py +18 -14
- synth_ai/environments/examples/enron/environment.py +12 -11
- synth_ai/environments/examples/enron/taskset.py +7 -7
- synth_ai/environments/examples/minigrid/__init__.py +6 -6
- synth_ai/environments/examples/minigrid/engine.py +6 -6
- synth_ai/environments/examples/minigrid/environment.py +6 -6
- synth_ai/environments/examples/minigrid/puzzle_loader.py +3 -2
- synth_ai/environments/examples/minigrid/taskset.py +13 -13
- synth_ai/environments/examples/nethack/achievements.py +1 -1
- synth_ai/environments/examples/nethack/engine.py +8 -7
- synth_ai/environments/examples/nethack/environment.py +10 -9
- synth_ai/environments/examples/nethack/helpers/__init__.py +8 -9
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +1 -1
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +2 -1
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +1 -1
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +3 -4
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +6 -5
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +5 -5
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +7 -6
- synth_ai/environments/examples/nethack/taskset.py +5 -5
- synth_ai/environments/examples/red/engine.py +9 -8
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +7 -7
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +2 -1
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +3 -2
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +2 -1
- synth_ai/environments/examples/red/environment.py +18 -15
- synth_ai/environments/examples/red/taskset.py +5 -3
- synth_ai/environments/examples/sokoban/engine.py +16 -13
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +3 -2
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +2 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +1 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +7 -5
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +1 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +2 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +5 -4
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +3 -2
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +2 -1
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +5 -4
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +1 -1
- synth_ai/environments/examples/sokoban/environment.py +15 -14
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +5 -3
- synth_ai/environments/examples/sokoban/puzzle_loader.py +3 -2
- synth_ai/environments/examples/sokoban/taskset.py +13 -10
- synth_ai/environments/examples/tictactoe/engine.py +6 -6
- synth_ai/environments/examples/tictactoe/environment.py +8 -7
- synth_ai/environments/examples/tictactoe/taskset.py +6 -5
- synth_ai/environments/examples/verilog/engine.py +4 -3
- synth_ai/environments/examples/verilog/environment.py +11 -10
- synth_ai/environments/examples/verilog/taskset.py +14 -12
- synth_ai/environments/examples/wordle/__init__.py +29 -0
- synth_ai/environments/examples/wordle/engine.py +398 -0
- synth_ai/environments/examples/wordle/environment.py +159 -0
- synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +75 -0
- synth_ai/environments/examples/wordle/taskset.py +230 -0
- synth_ai/environments/reproducibility/core.py +1 -1
- synth_ai/environments/reproducibility/tree.py +21 -21
- synth_ai/environments/service/app.py +11 -2
- synth_ai/environments/service/core_routes.py +137 -105
- synth_ai/environments/service/external_registry.py +1 -2
- synth_ai/environments/service/registry.py +1 -1
- synth_ai/environments/stateful/core.py +1 -2
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/api.py +4 -4
- synth_ai/environments/tasks/core.py +14 -12
- synth_ai/environments/tasks/filters.py +6 -4
- synth_ai/environments/tasks/utils.py +13 -11
- synth_ai/evals/base.py +2 -3
- synth_ai/experimental/synth_oss.py +4 -4
- synth_ai/learning/gateway.py +1 -3
- synth_ai/learning/prompts/banking77_injection_eval.py +168 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +213 -0
- synth_ai/learning/prompts/mipro.py +282 -1
- synth_ai/learning/prompts/random_search.py +246 -0
- synth_ai/learning/prompts/run_mipro_banking77.py +172 -0
- synth_ai/learning/prompts/run_random_search_banking77.py +324 -0
- synth_ai/lm/__init__.py +5 -5
- synth_ai/lm/caching/ephemeral.py +9 -9
- synth_ai/lm/caching/handler.py +20 -20
- synth_ai/lm/caching/persistent.py +10 -10
- synth_ai/lm/config.py +3 -3
- synth_ai/lm/constants.py +7 -7
- synth_ai/lm/core/all.py +17 -3
- synth_ai/lm/core/exceptions.py +0 -2
- synth_ai/lm/core/main.py +26 -41
- synth_ai/lm/core/main_v3.py +20 -10
- synth_ai/lm/core/vendor_clients.py +18 -17
- synth_ai/lm/injection.py +80 -0
- synth_ai/lm/overrides.py +206 -0
- synth_ai/lm/provider_support/__init__.py +1 -1
- synth_ai/lm/provider_support/anthropic.py +51 -24
- synth_ai/lm/provider_support/openai.py +51 -22
- synth_ai/lm/structured_outputs/handler.py +34 -32
- synth_ai/lm/structured_outputs/inject.py +24 -27
- synth_ai/lm/structured_outputs/rehabilitate.py +19 -15
- synth_ai/lm/tools/base.py +17 -16
- synth_ai/lm/unified_interface.py +17 -18
- synth_ai/lm/vendors/base.py +20 -18
- synth_ai/lm/vendors/core/anthropic_api.py +50 -25
- synth_ai/lm/vendors/core/gemini_api.py +31 -36
- synth_ai/lm/vendors/core/mistral_api.py +19 -19
- synth_ai/lm/vendors/core/openai_api.py +11 -10
- synth_ai/lm/vendors/openai_standard.py +144 -88
- synth_ai/lm/vendors/openai_standard_responses.py +74 -61
- synth_ai/lm/vendors/retries.py +9 -1
- synth_ai/lm/vendors/supported/custom_endpoint.py +26 -26
- synth_ai/lm/vendors/supported/deepseek.py +10 -10
- synth_ai/lm/vendors/supported/grok.py +8 -8
- synth_ai/lm/vendors/supported/ollama.py +2 -1
- synth_ai/lm/vendors/supported/openrouter.py +11 -9
- synth_ai/lm/vendors/synth_client.py +69 -63
- synth_ai/lm/warmup.py +8 -7
- synth_ai/tracing/__init__.py +22 -10
- synth_ai/tracing_v1/__init__.py +22 -20
- synth_ai/tracing_v3/__init__.py +7 -7
- synth_ai/tracing_v3/abstractions.py +56 -52
- synth_ai/tracing_v3/config.py +4 -2
- synth_ai/tracing_v3/db_config.py +6 -8
- synth_ai/tracing_v3/decorators.py +29 -30
- synth_ai/tracing_v3/examples/basic_usage.py +12 -12
- synth_ai/tracing_v3/hooks.py +21 -21
- synth_ai/tracing_v3/llm_call_record_helpers.py +85 -98
- synth_ai/tracing_v3/lm_call_record_abstractions.py +2 -4
- synth_ai/tracing_v3/migration_helper.py +3 -5
- synth_ai/tracing_v3/replica_sync.py +30 -32
- synth_ai/tracing_v3/session_tracer.py +35 -29
- synth_ai/tracing_v3/storage/__init__.py +1 -1
- synth_ai/tracing_v3/storage/base.py +8 -7
- synth_ai/tracing_v3/storage/config.py +4 -4
- synth_ai/tracing_v3/storage/factory.py +4 -4
- synth_ai/tracing_v3/storage/utils.py +9 -9
- synth_ai/tracing_v3/turso/__init__.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +9 -9
- synth_ai/tracing_v3/turso/manager.py +60 -48
- synth_ai/tracing_v3/turso/models.py +24 -19
- synth_ai/tracing_v3/utils.py +5 -5
- synth_ai/tui/__main__.py +1 -1
- synth_ai/tui/cli/query_experiments.py +2 -3
- synth_ai/tui/cli/query_experiments_v3.py +2 -3
- synth_ai/tui/dashboard.py +97 -86
- synth_ai/v0/tracing/abstractions.py +28 -28
- synth_ai/v0/tracing/base_client.py +9 -9
- synth_ai/v0/tracing/client_manager.py +7 -7
- synth_ai/v0/tracing/config.py +7 -7
- synth_ai/v0/tracing/context.py +6 -6
- synth_ai/v0/tracing/decorators.py +6 -5
- synth_ai/v0/tracing/events/manage.py +1 -1
- synth_ai/v0/tracing/events/store.py +5 -4
- synth_ai/v0/tracing/immediate_client.py +4 -5
- synth_ai/v0/tracing/local.py +3 -3
- synth_ai/v0/tracing/log_client_base.py +4 -5
- synth_ai/v0/tracing/retry_queue.py +5 -6
- synth_ai/v0/tracing/trackers.py +25 -25
- synth_ai/v0/tracing/upload.py +6 -0
- synth_ai/v0/tracing_v1/__init__.py +1 -1
- synth_ai/v0/tracing_v1/abstractions.py +28 -28
- synth_ai/v0/tracing_v1/base_client.py +9 -9
- synth_ai/v0/tracing_v1/client_manager.py +7 -7
- synth_ai/v0/tracing_v1/config.py +7 -7
- synth_ai/v0/tracing_v1/context.py +6 -6
- synth_ai/v0/tracing_v1/decorators.py +7 -6
- synth_ai/v0/tracing_v1/events/manage.py +1 -1
- synth_ai/v0/tracing_v1/events/store.py +5 -4
- synth_ai/v0/tracing_v1/immediate_client.py +4 -5
- synth_ai/v0/tracing_v1/local.py +3 -3
- synth_ai/v0/tracing_v1/log_client_base.py +4 -5
- synth_ai/v0/tracing_v1/retry_queue.py +5 -6
- synth_ai/v0/tracing_v1/trackers.py +25 -25
- synth_ai/v0/tracing_v1/upload.py +25 -24
- synth_ai/zyk/__init__.py +1 -0
- {synth_ai-0.2.4.dev5.dist-info → synth_ai-0.2.4.dev7.dist-info}/METADATA +2 -11
- synth_ai-0.2.4.dev7.dist-info/RECORD +299 -0
- synth_ai-0.2.4.dev5.dist-info/RECORD +0 -287
- {synth_ai-0.2.4.dev5.dist-info → synth_ai-0.2.4.dev7.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev5.dist-info → synth_ai-0.2.4.dev7.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.4.dev5.dist-info → synth_ai-0.2.4.dev7.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev5.dist-info → synth_ai-0.2.4.dev7.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,289 @@
|
|
1
|
+
"""
|
2
|
+
MIPROv2-style prompt optimizer (modular, DSPy-inspired).
|
3
|
+
|
4
|
+
This module provides a modular implementation of the MIPROv2 pseudocode from DSPy,
|
5
|
+
adapted to a provider-agnostic "program" interface. The goal is to keep the
|
6
|
+
bootstrapping and search process pluggable so it can be swapped for alternatives.
|
7
|
+
|
8
|
+
Key ideas
|
9
|
+
- Program adapter: unify how we set instructions/demos and run predictions.
|
10
|
+
- Demo bootstrapping: gather high-confidence examples (by metric) as candidates.
|
11
|
+
- Instruction proposals: generated by a prompt model from contextual summaries.
|
12
|
+
- Search (placeholder): random/Bayesian-like search over (instructions × demos).
|
13
|
+
|
14
|
+
Notes
|
15
|
+
- The implementation is intentionally lightweight and dependency-free.
|
16
|
+
- "BayesOpt" here is a placeholder randomized proposer that uses history; you
|
17
|
+
can plug in a real optimizer later.
|
18
|
+
"""
|
19
|
+
|
20
|
+
from __future__ import annotations
|
21
|
+
|
22
|
+
import random
|
23
|
+
from collections.abc import Callable, Sequence
|
24
|
+
from dataclasses import dataclass, replace
|
25
|
+
from typing import Any, Protocol
|
26
|
+
|
27
|
+
# ---------------------------
|
28
|
+
# Program adapter and protocols
|
29
|
+
# ---------------------------
|
30
|
+
|
31
|
+
|
32
|
+
class PredictProgram(Protocol):
|
33
|
+
"""Minimal protocol a program must satisfy for MIPRO.
|
34
|
+
|
35
|
+
You can adapt your own pipeline to this by implementing these methods or
|
36
|
+
by wrapping it with `ProgramAdapter` below.
|
37
|
+
"""
|
38
|
+
|
39
|
+
def deepcopy(self) -> PredictProgram: ...
|
40
|
+
|
41
|
+
def run(self, x: Any, *, model: Any | None = None) -> Any: ...
|
42
|
+
|
43
|
+
def with_instructions(self, instructions: dict[str, str]) -> PredictProgram: ...
|
44
|
+
|
45
|
+
def with_demos(self, demos: list[tuple[Any, Any]]) -> PredictProgram: ...
|
46
|
+
|
47
|
+
@property
|
48
|
+
def predictors(self) -> list[str]: ...
|
49
|
+
|
50
|
+
|
51
|
+
@dataclass
|
52
|
+
class ProgramAdapter:
|
53
|
+
"""Adapter that turns a set of callables/state into a `PredictProgram`.
|
54
|
+
|
55
|
+
- run_fn: Callable[[x, model], y]
|
56
|
+
- state: arbitrary dict; supports `instructions` and `demos` keys
|
57
|
+
- predictors: list of predictor identifiers (e.g., names of prompt blocks)
|
58
|
+
- set_instructions: Callable to update instructions (per predictor)
|
59
|
+
- set_demos: Callable to update demos (global or per predictor)
|
60
|
+
"""
|
61
|
+
|
62
|
+
run_fn: Callable[[Any, Any | None], Any]
|
63
|
+
state: dict[str, Any]
|
64
|
+
_predictors: list[str]
|
65
|
+
set_instructions: Callable[[dict[str, str], dict[str, Any]], dict[str, Any]]
|
66
|
+
set_demos: Callable[[list[tuple[Any, Any]], dict[str, Any]], dict[str, Any]]
|
67
|
+
|
68
|
+
def deepcopy(self) -> ProgramAdapter:
|
69
|
+
return replace(self, state={**self.state})
|
70
|
+
|
71
|
+
def run(self, x: Any, *, model: Any | None = None) -> Any:
|
72
|
+
return self.run_fn(x, model)
|
73
|
+
|
74
|
+
def with_instructions(self, instructions: dict[str, str]) -> ProgramAdapter:
|
75
|
+
new_state = self.set_instructions(instructions, {**self.state})
|
76
|
+
return replace(self, state=new_state)
|
77
|
+
|
78
|
+
def with_demos(self, demos: list[tuple[Any, Any]]) -> ProgramAdapter:
|
79
|
+
new_state = self.set_demos(demos, {**self.state})
|
80
|
+
return replace(self, state=new_state)
|
81
|
+
|
82
|
+
@property
|
83
|
+
def predictors(self) -> list[str]:
|
84
|
+
return list(self._predictors)
|
85
|
+
|
86
|
+
|
87
|
+
# ---------------------------
|
88
|
+
# Utility helpers
|
89
|
+
# ---------------------------
|
90
|
+
|
91
|
+
|
92
|
+
def summarize_dataset(trainset: Sequence[tuple[Any, Any]], max_items: int = 50) -> str:
|
93
|
+
n = len(trainset)
|
94
|
+
ex = ", ".join(
|
95
|
+
repr(trainset[i][0])[:40] for i in range(0, min(max_items, n), max(1, n // max_items or 1))
|
96
|
+
)
|
97
|
+
return f"Dataset size: {n}. Example inputs: {ex}"
|
98
|
+
|
99
|
+
|
100
|
+
def summarize_program(prog: PredictProgram) -> str:
|
101
|
+
return f"Program predictors: {prog.predictors}"
|
102
|
+
|
103
|
+
|
104
|
+
def random_tip(rng: random.Random) -> str:
|
105
|
+
tips = [
|
106
|
+
"Be concise.",
|
107
|
+
"Focus on the task definition.",
|
108
|
+
"Use the provided examples as guidance.",
|
109
|
+
"Avoid unnecessary verbosity.",
|
110
|
+
]
|
111
|
+
return rng.choice(tips)
|
112
|
+
|
113
|
+
|
114
|
+
def choose(items: Sequence[Any], rng: random.Random | None = None) -> Any:
|
115
|
+
r = rng or random
|
116
|
+
return r.choice(items)
|
117
|
+
|
118
|
+
|
119
|
+
# ---------------------------
|
120
|
+
# Evaluator
|
121
|
+
# ---------------------------
|
122
|
+
|
123
|
+
|
124
|
+
@dataclass
|
125
|
+
class EvalResult:
|
126
|
+
score: float
|
127
|
+
subscores: list[float]
|
128
|
+
|
129
|
+
|
130
|
+
def evaluate_program(
|
131
|
+
program: PredictProgram, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
|
132
|
+
) -> EvalResult:
|
133
|
+
subs = []
|
134
|
+
for x, y in dataset:
|
135
|
+
yhat = program.run(x)
|
136
|
+
subs.append(metric(yhat, y))
|
137
|
+
return EvalResult(score=float(sum(subs)) / max(1, len(subs)), subscores=subs)
|
138
|
+
|
139
|
+
|
140
|
+
# ---------------------------
|
141
|
+
# MIPROv2 compile
|
142
|
+
# ---------------------------
|
143
|
+
|
144
|
+
|
145
|
+
def mipro_v2_compile(
|
146
|
+
student: PredictProgram,
|
147
|
+
trainset: Sequence[tuple[Any, Any]],
|
148
|
+
valset: Sequence[tuple[Any, Any]],
|
149
|
+
metric: Callable[[Any, Any], float],
|
150
|
+
*,
|
151
|
+
prompt_model: Any,
|
152
|
+
task_model: Any,
|
153
|
+
max_bootstrapped_demos: int = 8,
|
154
|
+
max_labeled_demos: int = 4,
|
155
|
+
num_candidates: int = 8,
|
156
|
+
num_trials: int = 20,
|
157
|
+
minibatch: bool = True,
|
158
|
+
minibatch_size: int = 16,
|
159
|
+
minibatch_full_eval_steps: int = 5,
|
160
|
+
seed: int = 0,
|
161
|
+
auto: str = "light",
|
162
|
+
program_aware: bool = True,
|
163
|
+
data_aware: bool = True,
|
164
|
+
tip_aware: bool = True,
|
165
|
+
fewshot_aware: bool = True,
|
166
|
+
) -> tuple[PredictProgram, list[dict[str, Any]]]:
|
167
|
+
"""MIPROv2-style optimizer.
|
168
|
+
|
169
|
+
Arguments mirror the DSPy pseudocode but remain provider-agnostic. The
|
170
|
+
`prompt_model` must expose `generate_instructions(ctx, k)`; the `student`
|
171
|
+
program must implement the `PredictProgram` protocol.
|
172
|
+
"""
|
173
|
+
|
174
|
+
rng = random.Random(seed)
|
175
|
+
program = student.deepcopy()
|
176
|
+
|
177
|
+
# Step 1: bootstrap few-shot example candidates
|
178
|
+
demo_candidates: list[dict[str, Any]] = []
|
179
|
+
for _ in range(num_candidates):
|
180
|
+
boot: list[tuple[Any, Any]] = []
|
181
|
+
# collect bootstrapped, self-consistent demos
|
182
|
+
while len(boot) < max_bootstrapped_demos:
|
183
|
+
x, y = rng.choice(trainset)
|
184
|
+
yhat = program.run(x, model=task_model)
|
185
|
+
if metric(yhat, y) == 1: # perfect match
|
186
|
+
boot.append((x, y))
|
187
|
+
labeled = rng.sample(list(trainset), k=min(max_labeled_demos, len(trainset)))
|
188
|
+
demo_candidates.append({"boot": boot, "labeled": labeled})
|
189
|
+
|
190
|
+
# Step 2: propose instruction candidates per predictor
|
191
|
+
instr_candidates: dict[str, list[str]] = {}
|
192
|
+
for pred in program.predictors or ["predictor"]:
|
193
|
+
ctx: dict[str, Any] = {}
|
194
|
+
if data_aware:
|
195
|
+
ctx["dataset_summary"] = summarize_dataset(trainset)
|
196
|
+
if program_aware:
|
197
|
+
ctx["program_summary"] = summarize_program(program)
|
198
|
+
if fewshot_aware and demo_candidates:
|
199
|
+
ctx["examples"] = choose(demo_candidates, rng)
|
200
|
+
if tip_aware:
|
201
|
+
ctx["tip"] = random_tip(rng)
|
202
|
+
cand = prompt_model.generate_instructions(ctx, k=num_candidates)
|
203
|
+
instr_candidates[pred] = list(cand)
|
204
|
+
|
205
|
+
# Step 3: Bayesian-optimization-like search (random proposer placeholder)
|
206
|
+
history: list[tuple[dict[str, Any], float]] = []
|
207
|
+
records: list[dict[str, Any]] = []
|
208
|
+
best_score = -1.0
|
209
|
+
best_cfg: dict[str, Any] | None = None
|
210
|
+
|
211
|
+
def propose(history_: list[tuple[dict[str, Any], float]]) -> dict[str, Any]:
|
212
|
+
# Placeholder: randomly sample from the cartesian product
|
213
|
+
instructions = {pred: choose(instr_candidates[pred], rng) for pred in instr_candidates}
|
214
|
+
demos = choose(demo_candidates, rng) if demo_candidates else None
|
215
|
+
return {"instructions": instructions, "demo_set": demos}
|
216
|
+
|
217
|
+
for t in range(1, num_trials + 1):
|
218
|
+
theta = propose(history)
|
219
|
+
program_t = program.with_instructions(theta["instructions"])
|
220
|
+
if theta.get("demo_set") is not None:
|
221
|
+
# Combine bootstrapped + labeled demos
|
222
|
+
ds = theta["demo_set"]
|
223
|
+
demo_set = list(ds.get("boot", [])) + list(ds.get("labeled", []))
|
224
|
+
program_t = program_t.with_demos(demo_set)
|
225
|
+
|
226
|
+
batch = (
|
227
|
+
valset
|
228
|
+
if not minibatch
|
229
|
+
else random.sample(list(valset), k=min(minibatch_size, len(valset)))
|
230
|
+
)
|
231
|
+
batch_res = evaluate_program(program_t, batch, metric)
|
232
|
+
s_t = batch_res.score
|
233
|
+
history.append((theta, s_t))
|
234
|
+
records.append(
|
235
|
+
{
|
236
|
+
"trial": t,
|
237
|
+
"evaluation": "batch" if minibatch else "full",
|
238
|
+
"score": s_t,
|
239
|
+
"intervention": {
|
240
|
+
"instructions": theta.get("instructions"),
|
241
|
+
"demo_set": theta.get("demo_set"),
|
242
|
+
},
|
243
|
+
}
|
244
|
+
)
|
245
|
+
|
246
|
+
if (not minibatch) or (t % max(1, minibatch_full_eval_steps) == 0):
|
247
|
+
full_res = evaluate_program(program_t, valset, metric)
|
248
|
+
s_full = full_res.score
|
249
|
+
if s_full > best_score:
|
250
|
+
best_score = s_full
|
251
|
+
best_cfg = theta
|
252
|
+
records.append(
|
253
|
+
{
|
254
|
+
"trial": t,
|
255
|
+
"evaluation": "full",
|
256
|
+
"score": s_full,
|
257
|
+
"intervention": {
|
258
|
+
"instructions": theta.get("instructions"),
|
259
|
+
"demo_set": theta.get("demo_set"),
|
260
|
+
},
|
261
|
+
}
|
262
|
+
)
|
263
|
+
|
264
|
+
if best_cfg is None:
|
265
|
+
return program, records
|
266
|
+
|
267
|
+
best_program = program.with_instructions(best_cfg["instructions"])
|
268
|
+
if best_cfg.get("demo_set") is not None:
|
269
|
+
ds = best_cfg["demo_set"]
|
270
|
+
demo_set = list(ds.get("boot", [])) + list(ds.get("labeled", []))
|
271
|
+
best_program = best_program.with_demos(demo_set)
|
272
|
+
return best_program, records
|
273
|
+
|
274
|
+
|
275
|
+
__all__ = [
|
276
|
+
"PredictProgram",
|
277
|
+
"ProgramAdapter",
|
278
|
+
"evaluate_program",
|
279
|
+
"mipro_v2_compile",
|
280
|
+
]
|
1
281
|
|
2
282
|
|
3
283
|
class ExampleTwoStepDag:
|
4
284
|
pass
|
5
285
|
|
286
|
+
|
6
287
|
"""
|
7
288
|
A -> B
|
8
|
-
"""
|
289
|
+
"""
|
@@ -0,0 +1,246 @@
|
|
1
|
+
"""
|
2
|
+
Random-search prompt optimizer (BootstrapFewShotWithRandomSearch), DSPy-inspired.
|
3
|
+
|
4
|
+
Implements the high-level pseudocode of DSPy's Random Search optimizer in a
|
5
|
+
provider-agnostic, modular style. You can plug in your own student/program and
|
6
|
+
metric, and this module will explore baselines and bootstrapped few-shot variants.
|
7
|
+
"""
|
8
|
+
|
9
|
+
from __future__ import annotations
|
10
|
+
|
11
|
+
import contextlib
|
12
|
+
import random
|
13
|
+
from collections.abc import Callable, Sequence
|
14
|
+
from dataclasses import dataclass
|
15
|
+
from typing import Any
|
16
|
+
|
17
|
+
# ---------------------------
|
18
|
+
# Protocol-like expectations (duck-typed)
|
19
|
+
# ---------------------------
|
20
|
+
|
21
|
+
|
22
|
+
class _ProgramLike:
|
23
|
+
def reset_copy(self): # zero-shot copy
|
24
|
+
return self
|
25
|
+
|
26
|
+
def deepcopy(self): # deep copy
|
27
|
+
return self
|
28
|
+
|
29
|
+
def with_demos(self, demos: list[tuple[Any, Any]]):
|
30
|
+
return self
|
31
|
+
|
32
|
+
def run(self, x: Any) -> Any:
|
33
|
+
raise NotImplementedError
|
34
|
+
|
35
|
+
|
36
|
+
# ---------------------------
|
37
|
+
# Helpers and lightweight components
|
38
|
+
# ---------------------------
|
39
|
+
|
40
|
+
|
41
|
+
@dataclass
|
42
|
+
class EvalResult:
|
43
|
+
score: float
|
44
|
+
subscores: list[float]
|
45
|
+
|
46
|
+
|
47
|
+
def evaluate(
|
48
|
+
program: _ProgramLike, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
|
49
|
+
) -> EvalResult:
|
50
|
+
subs = []
|
51
|
+
for x, y in dataset:
|
52
|
+
subs.append(metric(program.run(x), y))
|
53
|
+
return EvalResult(sum(subs) / max(1, len(subs)), subs)
|
54
|
+
|
55
|
+
|
56
|
+
class LabeledFewShot:
|
57
|
+
def __init__(self, k: int):
|
58
|
+
self.k = k
|
59
|
+
|
60
|
+
def compile(
|
61
|
+
self, student: _ProgramLike, trainset: Sequence[tuple[Any, Any]], sample: bool = True
|
62
|
+
) -> _ProgramLike:
|
63
|
+
p = getattr(student, "deepcopy", student.reset_copy)()
|
64
|
+
demos = list(trainset)
|
65
|
+
if sample:
|
66
|
+
random.shuffle(demos)
|
67
|
+
p = p.with_demos(demos[: min(self.k, len(demos))])
|
68
|
+
return p
|
69
|
+
|
70
|
+
|
71
|
+
class BootstrapFewShot:
|
72
|
+
def __init__(
|
73
|
+
self,
|
74
|
+
*,
|
75
|
+
metric: Callable[[Any, Any], float],
|
76
|
+
metric_threshold: float | None = None,
|
77
|
+
max_bootstrapped_demos: int = 8,
|
78
|
+
max_labeled_demos: int = 0,
|
79
|
+
teacher_settings: dict[str, Any] | None = None,
|
80
|
+
max_rounds: int = 1,
|
81
|
+
):
|
82
|
+
self.metric = metric
|
83
|
+
self.metric_threshold = metric_threshold
|
84
|
+
self.max_bootstrapped_demos = max_bootstrapped_demos
|
85
|
+
self.max_labeled_demos = max_labeled_demos
|
86
|
+
self.teacher_settings = teacher_settings or {}
|
87
|
+
self.max_rounds = max_rounds
|
88
|
+
|
89
|
+
def compile(
|
90
|
+
self,
|
91
|
+
student: _ProgramLike,
|
92
|
+
teacher: _ProgramLike | None,
|
93
|
+
trainset: Sequence[tuple[Any, Any]],
|
94
|
+
) -> _ProgramLike:
|
95
|
+
p = getattr(student, "deepcopy", student.reset_copy)()
|
96
|
+
rng = random.Random()
|
97
|
+
# If bootstrapped demos disabled, return labeled-only few-shot quickly
|
98
|
+
if self.max_bootstrapped_demos <= 0:
|
99
|
+
demos: list[tuple[Any, Any]] = []
|
100
|
+
if self.max_labeled_demos > 0:
|
101
|
+
demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
|
102
|
+
return p.with_demos(demos)
|
103
|
+
boot: list[tuple[Any, Any]] = []
|
104
|
+
# Bootstrap demos by self consistency
|
105
|
+
for _ in range(self.max_rounds):
|
106
|
+
rng.shuffle(trainset := list(trainset))
|
107
|
+
for x, y in trainset:
|
108
|
+
yhat = p.run(x)
|
109
|
+
ok = self.metric(yhat, y)
|
110
|
+
if (self.metric_threshold is None and ok == 1) or (
|
111
|
+
self.metric_threshold is not None and ok >= self.metric_threshold
|
112
|
+
):
|
113
|
+
boot.append((x, y))
|
114
|
+
if len(boot) >= self.max_bootstrapped_demos:
|
115
|
+
break
|
116
|
+
if len(boot) >= self.max_bootstrapped_demos:
|
117
|
+
break
|
118
|
+
|
119
|
+
# Optionally add labeled demos
|
120
|
+
demos = list(boot)
|
121
|
+
if self.max_labeled_demos > 0:
|
122
|
+
demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
|
123
|
+
|
124
|
+
return p.with_demos(demos)
|
125
|
+
|
126
|
+
|
127
|
+
# ---------------------------
|
128
|
+
# Random-search compile (BootstrapFewShotWithRandomSearch)
|
129
|
+
# ---------------------------
|
130
|
+
|
131
|
+
|
132
|
+
@dataclass
|
133
|
+
class Candidate:
|
134
|
+
score: float
|
135
|
+
subscores: list[float]
|
136
|
+
seed: int
|
137
|
+
program: _ProgramLike
|
138
|
+
|
139
|
+
|
140
|
+
def random_search_compile(
|
141
|
+
student: _ProgramLike,
|
142
|
+
trainset: Sequence[tuple[Any, Any]],
|
143
|
+
valset: Sequence[tuple[Any, Any]],
|
144
|
+
metric: Callable[[Any, Any], float],
|
145
|
+
*,
|
146
|
+
max_bootstrapped_demos: int = 8,
|
147
|
+
max_labeled_demos: int = 4,
|
148
|
+
max_rounds: int = 2,
|
149
|
+
num_candidate_programs: int = 16,
|
150
|
+
stop_at_score: float | None = None,
|
151
|
+
evaluate_fn: Callable[[_ProgramLike, Sequence[tuple[Any, Any]], Callable[[Any, Any], float]], EvalResult] | None = None,
|
152
|
+
on_candidate_evaluated: Callable[[int, float, EvalResult, dict[str, Any]], None] | None = None,
|
153
|
+
) -> tuple[_ProgramLike, list[dict[str, Any]]]:
|
154
|
+
best_program: _ProgramLike | None = None
|
155
|
+
best_score = float("-inf")
|
156
|
+
candidates: list[Candidate] = []
|
157
|
+
records: list[dict[str, Any]] = []
|
158
|
+
|
159
|
+
seeds = list(range(num_candidate_programs))
|
160
|
+
seeds = [-3, -2, -1] + seeds # zero-shot, labeled few-shot, bootstrapped few-shot
|
161
|
+
|
162
|
+
rng = random.Random(0)
|
163
|
+
for idx, seed in enumerate(seeds):
|
164
|
+
train_copy = list(trainset)
|
165
|
+
|
166
|
+
if seed == -3:
|
167
|
+
program = getattr(student, "reset_copy", student.deepcopy)()
|
168
|
+
|
169
|
+
elif seed == -2:
|
170
|
+
program = LabeledFewShot(k=max_labeled_demos).compile(student, train_copy, sample=True)
|
171
|
+
|
172
|
+
else:
|
173
|
+
if seed >= 0:
|
174
|
+
rng.shuffle(train_copy)
|
175
|
+
if max_bootstrapped_demos <= 0:
|
176
|
+
size = 0
|
177
|
+
else:
|
178
|
+
size = (
|
179
|
+
max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
|
180
|
+
)
|
181
|
+
program = BootstrapFewShot(
|
182
|
+
metric=metric,
|
183
|
+
metric_threshold=None,
|
184
|
+
max_bootstrapped_demos=size,
|
185
|
+
max_labeled_demos=max_labeled_demos,
|
186
|
+
teacher_settings={},
|
187
|
+
max_rounds=max_rounds,
|
188
|
+
).compile(student, teacher=None, trainset=train_copy)
|
189
|
+
|
190
|
+
res = (
|
191
|
+
evaluate_fn(program, valset, metric)
|
192
|
+
if evaluate_fn
|
193
|
+
else evaluate(program, valset, metric)
|
194
|
+
)
|
195
|
+
cand = Candidate(score=res.score, subscores=res.subscores, seed=seed, program=program)
|
196
|
+
candidates.append(cand)
|
197
|
+
# Record an intervention summary for reproducibility
|
198
|
+
intervention: dict[str, Any] = {"seed": seed}
|
199
|
+
if hasattr(program, "demos"):
|
200
|
+
try:
|
201
|
+
intervention["demos"] = program.demos # type: ignore
|
202
|
+
except Exception:
|
203
|
+
intervention["demos"] = None
|
204
|
+
# Type of candidate
|
205
|
+
if seed == -3:
|
206
|
+
intervention["kind"] = "zero_shot"
|
207
|
+
intervention["label"] = "zero-shot"
|
208
|
+
elif seed == -2:
|
209
|
+
intervention["kind"] = "labeled_few_shot"
|
210
|
+
intervention["label"] = f"labeled-{max_labeled_demos}"
|
211
|
+
else:
|
212
|
+
intervention["kind"] = "bootstrapped_few_shot"
|
213
|
+
intervention["label"] = f"boot-b{max_bootstrapped_demos}-l{max_labeled_demos}"
|
214
|
+
record_obj = {
|
215
|
+
"score": cand.score,
|
216
|
+
"subscores": cand.subscores,
|
217
|
+
"intervention": intervention,
|
218
|
+
}
|
219
|
+
records.append(record_obj)
|
220
|
+
|
221
|
+
if res.score > best_score:
|
222
|
+
best_score, best_program = res.score, program
|
223
|
+
|
224
|
+
if stop_at_score is not None and best_score >= stop_at_score:
|
225
|
+
break
|
226
|
+
|
227
|
+
if on_candidate_evaluated is not None:
|
228
|
+
with contextlib.suppress(Exception):
|
229
|
+
on_candidate_evaluated(idx + 1, res.score, res, intervention)
|
230
|
+
|
231
|
+
# Attach candidates for inspection
|
232
|
+
if hasattr(best_program, "candidate_programs"):
|
233
|
+
# If user object supports attribute assignment
|
234
|
+
with contextlib.suppress(Exception):
|
235
|
+
best_program.candidate_programs = sorted(
|
236
|
+
candidates, key=lambda c: c.score, reverse=True
|
237
|
+
) # type: ignore[attr-defined]
|
238
|
+
|
239
|
+
return (best_program or getattr(student, "deepcopy", student)(), records)
|
240
|
+
|
241
|
+
|
242
|
+
__all__ = [
|
243
|
+
"random_search_compile",
|
244
|
+
"LabeledFewShot",
|
245
|
+
"BootstrapFewShot",
|
246
|
+
]
|
@@ -0,0 +1,172 @@
|
|
1
|
+
"""
|
2
|
+
Example: MIPROv2-style optimizer on Banking77 using Groq gpt-oss-20b.
|
3
|
+
|
4
|
+
Requires:
|
5
|
+
- .env with GROQ_API_KEY
|
6
|
+
- datasets
|
7
|
+
|
8
|
+
Run:
|
9
|
+
- uv run -q python -m synth_ai.learning.prompts.run_mipro_banking77
|
10
|
+
"""
|
11
|
+
|
12
|
+
from __future__ import annotations
|
13
|
+
|
14
|
+
import asyncio
|
15
|
+
import json
|
16
|
+
import os
|
17
|
+
import random
|
18
|
+
import time
|
19
|
+
from collections.abc import Sequence
|
20
|
+
from pathlib import Path
|
21
|
+
from typing import Any
|
22
|
+
|
23
|
+
from datasets import load_dataset
|
24
|
+
from dotenv import load_dotenv
|
25
|
+
from synth_ai.learning.prompts.mipro import ProgramAdapter, evaluate_program, mipro_v2_compile
|
26
|
+
from synth_ai.lm.core.main_v3 import LM, build_messages
|
27
|
+
|
28
|
+
|
29
|
+
def choose_label(pred: str, label_names: list[str]) -> str:
|
30
|
+
norm = (pred or "").strip().lower()
|
31
|
+
d = {ln.lower(): ln for ln in label_names}
|
32
|
+
if norm in d:
|
33
|
+
return d[norm]
|
34
|
+
|
35
|
+
def score(cand: str) -> int:
|
36
|
+
c = cand.lower()
|
37
|
+
return sum(1 for w in c.split() if w in norm)
|
38
|
+
|
39
|
+
return max(label_names, key=score)
|
40
|
+
|
41
|
+
|
42
|
+
def accuracy(pred: str, gold: str, labels: list[str]) -> float:
|
43
|
+
return 1.0 if choose_label(pred, labels) == gold else 0.0
|
44
|
+
|
45
|
+
|
46
|
+
class NaivePromptModel:
|
47
|
+
"""Toy prompt model that returns simple instruction variants."""
|
48
|
+
|
49
|
+
def generate_instructions(self, ctx: dict[str, Any], k: int = 8) -> list[str]:
|
50
|
+
base = "Classify the Banking77 intent and return exactly one label."
|
51
|
+
variants = [
|
52
|
+
base,
|
53
|
+
base + " Be concise.",
|
54
|
+
base + " Use examples to guide your reasoning.",
|
55
|
+
base + " Return only the label text.",
|
56
|
+
base + " Follow the label names strictly.",
|
57
|
+
base + " Do not include explanations.",
|
58
|
+
base + " Think about similar intents before answering.",
|
59
|
+
base + " Carefully consider the user's message.",
|
60
|
+
]
|
61
|
+
random.shuffle(variants)
|
62
|
+
return variants[:k]
|
63
|
+
|
64
|
+
|
65
|
+
def build_run_fn(lm: LM, label_names: list[str]):
|
66
|
+
def run_fn(x: str, _model: Any | None = None) -> str:
|
67
|
+
# Use instructions and demos from adapter state (set by set_instructions/set_demos)
|
68
|
+
# The adapter passes state via closure; we rebuild messages here
|
69
|
+
instructions = state_ref.get("instructions", {}).get(
|
70
|
+
"main", "You are an intent classifier for Banking77."
|
71
|
+
)
|
72
|
+
examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in state_ref.get("demos", []))
|
73
|
+
sys = instructions
|
74
|
+
user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
|
75
|
+
messages = build_messages(sys, user, images_bytes=None, model_name=lm.model)
|
76
|
+
|
77
|
+
async def _call():
|
78
|
+
resp = await lm.respond_async(messages=messages)
|
79
|
+
return (resp.raw_response or "").strip()
|
80
|
+
|
81
|
+
return asyncio.run(_call())
|
82
|
+
|
83
|
+
return run_fn
|
84
|
+
|
85
|
+
|
86
|
+
def set_instructions(new_instr: dict[str, str], state: dict[str, Any]) -> dict[str, Any]:
|
87
|
+
state["instructions"] = {**state.get("instructions", {}), **new_instr}
|
88
|
+
return state
|
89
|
+
|
90
|
+
|
91
|
+
def set_demos(demos: list[tuple[str, str]], state: dict[str, Any]) -> dict[str, Any]:
|
92
|
+
state["demos"] = list(demos)
|
93
|
+
return state
|
94
|
+
|
95
|
+
|
96
|
+
def main():
|
97
|
+
load_dotenv()
|
98
|
+
random.seed(0)
|
99
|
+
|
100
|
+
model = os.getenv("MODEL", "openai/gpt-oss-20b")
|
101
|
+
vendor = os.getenv("VENDOR", "groq")
|
102
|
+
lm = LM(model=model, vendor=vendor, temperature=0.0)
|
103
|
+
|
104
|
+
print("Loading Banking77 dataset (train/dev split of test for demo)...")
|
105
|
+
ds = load_dataset("banking77")
|
106
|
+
label_names: list[str] = ds["test"].features["label"].names # type: ignore
|
107
|
+
|
108
|
+
all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
|
109
|
+
random.shuffle(all_items)
|
110
|
+
trainset: Sequence[tuple[str, str]] = all_items[:80]
|
111
|
+
valset: Sequence[tuple[str, str]] = all_items[80:160]
|
112
|
+
|
113
|
+
global state_ref
|
114
|
+
state_ref = {
|
115
|
+
"instructions": {"main": "You are an intent classifier for Banking77."},
|
116
|
+
"demos": [],
|
117
|
+
}
|
118
|
+
adapter = ProgramAdapter(
|
119
|
+
run_fn=build_run_fn(lm, label_names),
|
120
|
+
state=state_ref,
|
121
|
+
_predictors=["main"],
|
122
|
+
set_instructions=set_instructions,
|
123
|
+
set_demos=set_demos,
|
124
|
+
)
|
125
|
+
|
126
|
+
def metric(yhat: str, y: str) -> float:
|
127
|
+
return accuracy(yhat, y, label_names)
|
128
|
+
|
129
|
+
prompt_model = NaivePromptModel()
|
130
|
+
task_model = None # not used in this minimal example
|
131
|
+
|
132
|
+
print("Running MIPROv2-style optimizer...")
|
133
|
+
best, records = mipro_v2_compile(
|
134
|
+
student=adapter,
|
135
|
+
trainset=trainset,
|
136
|
+
valset=valset,
|
137
|
+
metric=metric,
|
138
|
+
prompt_model=prompt_model,
|
139
|
+
task_model=task_model,
|
140
|
+
max_bootstrapped_demos=6,
|
141
|
+
max_labeled_demos=4,
|
142
|
+
num_candidates=6,
|
143
|
+
num_trials=12,
|
144
|
+
minibatch=True,
|
145
|
+
minibatch_size=16,
|
146
|
+
minibatch_full_eval_steps=3,
|
147
|
+
seed=0,
|
148
|
+
)
|
149
|
+
|
150
|
+
res = evaluate_program(best, valset, metric)
|
151
|
+
print(
|
152
|
+
f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})"
|
153
|
+
)
|
154
|
+
|
155
|
+
out = {
|
156
|
+
"context": {
|
157
|
+
"model": model,
|
158
|
+
"vendor": vendor,
|
159
|
+
"train_size": len(trainset),
|
160
|
+
"val_size": len(valset),
|
161
|
+
},
|
162
|
+
"trials": records,
|
163
|
+
}
|
164
|
+
out_dir = Path(__file__).parent
|
165
|
+
fname = str(out_dir / f"mipro_banking77_{int(time.time())}.json")
|
166
|
+
with open(fname, "w") as f:
|
167
|
+
json.dump(out, f, indent=2)
|
168
|
+
print(f"Saved trial records to {fname}")
|
169
|
+
|
170
|
+
|
171
|
+
if __name__ == "__main__":
|
172
|
+
main()
|