synth-ai 0.2.4.dev4__py3-none-any.whl → 0.2.4.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. synth_ai/environments/examples/__init__.py +1 -0
  2. synth_ai/environments/examples/crafter_classic/__init__.py +8 -0
  3. synth_ai/environments/examples/crafter_classic/config_logging.py +111 -0
  4. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  5. synth_ai/environments/examples/crafter_classic/engine.py +579 -0
  6. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +63 -0
  7. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +5 -0
  8. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +74 -0
  9. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +266 -0
  10. synth_ai/environments/examples/crafter_classic/environment.py +364 -0
  11. synth_ai/environments/examples/crafter_classic/taskset.py +233 -0
  12. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +229 -0
  13. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +298 -0
  14. synth_ai/environments/examples/crafter_custom/__init__.py +4 -0
  15. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +7 -0
  16. synth_ai/environments/examples/crafter_custom/crafter/config.py +182 -0
  17. synth_ai/environments/examples/crafter_custom/crafter/constants.py +8 -0
  18. synth_ai/environments/examples/crafter_custom/crafter/engine.py +269 -0
  19. synth_ai/environments/examples/crafter_custom/crafter/env.py +266 -0
  20. synth_ai/environments/examples/crafter_custom/crafter/objects.py +418 -0
  21. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +187 -0
  22. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +119 -0
  23. synth_ai/environments/examples/crafter_custom/dataset_builder.py +373 -0
  24. synth_ai/environments/examples/crafter_custom/environment.py +312 -0
  25. synth_ai/environments/examples/crafter_custom/run_dataset.py +305 -0
  26. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +156 -0
  27. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +280 -0
  28. synth_ai/environments/examples/enron/art_helpers/types_enron.py +24 -0
  29. synth_ai/environments/examples/enron/engine.py +291 -0
  30. synth_ai/environments/examples/enron/environment.py +165 -0
  31. synth_ai/environments/examples/enron/taskset.py +112 -0
  32. synth_ai/environments/examples/minigrid/__init__.py +48 -0
  33. synth_ai/environments/examples/minigrid/engine.py +589 -0
  34. synth_ai/environments/examples/minigrid/environment.py +274 -0
  35. synth_ai/environments/examples/minigrid/environment_mapping.py +242 -0
  36. synth_ai/environments/examples/minigrid/puzzle_loader.py +416 -0
  37. synth_ai/environments/examples/minigrid/taskset.py +583 -0
  38. synth_ai/environments/examples/nethack/__init__.py +7 -0
  39. synth_ai/environments/examples/nethack/achievements.py +337 -0
  40. synth_ai/environments/examples/nethack/engine.py +738 -0
  41. synth_ai/environments/examples/nethack/environment.py +255 -0
  42. synth_ai/environments/examples/nethack/helpers/__init__.py +42 -0
  43. synth_ai/environments/examples/nethack/helpers/action_mapping.py +301 -0
  44. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +401 -0
  45. synth_ai/environments/examples/nethack/helpers/observation_utils.py +433 -0
  46. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +201 -0
  47. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +268 -0
  48. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +308 -0
  49. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +430 -0
  50. synth_ai/environments/examples/nethack/taskset.py +323 -0
  51. synth_ai/environments/examples/red/__init__.py +7 -0
  52. synth_ai/environments/examples/red/config_logging.py +110 -0
  53. synth_ai/environments/examples/red/engine.py +693 -0
  54. synth_ai/environments/examples/red/engine_helpers/__init__.py +1 -0
  55. synth_ai/environments/examples/red/engine_helpers/memory_map.py +28 -0
  56. synth_ai/environments/examples/red/engine_helpers/reward_components.py +275 -0
  57. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +142 -0
  58. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +56 -0
  59. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +283 -0
  60. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +149 -0
  61. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +137 -0
  62. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +56 -0
  63. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +330 -0
  64. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +120 -0
  65. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +558 -0
  66. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +312 -0
  67. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +147 -0
  68. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +246 -0
  69. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +367 -0
  70. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +139 -0
  71. synth_ai/environments/examples/red/environment.py +235 -0
  72. synth_ai/environments/examples/red/taskset.py +77 -0
  73. synth_ai/environments/examples/sokoban/__init__.py +1 -0
  74. synth_ai/environments/examples/sokoban/engine.py +675 -0
  75. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +1 -0
  76. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +656 -0
  77. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +17 -0
  78. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +3 -0
  79. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +129 -0
  80. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +370 -0
  81. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +331 -0
  82. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +305 -0
  83. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +66 -0
  84. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +114 -0
  85. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +122 -0
  86. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +394 -0
  87. synth_ai/environments/examples/sokoban/environment.py +228 -0
  88. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +438 -0
  89. synth_ai/environments/examples/sokoban/puzzle_loader.py +311 -0
  90. synth_ai/environments/examples/sokoban/taskset.py +425 -0
  91. synth_ai/environments/examples/tictactoe/__init__.py +1 -0
  92. synth_ai/environments/examples/tictactoe/engine.py +368 -0
  93. synth_ai/environments/examples/tictactoe/environment.py +239 -0
  94. synth_ai/environments/examples/tictactoe/taskset.py +214 -0
  95. synth_ai/environments/examples/verilog/__init__.py +10 -0
  96. synth_ai/environments/examples/verilog/engine.py +328 -0
  97. synth_ai/environments/examples/verilog/environment.py +349 -0
  98. synth_ai/environments/examples/verilog/taskset.py +418 -0
  99. synth_ai/environments/examples/wordle/__init__.py +29 -0
  100. synth_ai/environments/examples/wordle/engine.py +391 -0
  101. synth_ai/environments/examples/wordle/environment.py +154 -0
  102. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +75 -0
  103. synth_ai/environments/examples/wordle/taskset.py +222 -0
  104. synth_ai/environments/service/app.py +8 -0
  105. synth_ai/environments/service/core_routes.py +38 -0
  106. synth_ai/learning/prompts/banking77_injection_eval.py +163 -0
  107. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +201 -0
  108. synth_ai/learning/prompts/mipro.py +273 -1
  109. synth_ai/learning/prompts/random_search.py +247 -0
  110. synth_ai/learning/prompts/run_mipro_banking77.py +160 -0
  111. synth_ai/learning/prompts/run_random_search_banking77.py +305 -0
  112. synth_ai/lm/injection.py +81 -0
  113. synth_ai/lm/overrides.py +204 -0
  114. synth_ai/lm/provider_support/anthropic.py +39 -12
  115. synth_ai/lm/provider_support/openai.py +31 -4
  116. synth_ai/lm/vendors/core/anthropic_api.py +16 -0
  117. synth_ai/lm/vendors/openai_standard.py +35 -5
  118. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/METADATA +2 -1
  119. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/RECORD +123 -13
  120. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/WHEEL +0 -0
  121. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/entry_points.txt +0 -0
  122. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/licenses/LICENSE +0 -0
  123. {synth_ai-0.2.4.dev4.dist-info → synth_ai-0.2.4.dev6.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,275 @@
1
+ """
2
+ MIPROv2-style prompt optimizer (modular, DSPy-inspired).
3
+
4
+ This module provides a modular implementation of the MIPROv2 pseudocode from DSPy,
5
+ adapted to a provider-agnostic "program" interface. The goal is to keep the
6
+ bootstrapping and search process pluggable so it can be swapped for alternatives.
7
+
8
+ Key ideas
9
+ - Program adapter: unify how we set instructions/demos and run predictions.
10
+ - Demo bootstrapping: gather high-confidence examples (by metric) as candidates.
11
+ - Instruction proposals: generated by a prompt model from contextual summaries.
12
+ - Search (placeholder): random/Bayesian-like search over (instructions × demos).
13
+
14
+ Notes
15
+ - The implementation is intentionally lightweight and dependency-free.
16
+ - "BayesOpt" here is a placeholder randomized proposer that uses history; you
17
+ can plug in a real optimizer later.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import random
23
+ from dataclasses import dataclass, replace
24
+ from typing import Any, Callable, Dict, Iterable, List, Optional, Protocol, Sequence, Tuple
25
+
26
+
27
+ # ---------------------------
28
+ # Program adapter and protocols
29
+ # ---------------------------
30
+
31
+
32
+ class PredictProgram(Protocol):
33
+ """Minimal protocol a program must satisfy for MIPRO.
34
+
35
+ You can adapt your own pipeline to this by implementing these methods or
36
+ by wrapping it with `ProgramAdapter` below.
37
+ """
38
+
39
+ def deepcopy(self) -> "PredictProgram": ...
40
+
41
+ def run(self, x: Any, *, model: Optional[Any] = None) -> Any: ...
42
+
43
+ def with_instructions(self, instructions: Dict[str, str]) -> "PredictProgram": ...
44
+
45
+ def with_demos(self, demos: List[Tuple[Any, Any]]) -> "PredictProgram": ...
46
+
47
+ @property
48
+ def predictors(self) -> List[str]: ...
49
+
50
+
51
+ @dataclass
52
+ class ProgramAdapter:
53
+ """Adapter that turns a set of callables/state into a `PredictProgram`.
54
+
55
+ - run_fn: Callable[[x, model], y]
56
+ - state: arbitrary dict; supports `instructions` and `demos` keys
57
+ - predictors: list of predictor identifiers (e.g., names of prompt blocks)
58
+ - set_instructions: Callable to update instructions (per predictor)
59
+ - set_demos: Callable to update demos (global or per predictor)
60
+ """
61
+
62
+ run_fn: Callable[[Any, Optional[Any]], Any]
63
+ state: Dict[str, Any]
64
+ _predictors: List[str]
65
+ set_instructions: Callable[[Dict[str, str], Dict[str, Any]], Dict[str, Any]]
66
+ set_demos: Callable[[List[Tuple[Any, Any]], Dict[str, Any]], Dict[str, Any]]
67
+
68
+ def deepcopy(self) -> "ProgramAdapter":
69
+ return replace(self, state={**self.state})
70
+
71
+ def run(self, x: Any, *, model: Optional[Any] = None) -> Any:
72
+ return self.run_fn(x, model)
73
+
74
+ def with_instructions(self, instructions: Dict[str, str]) -> "ProgramAdapter":
75
+ new_state = self.set_instructions(instructions, {**self.state})
76
+ return replace(self, state=new_state)
77
+
78
+ def with_demos(self, demos: List[Tuple[Any, Any]]) -> "ProgramAdapter":
79
+ new_state = self.set_demos(demos, {**self.state})
80
+ return replace(self, state=new_state)
81
+
82
+ @property
83
+ def predictors(self) -> List[str]:
84
+ return list(self._predictors)
85
+
86
+
87
+ # ---------------------------
88
+ # Utility helpers
89
+ # ---------------------------
90
+
91
+
92
+ def summarize_dataset(trainset: Sequence[Tuple[Any, Any]], max_items: int = 50) -> str:
93
+ n = len(trainset)
94
+ ex = ", ".join(repr(trainset[i][0])[:40] for i in range(0, min(max_items, n), max(1, n // max_items or 1)))
95
+ return f"Dataset size: {n}. Example inputs: {ex}"
96
+
97
+
98
+ def summarize_program(prog: PredictProgram) -> str:
99
+ return f"Program predictors: {prog.predictors}"
100
+
101
+
102
+ def random_tip(rng: random.Random) -> str:
103
+ tips = [
104
+ "Be concise.",
105
+ "Focus on the task definition.",
106
+ "Use the provided examples as guidance.",
107
+ "Avoid unnecessary verbosity.",
108
+ ]
109
+ return rng.choice(tips)
110
+
111
+
112
+ def choose(items: Sequence[Any], rng: Optional[random.Random] = None) -> Any:
113
+ r = rng or random
114
+ return r.choice(items)
115
+
116
+
117
+ # ---------------------------
118
+ # Evaluator
119
+ # ---------------------------
120
+
121
+
122
+ @dataclass
123
+ class EvalResult:
124
+ score: float
125
+ subscores: List[float]
126
+
127
+
128
+ def evaluate_program(program: PredictProgram, dataset: Sequence[Tuple[Any, Any]], metric: Callable[[Any, Any], float]) -> EvalResult:
129
+ subs = []
130
+ for x, y in dataset:
131
+ yhat = program.run(x)
132
+ subs.append(metric(yhat, y))
133
+ return EvalResult(score=float(sum(subs)) / max(1, len(subs)), subscores=subs)
134
+
135
+
136
+ # ---------------------------
137
+ # MIPROv2 compile
138
+ # ---------------------------
139
+
140
+
141
+ def mipro_v2_compile(
142
+ student: PredictProgram,
143
+ trainset: Sequence[Tuple[Any, Any]],
144
+ valset: Sequence[Tuple[Any, Any]],
145
+ metric: Callable[[Any, Any], float],
146
+ *,
147
+ prompt_model: Any,
148
+ task_model: Any,
149
+ max_bootstrapped_demos: int = 8,
150
+ max_labeled_demos: int = 4,
151
+ num_candidates: int = 8,
152
+ num_trials: int = 20,
153
+ minibatch: bool = True,
154
+ minibatch_size: int = 16,
155
+ minibatch_full_eval_steps: int = 5,
156
+ seed: int = 0,
157
+ auto: str = "light",
158
+ program_aware: bool = True,
159
+ data_aware: bool = True,
160
+ tip_aware: bool = True,
161
+ fewshot_aware: bool = True,
162
+ ) -> Tuple[PredictProgram, List[Dict[str, Any]]]:
163
+ """MIPROv2-style optimizer.
164
+
165
+ Arguments mirror the DSPy pseudocode but remain provider-agnostic. The
166
+ `prompt_model` must expose `generate_instructions(ctx, k)`; the `student`
167
+ program must implement the `PredictProgram` protocol.
168
+ """
169
+
170
+ rng = random.Random(seed)
171
+ program = student.deepcopy()
172
+
173
+ # Step 1: bootstrap few-shot example candidates
174
+ demo_candidates: List[Dict[str, Any]] = []
175
+ for _ in range(num_candidates):
176
+ boot: List[Tuple[Any, Any]] = []
177
+ # collect bootstrapped, self-consistent demos
178
+ while len(boot) < max_bootstrapped_demos:
179
+ x, y = rng.choice(trainset)
180
+ yhat = program.run(x, model=task_model)
181
+ if metric(yhat, y) == 1: # perfect match
182
+ boot.append((x, y))
183
+ labeled = rng.sample(list(trainset), k=min(max_labeled_demos, len(trainset)))
184
+ demo_candidates.append({"boot": boot, "labeled": labeled})
185
+
186
+ # Step 2: propose instruction candidates per predictor
187
+ instr_candidates: Dict[str, List[str]] = {}
188
+ for pred in (program.predictors or ["predictor"]):
189
+ ctx: Dict[str, Any] = {}
190
+ if data_aware:
191
+ ctx["dataset_summary"] = summarize_dataset(trainset)
192
+ if program_aware:
193
+ ctx["program_summary"] = summarize_program(program)
194
+ if fewshot_aware and demo_candidates:
195
+ ctx["examples"] = choose(demo_candidates, rng)
196
+ if tip_aware:
197
+ ctx["tip"] = random_tip(rng)
198
+ cand = prompt_model.generate_instructions(ctx, k=num_candidates)
199
+ instr_candidates[pred] = list(cand)
200
+
201
+ # Step 3: Bayesian-optimization-like search (random proposer placeholder)
202
+ history: List[Tuple[Dict[str, Any], float]] = []
203
+ records: List[Dict[str, Any]] = []
204
+ best_score = -1.0
205
+ best_cfg: Optional[Dict[str, Any]] = None
206
+
207
+ def propose(history_: List[Tuple[Dict[str, Any], float]]) -> Dict[str, Any]:
208
+ # Placeholder: randomly sample from the cartesian product
209
+ instructions = {pred: choose(instr_candidates[pred], rng) for pred in instr_candidates}
210
+ demos = choose(demo_candidates, rng) if demo_candidates else None
211
+ return {"instructions": instructions, "demo_set": demos}
212
+
213
+ for t in range(1, num_trials + 1):
214
+ theta = propose(history)
215
+ program_t = program.with_instructions(theta["instructions"])
216
+ if theta.get("demo_set") is not None:
217
+ # Combine bootstrapped + labeled demos
218
+ ds = theta["demo_set"]
219
+ demo_set = list(ds.get("boot", [])) + list(ds.get("labeled", []))
220
+ program_t = program_t.with_demos(demo_set)
221
+
222
+ batch = (
223
+ valset
224
+ if not minibatch
225
+ else random.sample(list(valset), k=min(minibatch_size, len(valset)))
226
+ )
227
+ batch_res = evaluate_program(program_t, batch, metric)
228
+ s_t = batch_res.score
229
+ history.append((theta, s_t))
230
+ records.append({
231
+ "trial": t,
232
+ "evaluation": "batch" if minibatch else "full",
233
+ "score": s_t,
234
+ "intervention": {
235
+ "instructions": theta.get("instructions"),
236
+ "demo_set": theta.get("demo_set"),
237
+ },
238
+ })
239
+
240
+ if (not minibatch) or (t % max(1, minibatch_full_eval_steps) == 0):
241
+ full_res = evaluate_program(program_t, valset, metric)
242
+ s_full = full_res.score
243
+ if s_full > best_score:
244
+ best_score = s_full
245
+ best_cfg = theta
246
+ records.append({
247
+ "trial": t,
248
+ "evaluation": "full",
249
+ "score": s_full,
250
+ "intervention": {
251
+ "instructions": theta.get("instructions"),
252
+ "demo_set": theta.get("demo_set"),
253
+ },
254
+ })
255
+
256
+ if best_cfg is None:
257
+ return program, records
258
+
259
+ best_program = program.with_instructions(best_cfg["instructions"])
260
+ if best_cfg.get("demo_set") is not None:
261
+ ds = best_cfg["demo_set"]
262
+ demo_set = list(ds.get("boot", [])) + list(ds.get("labeled", []))
263
+ best_program = best_program.with_demos(demo_set)
264
+ return best_program, records
265
+
266
+
267
+ __all__ = [
268
+ "PredictProgram",
269
+ "ProgramAdapter",
270
+ "evaluate_program",
271
+ "mipro_v2_compile",
272
+ ]
1
273
 
2
274
 
3
275
  class ExampleTwoStepDag:
@@ -5,4 +277,4 @@ class ExampleTwoStepDag:
5
277
 
6
278
  """
7
279
  A -> B
8
- """
280
+ """
@@ -0,0 +1,247 @@
1
+ """
2
+ Random-search prompt optimizer (BootstrapFewShotWithRandomSearch), DSPy-inspired.
3
+
4
+ Implements the high-level pseudocode of DSPy's Random Search optimizer in a
5
+ provider-agnostic, modular style. You can plug in your own student/program and
6
+ metric, and this module will explore baselines and bootstrapped few-shot variants.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import random
12
+ from dataclasses import dataclass
13
+ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
14
+
15
+
16
+ # ---------------------------
17
+ # Protocol-like expectations (duck-typed)
18
+ # ---------------------------
19
+
20
+
21
+ class _ProgramLike:
22
+ def reset_copy(self): # zero-shot copy
23
+ return self
24
+
25
+ def deepcopy(self): # deep copy
26
+ return self
27
+
28
+ def with_demos(self, demos: List[Tuple[Any, Any]]):
29
+ return self
30
+
31
+ def run(self, x: Any) -> Any:
32
+ raise NotImplementedError
33
+
34
+
35
+ # ---------------------------
36
+ # Helpers and lightweight components
37
+ # ---------------------------
38
+
39
+
40
+ @dataclass
41
+ class EvalResult:
42
+ score: float
43
+ subscores: List[float]
44
+
45
+
46
+ def evaluate(program: _ProgramLike, dataset: Sequence[Tuple[Any, Any]], metric: Callable[[Any, Any], float]) -> EvalResult:
47
+ subs = []
48
+ for x, y in dataset:
49
+ subs.append(metric(program.run(x), y))
50
+ return EvalResult(sum(subs) / max(1, len(subs)), subs)
51
+
52
+
53
+ class LabeledFewShot:
54
+ def __init__(self, k: int):
55
+ self.k = k
56
+
57
+ def compile(self, student: _ProgramLike, trainset: Sequence[Tuple[Any, Any]], sample: bool = True) -> _ProgramLike:
58
+ p = getattr(student, "deepcopy", student.reset_copy)()
59
+ demos = list(trainset)
60
+ if sample:
61
+ random.shuffle(demos)
62
+ p = p.with_demos(demos[: min(self.k, len(demos))])
63
+ return p
64
+
65
+
66
+ class BootstrapFewShot:
67
+ def __init__(
68
+ self,
69
+ *,
70
+ metric: Callable[[Any, Any], float],
71
+ metric_threshold: Optional[float] = None,
72
+ max_bootstrapped_demos: int = 8,
73
+ max_labeled_demos: int = 0,
74
+ teacher_settings: Optional[Dict[str, Any]] = None,
75
+ max_rounds: int = 1,
76
+ ):
77
+ self.metric = metric
78
+ self.metric_threshold = metric_threshold
79
+ self.max_bootstrapped_demos = max_bootstrapped_demos
80
+ self.max_labeled_demos = max_labeled_demos
81
+ self.teacher_settings = teacher_settings or {}
82
+ self.max_rounds = max_rounds
83
+
84
+ def compile(
85
+ self,
86
+ student: _ProgramLike,
87
+ teacher: Optional[_ProgramLike],
88
+ trainset: Sequence[Tuple[Any, Any]],
89
+ ) -> _ProgramLike:
90
+ p = getattr(student, "deepcopy", student.reset_copy)()
91
+ rng = random.Random()
92
+ # If bootstrapped demos disabled, return labeled-only few-shot quickly
93
+ if self.max_bootstrapped_demos <= 0:
94
+ demos: List[Tuple[Any, Any]] = []
95
+ if self.max_labeled_demos > 0:
96
+ demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
97
+ return p.with_demos(demos)
98
+ boot: List[Tuple[Any, Any]] = []
99
+ # Bootstrap demos by self consistency
100
+ for _ in range(self.max_rounds):
101
+ rng.shuffle(trainset := list(trainset))
102
+ for x, y in trainset:
103
+ yhat = p.run(x)
104
+ ok = self.metric(yhat, y)
105
+ if (self.metric_threshold is None and ok == 1) or (
106
+ self.metric_threshold is not None and ok >= self.metric_threshold
107
+ ):
108
+ boot.append((x, y))
109
+ if len(boot) >= self.max_bootstrapped_demos:
110
+ break
111
+ if len(boot) >= self.max_bootstrapped_demos:
112
+ break
113
+
114
+ # Optionally add labeled demos
115
+ demos = list(boot)
116
+ if self.max_labeled_demos > 0:
117
+ demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
118
+
119
+ return p.with_demos(demos)
120
+
121
+
122
+ # ---------------------------
123
+ # Random-search compile (BootstrapFewShotWithRandomSearch)
124
+ # ---------------------------
125
+
126
+
127
+ @dataclass
128
+ class Candidate:
129
+ score: float
130
+ subscores: List[float]
131
+ seed: int
132
+ program: _ProgramLike
133
+
134
+
135
+ def random_search_compile(
136
+ student: _ProgramLike,
137
+ trainset: Sequence[Tuple[Any, Any]],
138
+ valset: Sequence[Tuple[Any, Any]],
139
+ metric: Callable[[Any, Any], float],
140
+ *,
141
+ max_bootstrapped_demos: int = 8,
142
+ max_labeled_demos: int = 4,
143
+ max_rounds: int = 2,
144
+ num_candidate_programs: int = 16,
145
+ stop_at_score: Optional[float] = None,
146
+ evaluate_fn: Optional[Callable[[
147
+ _ProgramLike,
148
+ Sequence[Tuple[Any, Any]],
149
+ Callable[[Any, Any], float]
150
+ ], EvalResult]] = None,
151
+ on_candidate_evaluated: Optional[Callable[[int, float, EvalResult, Dict[str, Any]], None]] = None,
152
+ ) -> Tuple[_ProgramLike, List[Dict[str, Any]]]:
153
+ best_program: Optional[_ProgramLike] = None
154
+ best_score = float("-inf")
155
+ candidates: List[Candidate] = []
156
+ records: List[Dict[str, Any]] = []
157
+
158
+ seeds = list(range(num_candidate_programs))
159
+ seeds = [-3, -2, -1] + seeds # zero-shot, labeled few-shot, bootstrapped few-shot
160
+
161
+ rng = random.Random(0)
162
+ for idx, seed in enumerate(seeds):
163
+ train_copy = list(trainset)
164
+
165
+ if seed == -3:
166
+ program = getattr(student, "reset_copy", student.deepcopy)()
167
+
168
+ elif seed == -2:
169
+ program = LabeledFewShot(k=max_labeled_demos).compile(student, train_copy, sample=True)
170
+
171
+ else:
172
+ if seed >= 0:
173
+ rng.shuffle(train_copy)
174
+ if max_bootstrapped_demos <= 0:
175
+ size = 0
176
+ else:
177
+ size = max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
178
+ program = BootstrapFewShot(
179
+ metric=metric,
180
+ metric_threshold=None,
181
+ max_bootstrapped_demos=size,
182
+ max_labeled_demos=max_labeled_demos,
183
+ teacher_settings={},
184
+ max_rounds=max_rounds,
185
+ ).compile(student, teacher=None, trainset=train_copy)
186
+
187
+ res = (evaluate_fn(program, valset, metric) if evaluate_fn else evaluate(program, valset, metric))
188
+ cand = Candidate(score=res.score, subscores=res.subscores, seed=seed, program=program)
189
+ candidates.append(cand)
190
+ # Record an intervention summary for reproducibility
191
+ intervention: Dict[str, Any] = {"seed": seed}
192
+ if hasattr(program, "demos"):
193
+ try:
194
+ intervention["demos"] = getattr(program, "demos") # type: ignore
195
+ except Exception:
196
+ intervention["demos"] = None
197
+ # Type of candidate
198
+ if seed == -3:
199
+ intervention["kind"] = "zero_shot"
200
+ intervention["label"] = "zero-shot"
201
+ elif seed == -2:
202
+ intervention["kind"] = "labeled_few_shot"
203
+ intervention["label"] = f"labeled-{max_labeled_demos}"
204
+ else:
205
+ intervention["kind"] = "bootstrapped_few_shot"
206
+ bs = 0
207
+ try:
208
+ # try to infer from program demos length if present
209
+ bs = len(intervention.get("demos") or [])
210
+ except Exception:
211
+ bs = 0
212
+ intervention["label"] = f"boot-b{max_bootstrapped_demos}-l{max_labeled_demos}"
213
+ record_obj = {
214
+ "score": cand.score,
215
+ "subscores": cand.subscores,
216
+ "intervention": intervention,
217
+ }
218
+ records.append(record_obj)
219
+
220
+ if res.score > best_score:
221
+ best_score, best_program = res.score, program
222
+
223
+ if stop_at_score is not None and best_score >= stop_at_score:
224
+ break
225
+
226
+ if on_candidate_evaluated is not None:
227
+ try:
228
+ on_candidate_evaluated(idx + 1, res.score, res, intervention)
229
+ except Exception:
230
+ pass
231
+
232
+ # Attach candidates for inspection
233
+ if hasattr(best_program, "candidate_programs"):
234
+ # If user object supports attribute assignment
235
+ try:
236
+ best_program.candidate_programs = sorted(candidates, key=lambda c: c.score, reverse=True) # type: ignore[attr-defined]
237
+ except Exception:
238
+ pass
239
+
240
+ return (best_program or getattr(student, "deepcopy", student)(), records)
241
+
242
+
243
+ __all__ = [
244
+ "random_search_compile",
245
+ "LabeledFewShot",
246
+ "BootstrapFewShot",
247
+ ]
@@ -0,0 +1,160 @@
1
+ """
2
+ Example: MIPROv2-style optimizer on Banking77 using Groq gpt-oss-20b.
3
+
4
+ Requires:
5
+ - .env with GROQ_API_KEY
6
+ - datasets
7
+
8
+ Run:
9
+ - uv run -q python -m synth_ai.learning.prompts.run_mipro_banking77
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import asyncio
15
+ import os
16
+ import random
17
+ from dataclasses import dataclass, replace
18
+ from typing import Any, Dict, List, Sequence, Tuple
19
+
20
+ from dotenv import load_dotenv
21
+ from datasets import load_dataset
22
+
23
+ from synth_ai.lm.core.main_v3 import LM, build_messages
24
+ import json
25
+ import time
26
+ from pathlib import Path
27
+ from synth_ai.learning.prompts.mipro import ProgramAdapter, mipro_v2_compile, evaluate_program
28
+
29
+
30
+ def choose_label(pred: str, label_names: List[str]) -> str:
31
+ norm = (pred or "").strip().lower()
32
+ d = {ln.lower(): ln for ln in label_names}
33
+ if norm in d:
34
+ return d[norm]
35
+ def score(cand: str) -> int:
36
+ c = cand.lower()
37
+ return sum(1 for w in c.split() if w in norm)
38
+ return max(label_names, key=score)
39
+
40
+
41
+ def accuracy(pred: str, gold: str, labels: List[str]) -> float:
42
+ return 1.0 if choose_label(pred, labels) == gold else 0.0
43
+
44
+
45
+ class NaivePromptModel:
46
+ """Toy prompt model that returns simple instruction variants."""
47
+ def generate_instructions(self, ctx: Dict[str, Any], k: int = 8) -> List[str]:
48
+ base = "Classify the Banking77 intent and return exactly one label."
49
+ variants = [
50
+ base,
51
+ base + " Be concise.",
52
+ base + " Use examples to guide your reasoning.",
53
+ base + " Return only the label text.",
54
+ base + " Follow the label names strictly.",
55
+ base + " Do not include explanations.",
56
+ base + " Think about similar intents before answering.",
57
+ base + " Carefully consider the user's message.",
58
+ ]
59
+ random.shuffle(variants)
60
+ return variants[:k]
61
+
62
+
63
+ def build_run_fn(lm: LM, label_names: List[str]):
64
+ def run_fn(x: str, _model: Any | None = None) -> str:
65
+ # Use instructions and demos from adapter state (set by set_instructions/set_demos)
66
+ # The adapter passes state via closure; we rebuild messages here
67
+ instructions = state_ref.get("instructions", {}).get("main", "You are an intent classifier for Banking77.")
68
+ examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in state_ref.get("demos", []))
69
+ sys = instructions
70
+ user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
71
+ messages = build_messages(sys, user, images_bytes=None, model_name=lm.model)
72
+ async def _call():
73
+ resp = await lm.respond_async(messages=messages)
74
+ return (resp.raw_response or "").strip()
75
+ return asyncio.run(_call())
76
+ return run_fn
77
+
78
+
79
+ def set_instructions(new_instr: Dict[str, str], state: Dict[str, Any]) -> Dict[str, Any]:
80
+ state["instructions"] = {**state.get("instructions", {}), **new_instr}
81
+ return state
82
+
83
+
84
+ def set_demos(demos: List[Tuple[str, str]], state: Dict[str, Any]) -> Dict[str, Any]:
85
+ state["demos"] = list(demos)
86
+ return state
87
+
88
+
89
+ def main():
90
+ load_dotenv()
91
+ random.seed(0)
92
+
93
+ model = os.getenv("MODEL", "openai/gpt-oss-20b")
94
+ vendor = os.getenv("VENDOR", "groq")
95
+ lm = LM(model=model, vendor=vendor, temperature=0.0)
96
+
97
+ print("Loading Banking77 dataset (train/dev split of test for demo)...")
98
+ ds = load_dataset("banking77")
99
+ label_names: List[str] = ds["test"].features["label"].names # type: ignore
100
+
101
+ all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
102
+ random.shuffle(all_items)
103
+ trainset: Sequence[Tuple[str, str]] = all_items[:80]
104
+ valset: Sequence[Tuple[str, str]] = all_items[80:160]
105
+
106
+ global state_ref
107
+ state_ref = {"instructions": {"main": "You are an intent classifier for Banking77."}, "demos": []}
108
+ adapter = ProgramAdapter(
109
+ run_fn=build_run_fn(lm, label_names),
110
+ state=state_ref,
111
+ _predictors=["main"],
112
+ set_instructions=set_instructions,
113
+ set_demos=set_demos,
114
+ )
115
+
116
+ def metric(yhat: str, y: str) -> float:
117
+ return accuracy(yhat, y, label_names)
118
+
119
+ prompt_model = NaivePromptModel()
120
+ task_model = None # not used in this minimal example
121
+
122
+ print("Running MIPROv2-style optimizer...")
123
+ best, records = mipro_v2_compile(
124
+ student=adapter,
125
+ trainset=trainset,
126
+ valset=valset,
127
+ metric=metric,
128
+ prompt_model=prompt_model,
129
+ task_model=task_model,
130
+ max_bootstrapped_demos=6,
131
+ max_labeled_demos=4,
132
+ num_candidates=6,
133
+ num_trials=12,
134
+ minibatch=True,
135
+ minibatch_size=16,
136
+ minibatch_full_eval_steps=3,
137
+ seed=0,
138
+ )
139
+
140
+ res = evaluate_program(best, valset, metric)
141
+ print(f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})")
142
+
143
+ out = {
144
+ "context": {
145
+ "model": model,
146
+ "vendor": vendor,
147
+ "train_size": len(trainset),
148
+ "val_size": len(valset),
149
+ },
150
+ "trials": records,
151
+ }
152
+ out_dir = Path(__file__).parent
153
+ fname = str(out_dir / f"mipro_banking77_{int(time.time())}.json")
154
+ with open(fname, "w") as f:
155
+ json.dump(out, f, indent=2)
156
+ print(f"Saved trial records to {fname}")
157
+
158
+
159
+ if __name__ == "__main__":
160
+ main()