synth-ai 0.2.4.dev6__py3-none-any.whl → 0.2.4.dev8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. synth_ai/__init__.py +18 -9
  2. synth_ai/cli/__init__.py +10 -5
  3. synth_ai/cli/balance.py +25 -32
  4. synth_ai/cli/calc.py +2 -3
  5. synth_ai/cli/demo.py +3 -5
  6. synth_ai/cli/legacy_root_backup.py +58 -32
  7. synth_ai/cli/man.py +22 -19
  8. synth_ai/cli/recent.py +9 -8
  9. synth_ai/cli/root.py +58 -13
  10. synth_ai/cli/status.py +13 -6
  11. synth_ai/cli/traces.py +45 -21
  12. synth_ai/cli/watch.py +40 -37
  13. synth_ai/config/base_url.py +47 -2
  14. synth_ai/core/experiment.py +1 -2
  15. synth_ai/environments/__init__.py +2 -6
  16. synth_ai/environments/environment/artifacts/base.py +3 -1
  17. synth_ai/environments/environment/db/sqlite.py +1 -1
  18. synth_ai/environments/environment/registry.py +19 -20
  19. synth_ai/environments/environment/resources/sqlite.py +2 -3
  20. synth_ai/environments/environment/rewards/core.py +3 -2
  21. synth_ai/environments/environment/tools/__init__.py +6 -4
  22. synth_ai/environments/examples/crafter_classic/__init__.py +1 -1
  23. synth_ai/environments/examples/crafter_classic/engine.py +13 -13
  24. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +1 -0
  25. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +2 -1
  26. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +2 -1
  27. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +3 -2
  28. synth_ai/environments/examples/crafter_classic/environment.py +16 -15
  29. synth_ai/environments/examples/crafter_classic/taskset.py +2 -2
  30. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +2 -3
  31. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +2 -1
  32. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +2 -2
  33. synth_ai/environments/examples/crafter_custom/crafter/config.py +2 -2
  34. synth_ai/environments/examples/crafter_custom/crafter/env.py +1 -5
  35. synth_ai/environments/examples/crafter_custom/crafter/objects.py +1 -2
  36. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +1 -2
  37. synth_ai/environments/examples/crafter_custom/dataset_builder.py +5 -5
  38. synth_ai/environments/examples/crafter_custom/environment.py +13 -13
  39. synth_ai/environments/examples/crafter_custom/run_dataset.py +5 -5
  40. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +2 -2
  41. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +5 -4
  42. synth_ai/environments/examples/enron/art_helpers/types_enron.py +2 -1
  43. synth_ai/environments/examples/enron/engine.py +18 -14
  44. synth_ai/environments/examples/enron/environment.py +12 -11
  45. synth_ai/environments/examples/enron/taskset.py +7 -7
  46. synth_ai/environments/examples/minigrid/__init__.py +6 -6
  47. synth_ai/environments/examples/minigrid/engine.py +6 -6
  48. synth_ai/environments/examples/minigrid/environment.py +6 -6
  49. synth_ai/environments/examples/minigrid/puzzle_loader.py +3 -2
  50. synth_ai/environments/examples/minigrid/taskset.py +13 -13
  51. synth_ai/environments/examples/nethack/achievements.py +1 -1
  52. synth_ai/environments/examples/nethack/engine.py +8 -7
  53. synth_ai/environments/examples/nethack/environment.py +10 -9
  54. synth_ai/environments/examples/nethack/helpers/__init__.py +8 -9
  55. synth_ai/environments/examples/nethack/helpers/action_mapping.py +1 -1
  56. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +2 -1
  57. synth_ai/environments/examples/nethack/helpers/observation_utils.py +1 -1
  58. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +3 -4
  59. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +6 -5
  60. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +5 -5
  61. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +7 -6
  62. synth_ai/environments/examples/nethack/taskset.py +5 -5
  63. synth_ai/environments/examples/red/engine.py +9 -8
  64. synth_ai/environments/examples/red/engine_helpers/reward_components.py +2 -1
  65. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +7 -7
  66. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +2 -1
  67. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +2 -1
  68. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +2 -1
  69. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +2 -1
  70. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +2 -1
  71. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +2 -1
  72. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +2 -1
  73. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +2 -1
  74. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +2 -1
  75. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +2 -1
  76. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +2 -1
  77. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +3 -2
  78. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +2 -1
  79. synth_ai/environments/examples/red/environment.py +18 -15
  80. synth_ai/environments/examples/red/taskset.py +5 -3
  81. synth_ai/environments/examples/sokoban/engine.py +16 -13
  82. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +3 -2
  83. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +2 -1
  84. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +1 -1
  85. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +7 -5
  86. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +1 -1
  87. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +2 -1
  88. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +5 -4
  89. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +3 -2
  90. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +2 -1
  91. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +5 -4
  92. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +1 -1
  93. synth_ai/environments/examples/sokoban/environment.py +15 -14
  94. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +5 -3
  95. synth_ai/environments/examples/sokoban/puzzle_loader.py +3 -2
  96. synth_ai/environments/examples/sokoban/taskset.py +13 -10
  97. synth_ai/environments/examples/tictactoe/engine.py +6 -6
  98. synth_ai/environments/examples/tictactoe/environment.py +8 -7
  99. synth_ai/environments/examples/tictactoe/taskset.py +6 -5
  100. synth_ai/environments/examples/verilog/engine.py +4 -3
  101. synth_ai/environments/examples/verilog/environment.py +11 -10
  102. synth_ai/environments/examples/verilog/taskset.py +14 -12
  103. synth_ai/environments/examples/wordle/__init__.py +5 -5
  104. synth_ai/environments/examples/wordle/engine.py +32 -25
  105. synth_ai/environments/examples/wordle/environment.py +21 -16
  106. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +6 -6
  107. synth_ai/environments/examples/wordle/taskset.py +20 -12
  108. synth_ai/environments/reproducibility/core.py +1 -1
  109. synth_ai/environments/reproducibility/tree.py +21 -21
  110. synth_ai/environments/service/app.py +3 -2
  111. synth_ai/environments/service/core_routes.py +104 -110
  112. synth_ai/environments/service/external_registry.py +1 -2
  113. synth_ai/environments/service/registry.py +1 -1
  114. synth_ai/environments/stateful/core.py +1 -2
  115. synth_ai/environments/stateful/engine.py +1 -1
  116. synth_ai/environments/tasks/api.py +4 -4
  117. synth_ai/environments/tasks/core.py +14 -12
  118. synth_ai/environments/tasks/filters.py +6 -4
  119. synth_ai/environments/tasks/utils.py +13 -11
  120. synth_ai/evals/base.py +2 -3
  121. synth_ai/experimental/synth_oss.py +4 -4
  122. synth_ai/http.py +102 -0
  123. synth_ai/inference/__init__.py +7 -0
  124. synth_ai/inference/client.py +20 -0
  125. synth_ai/jobs/client.py +246 -0
  126. synth_ai/learning/__init__.py +24 -0
  127. synth_ai/learning/client.py +149 -0
  128. synth_ai/learning/config.py +43 -0
  129. synth_ai/learning/constants.py +29 -0
  130. synth_ai/learning/ft_client.py +59 -0
  131. synth_ai/learning/gateway.py +1 -3
  132. synth_ai/learning/health.py +43 -0
  133. synth_ai/learning/jobs.py +205 -0
  134. synth_ai/learning/prompts/banking77_injection_eval.py +15 -10
  135. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +26 -14
  136. synth_ai/learning/prompts/mipro.py +61 -52
  137. synth_ai/learning/prompts/random_search.py +42 -43
  138. synth_ai/learning/prompts/run_mipro_banking77.py +32 -20
  139. synth_ai/learning/prompts/run_random_search_banking77.py +71 -52
  140. synth_ai/learning/rl_client.py +256 -0
  141. synth_ai/learning/sse.py +58 -0
  142. synth_ai/learning/validators.py +48 -0
  143. synth_ai/lm/__init__.py +5 -5
  144. synth_ai/lm/caching/ephemeral.py +9 -9
  145. synth_ai/lm/caching/handler.py +20 -20
  146. synth_ai/lm/caching/persistent.py +10 -10
  147. synth_ai/lm/config.py +3 -3
  148. synth_ai/lm/constants.py +7 -7
  149. synth_ai/lm/core/all.py +17 -3
  150. synth_ai/lm/core/exceptions.py +0 -2
  151. synth_ai/lm/core/main.py +26 -41
  152. synth_ai/lm/core/main_v3.py +33 -10
  153. synth_ai/lm/core/synth_models.py +48 -0
  154. synth_ai/lm/core/vendor_clients.py +26 -22
  155. synth_ai/lm/injection.py +7 -8
  156. synth_ai/lm/overrides.py +21 -19
  157. synth_ai/lm/provider_support/__init__.py +1 -1
  158. synth_ai/lm/provider_support/anthropic.py +15 -15
  159. synth_ai/lm/provider_support/openai.py +23 -21
  160. synth_ai/lm/structured_outputs/handler.py +34 -32
  161. synth_ai/lm/structured_outputs/inject.py +24 -27
  162. synth_ai/lm/structured_outputs/rehabilitate.py +19 -15
  163. synth_ai/lm/tools/base.py +17 -16
  164. synth_ai/lm/unified_interface.py +17 -18
  165. synth_ai/lm/vendors/base.py +20 -18
  166. synth_ai/lm/vendors/core/anthropic_api.py +36 -27
  167. synth_ai/lm/vendors/core/gemini_api.py +31 -36
  168. synth_ai/lm/vendors/core/mistral_api.py +19 -19
  169. synth_ai/lm/vendors/core/openai_api.py +42 -13
  170. synth_ai/lm/vendors/openai_standard.py +158 -101
  171. synth_ai/lm/vendors/openai_standard_responses.py +74 -61
  172. synth_ai/lm/vendors/retries.py +9 -1
  173. synth_ai/lm/vendors/supported/custom_endpoint.py +38 -28
  174. synth_ai/lm/vendors/supported/deepseek.py +10 -10
  175. synth_ai/lm/vendors/supported/grok.py +8 -8
  176. synth_ai/lm/vendors/supported/ollama.py +2 -1
  177. synth_ai/lm/vendors/supported/openrouter.py +11 -9
  178. synth_ai/lm/vendors/synth_client.py +425 -75
  179. synth_ai/lm/warmup.py +8 -7
  180. synth_ai/rl/__init__.py +30 -0
  181. synth_ai/rl/contracts.py +32 -0
  182. synth_ai/rl/env_keys.py +137 -0
  183. synth_ai/rl/secrets.py +19 -0
  184. synth_ai/scripts/verify_rewards.py +100 -0
  185. synth_ai/task/__init__.py +10 -0
  186. synth_ai/task/contracts.py +120 -0
  187. synth_ai/task/health.py +28 -0
  188. synth_ai/task/validators.py +12 -0
  189. synth_ai/tracing/__init__.py +22 -10
  190. synth_ai/tracing_v1/__init__.py +22 -20
  191. synth_ai/tracing_v3/__init__.py +7 -7
  192. synth_ai/tracing_v3/abstractions.py +56 -52
  193. synth_ai/tracing_v3/config.py +4 -2
  194. synth_ai/tracing_v3/db_config.py +6 -8
  195. synth_ai/tracing_v3/decorators.py +29 -30
  196. synth_ai/tracing_v3/examples/basic_usage.py +12 -12
  197. synth_ai/tracing_v3/hooks.py +24 -22
  198. synth_ai/tracing_v3/llm_call_record_helpers.py +85 -98
  199. synth_ai/tracing_v3/lm_call_record_abstractions.py +2 -4
  200. synth_ai/tracing_v3/migration_helper.py +3 -5
  201. synth_ai/tracing_v3/replica_sync.py +30 -32
  202. synth_ai/tracing_v3/session_tracer.py +158 -31
  203. synth_ai/tracing_v3/storage/__init__.py +1 -1
  204. synth_ai/tracing_v3/storage/base.py +8 -7
  205. synth_ai/tracing_v3/storage/config.py +4 -4
  206. synth_ai/tracing_v3/storage/factory.py +4 -4
  207. synth_ai/tracing_v3/storage/utils.py +9 -9
  208. synth_ai/tracing_v3/turso/__init__.py +3 -3
  209. synth_ai/tracing_v3/turso/daemon.py +9 -9
  210. synth_ai/tracing_v3/turso/manager.py +278 -48
  211. synth_ai/tracing_v3/turso/models.py +77 -19
  212. synth_ai/tracing_v3/utils.py +5 -5
  213. synth_ai/v0/tracing/abstractions.py +28 -28
  214. synth_ai/v0/tracing/base_client.py +9 -9
  215. synth_ai/v0/tracing/client_manager.py +7 -7
  216. synth_ai/v0/tracing/config.py +7 -7
  217. synth_ai/v0/tracing/context.py +6 -6
  218. synth_ai/v0/tracing/decorators.py +6 -5
  219. synth_ai/v0/tracing/events/manage.py +1 -1
  220. synth_ai/v0/tracing/events/store.py +5 -4
  221. synth_ai/v0/tracing/immediate_client.py +4 -5
  222. synth_ai/v0/tracing/local.py +3 -3
  223. synth_ai/v0/tracing/log_client_base.py +4 -5
  224. synth_ai/v0/tracing/retry_queue.py +5 -6
  225. synth_ai/v0/tracing/trackers.py +25 -25
  226. synth_ai/v0/tracing/upload.py +6 -0
  227. synth_ai/v0/tracing_v1/__init__.py +1 -1
  228. synth_ai/v0/tracing_v1/abstractions.py +28 -28
  229. synth_ai/v0/tracing_v1/base_client.py +9 -9
  230. synth_ai/v0/tracing_v1/client_manager.py +7 -7
  231. synth_ai/v0/tracing_v1/config.py +7 -7
  232. synth_ai/v0/tracing_v1/context.py +6 -6
  233. synth_ai/v0/tracing_v1/decorators.py +7 -6
  234. synth_ai/v0/tracing_v1/events/manage.py +1 -1
  235. synth_ai/v0/tracing_v1/events/store.py +5 -4
  236. synth_ai/v0/tracing_v1/immediate_client.py +4 -5
  237. synth_ai/v0/tracing_v1/local.py +3 -3
  238. synth_ai/v0/tracing_v1/log_client_base.py +4 -5
  239. synth_ai/v0/tracing_v1/retry_queue.py +5 -6
  240. synth_ai/v0/tracing_v1/trackers.py +25 -25
  241. synth_ai/v0/tracing_v1/upload.py +25 -24
  242. synth_ai/zyk/__init__.py +1 -0
  243. synth_ai-0.2.4.dev8.dist-info/METADATA +635 -0
  244. synth_ai-0.2.4.dev8.dist-info/RECORD +317 -0
  245. synth_ai/tui/__init__.py +0 -1
  246. synth_ai/tui/__main__.py +0 -13
  247. synth_ai/tui/cli/__init__.py +0 -1
  248. synth_ai/tui/cli/query_experiments.py +0 -165
  249. synth_ai/tui/cli/query_experiments_v3.py +0 -165
  250. synth_ai/tui/dashboard.py +0 -329
  251. synth_ai-0.2.4.dev6.dist-info/METADATA +0 -203
  252. synth_ai-0.2.4.dev6.dist-info/RECORD +0 -299
  253. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev8.dist-info}/WHEEL +0 -0
  254. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev8.dist-info}/entry_points.txt +0 -0
  255. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev8.dist-info}/licenses/LICENSE +0 -0
  256. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev8.dist-info}/top_level.txt +0 -0
@@ -20,9 +20,9 @@ Notes
20
20
  from __future__ import annotations
21
21
 
22
22
  import random
23
+ from collections.abc import Callable, Sequence
23
24
  from dataclasses import dataclass, replace
24
- from typing import Any, Callable, Dict, Iterable, List, Optional, Protocol, Sequence, Tuple
25
-
25
+ from typing import Any, Protocol
26
26
 
27
27
  # ---------------------------
28
28
  # Program adapter and protocols
@@ -36,16 +36,16 @@ class PredictProgram(Protocol):
36
36
  by wrapping it with `ProgramAdapter` below.
37
37
  """
38
38
 
39
- def deepcopy(self) -> "PredictProgram": ...
39
+ def deepcopy(self) -> PredictProgram: ...
40
40
 
41
- def run(self, x: Any, *, model: Optional[Any] = None) -> Any: ...
41
+ def run(self, x: Any, *, model: Any | None = None) -> Any: ...
42
42
 
43
- def with_instructions(self, instructions: Dict[str, str]) -> "PredictProgram": ...
43
+ def with_instructions(self, instructions: dict[str, str]) -> PredictProgram: ...
44
44
 
45
- def with_demos(self, demos: List[Tuple[Any, Any]]) -> "PredictProgram": ...
45
+ def with_demos(self, demos: list[tuple[Any, Any]]) -> PredictProgram: ...
46
46
 
47
47
  @property
48
- def predictors(self) -> List[str]: ...
48
+ def predictors(self) -> list[str]: ...
49
49
 
50
50
 
51
51
  @dataclass
@@ -59,28 +59,28 @@ class ProgramAdapter:
59
59
  - set_demos: Callable to update demos (global or per predictor)
60
60
  """
61
61
 
62
- run_fn: Callable[[Any, Optional[Any]], Any]
63
- state: Dict[str, Any]
64
- _predictors: List[str]
65
- set_instructions: Callable[[Dict[str, str], Dict[str, Any]], Dict[str, Any]]
66
- set_demos: Callable[[List[Tuple[Any, Any]], Dict[str, Any]], Dict[str, Any]]
62
+ run_fn: Callable[[Any, Any | None], Any]
63
+ state: dict[str, Any]
64
+ _predictors: list[str]
65
+ set_instructions: Callable[[dict[str, str], dict[str, Any]], dict[str, Any]]
66
+ set_demos: Callable[[list[tuple[Any, Any]], dict[str, Any]], dict[str, Any]]
67
67
 
68
- def deepcopy(self) -> "ProgramAdapter":
68
+ def deepcopy(self) -> ProgramAdapter:
69
69
  return replace(self, state={**self.state})
70
70
 
71
- def run(self, x: Any, *, model: Optional[Any] = None) -> Any:
71
+ def run(self, x: Any, *, model: Any | None = None) -> Any:
72
72
  return self.run_fn(x, model)
73
73
 
74
- def with_instructions(self, instructions: Dict[str, str]) -> "ProgramAdapter":
74
+ def with_instructions(self, instructions: dict[str, str]) -> ProgramAdapter:
75
75
  new_state = self.set_instructions(instructions, {**self.state})
76
76
  return replace(self, state=new_state)
77
77
 
78
- def with_demos(self, demos: List[Tuple[Any, Any]]) -> "ProgramAdapter":
78
+ def with_demos(self, demos: list[tuple[Any, Any]]) -> ProgramAdapter:
79
79
  new_state = self.set_demos(demos, {**self.state})
80
80
  return replace(self, state=new_state)
81
81
 
82
82
  @property
83
- def predictors(self) -> List[str]:
83
+ def predictors(self) -> list[str]:
84
84
  return list(self._predictors)
85
85
 
86
86
 
@@ -89,9 +89,11 @@ class ProgramAdapter:
89
89
  # ---------------------------
90
90
 
91
91
 
92
- def summarize_dataset(trainset: Sequence[Tuple[Any, Any]], max_items: int = 50) -> str:
92
+ def summarize_dataset(trainset: Sequence[tuple[Any, Any]], max_items: int = 50) -> str:
93
93
  n = len(trainset)
94
- ex = ", ".join(repr(trainset[i][0])[:40] for i in range(0, min(max_items, n), max(1, n // max_items or 1)))
94
+ ex = ", ".join(
95
+ repr(trainset[i][0])[:40] for i in range(0, min(max_items, n), max(1, n // max_items or 1))
96
+ )
95
97
  return f"Dataset size: {n}. Example inputs: {ex}"
96
98
 
97
99
 
@@ -109,7 +111,7 @@ def random_tip(rng: random.Random) -> str:
109
111
  return rng.choice(tips)
110
112
 
111
113
 
112
- def choose(items: Sequence[Any], rng: Optional[random.Random] = None) -> Any:
114
+ def choose(items: Sequence[Any], rng: random.Random | None = None) -> Any:
113
115
  r = rng or random
114
116
  return r.choice(items)
115
117
 
@@ -122,10 +124,12 @@ def choose(items: Sequence[Any], rng: Optional[random.Random] = None) -> Any:
122
124
  @dataclass
123
125
  class EvalResult:
124
126
  score: float
125
- subscores: List[float]
127
+ subscores: list[float]
126
128
 
127
129
 
128
- def evaluate_program(program: PredictProgram, dataset: Sequence[Tuple[Any, Any]], metric: Callable[[Any, Any], float]) -> EvalResult:
130
+ def evaluate_program(
131
+ program: PredictProgram, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
132
+ ) -> EvalResult:
129
133
  subs = []
130
134
  for x, y in dataset:
131
135
  yhat = program.run(x)
@@ -140,8 +144,8 @@ def evaluate_program(program: PredictProgram, dataset: Sequence[Tuple[Any, Any]]
140
144
 
141
145
  def mipro_v2_compile(
142
146
  student: PredictProgram,
143
- trainset: Sequence[Tuple[Any, Any]],
144
- valset: Sequence[Tuple[Any, Any]],
147
+ trainset: Sequence[tuple[Any, Any]],
148
+ valset: Sequence[tuple[Any, Any]],
145
149
  metric: Callable[[Any, Any], float],
146
150
  *,
147
151
  prompt_model: Any,
@@ -159,7 +163,7 @@ def mipro_v2_compile(
159
163
  data_aware: bool = True,
160
164
  tip_aware: bool = True,
161
165
  fewshot_aware: bool = True,
162
- ) -> Tuple[PredictProgram, List[Dict[str, Any]]]:
166
+ ) -> tuple[PredictProgram, list[dict[str, Any]]]:
163
167
  """MIPROv2-style optimizer.
164
168
 
165
169
  Arguments mirror the DSPy pseudocode but remain provider-agnostic. The
@@ -171,9 +175,9 @@ def mipro_v2_compile(
171
175
  program = student.deepcopy()
172
176
 
173
177
  # Step 1: bootstrap few-shot example candidates
174
- demo_candidates: List[Dict[str, Any]] = []
178
+ demo_candidates: list[dict[str, Any]] = []
175
179
  for _ in range(num_candidates):
176
- boot: List[Tuple[Any, Any]] = []
180
+ boot: list[tuple[Any, Any]] = []
177
181
  # collect bootstrapped, self-consistent demos
178
182
  while len(boot) < max_bootstrapped_demos:
179
183
  x, y = rng.choice(trainset)
@@ -184,9 +188,9 @@ def mipro_v2_compile(
184
188
  demo_candidates.append({"boot": boot, "labeled": labeled})
185
189
 
186
190
  # Step 2: propose instruction candidates per predictor
187
- instr_candidates: Dict[str, List[str]] = {}
188
- for pred in (program.predictors or ["predictor"]):
189
- ctx: Dict[str, Any] = {}
191
+ instr_candidates: dict[str, list[str]] = {}
192
+ for pred in program.predictors or ["predictor"]:
193
+ ctx: dict[str, Any] = {}
190
194
  if data_aware:
191
195
  ctx["dataset_summary"] = summarize_dataset(trainset)
192
196
  if program_aware:
@@ -199,12 +203,12 @@ def mipro_v2_compile(
199
203
  instr_candidates[pred] = list(cand)
200
204
 
201
205
  # Step 3: Bayesian-optimization-like search (random proposer placeholder)
202
- history: List[Tuple[Dict[str, Any], float]] = []
203
- records: List[Dict[str, Any]] = []
206
+ history: list[tuple[dict[str, Any], float]] = []
207
+ records: list[dict[str, Any]] = []
204
208
  best_score = -1.0
205
- best_cfg: Optional[Dict[str, Any]] = None
209
+ best_cfg: dict[str, Any] | None = None
206
210
 
207
- def propose(history_: List[Tuple[Dict[str, Any], float]]) -> Dict[str, Any]:
211
+ def propose(history_: list[tuple[dict[str, Any], float]]) -> dict[str, Any]:
208
212
  # Placeholder: randomly sample from the cartesian product
209
213
  instructions = {pred: choose(instr_candidates[pred], rng) for pred in instr_candidates}
210
214
  demos = choose(demo_candidates, rng) if demo_candidates else None
@@ -227,15 +231,17 @@ def mipro_v2_compile(
227
231
  batch_res = evaluate_program(program_t, batch, metric)
228
232
  s_t = batch_res.score
229
233
  history.append((theta, s_t))
230
- records.append({
231
- "trial": t,
232
- "evaluation": "batch" if minibatch else "full",
233
- "score": s_t,
234
- "intervention": {
235
- "instructions": theta.get("instructions"),
236
- "demo_set": theta.get("demo_set"),
237
- },
238
- })
234
+ records.append(
235
+ {
236
+ "trial": t,
237
+ "evaluation": "batch" if minibatch else "full",
238
+ "score": s_t,
239
+ "intervention": {
240
+ "instructions": theta.get("instructions"),
241
+ "demo_set": theta.get("demo_set"),
242
+ },
243
+ }
244
+ )
239
245
 
240
246
  if (not minibatch) or (t % max(1, minibatch_full_eval_steps) == 0):
241
247
  full_res = evaluate_program(program_t, valset, metric)
@@ -243,15 +249,17 @@ def mipro_v2_compile(
243
249
  if s_full > best_score:
244
250
  best_score = s_full
245
251
  best_cfg = theta
246
- records.append({
247
- "trial": t,
248
- "evaluation": "full",
249
- "score": s_full,
250
- "intervention": {
251
- "instructions": theta.get("instructions"),
252
- "demo_set": theta.get("demo_set"),
253
- },
254
- })
252
+ records.append(
253
+ {
254
+ "trial": t,
255
+ "evaluation": "full",
256
+ "score": s_full,
257
+ "intervention": {
258
+ "instructions": theta.get("instructions"),
259
+ "demo_set": theta.get("demo_set"),
260
+ },
261
+ }
262
+ )
255
263
 
256
264
  if best_cfg is None:
257
265
  return program, records
@@ -275,6 +283,7 @@ __all__ = [
275
283
  class ExampleTwoStepDag:
276
284
  pass
277
285
 
286
+
278
287
  """
279
288
  A -> B
280
289
  """
@@ -8,10 +8,11 @@ metric, and this module will explore baselines and bootstrapped few-shot variant
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
+ import contextlib
11
12
  import random
13
+ from collections.abc import Callable, Sequence
12
14
  from dataclasses import dataclass
13
- from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
14
-
15
+ from typing import Any
15
16
 
16
17
  # ---------------------------
17
18
  # Protocol-like expectations (duck-typed)
@@ -25,7 +26,7 @@ class _ProgramLike:
25
26
  def deepcopy(self): # deep copy
26
27
  return self
27
28
 
28
- def with_demos(self, demos: List[Tuple[Any, Any]]):
29
+ def with_demos(self, demos: list[tuple[Any, Any]]):
29
30
  return self
30
31
 
31
32
  def run(self, x: Any) -> Any:
@@ -40,10 +41,12 @@ class _ProgramLike:
40
41
  @dataclass
41
42
  class EvalResult:
42
43
  score: float
43
- subscores: List[float]
44
+ subscores: list[float]
44
45
 
45
46
 
46
- def evaluate(program: _ProgramLike, dataset: Sequence[Tuple[Any, Any]], metric: Callable[[Any, Any], float]) -> EvalResult:
47
+ def evaluate(
48
+ program: _ProgramLike, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
49
+ ) -> EvalResult:
47
50
  subs = []
48
51
  for x, y in dataset:
49
52
  subs.append(metric(program.run(x), y))
@@ -54,7 +57,9 @@ class LabeledFewShot:
54
57
  def __init__(self, k: int):
55
58
  self.k = k
56
59
 
57
- def compile(self, student: _ProgramLike, trainset: Sequence[Tuple[Any, Any]], sample: bool = True) -> _ProgramLike:
60
+ def compile(
61
+ self, student: _ProgramLike, trainset: Sequence[tuple[Any, Any]], sample: bool = True
62
+ ) -> _ProgramLike:
58
63
  p = getattr(student, "deepcopy", student.reset_copy)()
59
64
  demos = list(trainset)
60
65
  if sample:
@@ -68,10 +73,10 @@ class BootstrapFewShot:
68
73
  self,
69
74
  *,
70
75
  metric: Callable[[Any, Any], float],
71
- metric_threshold: Optional[float] = None,
76
+ metric_threshold: float | None = None,
72
77
  max_bootstrapped_demos: int = 8,
73
78
  max_labeled_demos: int = 0,
74
- teacher_settings: Optional[Dict[str, Any]] = None,
79
+ teacher_settings: dict[str, Any] | None = None,
75
80
  max_rounds: int = 1,
76
81
  ):
77
82
  self.metric = metric
@@ -84,18 +89,18 @@ class BootstrapFewShot:
84
89
  def compile(
85
90
  self,
86
91
  student: _ProgramLike,
87
- teacher: Optional[_ProgramLike],
88
- trainset: Sequence[Tuple[Any, Any]],
92
+ teacher: _ProgramLike | None,
93
+ trainset: Sequence[tuple[Any, Any]],
89
94
  ) -> _ProgramLike:
90
95
  p = getattr(student, "deepcopy", student.reset_copy)()
91
96
  rng = random.Random()
92
97
  # If bootstrapped demos disabled, return labeled-only few-shot quickly
93
98
  if self.max_bootstrapped_demos <= 0:
94
- demos: List[Tuple[Any, Any]] = []
99
+ demos: list[tuple[Any, Any]] = []
95
100
  if self.max_labeled_demos > 0:
96
101
  demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
97
102
  return p.with_demos(demos)
98
- boot: List[Tuple[Any, Any]] = []
103
+ boot: list[tuple[Any, Any]] = []
99
104
  # Bootstrap demos by self consistency
100
105
  for _ in range(self.max_rounds):
101
106
  rng.shuffle(trainset := list(trainset))
@@ -127,33 +132,29 @@ class BootstrapFewShot:
127
132
  @dataclass
128
133
  class Candidate:
129
134
  score: float
130
- subscores: List[float]
135
+ subscores: list[float]
131
136
  seed: int
132
137
  program: _ProgramLike
133
138
 
134
139
 
135
140
  def random_search_compile(
136
141
  student: _ProgramLike,
137
- trainset: Sequence[Tuple[Any, Any]],
138
- valset: Sequence[Tuple[Any, Any]],
142
+ trainset: Sequence[tuple[Any, Any]],
143
+ valset: Sequence[tuple[Any, Any]],
139
144
  metric: Callable[[Any, Any], float],
140
145
  *,
141
146
  max_bootstrapped_demos: int = 8,
142
147
  max_labeled_demos: int = 4,
143
148
  max_rounds: int = 2,
144
149
  num_candidate_programs: int = 16,
145
- stop_at_score: Optional[float] = None,
146
- evaluate_fn: Optional[Callable[[
147
- _ProgramLike,
148
- Sequence[Tuple[Any, Any]],
149
- Callable[[Any, Any], float]
150
- ], EvalResult]] = None,
151
- on_candidate_evaluated: Optional[Callable[[int, float, EvalResult, Dict[str, Any]], None]] = None,
152
- ) -> Tuple[_ProgramLike, List[Dict[str, Any]]]:
153
- best_program: Optional[_ProgramLike] = None
150
+ stop_at_score: float | None = None,
151
+ evaluate_fn: Callable[[_ProgramLike, Sequence[tuple[Any, Any]], Callable[[Any, Any], float]], EvalResult] | None = None,
152
+ on_candidate_evaluated: Callable[[int, float, EvalResult, dict[str, Any]], None] | None = None,
153
+ ) -> tuple[_ProgramLike, list[dict[str, Any]]]:
154
+ best_program: _ProgramLike | None = None
154
155
  best_score = float("-inf")
155
- candidates: List[Candidate] = []
156
- records: List[Dict[str, Any]] = []
156
+ candidates: list[Candidate] = []
157
+ records: list[dict[str, Any]] = []
157
158
 
158
159
  seeds = list(range(num_candidate_programs))
159
160
  seeds = [-3, -2, -1] + seeds # zero-shot, labeled few-shot, bootstrapped few-shot
@@ -174,7 +175,9 @@ def random_search_compile(
174
175
  if max_bootstrapped_demos <= 0:
175
176
  size = 0
176
177
  else:
177
- size = max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
178
+ size = (
179
+ max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
180
+ )
178
181
  program = BootstrapFewShot(
179
182
  metric=metric,
180
183
  metric_threshold=None,
@@ -184,14 +187,18 @@ def random_search_compile(
184
187
  max_rounds=max_rounds,
185
188
  ).compile(student, teacher=None, trainset=train_copy)
186
189
 
187
- res = (evaluate_fn(program, valset, metric) if evaluate_fn else evaluate(program, valset, metric))
190
+ res = (
191
+ evaluate_fn(program, valset, metric)
192
+ if evaluate_fn
193
+ else evaluate(program, valset, metric)
194
+ )
188
195
  cand = Candidate(score=res.score, subscores=res.subscores, seed=seed, program=program)
189
196
  candidates.append(cand)
190
197
  # Record an intervention summary for reproducibility
191
- intervention: Dict[str, Any] = {"seed": seed}
198
+ intervention: dict[str, Any] = {"seed": seed}
192
199
  if hasattr(program, "demos"):
193
200
  try:
194
- intervention["demos"] = getattr(program, "demos") # type: ignore
201
+ intervention["demos"] = program.demos # type: ignore
195
202
  except Exception:
196
203
  intervention["demos"] = None
197
204
  # Type of candidate
@@ -203,12 +210,6 @@ def random_search_compile(
203
210
  intervention["label"] = f"labeled-{max_labeled_demos}"
204
211
  else:
205
212
  intervention["kind"] = "bootstrapped_few_shot"
206
- bs = 0
207
- try:
208
- # try to infer from program demos length if present
209
- bs = len(intervention.get("demos") or [])
210
- except Exception:
211
- bs = 0
212
213
  intervention["label"] = f"boot-b{max_bootstrapped_demos}-l{max_labeled_demos}"
213
214
  record_obj = {
214
215
  "score": cand.score,
@@ -224,18 +225,16 @@ def random_search_compile(
224
225
  break
225
226
 
226
227
  if on_candidate_evaluated is not None:
227
- try:
228
+ with contextlib.suppress(Exception):
228
229
  on_candidate_evaluated(idx + 1, res.score, res, intervention)
229
- except Exception:
230
- pass
231
230
 
232
231
  # Attach candidates for inspection
233
232
  if hasattr(best_program, "candidate_programs"):
234
233
  # If user object supports attribute assignment
235
- try:
236
- best_program.candidate_programs = sorted(candidates, key=lambda c: c.score, reverse=True) # type: ignore[attr-defined]
237
- except Exception:
238
- pass
234
+ with contextlib.suppress(Exception):
235
+ best_program.candidate_programs = sorted(
236
+ candidates, key=lambda c: c.score, reverse=True
237
+ ) # type: ignore[attr-defined]
239
238
 
240
239
  return (best_program or getattr(student, "deepcopy", student)(), records)
241
240
 
@@ -12,39 +12,41 @@ Run:
12
12
  from __future__ import annotations
13
13
 
14
14
  import asyncio
15
+ import json
15
16
  import os
16
17
  import random
17
- from dataclasses import dataclass, replace
18
- from typing import Any, Dict, List, Sequence, Tuple
18
+ import time
19
+ from collections.abc import Sequence
20
+ from pathlib import Path
21
+ from typing import Any
19
22
 
20
- from dotenv import load_dotenv
21
23
  from datasets import load_dataset
22
-
24
+ from dotenv import load_dotenv
25
+ from synth_ai.learning.prompts.mipro import ProgramAdapter, evaluate_program, mipro_v2_compile
23
26
  from synth_ai.lm.core.main_v3 import LM, build_messages
24
- import json
25
- import time
26
- from pathlib import Path
27
- from synth_ai.learning.prompts.mipro import ProgramAdapter, mipro_v2_compile, evaluate_program
28
27
 
29
28
 
30
- def choose_label(pred: str, label_names: List[str]) -> str:
29
+ def choose_label(pred: str, label_names: list[str]) -> str:
31
30
  norm = (pred or "").strip().lower()
32
31
  d = {ln.lower(): ln for ln in label_names}
33
32
  if norm in d:
34
33
  return d[norm]
34
+
35
35
  def score(cand: str) -> int:
36
36
  c = cand.lower()
37
37
  return sum(1 for w in c.split() if w in norm)
38
+
38
39
  return max(label_names, key=score)
39
40
 
40
41
 
41
- def accuracy(pred: str, gold: str, labels: List[str]) -> float:
42
+ def accuracy(pred: str, gold: str, labels: list[str]) -> float:
42
43
  return 1.0 if choose_label(pred, labels) == gold else 0.0
43
44
 
44
45
 
45
46
  class NaivePromptModel:
46
47
  """Toy prompt model that returns simple instruction variants."""
47
- def generate_instructions(self, ctx: Dict[str, Any], k: int = 8) -> List[str]:
48
+
49
+ def generate_instructions(self, ctx: dict[str, Any], k: int = 8) -> list[str]:
48
50
  base = "Classify the Banking77 intent and return exactly one label."
49
51
  variants = [
50
52
  base,
@@ -60,28 +62,33 @@ class NaivePromptModel:
60
62
  return variants[:k]
61
63
 
62
64
 
63
- def build_run_fn(lm: LM, label_names: List[str]):
65
+ def build_run_fn(lm: LM, label_names: list[str]):
64
66
  def run_fn(x: str, _model: Any | None = None) -> str:
65
67
  # Use instructions and demos from adapter state (set by set_instructions/set_demos)
66
68
  # The adapter passes state via closure; we rebuild messages here
67
- instructions = state_ref.get("instructions", {}).get("main", "You are an intent classifier for Banking77.")
69
+ instructions = state_ref.get("instructions", {}).get(
70
+ "main", "You are an intent classifier for Banking77."
71
+ )
68
72
  examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in state_ref.get("demos", []))
69
73
  sys = instructions
70
74
  user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
71
75
  messages = build_messages(sys, user, images_bytes=None, model_name=lm.model)
76
+
72
77
  async def _call():
73
78
  resp = await lm.respond_async(messages=messages)
74
79
  return (resp.raw_response or "").strip()
80
+
75
81
  return asyncio.run(_call())
82
+
76
83
  return run_fn
77
84
 
78
85
 
79
- def set_instructions(new_instr: Dict[str, str], state: Dict[str, Any]) -> Dict[str, Any]:
86
+ def set_instructions(new_instr: dict[str, str], state: dict[str, Any]) -> dict[str, Any]:
80
87
  state["instructions"] = {**state.get("instructions", {}), **new_instr}
81
88
  return state
82
89
 
83
90
 
84
- def set_demos(demos: List[Tuple[str, str]], state: Dict[str, Any]) -> Dict[str, Any]:
91
+ def set_demos(demos: list[tuple[str, str]], state: dict[str, Any]) -> dict[str, Any]:
85
92
  state["demos"] = list(demos)
86
93
  return state
87
94
 
@@ -96,15 +103,18 @@ def main():
96
103
 
97
104
  print("Loading Banking77 dataset (train/dev split of test for demo)...")
98
105
  ds = load_dataset("banking77")
99
- label_names: List[str] = ds["test"].features["label"].names # type: ignore
106
+ label_names: list[str] = ds["test"].features["label"].names # type: ignore
100
107
 
101
108
  all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
102
109
  random.shuffle(all_items)
103
- trainset: Sequence[Tuple[str, str]] = all_items[:80]
104
- valset: Sequence[Tuple[str, str]] = all_items[80:160]
110
+ trainset: Sequence[tuple[str, str]] = all_items[:80]
111
+ valset: Sequence[tuple[str, str]] = all_items[80:160]
105
112
 
106
113
  global state_ref
107
- state_ref = {"instructions": {"main": "You are an intent classifier for Banking77."}, "demos": []}
114
+ state_ref = {
115
+ "instructions": {"main": "You are an intent classifier for Banking77."},
116
+ "demos": [],
117
+ }
108
118
  adapter = ProgramAdapter(
109
119
  run_fn=build_run_fn(lm, label_names),
110
120
  state=state_ref,
@@ -138,7 +148,9 @@ def main():
138
148
  )
139
149
 
140
150
  res = evaluate_program(best, valset, metric)
141
- print(f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})")
151
+ print(
152
+ f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})"
153
+ )
142
154
 
143
155
  out = {
144
156
  "context": {