synth-ai 0.2.4.dev6__py3-none-any.whl → 0.2.4.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. synth_ai/__init__.py +18 -9
  2. synth_ai/cli/__init__.py +10 -5
  3. synth_ai/cli/balance.py +22 -17
  4. synth_ai/cli/calc.py +2 -3
  5. synth_ai/cli/demo.py +3 -5
  6. synth_ai/cli/legacy_root_backup.py +58 -32
  7. synth_ai/cli/man.py +22 -19
  8. synth_ai/cli/recent.py +9 -8
  9. synth_ai/cli/root.py +58 -13
  10. synth_ai/cli/status.py +13 -6
  11. synth_ai/cli/traces.py +45 -21
  12. synth_ai/cli/watch.py +40 -37
  13. synth_ai/config/base_url.py +1 -3
  14. synth_ai/core/experiment.py +1 -2
  15. synth_ai/environments/__init__.py +2 -6
  16. synth_ai/environments/environment/artifacts/base.py +3 -1
  17. synth_ai/environments/environment/db/sqlite.py +1 -1
  18. synth_ai/environments/environment/registry.py +19 -20
  19. synth_ai/environments/environment/resources/sqlite.py +2 -3
  20. synth_ai/environments/environment/rewards/core.py +3 -2
  21. synth_ai/environments/environment/tools/__init__.py +6 -4
  22. synth_ai/environments/examples/crafter_classic/__init__.py +1 -1
  23. synth_ai/environments/examples/crafter_classic/engine.py +13 -13
  24. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +1 -0
  25. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +2 -1
  26. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +2 -1
  27. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +3 -2
  28. synth_ai/environments/examples/crafter_classic/environment.py +16 -15
  29. synth_ai/environments/examples/crafter_classic/taskset.py +2 -2
  30. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +2 -3
  31. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +2 -1
  32. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +2 -2
  33. synth_ai/environments/examples/crafter_custom/crafter/config.py +2 -2
  34. synth_ai/environments/examples/crafter_custom/crafter/env.py +1 -5
  35. synth_ai/environments/examples/crafter_custom/crafter/objects.py +1 -2
  36. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +1 -2
  37. synth_ai/environments/examples/crafter_custom/dataset_builder.py +5 -5
  38. synth_ai/environments/examples/crafter_custom/environment.py +13 -13
  39. synth_ai/environments/examples/crafter_custom/run_dataset.py +5 -5
  40. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +2 -2
  41. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +5 -4
  42. synth_ai/environments/examples/enron/art_helpers/types_enron.py +2 -1
  43. synth_ai/environments/examples/enron/engine.py +18 -14
  44. synth_ai/environments/examples/enron/environment.py +12 -11
  45. synth_ai/environments/examples/enron/taskset.py +7 -7
  46. synth_ai/environments/examples/minigrid/__init__.py +6 -6
  47. synth_ai/environments/examples/minigrid/engine.py +6 -6
  48. synth_ai/environments/examples/minigrid/environment.py +6 -6
  49. synth_ai/environments/examples/minigrid/puzzle_loader.py +3 -2
  50. synth_ai/environments/examples/minigrid/taskset.py +13 -13
  51. synth_ai/environments/examples/nethack/achievements.py +1 -1
  52. synth_ai/environments/examples/nethack/engine.py +8 -7
  53. synth_ai/environments/examples/nethack/environment.py +10 -9
  54. synth_ai/environments/examples/nethack/helpers/__init__.py +8 -9
  55. synth_ai/environments/examples/nethack/helpers/action_mapping.py +1 -1
  56. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +2 -1
  57. synth_ai/environments/examples/nethack/helpers/observation_utils.py +1 -1
  58. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +3 -4
  59. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +6 -5
  60. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +5 -5
  61. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +7 -6
  62. synth_ai/environments/examples/nethack/taskset.py +5 -5
  63. synth_ai/environments/examples/red/engine.py +9 -8
  64. synth_ai/environments/examples/red/engine_helpers/reward_components.py +2 -1
  65. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +7 -7
  66. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +2 -1
  67. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +2 -1
  68. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +2 -1
  69. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +2 -1
  70. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +2 -1
  71. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +2 -1
  72. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +2 -1
  73. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +2 -1
  74. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +2 -1
  75. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +2 -1
  76. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +2 -1
  77. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +3 -2
  78. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +2 -1
  79. synth_ai/environments/examples/red/environment.py +18 -15
  80. synth_ai/environments/examples/red/taskset.py +5 -3
  81. synth_ai/environments/examples/sokoban/engine.py +16 -13
  82. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +3 -2
  83. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +2 -1
  84. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +1 -1
  85. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +7 -5
  86. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +1 -1
  87. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +2 -1
  88. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +5 -4
  89. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +3 -2
  90. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +2 -1
  91. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +5 -4
  92. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +1 -1
  93. synth_ai/environments/examples/sokoban/environment.py +15 -14
  94. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +5 -3
  95. synth_ai/environments/examples/sokoban/puzzle_loader.py +3 -2
  96. synth_ai/environments/examples/sokoban/taskset.py +13 -10
  97. synth_ai/environments/examples/tictactoe/engine.py +6 -6
  98. synth_ai/environments/examples/tictactoe/environment.py +8 -7
  99. synth_ai/environments/examples/tictactoe/taskset.py +6 -5
  100. synth_ai/environments/examples/verilog/engine.py +4 -3
  101. synth_ai/environments/examples/verilog/environment.py +11 -10
  102. synth_ai/environments/examples/verilog/taskset.py +14 -12
  103. synth_ai/environments/examples/wordle/__init__.py +5 -5
  104. synth_ai/environments/examples/wordle/engine.py +32 -25
  105. synth_ai/environments/examples/wordle/environment.py +21 -16
  106. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +6 -6
  107. synth_ai/environments/examples/wordle/taskset.py +20 -12
  108. synth_ai/environments/reproducibility/core.py +1 -1
  109. synth_ai/environments/reproducibility/tree.py +21 -21
  110. synth_ai/environments/service/app.py +3 -2
  111. synth_ai/environments/service/core_routes.py +104 -110
  112. synth_ai/environments/service/external_registry.py +1 -2
  113. synth_ai/environments/service/registry.py +1 -1
  114. synth_ai/environments/stateful/core.py +1 -2
  115. synth_ai/environments/stateful/engine.py +1 -1
  116. synth_ai/environments/tasks/api.py +4 -4
  117. synth_ai/environments/tasks/core.py +14 -12
  118. synth_ai/environments/tasks/filters.py +6 -4
  119. synth_ai/environments/tasks/utils.py +13 -11
  120. synth_ai/evals/base.py +2 -3
  121. synth_ai/experimental/synth_oss.py +4 -4
  122. synth_ai/learning/gateway.py +1 -3
  123. synth_ai/learning/prompts/banking77_injection_eval.py +15 -10
  124. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +26 -14
  125. synth_ai/learning/prompts/mipro.py +61 -52
  126. synth_ai/learning/prompts/random_search.py +42 -43
  127. synth_ai/learning/prompts/run_mipro_banking77.py +32 -20
  128. synth_ai/learning/prompts/run_random_search_banking77.py +71 -52
  129. synth_ai/lm/__init__.py +5 -5
  130. synth_ai/lm/caching/ephemeral.py +9 -9
  131. synth_ai/lm/caching/handler.py +20 -20
  132. synth_ai/lm/caching/persistent.py +10 -10
  133. synth_ai/lm/config.py +3 -3
  134. synth_ai/lm/constants.py +7 -7
  135. synth_ai/lm/core/all.py +17 -3
  136. synth_ai/lm/core/exceptions.py +0 -2
  137. synth_ai/lm/core/main.py +26 -41
  138. synth_ai/lm/core/main_v3.py +20 -10
  139. synth_ai/lm/core/vendor_clients.py +18 -17
  140. synth_ai/lm/injection.py +7 -8
  141. synth_ai/lm/overrides.py +21 -19
  142. synth_ai/lm/provider_support/__init__.py +1 -1
  143. synth_ai/lm/provider_support/anthropic.py +15 -15
  144. synth_ai/lm/provider_support/openai.py +23 -21
  145. synth_ai/lm/structured_outputs/handler.py +34 -32
  146. synth_ai/lm/structured_outputs/inject.py +24 -27
  147. synth_ai/lm/structured_outputs/rehabilitate.py +19 -15
  148. synth_ai/lm/tools/base.py +17 -16
  149. synth_ai/lm/unified_interface.py +17 -18
  150. synth_ai/lm/vendors/base.py +20 -18
  151. synth_ai/lm/vendors/core/anthropic_api.py +36 -27
  152. synth_ai/lm/vendors/core/gemini_api.py +31 -36
  153. synth_ai/lm/vendors/core/mistral_api.py +19 -19
  154. synth_ai/lm/vendors/core/openai_api.py +11 -10
  155. synth_ai/lm/vendors/openai_standard.py +113 -87
  156. synth_ai/lm/vendors/openai_standard_responses.py +74 -61
  157. synth_ai/lm/vendors/retries.py +9 -1
  158. synth_ai/lm/vendors/supported/custom_endpoint.py +26 -26
  159. synth_ai/lm/vendors/supported/deepseek.py +10 -10
  160. synth_ai/lm/vendors/supported/grok.py +8 -8
  161. synth_ai/lm/vendors/supported/ollama.py +2 -1
  162. synth_ai/lm/vendors/supported/openrouter.py +11 -9
  163. synth_ai/lm/vendors/synth_client.py +69 -63
  164. synth_ai/lm/warmup.py +8 -7
  165. synth_ai/tracing/__init__.py +22 -10
  166. synth_ai/tracing_v1/__init__.py +22 -20
  167. synth_ai/tracing_v3/__init__.py +7 -7
  168. synth_ai/tracing_v3/abstractions.py +56 -52
  169. synth_ai/tracing_v3/config.py +4 -2
  170. synth_ai/tracing_v3/db_config.py +6 -8
  171. synth_ai/tracing_v3/decorators.py +29 -30
  172. synth_ai/tracing_v3/examples/basic_usage.py +12 -12
  173. synth_ai/tracing_v3/hooks.py +21 -21
  174. synth_ai/tracing_v3/llm_call_record_helpers.py +85 -98
  175. synth_ai/tracing_v3/lm_call_record_abstractions.py +2 -4
  176. synth_ai/tracing_v3/migration_helper.py +3 -5
  177. synth_ai/tracing_v3/replica_sync.py +30 -32
  178. synth_ai/tracing_v3/session_tracer.py +35 -29
  179. synth_ai/tracing_v3/storage/__init__.py +1 -1
  180. synth_ai/tracing_v3/storage/base.py +8 -7
  181. synth_ai/tracing_v3/storage/config.py +4 -4
  182. synth_ai/tracing_v3/storage/factory.py +4 -4
  183. synth_ai/tracing_v3/storage/utils.py +9 -9
  184. synth_ai/tracing_v3/turso/__init__.py +3 -3
  185. synth_ai/tracing_v3/turso/daemon.py +9 -9
  186. synth_ai/tracing_v3/turso/manager.py +60 -48
  187. synth_ai/tracing_v3/turso/models.py +24 -19
  188. synth_ai/tracing_v3/utils.py +5 -5
  189. synth_ai/tui/__main__.py +1 -1
  190. synth_ai/tui/cli/query_experiments.py +2 -3
  191. synth_ai/tui/cli/query_experiments_v3.py +2 -3
  192. synth_ai/tui/dashboard.py +97 -86
  193. synth_ai/v0/tracing/abstractions.py +28 -28
  194. synth_ai/v0/tracing/base_client.py +9 -9
  195. synth_ai/v0/tracing/client_manager.py +7 -7
  196. synth_ai/v0/tracing/config.py +7 -7
  197. synth_ai/v0/tracing/context.py +6 -6
  198. synth_ai/v0/tracing/decorators.py +6 -5
  199. synth_ai/v0/tracing/events/manage.py +1 -1
  200. synth_ai/v0/tracing/events/store.py +5 -4
  201. synth_ai/v0/tracing/immediate_client.py +4 -5
  202. synth_ai/v0/tracing/local.py +3 -3
  203. synth_ai/v0/tracing/log_client_base.py +4 -5
  204. synth_ai/v0/tracing/retry_queue.py +5 -6
  205. synth_ai/v0/tracing/trackers.py +25 -25
  206. synth_ai/v0/tracing/upload.py +6 -0
  207. synth_ai/v0/tracing_v1/__init__.py +1 -1
  208. synth_ai/v0/tracing_v1/abstractions.py +28 -28
  209. synth_ai/v0/tracing_v1/base_client.py +9 -9
  210. synth_ai/v0/tracing_v1/client_manager.py +7 -7
  211. synth_ai/v0/tracing_v1/config.py +7 -7
  212. synth_ai/v0/tracing_v1/context.py +6 -6
  213. synth_ai/v0/tracing_v1/decorators.py +7 -6
  214. synth_ai/v0/tracing_v1/events/manage.py +1 -1
  215. synth_ai/v0/tracing_v1/events/store.py +5 -4
  216. synth_ai/v0/tracing_v1/immediate_client.py +4 -5
  217. synth_ai/v0/tracing_v1/local.py +3 -3
  218. synth_ai/v0/tracing_v1/log_client_base.py +4 -5
  219. synth_ai/v0/tracing_v1/retry_queue.py +5 -6
  220. synth_ai/v0/tracing_v1/trackers.py +25 -25
  221. synth_ai/v0/tracing_v1/upload.py +25 -24
  222. synth_ai/zyk/__init__.py +1 -0
  223. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/METADATA +1 -11
  224. synth_ai-0.2.4.dev7.dist-info/RECORD +299 -0
  225. synth_ai-0.2.4.dev6.dist-info/RECORD +0 -299
  226. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/WHEEL +0 -0
  227. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/entry_points.txt +0 -0
  228. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/licenses/LICENSE +0 -0
  229. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/top_level.txt +0 -0
@@ -8,10 +8,11 @@ metric, and this module will explore baselines and bootstrapped few-shot variant
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
+ import contextlib
11
12
  import random
13
+ from collections.abc import Callable, Sequence
12
14
  from dataclasses import dataclass
13
- from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple
14
-
15
+ from typing import Any
15
16
 
16
17
  # ---------------------------
17
18
  # Protocol-like expectations (duck-typed)
@@ -25,7 +26,7 @@ class _ProgramLike:
25
26
  def deepcopy(self): # deep copy
26
27
  return self
27
28
 
28
- def with_demos(self, demos: List[Tuple[Any, Any]]):
29
+ def with_demos(self, demos: list[tuple[Any, Any]]):
29
30
  return self
30
31
 
31
32
  def run(self, x: Any) -> Any:
@@ -40,10 +41,12 @@ class _ProgramLike:
40
41
  @dataclass
41
42
  class EvalResult:
42
43
  score: float
43
- subscores: List[float]
44
+ subscores: list[float]
44
45
 
45
46
 
46
- def evaluate(program: _ProgramLike, dataset: Sequence[Tuple[Any, Any]], metric: Callable[[Any, Any], float]) -> EvalResult:
47
+ def evaluate(
48
+ program: _ProgramLike, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
49
+ ) -> EvalResult:
47
50
  subs = []
48
51
  for x, y in dataset:
49
52
  subs.append(metric(program.run(x), y))
@@ -54,7 +57,9 @@ class LabeledFewShot:
54
57
  def __init__(self, k: int):
55
58
  self.k = k
56
59
 
57
- def compile(self, student: _ProgramLike, trainset: Sequence[Tuple[Any, Any]], sample: bool = True) -> _ProgramLike:
60
+ def compile(
61
+ self, student: _ProgramLike, trainset: Sequence[tuple[Any, Any]], sample: bool = True
62
+ ) -> _ProgramLike:
58
63
  p = getattr(student, "deepcopy", student.reset_copy)()
59
64
  demos = list(trainset)
60
65
  if sample:
@@ -68,10 +73,10 @@ class BootstrapFewShot:
68
73
  self,
69
74
  *,
70
75
  metric: Callable[[Any, Any], float],
71
- metric_threshold: Optional[float] = None,
76
+ metric_threshold: float | None = None,
72
77
  max_bootstrapped_demos: int = 8,
73
78
  max_labeled_demos: int = 0,
74
- teacher_settings: Optional[Dict[str, Any]] = None,
79
+ teacher_settings: dict[str, Any] | None = None,
75
80
  max_rounds: int = 1,
76
81
  ):
77
82
  self.metric = metric
@@ -84,18 +89,18 @@ class BootstrapFewShot:
84
89
  def compile(
85
90
  self,
86
91
  student: _ProgramLike,
87
- teacher: Optional[_ProgramLike],
88
- trainset: Sequence[Tuple[Any, Any]],
92
+ teacher: _ProgramLike | None,
93
+ trainset: Sequence[tuple[Any, Any]],
89
94
  ) -> _ProgramLike:
90
95
  p = getattr(student, "deepcopy", student.reset_copy)()
91
96
  rng = random.Random()
92
97
  # If bootstrapped demos disabled, return labeled-only few-shot quickly
93
98
  if self.max_bootstrapped_demos <= 0:
94
- demos: List[Tuple[Any, Any]] = []
99
+ demos: list[tuple[Any, Any]] = []
95
100
  if self.max_labeled_demos > 0:
96
101
  demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
97
102
  return p.with_demos(demos)
98
- boot: List[Tuple[Any, Any]] = []
103
+ boot: list[tuple[Any, Any]] = []
99
104
  # Bootstrap demos by self consistency
100
105
  for _ in range(self.max_rounds):
101
106
  rng.shuffle(trainset := list(trainset))
@@ -127,33 +132,29 @@ class BootstrapFewShot:
127
132
  @dataclass
128
133
  class Candidate:
129
134
  score: float
130
- subscores: List[float]
135
+ subscores: list[float]
131
136
  seed: int
132
137
  program: _ProgramLike
133
138
 
134
139
 
135
140
  def random_search_compile(
136
141
  student: _ProgramLike,
137
- trainset: Sequence[Tuple[Any, Any]],
138
- valset: Sequence[Tuple[Any, Any]],
142
+ trainset: Sequence[tuple[Any, Any]],
143
+ valset: Sequence[tuple[Any, Any]],
139
144
  metric: Callable[[Any, Any], float],
140
145
  *,
141
146
  max_bootstrapped_demos: int = 8,
142
147
  max_labeled_demos: int = 4,
143
148
  max_rounds: int = 2,
144
149
  num_candidate_programs: int = 16,
145
- stop_at_score: Optional[float] = None,
146
- evaluate_fn: Optional[Callable[[
147
- _ProgramLike,
148
- Sequence[Tuple[Any, Any]],
149
- Callable[[Any, Any], float]
150
- ], EvalResult]] = None,
151
- on_candidate_evaluated: Optional[Callable[[int, float, EvalResult, Dict[str, Any]], None]] = None,
152
- ) -> Tuple[_ProgramLike, List[Dict[str, Any]]]:
153
- best_program: Optional[_ProgramLike] = None
150
+ stop_at_score: float | None = None,
151
+ evaluate_fn: Callable[[_ProgramLike, Sequence[tuple[Any, Any]], Callable[[Any, Any], float]], EvalResult] | None = None,
152
+ on_candidate_evaluated: Callable[[int, float, EvalResult, dict[str, Any]], None] | None = None,
153
+ ) -> tuple[_ProgramLike, list[dict[str, Any]]]:
154
+ best_program: _ProgramLike | None = None
154
155
  best_score = float("-inf")
155
- candidates: List[Candidate] = []
156
- records: List[Dict[str, Any]] = []
156
+ candidates: list[Candidate] = []
157
+ records: list[dict[str, Any]] = []
157
158
 
158
159
  seeds = list(range(num_candidate_programs))
159
160
  seeds = [-3, -2, -1] + seeds # zero-shot, labeled few-shot, bootstrapped few-shot
@@ -174,7 +175,9 @@ def random_search_compile(
174
175
  if max_bootstrapped_demos <= 0:
175
176
  size = 0
176
177
  else:
177
- size = max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
178
+ size = (
179
+ max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
180
+ )
178
181
  program = BootstrapFewShot(
179
182
  metric=metric,
180
183
  metric_threshold=None,
@@ -184,14 +187,18 @@ def random_search_compile(
184
187
  max_rounds=max_rounds,
185
188
  ).compile(student, teacher=None, trainset=train_copy)
186
189
 
187
- res = (evaluate_fn(program, valset, metric) if evaluate_fn else evaluate(program, valset, metric))
190
+ res = (
191
+ evaluate_fn(program, valset, metric)
192
+ if evaluate_fn
193
+ else evaluate(program, valset, metric)
194
+ )
188
195
  cand = Candidate(score=res.score, subscores=res.subscores, seed=seed, program=program)
189
196
  candidates.append(cand)
190
197
  # Record an intervention summary for reproducibility
191
- intervention: Dict[str, Any] = {"seed": seed}
198
+ intervention: dict[str, Any] = {"seed": seed}
192
199
  if hasattr(program, "demos"):
193
200
  try:
194
- intervention["demos"] = getattr(program, "demos") # type: ignore
201
+ intervention["demos"] = program.demos # type: ignore
195
202
  except Exception:
196
203
  intervention["demos"] = None
197
204
  # Type of candidate
@@ -203,12 +210,6 @@ def random_search_compile(
203
210
  intervention["label"] = f"labeled-{max_labeled_demos}"
204
211
  else:
205
212
  intervention["kind"] = "bootstrapped_few_shot"
206
- bs = 0
207
- try:
208
- # try to infer from program demos length if present
209
- bs = len(intervention.get("demos") or [])
210
- except Exception:
211
- bs = 0
212
213
  intervention["label"] = f"boot-b{max_bootstrapped_demos}-l{max_labeled_demos}"
213
214
  record_obj = {
214
215
  "score": cand.score,
@@ -224,18 +225,16 @@ def random_search_compile(
224
225
  break
225
226
 
226
227
  if on_candidate_evaluated is not None:
227
- try:
228
+ with contextlib.suppress(Exception):
228
229
  on_candidate_evaluated(idx + 1, res.score, res, intervention)
229
- except Exception:
230
- pass
231
230
 
232
231
  # Attach candidates for inspection
233
232
  if hasattr(best_program, "candidate_programs"):
234
233
  # If user object supports attribute assignment
235
- try:
236
- best_program.candidate_programs = sorted(candidates, key=lambda c: c.score, reverse=True) # type: ignore[attr-defined]
237
- except Exception:
238
- pass
234
+ with contextlib.suppress(Exception):
235
+ best_program.candidate_programs = sorted(
236
+ candidates, key=lambda c: c.score, reverse=True
237
+ ) # type: ignore[attr-defined]
239
238
 
240
239
  return (best_program or getattr(student, "deepcopy", student)(), records)
241
240
 
@@ -12,39 +12,41 @@ Run:
12
12
  from __future__ import annotations
13
13
 
14
14
  import asyncio
15
+ import json
15
16
  import os
16
17
  import random
17
- from dataclasses import dataclass, replace
18
- from typing import Any, Dict, List, Sequence, Tuple
18
+ import time
19
+ from collections.abc import Sequence
20
+ from pathlib import Path
21
+ from typing import Any
19
22
 
20
- from dotenv import load_dotenv
21
23
  from datasets import load_dataset
22
-
24
+ from dotenv import load_dotenv
25
+ from synth_ai.learning.prompts.mipro import ProgramAdapter, evaluate_program, mipro_v2_compile
23
26
  from synth_ai.lm.core.main_v3 import LM, build_messages
24
- import json
25
- import time
26
- from pathlib import Path
27
- from synth_ai.learning.prompts.mipro import ProgramAdapter, mipro_v2_compile, evaluate_program
28
27
 
29
28
 
30
- def choose_label(pred: str, label_names: List[str]) -> str:
29
+ def choose_label(pred: str, label_names: list[str]) -> str:
31
30
  norm = (pred or "").strip().lower()
32
31
  d = {ln.lower(): ln for ln in label_names}
33
32
  if norm in d:
34
33
  return d[norm]
34
+
35
35
  def score(cand: str) -> int:
36
36
  c = cand.lower()
37
37
  return sum(1 for w in c.split() if w in norm)
38
+
38
39
  return max(label_names, key=score)
39
40
 
40
41
 
41
- def accuracy(pred: str, gold: str, labels: List[str]) -> float:
42
+ def accuracy(pred: str, gold: str, labels: list[str]) -> float:
42
43
  return 1.0 if choose_label(pred, labels) == gold else 0.0
43
44
 
44
45
 
45
46
  class NaivePromptModel:
46
47
  """Toy prompt model that returns simple instruction variants."""
47
- def generate_instructions(self, ctx: Dict[str, Any], k: int = 8) -> List[str]:
48
+
49
+ def generate_instructions(self, ctx: dict[str, Any], k: int = 8) -> list[str]:
48
50
  base = "Classify the Banking77 intent and return exactly one label."
49
51
  variants = [
50
52
  base,
@@ -60,28 +62,33 @@ class NaivePromptModel:
60
62
  return variants[:k]
61
63
 
62
64
 
63
- def build_run_fn(lm: LM, label_names: List[str]):
65
+ def build_run_fn(lm: LM, label_names: list[str]):
64
66
  def run_fn(x: str, _model: Any | None = None) -> str:
65
67
  # Use instructions and demos from adapter state (set by set_instructions/set_demos)
66
68
  # The adapter passes state via closure; we rebuild messages here
67
- instructions = state_ref.get("instructions", {}).get("main", "You are an intent classifier for Banking77.")
69
+ instructions = state_ref.get("instructions", {}).get(
70
+ "main", "You are an intent classifier for Banking77."
71
+ )
68
72
  examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in state_ref.get("demos", []))
69
73
  sys = instructions
70
74
  user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
71
75
  messages = build_messages(sys, user, images_bytes=None, model_name=lm.model)
76
+
72
77
  async def _call():
73
78
  resp = await lm.respond_async(messages=messages)
74
79
  return (resp.raw_response or "").strip()
80
+
75
81
  return asyncio.run(_call())
82
+
76
83
  return run_fn
77
84
 
78
85
 
79
- def set_instructions(new_instr: Dict[str, str], state: Dict[str, Any]) -> Dict[str, Any]:
86
+ def set_instructions(new_instr: dict[str, str], state: dict[str, Any]) -> dict[str, Any]:
80
87
  state["instructions"] = {**state.get("instructions", {}), **new_instr}
81
88
  return state
82
89
 
83
90
 
84
- def set_demos(demos: List[Tuple[str, str]], state: Dict[str, Any]) -> Dict[str, Any]:
91
+ def set_demos(demos: list[tuple[str, str]], state: dict[str, Any]) -> dict[str, Any]:
85
92
  state["demos"] = list(demos)
86
93
  return state
87
94
 
@@ -96,15 +103,18 @@ def main():
96
103
 
97
104
  print("Loading Banking77 dataset (train/dev split of test for demo)...")
98
105
  ds = load_dataset("banking77")
99
- label_names: List[str] = ds["test"].features["label"].names # type: ignore
106
+ label_names: list[str] = ds["test"].features["label"].names # type: ignore
100
107
 
101
108
  all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
102
109
  random.shuffle(all_items)
103
- trainset: Sequence[Tuple[str, str]] = all_items[:80]
104
- valset: Sequence[Tuple[str, str]] = all_items[80:160]
110
+ trainset: Sequence[tuple[str, str]] = all_items[:80]
111
+ valset: Sequence[tuple[str, str]] = all_items[80:160]
105
112
 
106
113
  global state_ref
107
- state_ref = {"instructions": {"main": "You are an intent classifier for Banking77."}, "demos": []}
114
+ state_ref = {
115
+ "instructions": {"main": "You are an intent classifier for Banking77."},
116
+ "demos": [],
117
+ }
108
118
  adapter = ProgramAdapter(
109
119
  run_fn=build_run_fn(lm, label_names),
110
120
  state=state_ref,
@@ -138,7 +148,9 @@ def main():
138
148
  )
139
149
 
140
150
  res = evaluate_program(best, valset, metric)
141
- print(f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})")
151
+ print(
152
+ f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})"
153
+ )
142
154
 
143
155
  out = {
144
156
  "context": {
@@ -12,44 +12,46 @@ Run:
12
12
  from __future__ import annotations
13
13
 
14
14
  import asyncio
15
+ import json
15
16
  import os
16
17
  import random
18
+ import time
19
+ from collections.abc import Sequence
17
20
  from dataclasses import dataclass, replace
21
+ from pathlib import Path
18
22
  from types import SimpleNamespace
19
- from tqdm import tqdm
20
- from typing import Any, Dict, List, Sequence, Tuple
23
+ from typing import Any
21
24
 
22
- from dotenv import load_dotenv
23
25
  from datasets import load_dataset
24
-
25
- from synth_ai.lm.core.main_v3 import LM, build_messages
26
- import json
27
- import time
28
- from pathlib import Path
26
+ from dotenv import load_dotenv
29
27
  from synth_ai.learning.prompts.random_search import random_search_compile
28
+ from synth_ai.lm.core.main_v3 import LM, build_messages
29
+ from tqdm import tqdm
30
30
 
31
31
 
32
- def choose_label(pred: str, label_names: List[str]) -> str:
32
+ def choose_label(pred: str, label_names: list[str]) -> str:
33
33
  norm = (pred or "").strip().lower()
34
34
  d = {ln.lower(): ln for ln in label_names}
35
35
  if norm in d:
36
36
  return d[norm]
37
+
37
38
  def score(cand: str) -> int:
38
39
  c = cand.lower()
39
40
  return sum(1 for w in c.split() if w in norm)
41
+
40
42
  return max(label_names, key=score)
41
43
 
42
44
 
43
- def accuracy(pred: str, gold: str, labels: List[str]) -> float:
45
+ def accuracy(pred: str, gold: str, labels: list[str]) -> float:
44
46
  return 1.0 if choose_label(pred, labels) == gold else 0.0
45
47
 
46
48
 
47
49
  @dataclass
48
50
  class StudentProgram:
49
51
  lm: LM
50
- label_names: List[str]
52
+ label_names: list[str]
51
53
  instruction: str
52
- demos: List[Tuple[str, str]]
54
+ demos: list[tuple[str, str]]
53
55
 
54
56
  def reset_copy(self):
55
57
  return replace(self, instruction=self.instruction, demos=list(self.demos))
@@ -57,7 +59,7 @@ class StudentProgram:
57
59
  def deepcopy(self):
58
60
  return replace(self, instruction=str(self.instruction), demos=list(self.demos))
59
61
 
60
- def with_demos(self, demos: List[Tuple[str, str]]):
62
+ def with_demos(self, demos: list[tuple[str, str]]):
61
63
  return replace(self, demos=list(demos))
62
64
 
63
65
  def run(self, x: str) -> str:
@@ -66,10 +68,12 @@ class StudentProgram:
66
68
  sys = self.instruction or "You are an intent classifier for Banking77."
67
69
  user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
68
70
  messages = build_messages(sys, user, images_bytes=None, model_name=self.lm.model)
71
+
69
72
  # Call LM synchronously via asyncio
70
73
  async def _call():
71
74
  resp = await self.lm.respond_async(messages=messages)
72
75
  return (resp.raw_response or "").strip()
76
+
73
77
  return asyncio.run(_call())
74
78
 
75
79
  async def _apredict(self, x: str):
@@ -91,13 +95,13 @@ def main():
91
95
 
92
96
  print("Loading Banking77 dataset (train/dev split of test for demo)...")
93
97
  ds = load_dataset("banking77")
94
- label_names: List[str] = ds["test"].features["label"].names # type: ignore
98
+ label_names: list[str] = ds["test"].features["label"].names # type: ignore
95
99
 
96
100
  # Create small train/val from the test split for speed
97
101
  all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
98
102
  random.shuffle(all_items)
99
- trainset: Sequence[Tuple[str, str]] = all_items[:40]
100
- valset: Sequence[Tuple[str, str]] = all_items[40:60] # 20 examples
103
+ trainset: Sequence[tuple[str, str]] = all_items[:40]
104
+ valset: Sequence[tuple[str, str]] = all_items[40:60] # 20 examples
101
105
 
102
106
  student = StudentProgram(
103
107
  lm=lm,
@@ -110,17 +114,20 @@ def main():
110
114
  return accuracy(yhat, y, label_names)
111
115
 
112
116
  total_candidates = 3 + 3 # zero-shot, labeled few-shot, bootstrapped + 3 random seeds
113
- print(f"Running Random Search optimizer ({total_candidates} candidates, parallel eval of 20 questions)...")
117
+ print(
118
+ f"Running Random Search optimizer ({total_candidates} candidates, parallel eval of 20 questions)..."
119
+ )
114
120
 
115
- def eval_parallel(program: StudentProgram, dataset: Sequence[Tuple[str, str]], metric_fn):
121
+ def eval_parallel(program: StudentProgram, dataset: Sequence[tuple[str, str]], metric_fn):
116
122
  async def _run():
117
123
  xs = [x for x, _ in dataset]
118
124
  ys = [y for _, y in dataset]
119
- preds: List[Optional[str]] = [None] * len(xs)
125
+ preds: list[Optional[str]] = [None] * len(xs)
120
126
  sem = asyncio.Semaphore(int(os.getenv("CONCURRENCY", "5")))
121
127
 
122
128
  async def worker(i: int, x: str, y: str):
123
129
  import time
130
+
124
131
  t_start = time.monotonic()
125
132
  try:
126
133
  async with sem:
@@ -138,16 +145,18 @@ def main():
138
145
  t_end = time.monotonic()
139
146
  return i, y, "", t_start, t_end, {}
140
147
 
141
- tasks = [asyncio.create_task(worker(i, x, y)) for i, (x, y) in enumerate(zip(xs, ys))]
148
+ tasks = [asyncio.create_task(worker(i, x, y)) for i, (x, y) in enumerate(zip(xs, ys, strict=False))]
142
149
  correct_sum = 0.0
143
150
  processed = 0
144
- import time, statistics
145
- durations: List[float] = []
151
+ import statistics
152
+ import time
153
+
154
+ durations: list[float] = []
146
155
  in_tok_sum = 0
147
156
  out_tok_sum = 0
148
157
  in_tok_count = 0
149
158
  out_tok_count = 0
150
- details: List[Dict[str, Any]] = []
159
+ details: list[dict[str, Any]] = []
151
160
  t_batch_start = time.monotonic()
152
161
  deadline = float(os.getenv("BATCH_DEADLINE_S", "20"))
153
162
  with tqdm(total=len(tasks), desc="Rollouts", leave=False) as pbar:
@@ -172,7 +181,10 @@ def main():
172
181
  break
173
182
  # Wait for at least one completion within remaining time (polling granularity <= 1s)
174
183
  timeout = min(1.0, remaining)
175
- done, pending = await asyncio.wait(pending, timeout=timeout, return_when=asyncio.FIRST_COMPLETED)
184
+ done, pending = await asyncio.wait(
185
+ pending, timeout=timeout, return_when=asyncio.FIRST_COMPLETED
186
+ )
187
+ import contextlib
176
188
  for task in done:
177
189
  try:
178
190
  i, y_true, pred, t_start, t_end, usage = task.result()
@@ -182,11 +194,9 @@ def main():
182
194
  durations.append(max(0.0, t_end - t_start))
183
195
  preds[i] = pred
184
196
  processed += 1
185
- try:
197
+ with contextlib.suppress(Exception):
186
198
  correct_sum += float(metric_fn(pred, y_true))
187
- except Exception:
188
- pass
189
- try:
199
+ with contextlib.suppress(Exception):
190
200
  pt = usage.get("prompt_tokens") or usage.get("input_tokens")
191
201
  ct = usage.get("completion_tokens") or usage.get("output_tokens")
192
202
  if isinstance(pt, (int, float)):
@@ -195,30 +205,34 @@ def main():
195
205
  if isinstance(ct, (int, float)):
196
206
  out_tok_sum += int(ct)
197
207
  out_tok_count += 1
198
- except Exception:
199
- pass
200
- details.append({
201
- "index": i,
202
- "seconds": max(0.0, t_end - t_start),
203
- "score": float(metric_fn(pred, y_true)),
204
- "usage": {
205
- "prompt_tokens": usage.get("prompt_tokens") or usage.get("input_tokens"),
206
- "completion_tokens": usage.get("completion_tokens") or usage.get("output_tokens"),
207
- },
208
- })
208
+ details.append(
209
+ {
210
+ "index": i,
211
+ "seconds": max(0.0, t_end - t_start),
212
+ "score": float(metric_fn(pred, y_true)),
213
+ "usage": {
214
+ "prompt_tokens": usage.get("prompt_tokens")
215
+ or usage.get("input_tokens"),
216
+ "completion_tokens": usage.get("completion_tokens")
217
+ or usage.get("output_tokens"),
218
+ },
219
+ }
220
+ )
209
221
  pbar.update(1)
210
222
  med = statistics.median(durations) if durations else 0.0
211
223
  mx = max(durations) if durations else 0.0
212
224
  avg_in = (in_tok_sum / in_tok_count) if in_tok_count else 0.0
213
225
  avg_out = (out_tok_sum / out_tok_count) if out_tok_count else 0.0
214
- pbar.set_postfix({
215
- "acc": f"{(correct_sum/processed):.2f}",
216
- "done": f"{processed}/{len(tasks)}",
217
- "med_s": f"{med:.1f}",
218
- "max_s": f"{mx:.1f}",
219
- "tin": f"{avg_in:.1f}",
220
- "tout": f"{avg_out:.1f}",
221
- })
226
+ pbar.set_postfix(
227
+ {
228
+ "acc": f"{(correct_sum / processed):.2f}",
229
+ "done": f"{processed}/{len(tasks)}",
230
+ "med_s": f"{med:.1f}",
231
+ "max_s": f"{mx:.1f}",
232
+ "tin": f"{avg_in:.1f}",
233
+ "tout": f"{avg_out:.1f}",
234
+ }
235
+ )
222
236
  # Compute score only from completed/successful rollouts (drop timeouts/cancelled)
223
237
  subs = [float(d.get("score", 0.0)) for d in details]
224
238
  result = SimpleNamespace(score=(sum(subs) / max(1, len(subs))), subscores=subs)
@@ -226,28 +240,33 @@ def main():
226
240
  result.mean_in = (in_tok_sum / in_tok_count) if in_tok_count else 0.0
227
241
  result.mean_out = (out_tok_sum / out_tok_count) if out_tok_count else 0.0
228
242
  return result
243
+
229
244
  return asyncio.run(_run())
245
+
230
246
  pbar = tqdm(total=total_candidates, desc="Candidates")
231
- candidate_eval_details: Dict[int, Any] = {}
247
+ candidate_eval_details: dict[int, Any] = {}
248
+
232
249
  def on_cand(idx: int, score: float, res, intervention):
233
250
  pbar.update(1)
234
251
  pbar.set_postfix({"score": f"{score:.2f}"})
235
252
  # store per-instance details (for apples-to-apples)
236
- try:
253
+ import contextlib
254
+ with contextlib.suppress(Exception):
237
255
  candidate_eval_details[idx] = {
238
256
  "score": score,
239
257
  "mean_in": getattr(res, "mean_in", None),
240
258
  "mean_out": getattr(res, "mean_out", None),
241
259
  "instances": getattr(res, "details", None),
242
260
  }
243
- except Exception:
244
- pass
245
261
  # visible summary line per candidate
246
- kind = intervention.get("kind", "candidate") if isinstance(intervention, dict) else "candidate"
262
+ kind = (
263
+ intervention.get("kind", "candidate") if isinstance(intervention, dict) else "candidate"
264
+ )
247
265
  label = intervention.get("label") if isinstance(intervention, dict) else None
248
266
  seed = intervention.get("seed") if isinstance(intervention, dict) else None
249
267
  processed = len(getattr(res, "details", []) or [])
250
268
  from tqdm import tqdm as _tqdm
269
+
251
270
  _tqdm.write(
252
271
  f"Candidate {idx}/{total_candidates} [{kind}{'' if label is None else f', label={label}'}{'' if seed is None else f', seed={seed}'}]: "
253
272
  f"score={score:.2f} | mean tin/tout={getattr(res, 'mean_in', 0):.1f}/{getattr(res, 'mean_out', 0):.1f} | N={processed}"
synth_ai/lm/__init__.py CHANGED
@@ -4,24 +4,24 @@ Synth AI Language Model Interface.
4
4
  Provides a unified interface for multiple LLM providers including OpenAI and Synth.
5
5
  """
6
6
 
7
- from .config import SynthConfig, OpenAIConfig
8
- from .warmup import warmup_synth_model, get_warmup_status
7
+ from .config import OpenAIConfig, SynthConfig
8
+ from .core.main_v3 import LM
9
9
  from .unified_interface import (
10
- UnifiedLMProvider,
11
10
  OpenAIProvider,
12
11
  SynthProvider,
13
12
  UnifiedLMClient,
13
+ UnifiedLMProvider,
14
14
  create_provider,
15
15
  )
16
16
  from .vendors.synth_client import (
17
17
  AsyncSynthClient,
18
18
  SyncSynthClient,
19
19
  create_async_client,
20
- create_sync_client,
21
20
  create_chat_completion_async,
22
21
  create_chat_completion_sync,
22
+ create_sync_client,
23
23
  )
24
- from .core.main_v3 import LM
24
+ from .warmup import get_warmup_status, warmup_synth_model
25
25
 
26
26
  __all__ = [
27
27
  # Configuration
@@ -7,7 +7,6 @@ of the application run, useful for avoiding redundant API calls within a session
7
7
 
8
8
  import os
9
9
  from dataclasses import dataclass
10
- from typing import Optional, Union
11
10
 
12
11
  from diskcache import Cache
13
12
  from pydantic import BaseModel
@@ -20,24 +19,25 @@ from synth_ai.lm.vendors.base import BaseLMResponse
20
19
  class EphemeralCache:
21
20
  """
22
21
  Ephemeral cache implementation using diskcache.
23
-
22
+
24
23
  This cache stores LM responses temporarily on disk with a size limit.
25
24
  The cache is cleared when the application restarts.
26
25
  """
26
+
27
27
  def __init__(self, fast_cache_dir: str = ".cache/ephemeral_cache"):
28
28
  os.makedirs(fast_cache_dir, exist_ok=True)
29
29
  self.fast_cache = Cache(fast_cache_dir, size_limit=DISKCACHE_SIZE_LIMIT)
30
30
 
31
31
  def hit_cache(
32
- self, key: str, response_model: Optional[BaseModel] = None
33
- ) -> Optional[BaseLMResponse]:
32
+ self, key: str, response_model: BaseModel | None = None
33
+ ) -> BaseLMResponse | None:
34
34
  """
35
35
  Check if a response exists in cache for the given key.
36
-
36
+
37
37
  Args:
38
38
  key: Cache key to look up
39
39
  response_model: Optional Pydantic model to reconstruct structured output
40
-
40
+
41
41
  Returns:
42
42
  BaseLMResponse if found in cache, None otherwise
43
43
  """
@@ -65,14 +65,14 @@ class EphemeralCache:
65
65
  tool_calls=tool_calls,
66
66
  )
67
67
 
68
- def add_to_cache(self, key: str, response: Union[BaseLMResponse, str]) -> None:
68
+ def add_to_cache(self, key: str, response: BaseLMResponse | str) -> None:
69
69
  """
70
70
  Add a response to the cache.
71
-
71
+
72
72
  Args:
73
73
  key: Cache key to store under
74
74
  response: Either a BaseLMResponse object or raw string response
75
-
75
+
76
76
  Raises:
77
77
  ValueError: If response type is not supported
78
78
  """