synth-ai 0.2.4.dev6__py3-none-any.whl → 0.2.4.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. synth_ai/__init__.py +18 -9
  2. synth_ai/cli/__init__.py +10 -5
  3. synth_ai/cli/balance.py +22 -17
  4. synth_ai/cli/calc.py +2 -3
  5. synth_ai/cli/demo.py +3 -5
  6. synth_ai/cli/legacy_root_backup.py +58 -32
  7. synth_ai/cli/man.py +22 -19
  8. synth_ai/cli/recent.py +9 -8
  9. synth_ai/cli/root.py +58 -13
  10. synth_ai/cli/status.py +13 -6
  11. synth_ai/cli/traces.py +45 -21
  12. synth_ai/cli/watch.py +40 -37
  13. synth_ai/config/base_url.py +1 -3
  14. synth_ai/core/experiment.py +1 -2
  15. synth_ai/environments/__init__.py +2 -6
  16. synth_ai/environments/environment/artifacts/base.py +3 -1
  17. synth_ai/environments/environment/db/sqlite.py +1 -1
  18. synth_ai/environments/environment/registry.py +19 -20
  19. synth_ai/environments/environment/resources/sqlite.py +2 -3
  20. synth_ai/environments/environment/rewards/core.py +3 -2
  21. synth_ai/environments/environment/tools/__init__.py +6 -4
  22. synth_ai/environments/examples/crafter_classic/__init__.py +1 -1
  23. synth_ai/environments/examples/crafter_classic/engine.py +13 -13
  24. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +1 -0
  25. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +2 -1
  26. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +2 -1
  27. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +3 -2
  28. synth_ai/environments/examples/crafter_classic/environment.py +16 -15
  29. synth_ai/environments/examples/crafter_classic/taskset.py +2 -2
  30. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +2 -3
  31. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +2 -1
  32. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +2 -2
  33. synth_ai/environments/examples/crafter_custom/crafter/config.py +2 -2
  34. synth_ai/environments/examples/crafter_custom/crafter/env.py +1 -5
  35. synth_ai/environments/examples/crafter_custom/crafter/objects.py +1 -2
  36. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +1 -2
  37. synth_ai/environments/examples/crafter_custom/dataset_builder.py +5 -5
  38. synth_ai/environments/examples/crafter_custom/environment.py +13 -13
  39. synth_ai/environments/examples/crafter_custom/run_dataset.py +5 -5
  40. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +2 -2
  41. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +5 -4
  42. synth_ai/environments/examples/enron/art_helpers/types_enron.py +2 -1
  43. synth_ai/environments/examples/enron/engine.py +18 -14
  44. synth_ai/environments/examples/enron/environment.py +12 -11
  45. synth_ai/environments/examples/enron/taskset.py +7 -7
  46. synth_ai/environments/examples/minigrid/__init__.py +6 -6
  47. synth_ai/environments/examples/minigrid/engine.py +6 -6
  48. synth_ai/environments/examples/minigrid/environment.py +6 -6
  49. synth_ai/environments/examples/minigrid/puzzle_loader.py +3 -2
  50. synth_ai/environments/examples/minigrid/taskset.py +13 -13
  51. synth_ai/environments/examples/nethack/achievements.py +1 -1
  52. synth_ai/environments/examples/nethack/engine.py +8 -7
  53. synth_ai/environments/examples/nethack/environment.py +10 -9
  54. synth_ai/environments/examples/nethack/helpers/__init__.py +8 -9
  55. synth_ai/environments/examples/nethack/helpers/action_mapping.py +1 -1
  56. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +2 -1
  57. synth_ai/environments/examples/nethack/helpers/observation_utils.py +1 -1
  58. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +3 -4
  59. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +6 -5
  60. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +5 -5
  61. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +7 -6
  62. synth_ai/environments/examples/nethack/taskset.py +5 -5
  63. synth_ai/environments/examples/red/engine.py +9 -8
  64. synth_ai/environments/examples/red/engine_helpers/reward_components.py +2 -1
  65. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +7 -7
  66. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +2 -1
  67. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +2 -1
  68. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +2 -1
  69. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +2 -1
  70. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +2 -1
  71. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +2 -1
  72. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +2 -1
  73. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +2 -1
  74. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +2 -1
  75. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +2 -1
  76. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +2 -1
  77. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +3 -2
  78. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +2 -1
  79. synth_ai/environments/examples/red/environment.py +18 -15
  80. synth_ai/environments/examples/red/taskset.py +5 -3
  81. synth_ai/environments/examples/sokoban/engine.py +16 -13
  82. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +3 -2
  83. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +2 -1
  84. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +1 -1
  85. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +7 -5
  86. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +1 -1
  87. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +2 -1
  88. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +5 -4
  89. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +3 -2
  90. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +2 -1
  91. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +5 -4
  92. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +1 -1
  93. synth_ai/environments/examples/sokoban/environment.py +15 -14
  94. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +5 -3
  95. synth_ai/environments/examples/sokoban/puzzle_loader.py +3 -2
  96. synth_ai/environments/examples/sokoban/taskset.py +13 -10
  97. synth_ai/environments/examples/tictactoe/engine.py +6 -6
  98. synth_ai/environments/examples/tictactoe/environment.py +8 -7
  99. synth_ai/environments/examples/tictactoe/taskset.py +6 -5
  100. synth_ai/environments/examples/verilog/engine.py +4 -3
  101. synth_ai/environments/examples/verilog/environment.py +11 -10
  102. synth_ai/environments/examples/verilog/taskset.py +14 -12
  103. synth_ai/environments/examples/wordle/__init__.py +5 -5
  104. synth_ai/environments/examples/wordle/engine.py +32 -25
  105. synth_ai/environments/examples/wordle/environment.py +21 -16
  106. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +6 -6
  107. synth_ai/environments/examples/wordle/taskset.py +20 -12
  108. synth_ai/environments/reproducibility/core.py +1 -1
  109. synth_ai/environments/reproducibility/tree.py +21 -21
  110. synth_ai/environments/service/app.py +3 -2
  111. synth_ai/environments/service/core_routes.py +104 -110
  112. synth_ai/environments/service/external_registry.py +1 -2
  113. synth_ai/environments/service/registry.py +1 -1
  114. synth_ai/environments/stateful/core.py +1 -2
  115. synth_ai/environments/stateful/engine.py +1 -1
  116. synth_ai/environments/tasks/api.py +4 -4
  117. synth_ai/environments/tasks/core.py +14 -12
  118. synth_ai/environments/tasks/filters.py +6 -4
  119. synth_ai/environments/tasks/utils.py +13 -11
  120. synth_ai/evals/base.py +2 -3
  121. synth_ai/experimental/synth_oss.py +4 -4
  122. synth_ai/learning/gateway.py +1 -3
  123. synth_ai/learning/prompts/banking77_injection_eval.py +15 -10
  124. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +26 -14
  125. synth_ai/learning/prompts/mipro.py +61 -52
  126. synth_ai/learning/prompts/random_search.py +42 -43
  127. synth_ai/learning/prompts/run_mipro_banking77.py +32 -20
  128. synth_ai/learning/prompts/run_random_search_banking77.py +71 -52
  129. synth_ai/lm/__init__.py +5 -5
  130. synth_ai/lm/caching/ephemeral.py +9 -9
  131. synth_ai/lm/caching/handler.py +20 -20
  132. synth_ai/lm/caching/persistent.py +10 -10
  133. synth_ai/lm/config.py +3 -3
  134. synth_ai/lm/constants.py +7 -7
  135. synth_ai/lm/core/all.py +17 -3
  136. synth_ai/lm/core/exceptions.py +0 -2
  137. synth_ai/lm/core/main.py +26 -41
  138. synth_ai/lm/core/main_v3.py +20 -10
  139. synth_ai/lm/core/vendor_clients.py +18 -17
  140. synth_ai/lm/injection.py +7 -8
  141. synth_ai/lm/overrides.py +21 -19
  142. synth_ai/lm/provider_support/__init__.py +1 -1
  143. synth_ai/lm/provider_support/anthropic.py +15 -15
  144. synth_ai/lm/provider_support/openai.py +23 -21
  145. synth_ai/lm/structured_outputs/handler.py +34 -32
  146. synth_ai/lm/structured_outputs/inject.py +24 -27
  147. synth_ai/lm/structured_outputs/rehabilitate.py +19 -15
  148. synth_ai/lm/tools/base.py +17 -16
  149. synth_ai/lm/unified_interface.py +17 -18
  150. synth_ai/lm/vendors/base.py +20 -18
  151. synth_ai/lm/vendors/core/anthropic_api.py +36 -27
  152. synth_ai/lm/vendors/core/gemini_api.py +31 -36
  153. synth_ai/lm/vendors/core/mistral_api.py +19 -19
  154. synth_ai/lm/vendors/core/openai_api.py +11 -10
  155. synth_ai/lm/vendors/openai_standard.py +113 -87
  156. synth_ai/lm/vendors/openai_standard_responses.py +74 -61
  157. synth_ai/lm/vendors/retries.py +9 -1
  158. synth_ai/lm/vendors/supported/custom_endpoint.py +26 -26
  159. synth_ai/lm/vendors/supported/deepseek.py +10 -10
  160. synth_ai/lm/vendors/supported/grok.py +8 -8
  161. synth_ai/lm/vendors/supported/ollama.py +2 -1
  162. synth_ai/lm/vendors/supported/openrouter.py +11 -9
  163. synth_ai/lm/vendors/synth_client.py +69 -63
  164. synth_ai/lm/warmup.py +8 -7
  165. synth_ai/tracing/__init__.py +22 -10
  166. synth_ai/tracing_v1/__init__.py +22 -20
  167. synth_ai/tracing_v3/__init__.py +7 -7
  168. synth_ai/tracing_v3/abstractions.py +56 -52
  169. synth_ai/tracing_v3/config.py +4 -2
  170. synth_ai/tracing_v3/db_config.py +6 -8
  171. synth_ai/tracing_v3/decorators.py +29 -30
  172. synth_ai/tracing_v3/examples/basic_usage.py +12 -12
  173. synth_ai/tracing_v3/hooks.py +21 -21
  174. synth_ai/tracing_v3/llm_call_record_helpers.py +85 -98
  175. synth_ai/tracing_v3/lm_call_record_abstractions.py +2 -4
  176. synth_ai/tracing_v3/migration_helper.py +3 -5
  177. synth_ai/tracing_v3/replica_sync.py +30 -32
  178. synth_ai/tracing_v3/session_tracer.py +35 -29
  179. synth_ai/tracing_v3/storage/__init__.py +1 -1
  180. synth_ai/tracing_v3/storage/base.py +8 -7
  181. synth_ai/tracing_v3/storage/config.py +4 -4
  182. synth_ai/tracing_v3/storage/factory.py +4 -4
  183. synth_ai/tracing_v3/storage/utils.py +9 -9
  184. synth_ai/tracing_v3/turso/__init__.py +3 -3
  185. synth_ai/tracing_v3/turso/daemon.py +9 -9
  186. synth_ai/tracing_v3/turso/manager.py +60 -48
  187. synth_ai/tracing_v3/turso/models.py +24 -19
  188. synth_ai/tracing_v3/utils.py +5 -5
  189. synth_ai/tui/__main__.py +1 -1
  190. synth_ai/tui/cli/query_experiments.py +2 -3
  191. synth_ai/tui/cli/query_experiments_v3.py +2 -3
  192. synth_ai/tui/dashboard.py +97 -86
  193. synth_ai/v0/tracing/abstractions.py +28 -28
  194. synth_ai/v0/tracing/base_client.py +9 -9
  195. synth_ai/v0/tracing/client_manager.py +7 -7
  196. synth_ai/v0/tracing/config.py +7 -7
  197. synth_ai/v0/tracing/context.py +6 -6
  198. synth_ai/v0/tracing/decorators.py +6 -5
  199. synth_ai/v0/tracing/events/manage.py +1 -1
  200. synth_ai/v0/tracing/events/store.py +5 -4
  201. synth_ai/v0/tracing/immediate_client.py +4 -5
  202. synth_ai/v0/tracing/local.py +3 -3
  203. synth_ai/v0/tracing/log_client_base.py +4 -5
  204. synth_ai/v0/tracing/retry_queue.py +5 -6
  205. synth_ai/v0/tracing/trackers.py +25 -25
  206. synth_ai/v0/tracing/upload.py +6 -0
  207. synth_ai/v0/tracing_v1/__init__.py +1 -1
  208. synth_ai/v0/tracing_v1/abstractions.py +28 -28
  209. synth_ai/v0/tracing_v1/base_client.py +9 -9
  210. synth_ai/v0/tracing_v1/client_manager.py +7 -7
  211. synth_ai/v0/tracing_v1/config.py +7 -7
  212. synth_ai/v0/tracing_v1/context.py +6 -6
  213. synth_ai/v0/tracing_v1/decorators.py +7 -6
  214. synth_ai/v0/tracing_v1/events/manage.py +1 -1
  215. synth_ai/v0/tracing_v1/events/store.py +5 -4
  216. synth_ai/v0/tracing_v1/immediate_client.py +4 -5
  217. synth_ai/v0/tracing_v1/local.py +3 -3
  218. synth_ai/v0/tracing_v1/log_client_base.py +4 -5
  219. synth_ai/v0/tracing_v1/retry_queue.py +5 -6
  220. synth_ai/v0/tracing_v1/trackers.py +25 -25
  221. synth_ai/v0/tracing_v1/upload.py +25 -24
  222. synth_ai/zyk/__init__.py +1 -0
  223. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/METADATA +1 -11
  224. synth_ai-0.2.4.dev7.dist-info/RECORD +299 -0
  225. synth_ai-0.2.4.dev6.dist-info/RECORD +0 -299
  226. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/WHEEL +0 -0
  227. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/entry_points.txt +0 -0
  228. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/licenses/LICENSE +0 -0
  229. {synth_ai-0.2.4.dev6.dist-info → synth_ai-0.2.4.dev7.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,8 @@
1
- from typing import Any, Collection, Optional
1
+ from collections.abc import Collection
2
2
  from dataclasses import dataclass
3
- from synth_ai.environments.tasks.core import TaskInstanceMetadataFilter, TaskInstance
3
+ from typing import Any
4
+
5
+ from synth_ai.environments.tasks.core import TaskInstance, TaskInstanceMetadataFilter
4
6
 
5
7
 
6
8
  @dataclass
@@ -18,8 +20,8 @@ class ValueFilter(TaskInstanceMetadataFilter):
18
20
  @dataclass
19
21
  class RangeFilter(TaskInstanceMetadataFilter):
20
22
  key: str
21
- min_val: Optional[float] = None
22
- max_val: Optional[float] = None
23
+ min_val: float | None = None
24
+ max_val: float | None = None
23
25
 
24
26
  def __call__(self, instance: TaskInstance) -> bool:
25
27
  instance_value = getattr(instance.metadata, self.key, None)
@@ -2,17 +2,19 @@
2
2
  Utility functions and generic filters for taskset creation.
3
3
  """
4
4
 
5
- from typing import Any, Collection, Optional, List, Set
5
+ from collections.abc import Collection
6
+ from typing import Any
6
7
  from uuid import UUID, uuid4
8
+
7
9
  from synth_ai.environments.tasks.core import (
8
- TaskInstanceMetadataFilter,
9
- TaskInstanceSet,
10
10
  SplitInfo,
11
11
  TaskInstance,
12
+ TaskInstanceMetadataFilter,
13
+ TaskInstanceSet,
12
14
  )
13
15
 
14
16
 
15
- def parse_or_new_uuid(raw_id: Optional[str]) -> UUID:
17
+ def parse_or_new_uuid(raw_id: str | None) -> UUID:
16
18
  """
17
19
  Parse a raw ID string into a UUID, or generate a new one if invalid or missing.
18
20
  """
@@ -43,8 +45,8 @@ class RangeFilter(TaskInstanceMetadataFilter):
43
45
  def __init__(
44
46
  self,
45
47
  key: str,
46
- min_value: Optional[float] = None,
47
- max_value: Optional[float] = None,
48
+ min_value: float | None = None,
49
+ max_value: float | None = None,
48
50
  ):
49
51
  self.key = key
50
52
  self.min_value = min_value
@@ -62,15 +64,15 @@ class RangeFilter(TaskInstanceMetadataFilter):
62
64
  def make_taskset(
63
65
  name: str,
64
66
  description: str,
65
- instances: List[TaskInstance],
66
- val_filter: Optional[TaskInstanceMetadataFilter] = None,
67
- test_filter: Optional[TaskInstanceMetadataFilter] = None,
67
+ instances: list[TaskInstance],
68
+ val_filter: TaskInstanceMetadataFilter | None = None,
69
+ test_filter: TaskInstanceMetadataFilter | None = None,
68
70
  ) -> TaskInstanceSet:
69
71
  """
70
72
  Assemble a TaskInstanceSet by applying optional validation and test filters.
71
73
  """
72
- val_ids: Set[Any] = set()
73
- test_ids: Set[Any] = set()
74
+ val_ids: set[Any] = set()
75
+ test_ids: set[Any] = set()
74
76
  if val_filter:
75
77
  val_ids = {inst.id for inst in instances if val_filter(inst)}
76
78
  if test_filter:
synth_ai/evals/base.py CHANGED
@@ -1,9 +1,8 @@
1
- from typing import List
2
1
 
3
2
 
4
3
  class Judgement:
5
4
  def __init__(
6
- self, criteria: str, score: float, reasoning: str = "", evidence: List[str] = None
5
+ self, criteria: str, score: float, reasoning: str = "", evidence: list[str] = None
7
6
  ):
8
7
  self.criteria = criteria
9
8
  self.score = score
@@ -12,5 +11,5 @@ class Judgement:
12
11
 
13
12
 
14
13
  class BaseEval:
15
- async def run(self, data: any) -> List[Judgement]:
14
+ async def run(self, data: any) -> list[Judgement]:
16
15
  pass
@@ -1,5 +1,5 @@
1
-
2
- """
1
+ # ruff: noqa
2
+ '''
3
3
  Synth OSS Integration Module
4
4
 
5
5
  This module provides integration with Synth's open-source inference and training APIs
@@ -336,7 +336,7 @@ Implementation sketch (backend == "synth")
336
336
  The method is a *no-op* for the default (OpenAI) backend so existing code keeps
337
337
  working.
338
338
 
339
- """
339
+ '''
340
340
 
341
341
 
342
342
  """
@@ -443,4 +443,4 @@ async def warmup(
443
443
  So: **the existing endpoint does not yet support GPU selection; we need to add
444
444
  the small change above on the `learning_v2` side and then LM.warmup can request
445
445
  specific GPUs.**
446
- """
446
+ """
@@ -1,4 +1,2 @@
1
-
2
-
3
1
  class OfflineGateway:
4
- pass
2
+ pass
@@ -18,16 +18,15 @@ from __future__ import annotations
18
18
  import asyncio
19
19
  import os
20
20
  import random
21
- from typing import List, Dict, Any, Tuple
21
+ from typing import Any
22
22
 
23
- from dotenv import load_dotenv
24
23
  from datasets import load_dataset
25
-
24
+ from dotenv import load_dotenv
26
25
  from synth_ai.lm.core.main_v3 import LM, build_messages
27
26
  from synth_ai.lm.overrides import LMOverridesContext
28
27
 
29
28
 
30
- async def classify_one(lm: LM, text: str, label_names: List[str]) -> str:
29
+ async def classify_one(lm: LM, text: str, label_names: list[str]) -> str:
31
30
  labels_joined = ", ".join(label_names)
32
31
  system_message = (
33
32
  "You are an intent classifier for the Banking77 dataset. "
@@ -41,7 +40,7 @@ async def classify_one(lm: LM, text: str, label_names: List[str]) -> str:
41
40
  return (resp.raw_response or "").strip()
42
41
 
43
42
 
44
- def choose_label(pred: str, label_names: List[str]) -> str:
43
+ def choose_label(pred: str, label_names: list[str]) -> str:
45
44
  norm_pred = pred.strip().lower()
46
45
  label_lookup = {ln.lower(): ln for ln in label_names}
47
46
  mapped = label_lookup.get(norm_pred)
@@ -56,12 +55,18 @@ def choose_label(pred: str, label_names: List[str]) -> str:
56
55
  return max(label_names, key=score)
57
56
 
58
57
 
59
- async def eval_context(lm: LM, items: List[Tuple[str, str]], label_names: List[str], ctx_name: str, specs: List[Dict[str, Any]]) -> Tuple[str, int, int]:
58
+ async def eval_context(
59
+ lm: LM,
60
+ items: list[tuple[str, str]],
61
+ label_names: list[str],
62
+ ctx_name: str,
63
+ specs: list[dict[str, Any]],
64
+ ) -> tuple[str, int, int]:
60
65
  correct = 0
61
66
  with LMOverridesContext(specs):
62
67
  tasks = [classify_one(lm, text, label_names) for text, _ in items]
63
68
  results = await asyncio.gather(*tasks, return_exceptions=True)
64
- for (text, gold), pred in zip(items, results):
69
+ for (text, gold), pred in zip(items, results, strict=False):
65
70
  if isinstance(pred, Exception):
66
71
  # Treat exceptions as incorrect
67
72
  continue
@@ -81,7 +86,7 @@ async def main() -> None:
81
86
 
82
87
  print("Loading Banking77 dataset (split='test')...")
83
88
  ds = load_dataset("banking77", split="test")
84
- label_names: List[str] = ds.features["label"].names # type: ignore
89
+ label_names: list[str] = ds.features["label"].names # type: ignore
85
90
 
86
91
  idxs = random.sample(range(len(ds)), k=min(n, len(ds)))
87
92
  items = [
@@ -90,7 +95,7 @@ async def main() -> None:
90
95
  ]
91
96
 
92
97
  # Define a few override contexts to compare
93
- contexts: List[Dict[str, Any]] = [
98
+ contexts: list[dict[str, Any]] = [
94
99
  {
95
100
  "name": "baseline (no overrides)",
96
101
  "overrides": [],
@@ -145,7 +150,7 @@ async def main() -> None:
145
150
  print(f"\nEvaluating {len(contexts)} contexts on {len(items)} Banking77 samples (async)...")
146
151
 
147
152
  # Evaluate each context sequentially but batched (each context classifies in parallel)
148
- results: List[Tuple[str, int, int]] = []
153
+ results: list[tuple[str, int, int]] = []
149
154
  for ctx in contexts:
150
155
  name = ctx["name"]
151
156
  specs = ctx["overrides"]
@@ -27,18 +27,17 @@ from __future__ import annotations
27
27
  import asyncio
28
28
  import os
29
29
  import random
30
- from typing import Any, Dict, List, Optional
31
30
 
32
31
  from datasets import load_dataset
33
32
 
34
33
  # Use the v3 LM class present in this repo
35
34
  from synth_ai.lm.core.main_v3 import LM, build_messages
36
- from synth_ai.tracing_v3.session_tracer import SessionTracer
37
- from synth_ai.tracing_v3.abstractions import LMCAISEvent
38
-
39
35
 
40
36
  # Use Overrides context to demonstrate matching by content
41
37
  from synth_ai.lm.overrides import LMOverridesContext
38
+ from synth_ai.tracing_v3.abstractions import LMCAISEvent
39
+ from synth_ai.tracing_v3.session_tracer import SessionTracer
40
+
42
41
  INJECTION_RULES = [
43
42
  {"find": "accnt", "replace": "account"},
44
43
  {"find": "atm", "replace": "ATM"},
@@ -46,7 +45,7 @@ INJECTION_RULES = [
46
45
  ]
47
46
 
48
47
 
49
- async def classify_sample(lm: LM, text: str, label_names: List[str]) -> str:
48
+ async def classify_sample(lm: LM, text: str, label_names: list[str]) -> str:
50
49
  """Classify one Banking77 utterance and return the predicted label name."""
51
50
  labels_joined = ", ".join(label_names)
52
51
  system_message = (
@@ -77,7 +76,7 @@ async def main() -> None:
77
76
  # Columns: {"text": str, "label": int}; label names at ds.features["label"].names
78
77
  print("Loading Banking77 dataset (split='test')...")
79
78
  ds = load_dataset("banking77", split="test")
80
- label_names: List[str] = ds.features["label"].names # type: ignore
79
+ label_names: list[str] = ds.features["label"].names # type: ignore
81
80
 
82
81
  # Sample a few items for a quick demo
83
82
  n = int(os.getenv("N_SAMPLES", "8"))
@@ -116,7 +115,9 @@ async def main() -> None:
116
115
 
117
116
  is_correct = pred_label == gold_label
118
117
  correct += int(is_correct)
119
- print(f"[{i}] text={text!r}\n gold={gold_label}\n pred={pred} -> mapped={pred_label} {'✅' if is_correct else '❌'}")
118
+ print(
119
+ f"[{i}] text={text!r}\n gold={gold_label}\n pred={pred} -> mapped={pred_label} {'✅' if is_correct else '❌'}"
120
+ )
120
121
 
121
122
  if idxs:
122
123
  acc = correct / len(idxs)
@@ -137,7 +138,11 @@ async def main() -> None:
137
138
  with LMOverridesContext([{"match": {"contains": "atm"}, "injection_rules": INJECTION_RULES}]):
138
139
  _ = await classify_sample(lm_traced, test_text, label_names)
139
140
  # inspect trace
140
- events = [e for e in (tracer.current_session.event_history if tracer.current_session else []) if isinstance(e, LMCAISEvent)]
141
+ events = [
142
+ e
143
+ for e in (tracer.current_session.event_history if tracer.current_session else [])
144
+ if isinstance(e, LMCAISEvent)
145
+ ]
141
146
  assert events, "No LMCAISEvent recorded by SessionTracer"
142
147
  cr = events[-1].call_records[0]
143
148
  traced_user = ""
@@ -145,7 +150,7 @@ async def main() -> None:
145
150
  if m.role == "user":
146
151
  for part in m.parts:
147
152
  if getattr(part, "type", None) == "text":
148
- traced_user += (part.text or "")
153
+ traced_user += part.text or ""
149
154
  assert "ATM" in traced_user, f"Expected substitution in traced prompt; got: {traced_user!r}"
150
155
  print("LM path trace verified: substitution present in traced prompt.")
151
156
  await tracer.end_timestep()
@@ -155,7 +160,7 @@ async def main() -> None:
155
160
  try:
156
161
  import synth_ai.lm.provider_support.openai as _synth_openai_patch # noqa: F401
157
162
  from openai import AsyncOpenAI
158
- from datasets import load_dataset as _ld # ensure datasets present
163
+
159
164
  base_url = os.getenv("OPENAI_BASE_URL", "https://api.groq.com/openai/v1")
160
165
  api_key = os.getenv("OPENAI_API_KEY") or os.getenv("GROQ_API_KEY") or ""
161
166
  client = AsyncOpenAI(base_url=base_url, api_key=api_key)
@@ -163,8 +168,12 @@ async def main() -> None:
163
168
  {"role": "system", "content": "Echo user label."},
164
169
  {"role": "user", "content": f"Please classify: {test_text}"},
165
170
  ]
166
- with LMOverridesContext([{"match": {"contains": "atm"}, "injection_rules": INJECTION_RULES}]):
167
- resp = await client.chat.completions.create(model=model, messages=messages, temperature=0)
171
+ with LMOverridesContext(
172
+ [{"match": {"contains": "atm"}, "injection_rules": INJECTION_RULES}]
173
+ ):
174
+ _ = await client.chat.completions.create(
175
+ model=model, messages=messages, temperature=0
176
+ )
168
177
  # Not all models echo input; instead, verify that our injected expectation matches
169
178
  expected_user = messages[1]["content"].replace("atm", "ATM")
170
179
  if messages[1]["content"] == expected_user:
@@ -176,13 +185,16 @@ async def main() -> None:
176
185
 
177
186
  # 3) Anthropic wrapper path (AsyncClient): ensure apply_injection is active
178
187
  try:
179
- import synth_ai.lm.provider_support.anthropic as _synth_anthropic_patch # noqa: F401
180
188
  import anthropic
189
+ import synth_ai.lm.provider_support.anthropic as _synth_anthropic_patch # noqa: F401
190
+
181
191
  a_model = os.getenv("ANTHROPIC_MODEL", "claude-3-5-haiku-20241022")
182
192
  a_key = os.getenv("ANTHROPIC_API_KEY")
183
193
  if a_key:
184
194
  a_client = anthropic.AsyncClient(api_key=a_key)
185
- with LMOverridesContext([{"match": {"contains": "atm"}, "injection_rules": INJECTION_RULES}]):
195
+ with LMOverridesContext(
196
+ [{"match": {"contains": "atm"}, "injection_rules": INJECTION_RULES}]
197
+ ):
186
198
  _ = await a_client.messages.create(
187
199
  model=a_model,
188
200
  system="Echo user label.",
@@ -20,9 +20,9 @@ Notes
20
20
  from __future__ import annotations
21
21
 
22
22
  import random
23
+ from collections.abc import Callable, Sequence
23
24
  from dataclasses import dataclass, replace
24
- from typing import Any, Callable, Dict, Iterable, List, Optional, Protocol, Sequence, Tuple
25
-
25
+ from typing import Any, Protocol
26
26
 
27
27
  # ---------------------------
28
28
  # Program adapter and protocols
@@ -36,16 +36,16 @@ class PredictProgram(Protocol):
36
36
  by wrapping it with `ProgramAdapter` below.
37
37
  """
38
38
 
39
- def deepcopy(self) -> "PredictProgram": ...
39
+ def deepcopy(self) -> PredictProgram: ...
40
40
 
41
- def run(self, x: Any, *, model: Optional[Any] = None) -> Any: ...
41
+ def run(self, x: Any, *, model: Any | None = None) -> Any: ...
42
42
 
43
- def with_instructions(self, instructions: Dict[str, str]) -> "PredictProgram": ...
43
+ def with_instructions(self, instructions: dict[str, str]) -> PredictProgram: ...
44
44
 
45
- def with_demos(self, demos: List[Tuple[Any, Any]]) -> "PredictProgram": ...
45
+ def with_demos(self, demos: list[tuple[Any, Any]]) -> PredictProgram: ...
46
46
 
47
47
  @property
48
- def predictors(self) -> List[str]: ...
48
+ def predictors(self) -> list[str]: ...
49
49
 
50
50
 
51
51
  @dataclass
@@ -59,28 +59,28 @@ class ProgramAdapter:
59
59
  - set_demos: Callable to update demos (global or per predictor)
60
60
  """
61
61
 
62
- run_fn: Callable[[Any, Optional[Any]], Any]
63
- state: Dict[str, Any]
64
- _predictors: List[str]
65
- set_instructions: Callable[[Dict[str, str], Dict[str, Any]], Dict[str, Any]]
66
- set_demos: Callable[[List[Tuple[Any, Any]], Dict[str, Any]], Dict[str, Any]]
62
+ run_fn: Callable[[Any, Any | None], Any]
63
+ state: dict[str, Any]
64
+ _predictors: list[str]
65
+ set_instructions: Callable[[dict[str, str], dict[str, Any]], dict[str, Any]]
66
+ set_demos: Callable[[list[tuple[Any, Any]], dict[str, Any]], dict[str, Any]]
67
67
 
68
- def deepcopy(self) -> "ProgramAdapter":
68
+ def deepcopy(self) -> ProgramAdapter:
69
69
  return replace(self, state={**self.state})
70
70
 
71
- def run(self, x: Any, *, model: Optional[Any] = None) -> Any:
71
+ def run(self, x: Any, *, model: Any | None = None) -> Any:
72
72
  return self.run_fn(x, model)
73
73
 
74
- def with_instructions(self, instructions: Dict[str, str]) -> "ProgramAdapter":
74
+ def with_instructions(self, instructions: dict[str, str]) -> ProgramAdapter:
75
75
  new_state = self.set_instructions(instructions, {**self.state})
76
76
  return replace(self, state=new_state)
77
77
 
78
- def with_demos(self, demos: List[Tuple[Any, Any]]) -> "ProgramAdapter":
78
+ def with_demos(self, demos: list[tuple[Any, Any]]) -> ProgramAdapter:
79
79
  new_state = self.set_demos(demos, {**self.state})
80
80
  return replace(self, state=new_state)
81
81
 
82
82
  @property
83
- def predictors(self) -> List[str]:
83
+ def predictors(self) -> list[str]:
84
84
  return list(self._predictors)
85
85
 
86
86
 
@@ -89,9 +89,11 @@ class ProgramAdapter:
89
89
  # ---------------------------
90
90
 
91
91
 
92
- def summarize_dataset(trainset: Sequence[Tuple[Any, Any]], max_items: int = 50) -> str:
92
+ def summarize_dataset(trainset: Sequence[tuple[Any, Any]], max_items: int = 50) -> str:
93
93
  n = len(trainset)
94
- ex = ", ".join(repr(trainset[i][0])[:40] for i in range(0, min(max_items, n), max(1, n // max_items or 1)))
94
+ ex = ", ".join(
95
+ repr(trainset[i][0])[:40] for i in range(0, min(max_items, n), max(1, n // max_items or 1))
96
+ )
95
97
  return f"Dataset size: {n}. Example inputs: {ex}"
96
98
 
97
99
 
@@ -109,7 +111,7 @@ def random_tip(rng: random.Random) -> str:
109
111
  return rng.choice(tips)
110
112
 
111
113
 
112
- def choose(items: Sequence[Any], rng: Optional[random.Random] = None) -> Any:
114
+ def choose(items: Sequence[Any], rng: random.Random | None = None) -> Any:
113
115
  r = rng or random
114
116
  return r.choice(items)
115
117
 
@@ -122,10 +124,12 @@ def choose(items: Sequence[Any], rng: Optional[random.Random] = None) -> Any:
122
124
  @dataclass
123
125
  class EvalResult:
124
126
  score: float
125
- subscores: List[float]
127
+ subscores: list[float]
126
128
 
127
129
 
128
- def evaluate_program(program: PredictProgram, dataset: Sequence[Tuple[Any, Any]], metric: Callable[[Any, Any], float]) -> EvalResult:
130
+ def evaluate_program(
131
+ program: PredictProgram, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
132
+ ) -> EvalResult:
129
133
  subs = []
130
134
  for x, y in dataset:
131
135
  yhat = program.run(x)
@@ -140,8 +144,8 @@ def evaluate_program(program: PredictProgram, dataset: Sequence[Tuple[Any, Any]]
140
144
 
141
145
  def mipro_v2_compile(
142
146
  student: PredictProgram,
143
- trainset: Sequence[Tuple[Any, Any]],
144
- valset: Sequence[Tuple[Any, Any]],
147
+ trainset: Sequence[tuple[Any, Any]],
148
+ valset: Sequence[tuple[Any, Any]],
145
149
  metric: Callable[[Any, Any], float],
146
150
  *,
147
151
  prompt_model: Any,
@@ -159,7 +163,7 @@ def mipro_v2_compile(
159
163
  data_aware: bool = True,
160
164
  tip_aware: bool = True,
161
165
  fewshot_aware: bool = True,
162
- ) -> Tuple[PredictProgram, List[Dict[str, Any]]]:
166
+ ) -> tuple[PredictProgram, list[dict[str, Any]]]:
163
167
  """MIPROv2-style optimizer.
164
168
 
165
169
  Arguments mirror the DSPy pseudocode but remain provider-agnostic. The
@@ -171,9 +175,9 @@ def mipro_v2_compile(
171
175
  program = student.deepcopy()
172
176
 
173
177
  # Step 1: bootstrap few-shot example candidates
174
- demo_candidates: List[Dict[str, Any]] = []
178
+ demo_candidates: list[dict[str, Any]] = []
175
179
  for _ in range(num_candidates):
176
- boot: List[Tuple[Any, Any]] = []
180
+ boot: list[tuple[Any, Any]] = []
177
181
  # collect bootstrapped, self-consistent demos
178
182
  while len(boot) < max_bootstrapped_demos:
179
183
  x, y = rng.choice(trainset)
@@ -184,9 +188,9 @@ def mipro_v2_compile(
184
188
  demo_candidates.append({"boot": boot, "labeled": labeled})
185
189
 
186
190
  # Step 2: propose instruction candidates per predictor
187
- instr_candidates: Dict[str, List[str]] = {}
188
- for pred in (program.predictors or ["predictor"]):
189
- ctx: Dict[str, Any] = {}
191
+ instr_candidates: dict[str, list[str]] = {}
192
+ for pred in program.predictors or ["predictor"]:
193
+ ctx: dict[str, Any] = {}
190
194
  if data_aware:
191
195
  ctx["dataset_summary"] = summarize_dataset(trainset)
192
196
  if program_aware:
@@ -199,12 +203,12 @@ def mipro_v2_compile(
199
203
  instr_candidates[pred] = list(cand)
200
204
 
201
205
  # Step 3: Bayesian-optimization-like search (random proposer placeholder)
202
- history: List[Tuple[Dict[str, Any], float]] = []
203
- records: List[Dict[str, Any]] = []
206
+ history: list[tuple[dict[str, Any], float]] = []
207
+ records: list[dict[str, Any]] = []
204
208
  best_score = -1.0
205
- best_cfg: Optional[Dict[str, Any]] = None
209
+ best_cfg: dict[str, Any] | None = None
206
210
 
207
- def propose(history_: List[Tuple[Dict[str, Any], float]]) -> Dict[str, Any]:
211
+ def propose(history_: list[tuple[dict[str, Any], float]]) -> dict[str, Any]:
208
212
  # Placeholder: randomly sample from the cartesian product
209
213
  instructions = {pred: choose(instr_candidates[pred], rng) for pred in instr_candidates}
210
214
  demos = choose(demo_candidates, rng) if demo_candidates else None
@@ -227,15 +231,17 @@ def mipro_v2_compile(
227
231
  batch_res = evaluate_program(program_t, batch, metric)
228
232
  s_t = batch_res.score
229
233
  history.append((theta, s_t))
230
- records.append({
231
- "trial": t,
232
- "evaluation": "batch" if minibatch else "full",
233
- "score": s_t,
234
- "intervention": {
235
- "instructions": theta.get("instructions"),
236
- "demo_set": theta.get("demo_set"),
237
- },
238
- })
234
+ records.append(
235
+ {
236
+ "trial": t,
237
+ "evaluation": "batch" if minibatch else "full",
238
+ "score": s_t,
239
+ "intervention": {
240
+ "instructions": theta.get("instructions"),
241
+ "demo_set": theta.get("demo_set"),
242
+ },
243
+ }
244
+ )
239
245
 
240
246
  if (not minibatch) or (t % max(1, minibatch_full_eval_steps) == 0):
241
247
  full_res = evaluate_program(program_t, valset, metric)
@@ -243,15 +249,17 @@ def mipro_v2_compile(
243
249
  if s_full > best_score:
244
250
  best_score = s_full
245
251
  best_cfg = theta
246
- records.append({
247
- "trial": t,
248
- "evaluation": "full",
249
- "score": s_full,
250
- "intervention": {
251
- "instructions": theta.get("instructions"),
252
- "demo_set": theta.get("demo_set"),
253
- },
254
- })
252
+ records.append(
253
+ {
254
+ "trial": t,
255
+ "evaluation": "full",
256
+ "score": s_full,
257
+ "intervention": {
258
+ "instructions": theta.get("instructions"),
259
+ "demo_set": theta.get("demo_set"),
260
+ },
261
+ }
262
+ )
255
263
 
256
264
  if best_cfg is None:
257
265
  return program, records
@@ -275,6 +283,7 @@ __all__ = [
275
283
  class ExampleTwoStepDag:
276
284
  pass
277
285
 
286
+
278
287
  """
279
288
  A -> B
280
289
  """