synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (351) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +23 -17
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +53 -52
  22. examples/rl/run_rl_and_save.py +29 -12
  23. examples/rl/task_app/math_single_step.py +180 -41
  24. examples/rl/task_app/math_task_app.py +14 -6
  25. examples/sft/README.md +139 -0
  26. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  27. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  28. examples/sft/evaluate.py +117 -0
  29. examples/sft/export_dataset.py +117 -0
  30. examples/sft/generate_traces.py +162 -0
  31. examples/swe/__init__.py +12 -0
  32. examples/swe/task_app/README.md +105 -0
  33. examples/swe/task_app/__init__.py +2 -0
  34. examples/swe/task_app/grpo_swe_mini.py +571 -0
  35. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  36. examples/swe/task_app/hosted/README.md +173 -0
  37. examples/swe/task_app/hosted/__init__.py +5 -0
  38. examples/swe/task_app/hosted/branching.py +143 -0
  39. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  40. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  41. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  42. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  43. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  44. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  45. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  46. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  47. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  50. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  53. examples/swe/task_app/hosted/hosted_app.py +204 -0
  54. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  55. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  56. examples/swe/task_app/hosted/main.py +100 -0
  57. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  58. examples/swe/task_app/hosted/registry.py +195 -0
  59. examples/swe/task_app/hosted/rollout.py +1869 -0
  60. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  61. examples/swe/task_app/hosted/storage/volume.py +211 -0
  62. examples/swe/task_app/hosted/test_agents.py +161 -0
  63. examples/swe/task_app/hosted/test_service.py +137 -0
  64. examples/swe/task_app/hosted/utils.py +62 -0
  65. examples/vlm/README.md +68 -0
  66. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  67. examples/vlm/crafter_image_only_agent.py +207 -0
  68. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  69. examples/vlm/filter_image_rows.py +63 -0
  70. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  71. examples/warming_up_to_rl/analyze_trace_db.py +12 -10
  72. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  73. examples/warming_up_to_rl/export_trace_sft.py +218 -36
  74. examples/warming_up_to_rl/groq_test.py +15 -8
  75. examples/warming_up_to_rl/manage_secrets.py +29 -25
  76. examples/warming_up_to_rl/readme.md +9 -2
  77. examples/warming_up_to_rl/run_eval.py +137 -61
  78. examples/warming_up_to_rl/run_fft_and_save.py +131 -60
  79. examples/warming_up_to_rl/run_local_rollout.py +88 -39
  80. examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
  81. examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
  82. examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
  83. examples/warming_up_to_rl/run_rl_and_save.py +35 -12
  84. examples/warming_up_to_rl/run_rollout_remote.py +44 -19
  85. examples/warming_up_to_rl/task_app/README.md +6 -2
  86. examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
  87. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  111. synth/__init__.py +14 -0
  112. synth_ai/__init__.py +20 -4
  113. synth_ai/api/models/supported.py +376 -0
  114. synth_ai/api/train/builders.py +157 -26
  115. synth_ai/api/train/cli.py +213 -57
  116. synth_ai/api/train/config_finder.py +65 -5
  117. synth_ai/api/train/env_resolver.py +33 -15
  118. synth_ai/api/train/pollers.py +13 -4
  119. synth_ai/api/train/supported_algos.py +139 -0
  120. synth_ai/api/train/task_app.py +5 -3
  121. synth_ai/api/train/utils.py +33 -48
  122. synth_ai/cli/__init__.py +19 -4
  123. synth_ai/cli/_modal_wrapper.py +28 -0
  124. synth_ai/cli/_typer_patch.py +49 -0
  125. synth_ai/cli/balance.py +2 -3
  126. synth_ai/cli/calc.py +1 -1
  127. synth_ai/cli/demo.py +21 -6
  128. synth_ai/cli/recent.py +2 -2
  129. synth_ai/cli/rl_demo.py +77 -17
  130. synth_ai/cli/root.py +116 -39
  131. synth_ai/cli/status.py +2 -2
  132. synth_ai/cli/task_apps.py +1699 -259
  133. synth_ai/cli/traces.py +7 -4
  134. synth_ai/cli/turso.py +73 -0
  135. synth_ai/cli/watch.py +12 -18
  136. synth_ai/core/experiment.py +0 -2
  137. synth_ai/demo_registry.py +68 -31
  138. synth_ai/demos/core/cli.py +516 -194
  139. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  140. synth_ai/demos/demo_task_apps/core.py +64 -28
  141. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  142. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
  143. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  144. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  145. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  146. synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
  147. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  148. synth_ai/environments/examples/bandit/engine.py +12 -4
  149. synth_ai/environments/examples/bandit/taskset.py +4 -4
  150. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  151. synth_ai/environments/reproducibility/tree.py +5 -6
  152. synth_ai/environments/service/app.py +11 -12
  153. synth_ai/environments/service/core_routes.py +10 -9
  154. synth_ai/environments/stateful/engine.py +1 -1
  155. synth_ai/environments/tasks/core.py +1 -0
  156. synth_ai/environments/tasks/filters.py +5 -6
  157. synth_ai/environments/tasks/utils.py +4 -5
  158. synth_ai/evals/base.py +0 -2
  159. synth_ai/handshake.py +11 -9
  160. synth_ai/http.py +1 -1
  161. synth_ai/http_client.py +43 -11
  162. synth_ai/inference/__init__.py +0 -2
  163. synth_ai/inference/client.py +20 -6
  164. synth_ai/jobs/client.py +103 -78
  165. synth_ai/learning/__init__.py +41 -6
  166. synth_ai/learning/algorithms.py +14 -0
  167. synth_ai/learning/client.py +121 -29
  168. synth_ai/learning/config.py +2 -40
  169. synth_ai/learning/constants.py +0 -2
  170. synth_ai/learning/ft_client.py +4 -56
  171. synth_ai/learning/health.py +13 -7
  172. synth_ai/learning/jobs.py +43 -47
  173. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  174. synth_ai/learning/rl/client.py +267 -0
  175. synth_ai/learning/rl/config.py +31 -0
  176. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  177. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  178. synth_ai/learning/rl/secrets.py +13 -0
  179. synth_ai/learning/rl_client.py +2 -253
  180. synth_ai/learning/sft/__init__.py +29 -0
  181. synth_ai/learning/sft/client.py +68 -0
  182. synth_ai/learning/sft/config.py +270 -0
  183. synth_ai/learning/sft/data.py +295 -0
  184. synth_ai/learning/sse.py +25 -26
  185. synth_ai/learning/validators.py +25 -24
  186. synth_ai/lm/__init__.py +21 -47
  187. synth_ai/task/__init__.py +26 -27
  188. synth_ai/task/apps/__init__.py +18 -19
  189. synth_ai/task/auth.py +35 -23
  190. synth_ai/task/client.py +15 -13
  191. synth_ai/task/contracts.py +37 -35
  192. synth_ai/task/datasets.py +9 -6
  193. synth_ai/task/errors.py +11 -10
  194. synth_ai/task/health.py +17 -11
  195. synth_ai/task/json.py +58 -24
  196. synth_ai/task/proxy.py +15 -14
  197. synth_ai/task/rubrics.py +22 -15
  198. synth_ai/task/server.py +43 -17
  199. synth_ai/task/tracing_utils.py +12 -7
  200. synth_ai/task/validators.py +0 -1
  201. synth_ai/task/vendors.py +5 -7
  202. synth_ai/tracing_v3/__init__.py +2 -0
  203. synth_ai/tracing_v3/abstractions.py +21 -4
  204. synth_ai/tracing_v3/db_config.py +26 -1
  205. synth_ai/tracing_v3/decorators.py +18 -15
  206. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  207. synth_ai/tracing_v3/hooks.py +6 -4
  208. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  209. synth_ai/tracing_v3/replica_sync.py +1 -0
  210. synth_ai/tracing_v3/session_tracer.py +63 -16
  211. synth_ai/tracing_v3/storage/base.py +89 -1
  212. synth_ai/tracing_v3/storage/config.py +21 -8
  213. synth_ai/tracing_v3/storage/factory.py +10 -8
  214. synth_ai/tracing_v3/storage/utils.py +4 -2
  215. synth_ai/tracing_v3/turso/daemon.py +7 -2
  216. synth_ai/tracing_v3/turso/models.py +5 -2
  217. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  218. synth_ai/tracing_v3/utils.py +4 -3
  219. synth_ai/v0/api/__init__.py +8 -0
  220. synth_ai/v0/api/models/__init__.py +8 -0
  221. synth_ai/v0/api/models/supported.py +8 -0
  222. synth_ai/v0/config/__init__.py +15 -0
  223. synth_ai/v0/config/base_url.py +12 -0
  224. synth_ai/v0/lm/__init__.py +51 -0
  225. synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
  226. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  227. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  228. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  229. synth_ai/{lm → v0/lm}/config.py +6 -1
  230. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  231. synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
  232. synth_ai/{lm → v0/lm}/core/main.py +19 -7
  233. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  234. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
  235. synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
  236. synth_ai/{lm → v0/lm}/overrides.py +4 -4
  237. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  238. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  239. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  240. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  241. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
  242. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  243. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  244. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
  245. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
  246. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
  247. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
  248. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  249. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  250. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  251. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  252. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  253. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  254. synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
  255. synth_ai/v0/tracing/upload.py +32 -135
  256. synth_ai/v0/tracing_v3/__init__.py +10 -0
  257. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  258. synth_ai/v0/tracing_v3/decorators.py +3 -0
  259. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  260. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  261. synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
  262. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -262
  263. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
  264. examples/common_old/backend.py +0 -21
  265. examples/evals_old/README.md +0 -98
  266. examples/evals_old/__init__.py +0 -6
  267. examples/evals_old/compare_models.py +0 -1037
  268. examples/evals_old/example_log.md +0 -145
  269. examples/evals_old/run_demo.sh +0 -126
  270. examples/evals_old/trace_analysis.py +0 -270
  271. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  272. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  273. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  274. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
  275. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  276. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  277. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  278. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  279. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  280. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
  281. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  282. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  283. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
  284. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  285. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  286. examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
  287. examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
  288. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  289. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  290. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
  291. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
  292. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
  293. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  294. examples/finetuning_old/synth_qwen_v1/util.py +0 -147
  295. examples/rl_old/task_app.py +0 -962
  296. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  297. examples/warming_up_to_rl/old/notes.md +0 -73
  298. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  299. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  300. synth_ai/experimental/synth_oss.py +0 -446
  301. synth_ai/install_sqld.sh +0 -40
  302. synth_ai/learning/filtering.py +0 -0
  303. synth_ai/learning/offline/dpo.py +0 -0
  304. synth_ai/learning/offline/providers.py +0 -7
  305. synth_ai/learning/offline/sft.py +0 -0
  306. synth_ai/learning/offline/shared.py +0 -0
  307. synth_ai/learning/online/grpo.py +0 -0
  308. synth_ai/learning/online/irft.py +0 -0
  309. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  310. synth_ai/learning/prompts/gepa.py +0 -0
  311. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  312. synth_ai/learning/prompts/mipro.py +0 -289
  313. synth_ai/learning/prompts/random_search.py +0 -246
  314. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  315. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  316. synth_ai/rl/secrets.py +0 -19
  317. synth_ai/scripts/verify_rewards.py +0 -100
  318. synth_ai/tracing/__init__.py +0 -30
  319. synth_ai/tracing_v1/__init__.py +0 -33
  320. synth_ai/tracing_v3/turso/__init__.py +0 -25
  321. synth_ai/tracing_v3/turso/manager.py +0 -774
  322. synth_ai/zyk/__init__.py +0 -30
  323. synth_ai-0.2.9.dev5.dist-info/METADATA +0 -131
  324. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  325. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  326. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  327. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  328. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  329. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  330. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  331. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  332. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  333. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  334. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  335. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  336. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  337. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  338. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  339. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  340. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  341. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  342. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  343. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  344. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  345. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  346. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  347. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  348. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  349. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
  350. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
  351. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
@@ -1,446 +0,0 @@
1
- # ruff: noqa
2
- '''
3
- Synth OSS Integration Module
4
-
5
- This module provides integration with Synth's open-source inference and training APIs
6
- from the monorepo learning_v2 service. All APIs are OpenAI-compatible.
7
-
8
- Learning V2 APIs available for integration via lm/:
9
- """
10
-
11
- # API Configuration
12
- SYNTH_BACKEND_URL = ""
13
-
14
- # Learning V2 Modal Service URLs
15
- LEARNING_V2_URLS = {
16
- "dev": "https://synth-laboratories-dev--learning-v2-service-fastapi-app.modal.run",
17
- "prod": "https://synth-laboratories-prod--learning-v2-service-fastapi-app.modal.run",
18
- "main": "https://synth-laboratories--learning-v2-service-fastapi-app.modal.run"
19
- }
20
-
21
- # ============================================================================
22
- # HEALTH & STATUS APIS
23
- # ============================================================================
24
-
25
- HEALTH_APIS = {
26
- "basic_health": {
27
- "method": "GET",
28
- "endpoint": "/health",
29
- "description": "Basic health check",
30
- "response": {"status": "healthy"}
31
- },
32
- "detailed_health": {
33
- "method": "GET",
34
- "endpoint": "/learning/health",
35
- "description": "Detailed health check including GPU function availability",
36
- "response": {"status": "healthy", "components": {...}}
37
- }
38
- }
39
-
40
- # ============================================================================
41
- # FILE MANAGEMENT APIS
42
- # ============================================================================
43
-
44
- FILE_MANAGEMENT_APIS = {
45
- "upload_file": {
46
- "method": "POST",
47
- "endpoint": "/files",
48
- "description": "Upload a file for fine-tuning (JSONL format)",
49
- "request": "multipart/form-data with 'file' and 'purpose'='fine-tune'",
50
- "response": {
51
- "id": "file-abc123",
52
- "object": "file",
53
- "bytes": 1234,
54
- "created_at": 1638360000,
55
- "filename": "data.jsonl",
56
- "purpose": "fine-tune"
57
- }
58
- },
59
- "list_files": {
60
- "method": "GET",
61
- "endpoint": "/files",
62
- "description": "List all uploaded files",
63
- "params": {"limit": "optional"},
64
- "response": {"object": "list", "data": ["file_objects"]}
65
- },
66
- "get_file": {
67
- "method": "GET",
68
- "endpoint": "/files/{file_id}",
69
- "description": "Get file metadata by ID",
70
- "response": "Single file object with metadata"
71
- },
72
- "delete_file": {
73
- "method": "DELETE",
74
- "endpoint": "/files/{file_id}",
75
- "description": "Delete a file",
76
- "response": {"id": "file-abc123", "object": "file", "deleted": True}
77
- },
78
- "get_file_content": {
79
- "method": "GET",
80
- "endpoint": "/files/{file_id}/content",
81
- "description": "Download raw file content",
82
- "response": "Raw file content stream"
83
- }
84
- }
85
-
86
- # ============================================================================
87
- # TRAINING/FINE-TUNING APIS
88
- # ============================================================================
89
-
90
- TRAINING_APIS = {
91
- "create_training_job": {
92
- "method": "POST",
93
- "endpoint": "/fine_tuning/jobs",
94
- "description": "Create a fine-tuning job",
95
- "request": {
96
- "model": "Qwen/Qwen3-0.5B",
97
- "training_file": "file-abc123",
98
- "training_type": "sft", # or "dpo"
99
- "hyperparameters": {...},
100
- "suffix": "optional"
101
- },
102
- "response": {
103
- "object": "fine_tuning.job",
104
- "id": "ftjob-xyz789",
105
- "model": "...",
106
- "status": "validating_files",
107
- "training_file": "file-abc123",
108
- "hyperparameters": {...}
109
- }
110
- },
111
- "list_training_jobs": {
112
- "method": "GET",
113
- "endpoint": "/fine_tuning/jobs",
114
- "description": "List all training jobs",
115
- "response": {"object": "list", "data": ["job_objects"]}
116
- },
117
- "get_training_job": {
118
- "method": "GET",
119
- "endpoint": "/fine_tuning/jobs/{job_id}",
120
- "description": "Get training job status",
121
- "response": {
122
- "object": "fine_tuning.job",
123
- "id": "ftjob-xyz789",
124
- "status": "running", # or "completed", "failed", "cancelled"
125
- "fine_tuned_model": "ft:model:suffix" # when completed
126
- }
127
- },
128
- "cancel_training_job": {
129
- "method": "POST",
130
- "endpoint": "/fine_tuning/jobs/{job_id}/cancel",
131
- "description": "Cancel a running training job",
132
- "response": {"object": "fine_tuning.job", "id": "...", "status": "cancelled"}
133
- },
134
- "get_training_events": {
135
- "method": "GET",
136
- "endpoint": "/fine_tuning/jobs/{job_id}/events",
137
- "description": "Get training logs/events",
138
- "response": {
139
- "object": "list",
140
- "data": [{
141
- "object": "fine_tuning.job.event",
142
- "level": "info",
143
- "message": "Training started",
144
- "created_at": 1638360000
145
- }]
146
- }
147
- }
148
- }
149
-
150
- # ============================================================================
151
- # INFERENCE APIS
152
- # ============================================================================
153
-
154
- INFERENCE_APIS = {
155
- "chat_completions": {
156
- "method": "POST",
157
- "endpoint": "/chat/completions",
158
- "description": "OpenAI-compatible chat completions for base and fine-tuned models",
159
- "request": {
160
- "model": "Qwen/Qwen3-0.5B", # or "ft:Qwen/Qwen3-0.5B:suffix"
161
- "messages": [{"role": "user", "content": "Hello"}],
162
- "temperature": 0.7,
163
- "max_tokens": 100,
164
- "top_p": 1.0,
165
- "stream": False, # Set to True for streaming
166
- "tools": [], # For tool calling
167
- "tool_choice": "auto"
168
- },
169
- "response": {
170
- "id": "chatcmpl-123",
171
- "object": "chat.completion",
172
- "created": 1638360000,
173
- "model": "Qwen/Qwen3-0.5B",
174
- "choices": [{
175
- "index": 0,
176
- "message": {
177
- "role": "assistant",
178
- "content": "Hello! How can I help you?",
179
- "tool_calls": [] # If tools were used
180
- },
181
- "finish_reason": "stop"
182
- }],
183
- "usage": {
184
- "prompt_tokens": 10,
185
- "completion_tokens": 20,
186
- "total_tokens": 30
187
- }
188
- },
189
- "streaming": "Server-sent events with data: {...} format when stream=True"
190
- }
191
- }
192
-
193
- # ============================================================================
194
- # MODEL MANAGEMENT APIS
195
- # ============================================================================
196
-
197
- MODEL_APIS = {
198
- "list_models": {
199
- "method": "GET",
200
- "endpoint": "/models",
201
- "description": "List all available models (base and fine-tuned)",
202
- "response": {
203
- "object": "list",
204
- "data": [{
205
- "id": "Qwen/Qwen3-0.5B",
206
- "object": "model",
207
- "created": 1638360000,
208
- "owned_by": "learning_v2"
209
- }]
210
- }
211
- },
212
- "delete_model": {
213
- "method": "DELETE",
214
- "endpoint": "/models/{model_id}",
215
- "description": "Delete a fine-tuned model",
216
- "response": {"id": "ft:model:suffix", "object": "model", "deleted": True}
217
- }
218
- }
219
-
220
- # ============================================================================
221
- # SUPPORTED MODELS
222
- # ============================================================================
223
-
224
- SUPPORTED_MODELS = {
225
- "base_models": [
226
- # Qwen 3 family
227
- "Qwen/Qwen3-0.6B",
228
- "Qwen/Qwen3-1.8B",
229
- "Qwen/Qwen3-8B",
230
- "Qwen/Qwen3-14B",
231
- "Qwen/Qwen3-32B",
232
- # Qwen 2.5 family
233
- "Qwen/Qwen2.5-0.5B-Instruct",
234
- "Qwen/Qwen2.5-1.5B-Instruct",
235
- "Qwen/Qwen2.5-3B-Instruct",
236
- "Qwen/Qwen2.5-7B-Instruct",
237
- "Qwen/Qwen2.5-14B-Instruct",
238
- "Qwen/Qwen2.5-32B-Instruct",
239
- "Qwen/Qwen2.5-72B-Instruct",
240
- # OLMo 2 family
241
- "allenai/OLMo-2-0425-1B-Instruct",
242
- "allenai/OLMo-2-1124-7B-Instruct",
243
- "allenai/OLMo-2-1124-13B-Instruct"
244
- ],
245
- "training_types": ["sft", "dpo"],
246
- "gpu_types": ["A10G", "L40S", "A100", "H100"],
247
- "features": [
248
- "Tool calling",
249
- "Streaming responses",
250
- "Fine-tuning",
251
- "Multi-GPU training",
252
- "JSONL data format",
253
- "OpenAI compatibility"
254
- ]
255
- }
256
-
257
- # ============================================================================
258
- # INTEGRATION PLAN – Synth OSS
259
- # ==========================================================================
260
- """
261
- GPU & Resource Selection
262
- ------------------------
263
- Synth OSS decides the GPU based on the `ModelFamily` definition:
264
- • Each `ModelConfig` lists `inference_gpus` and `training_gpus`.
265
- • The API’s `InferenceRouter` calls `_select_gpu_for_model`, which chooses the **first recommended GPU** returned by `get_model_gpu_recommendations` (usually the `default_inference_gpu`).
266
- • By default the server picks the first recommended GPU, **but** we can request
267
- another GPU type via a custom header that the server *can* opt to honor:
268
-
269
- X-GPU-Preference: L40S # or A10G, A100, H100
270
-
271
- The current dev deployment already forwards this header to `InferenceRouter`,
272
- so adding it makes the GPU configurable without breaking existing behaviour.
273
-
274
- `InferenceConfig` therefore gets a new optional field:
275
-
276
- ```python
277
- class InferenceConfig(BaseModel):
278
- stream: bool = False
279
- gpu_preference: Optional[str] = None # "A10G", "L40S", "A100", "H100"
280
- # ...future knobs (temperature, max_tokens, etc.)
281
- ```
282
-
283
- LM will include `gpu_preference` as that header when `backend="synth"`. If the
284
- header is omitted or the value is not valid for the chosen model, the server
285
- falls back to its default selection. This keeps the API forward-compatible and
286
- provides explicit GPU control when supported.
287
-
288
- Only two parts of synth-ai need to change for Synth OSS inference:
289
-
290
- 1. LM() class (synth_ai.lm)
291
- 2. The async respond(...) coroutine on that class
292
-
293
- Extend LM with backend="synth"; when selected, issue POST requests to
294
- `${LEARNING_V2_URL}/chat/completions`, supporting both streaming and
295
- non-streaming modes and returning the same dict structure as today.
296
-
297
- Everything else (file upload, fine-tuning, model listing) lives in the
298
- `synth_ai.learning` package and does NOT affect LM:
299
-
300
- synth_ai/learning/
301
- ├─ files.py
302
- ├─ training.py
303
- ├─ models.py
304
- ├─ client.py
305
- └─ types.py
306
-
307
- Warm-up flow
308
- ~~~~~~~~~~~~
309
- `learning_v2` exposes `POST /warmup/{model_id}` and `GET /warmup/status/{model_id}`
310
- (via the Render proxy). We can exploit that to reduce first-token latency.
311
-
312
- LM API addition:
313
-
314
- ```python
315
- async def warmup(self, model: str | None = None, gpu_preference: str | None = None) -> dict:
316
- """Pre-spin the container & load weights for *model* on the requested GPU.
317
- Returns the JSON response from /warmup. If *model* is None we warm-up
318
- `self.model`.
319
- """
320
- ```
321
-
322
- Implementation sketch (backend == "synth")
323
- ------------------------------------------
324
- 1. Determine `model_id = model or self.model`.
325
- 2. Build headers:
326
- ```python
327
- headers = {}
328
- if gpu_preference:
329
- headers["X-GPU-Preference"] = gpu_preference
330
- ```
331
- 3. `POST f"{url}/warmup/{model_id}"`.
332
- 4. Optionally call `GET /warmup/status/{model_id}` in a loop until
333
- `status == "ready"` (exponential backoff) – or expose a separate
334
- `await LM.warmup_status(model)` helper.
335
-
336
- The method is a *no-op* for the default (OpenAI) backend so existing code keeps
337
- working.
338
-
339
- '''
340
-
341
-
342
- """
343
- LEARNING_v2 server-side changes required to honor `X-GPU-Preference`
344
- ====================================================================
345
- 1. **Add header extraction in API layer**
346
- • File: `backend/app/services/learning_v2/modal_service/api_openai_complete.py`
347
- • In `@app.post("/chat/completions")` add:
348
- ```python
349
- gpu_pref = req.headers.get("X-GPU-Preference")
350
- ```
351
- • Pass `gpu_pref` to `inference_router.route_completion(...)` and
352
- `route_streaming_completion(...)`.
353
-
354
- 2. **Propagate preference through `InferenceRouter`**
355
- • Update signatures of `route_completion`, `route_streaming_completion` and
356
- `_select_gpu_for_model` to accept `gpu_preference: Optional[str] = None`.
357
- • Inside `_select_gpu_for_model`:
358
- ```python
359
- if gpu_preference and gpu_preference in valid_gpu_types_for_model:
360
- return gpu_preference
361
- ```
362
- falling back to current auto-selection logic otherwise.
363
-
364
- 3. **Validate against model config**
365
- • Retrieve `model_cfg.inference_gpus` and refuse unsupported choices with
366
- `HTTP 400` ("GPU type not supported for this model").
367
-
368
- 4. **Expose preference in usage / logging**
369
- • Include the chosen GPU in the response’s `system_fingerprint` or add a
370
- field `selected_gpu` so clients can confirm the assignment.
371
- • Log header + selected GPU for observability.
372
-
373
- 5. **(Optional) Accept preference in JSON body**
374
- • If we want parity with LM’s `InferenceConfig`, extend
375
- `ChatCompletionRequest` with an optional `gpu_preference` field and treat
376
- it as a fallback if the header is missing.
377
-
378
- 6. **Update automated tests**
379
- • Modify `tests/dev/learning_v2/test_multi_gpu_inference_v2.py` to set the
380
- header and assert that the correct GPU function (`*_L40S_generate`, etc.)
381
- was invoked (can mock `get_gpu_function`).
382
-
383
- 7. **Documentation**
384
- • Add the header to `learning_v2_api_structure.txt` and README so external
385
- callers know how to request a GPU.
386
-
387
- These changes are backwards-compatible: calls without the header behave exactly
388
- as before.
389
- """
390
-
391
- """
392
- Ran tool
393
-
394
- Ran tool
395
-
396
- Read file: /Users/joshuapurtell/Documents/GitHub/monorepo/backend/app/services/learning/unified_ft_service/api.py
397
-
398
- Ran tool
399
- Read file: /Users/joshuapurtell/Documents/GitHub/monorepo/backend/app/services/learning/unified_ft_service/api.py
400
-
401
- Ran tool
402
-
403
- Warm-up today:
404
-
405
- • `POST /warmup/{model_name}` in `unified_ft_service/api.py` kicks off an async worker that:
406
- 1. Calls `get_model_routing(model_name)` → this falls back to `choose_gpu(model_id, req_gpu=None)`
407
- so no GPU can be requested.
408
- 2. Fires the routed Modal function once (“dummy request”) to cache weights.
409
-
410
- Because the handler never accepts a GPU argument and never reads `X-GPU-Preference`, the server always warms the model with its default GPU.
411
-
412
- What has to change
413
- ==================
414
-
415
- Server-side (learning_v2)
416
- 1. Add an optional header or query field
417
- `X-GPU-Preference` **or** `?gpu=A100` to `POST /warmup/{model_id}`.
418
- 2. Pass the value into `choose_gpu(model_id, req_gpu)` inside
419
- `warmup_worker()`. The helper already validates the choice, so no other
420
- change is required.
421
- 3. Expose the selected GPU in the JSON response for visibility.
422
-
423
- Client-side (synth-ai LM)
424
- ```python
425
- async def warmup(
426
- self,
427
- model: str | None = None,
428
- gpu_preference: str | None = None,
429
- wait_until_ready: bool = False,
430
- ) -> dict:
431
- mdl = model or self.model
432
- headers = {}
433
- if gpu_preference:
434
- headers["X-GPU-Preference"] = gpu_preference
435
- resp = await _client.post(f"{url}/warmup/{mdl}", headers=headers)
436
- if wait_until_ready:
437
- while resp.json()["status"] != "warmed":
438
- await asyncio.sleep(2)
439
- resp = await _client.get(f"{url}/warmup/status/{mdl}")
440
- return resp.json()
441
- ```
442
-
443
- So: **the existing endpoint does not yet support GPU selection; we need to add
444
- the small change above on the `learning_v2` side and then LM.warmup can request
445
- specific GPUs.**
446
- """
synth_ai/install_sqld.sh DELETED
@@ -1,40 +0,0 @@
1
- #!/bin/bash
2
- # Install sqld binary for Synth AI
3
-
4
- set -e
5
-
6
- SQLD_VERSION="v0.26.2"
7
- OS=$(uname -s | tr '[:upper:]' '[:lower:]')
8
- ARCH=$(uname -m)
9
-
10
- # Map architecture names
11
- case "$ARCH" in
12
- x86_64) ARCH="x86_64" ;;
13
- aarch64|arm64) ARCH="aarch64" ;;
14
- *) echo "Unsupported architecture: $ARCH"; exit 1 ;;
15
- esac
16
-
17
- # Construct download URL
18
- URL="https://github.com/tursodatabase/libsql/releases/download/libsql-server-${SQLD_VERSION}/sqld-${OS}-${ARCH}.tar.xz"
19
-
20
- echo "📥 Downloading sqld ${SQLD_VERSION} for ${OS}-${ARCH}..."
21
-
22
- # Download and extract
23
- TMP_DIR=$(mktemp -d)
24
- cd "$TMP_DIR"
25
- curl -L -o sqld.tar.xz "$URL"
26
- tar -xf sqld.tar.xz
27
-
28
- # Install to user's local bin
29
- mkdir -p ~/.local/bin
30
- mv sqld ~/.local/bin/
31
- chmod +x ~/.local/bin/sqld
32
-
33
- # Clean up
34
- cd -
35
- rm -rf "$TMP_DIR"
36
-
37
- echo "✅ sqld installed to ~/.local/bin/sqld"
38
- echo ""
39
- echo "🔧 Add ~/.local/bin to your PATH if needed:"
40
- echo " export PATH=\"\$HOME/.local/bin:\$PATH\""
File without changes
File without changes
@@ -1,7 +0,0 @@
1
- ## Gemini
2
-
3
-
4
- ## OpenAI
5
-
6
-
7
- ## Synth
File without changes
File without changes
File without changes
File without changes
@@ -1,168 +0,0 @@
1
- """
2
- Banking77 in-context injection evals (async, not tests)
3
-
4
- Samples a handful of Banking77 prompts and evaluates multiple override
5
- contexts in parallel, printing simple accuracy for each.
6
-
7
- Usage
8
- - Keys in .env (GROQ_API_KEY, etc.)
9
- - Run: uv run -q python -m synth_ai.learning.prompts.banking77_injection_eval
10
- Optional env:
11
- - N_SAMPLES=20 (default)
12
- - MODEL=openai/gpt-oss-20b (default)
13
- - VENDOR=groq (default)
14
- """
15
-
16
- from __future__ import annotations
17
-
18
- import asyncio
19
- import os
20
- import random
21
- from typing import Any
22
-
23
- from datasets import load_dataset
24
- from dotenv import load_dotenv
25
- from synth_ai.lm.core.main_v3 import LM, build_messages
26
- from synth_ai.lm.overrides import LMOverridesContext
27
-
28
-
29
- async def classify_one(lm: LM, text: str, label_names: list[str]) -> str:
30
- labels_joined = ", ".join(label_names)
31
- system_message = (
32
- "You are an intent classifier for the Banking77 dataset. "
33
- "Given a customer message, respond with exactly one label from the list. "
34
- "Return only the label text with no extra words.\n\n"
35
- f"Valid labels: {labels_joined}"
36
- )
37
- user_message = f"Message: {text}\nLabel:"
38
- messages = build_messages(system_message, user_message, images_bytes=None, model_name=lm.model)
39
- resp = await lm.respond_async(messages=messages)
40
- return (resp.raw_response or "").strip()
41
-
42
-
43
- def choose_label(pred: str, label_names: list[str]) -> str:
44
- norm_pred = pred.strip().lower()
45
- label_lookup = {ln.lower(): ln for ln in label_names}
46
- mapped = label_lookup.get(norm_pred)
47
- if mapped is not None:
48
- return mapped
49
-
50
- # Fallback: choose the label with the highest naive token overlap
51
- def score(cand: str) -> int:
52
- c = cand.lower()
53
- return sum(1 for w in c.split() if w in norm_pred)
54
-
55
- return max(label_names, key=score)
56
-
57
-
58
- async def eval_context(
59
- lm: LM,
60
- items: list[tuple[str, str]],
61
- label_names: list[str],
62
- ctx_name: str,
63
- specs: list[dict[str, Any]],
64
- ) -> tuple[str, int, int]:
65
- correct = 0
66
- with LMOverridesContext(specs):
67
- tasks = [classify_one(lm, text, label_names) for text, _ in items]
68
- results = await asyncio.gather(*tasks, return_exceptions=True)
69
- for (text, gold), pred in zip(items, results, strict=False):
70
- if isinstance(pred, Exception):
71
- # Treat exceptions as incorrect
72
- continue
73
- mapped = choose_label(pred, label_names)
74
- correct += int(mapped == gold)
75
- return (ctx_name, correct, len(items))
76
-
77
-
78
- async def main() -> None:
79
- load_dotenv()
80
-
81
- n = int(os.getenv("N_SAMPLES", "20"))
82
- model = os.getenv("MODEL", "openai/gpt-oss-20b")
83
- vendor = os.getenv("VENDOR", "groq")
84
-
85
- lm = LM(model=model, vendor=vendor, temperature=0.0)
86
-
87
- print("Loading Banking77 dataset (split='test')...")
88
- ds = load_dataset("banking77", split="test")
89
- label_names: list[str] = ds.features["label"].names # type: ignore
90
-
91
- idxs = random.sample(range(len(ds)), k=min(n, len(ds)))
92
- items = [
93
- (ds[i]["text"], label_names[int(ds[i]["label"])]) # (text, gold_label)
94
- for i in idxs
95
- ]
96
-
97
- # Define a few override contexts to compare
98
- contexts: list[dict[str, Any]] = [
99
- {
100
- "name": "baseline (no overrides)",
101
- "overrides": [],
102
- },
103
- {
104
- "name": "nonsense prompt injection (expected worse)",
105
- "overrides": [
106
- {
107
- "match": {"contains": "", "role": "user"},
108
- "injection_rules": [
109
- # Heavily corrupt user text by replacing vowels
110
- {"find": "a", "replace": "x"},
111
- {"find": "e", "replace": "x"},
112
- {"find": "i", "replace": "x"},
113
- {"find": "o", "replace": "x"},
114
- {"find": "u", "replace": "x"},
115
- {"find": "A", "replace": "X"},
116
- {"find": "E", "replace": "X"},
117
- {"find": "I", "replace": "X"},
118
- {"find": "O", "replace": "X"},
119
- {"find": "U", "replace": "X"},
120
- ],
121
- }
122
- ],
123
- },
124
- {
125
- "name": "injection: atm->ATM, txn->transaction",
126
- "overrides": [
127
- {
128
- "match": {"contains": "atm", "role": "user"},
129
- "injection_rules": [
130
- {"find": "atm", "replace": "ATM"},
131
- {"find": "txn", "replace": "transaction"},
132
- ],
133
- }
134
- ],
135
- },
136
- {
137
- "name": "params: temperature=0.0",
138
- "overrides": [
139
- {"match": {"contains": ""}, "params": {"temperature": 0.0}},
140
- ],
141
- },
142
- {
143
- "name": "model override: 20b->120b",
144
- "overrides": [
145
- {"match": {"contains": ""}, "params": {"model": "openai/gpt-oss-120b"}},
146
- ],
147
- },
148
- ]
149
-
150
- print(f"\nEvaluating {len(contexts)} contexts on {len(items)} Banking77 samples (async)...")
151
-
152
- # Evaluate each context sequentially but batched (each context classifies in parallel)
153
- results: list[tuple[str, int, int]] = []
154
- for ctx in contexts:
155
- name = ctx["name"]
156
- specs = ctx["overrides"]
157
- print(f"Evaluating: {name} ...")
158
- res = await eval_context(lm, items, label_names, name, specs)
159
- results.append(res)
160
-
161
- print("\nResults:")
162
- for name, correct, total in results:
163
- acc = correct / total if total else 0.0
164
- print(f"- {name}: {correct}/{total} correct ({acc:.2%})")
165
-
166
-
167
- if __name__ == "__main__":
168
- asyncio.run(main())
File without changes