synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (351) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +23 -17
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +53 -52
  22. examples/rl/run_rl_and_save.py +29 -12
  23. examples/rl/task_app/math_single_step.py +180 -41
  24. examples/rl/task_app/math_task_app.py +14 -6
  25. examples/sft/README.md +139 -0
  26. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  27. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  28. examples/sft/evaluate.py +117 -0
  29. examples/sft/export_dataset.py +117 -0
  30. examples/sft/generate_traces.py +162 -0
  31. examples/swe/__init__.py +12 -0
  32. examples/swe/task_app/README.md +105 -0
  33. examples/swe/task_app/__init__.py +2 -0
  34. examples/swe/task_app/grpo_swe_mini.py +571 -0
  35. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  36. examples/swe/task_app/hosted/README.md +173 -0
  37. examples/swe/task_app/hosted/__init__.py +5 -0
  38. examples/swe/task_app/hosted/branching.py +143 -0
  39. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  40. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  41. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  42. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  43. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  44. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  45. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  46. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  47. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  50. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  53. examples/swe/task_app/hosted/hosted_app.py +204 -0
  54. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  55. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  56. examples/swe/task_app/hosted/main.py +100 -0
  57. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  58. examples/swe/task_app/hosted/registry.py +195 -0
  59. examples/swe/task_app/hosted/rollout.py +1869 -0
  60. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  61. examples/swe/task_app/hosted/storage/volume.py +211 -0
  62. examples/swe/task_app/hosted/test_agents.py +161 -0
  63. examples/swe/task_app/hosted/test_service.py +137 -0
  64. examples/swe/task_app/hosted/utils.py +62 -0
  65. examples/vlm/README.md +68 -0
  66. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  67. examples/vlm/crafter_image_only_agent.py +207 -0
  68. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  69. examples/vlm/filter_image_rows.py +63 -0
  70. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  71. examples/warming_up_to_rl/analyze_trace_db.py +12 -10
  72. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  73. examples/warming_up_to_rl/export_trace_sft.py +218 -36
  74. examples/warming_up_to_rl/groq_test.py +15 -8
  75. examples/warming_up_to_rl/manage_secrets.py +29 -25
  76. examples/warming_up_to_rl/readme.md +9 -2
  77. examples/warming_up_to_rl/run_eval.py +137 -61
  78. examples/warming_up_to_rl/run_fft_and_save.py +131 -60
  79. examples/warming_up_to_rl/run_local_rollout.py +88 -39
  80. examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
  81. examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
  82. examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
  83. examples/warming_up_to_rl/run_rl_and_save.py +35 -12
  84. examples/warming_up_to_rl/run_rollout_remote.py +44 -19
  85. examples/warming_up_to_rl/task_app/README.md +6 -2
  86. examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
  87. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  111. synth/__init__.py +14 -0
  112. synth_ai/__init__.py +20 -4
  113. synth_ai/api/models/supported.py +376 -0
  114. synth_ai/api/train/builders.py +157 -26
  115. synth_ai/api/train/cli.py +213 -57
  116. synth_ai/api/train/config_finder.py +65 -5
  117. synth_ai/api/train/env_resolver.py +33 -15
  118. synth_ai/api/train/pollers.py +13 -4
  119. synth_ai/api/train/supported_algos.py +139 -0
  120. synth_ai/api/train/task_app.py +5 -3
  121. synth_ai/api/train/utils.py +33 -48
  122. synth_ai/cli/__init__.py +19 -4
  123. synth_ai/cli/_modal_wrapper.py +28 -0
  124. synth_ai/cli/_typer_patch.py +49 -0
  125. synth_ai/cli/balance.py +2 -3
  126. synth_ai/cli/calc.py +1 -1
  127. synth_ai/cli/demo.py +21 -6
  128. synth_ai/cli/recent.py +2 -2
  129. synth_ai/cli/rl_demo.py +77 -17
  130. synth_ai/cli/root.py +116 -39
  131. synth_ai/cli/status.py +2 -2
  132. synth_ai/cli/task_apps.py +1699 -259
  133. synth_ai/cli/traces.py +7 -4
  134. synth_ai/cli/turso.py +73 -0
  135. synth_ai/cli/watch.py +12 -18
  136. synth_ai/core/experiment.py +0 -2
  137. synth_ai/demo_registry.py +68 -31
  138. synth_ai/demos/core/cli.py +516 -194
  139. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  140. synth_ai/demos/demo_task_apps/core.py +64 -28
  141. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  142. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
  143. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  144. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  145. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  146. synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
  147. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  148. synth_ai/environments/examples/bandit/engine.py +12 -4
  149. synth_ai/environments/examples/bandit/taskset.py +4 -4
  150. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  151. synth_ai/environments/reproducibility/tree.py +5 -6
  152. synth_ai/environments/service/app.py +11 -12
  153. synth_ai/environments/service/core_routes.py +10 -9
  154. synth_ai/environments/stateful/engine.py +1 -1
  155. synth_ai/environments/tasks/core.py +1 -0
  156. synth_ai/environments/tasks/filters.py +5 -6
  157. synth_ai/environments/tasks/utils.py +4 -5
  158. synth_ai/evals/base.py +0 -2
  159. synth_ai/handshake.py +11 -9
  160. synth_ai/http.py +1 -1
  161. synth_ai/http_client.py +43 -11
  162. synth_ai/inference/__init__.py +0 -2
  163. synth_ai/inference/client.py +20 -6
  164. synth_ai/jobs/client.py +103 -78
  165. synth_ai/learning/__init__.py +41 -6
  166. synth_ai/learning/algorithms.py +14 -0
  167. synth_ai/learning/client.py +121 -29
  168. synth_ai/learning/config.py +2 -40
  169. synth_ai/learning/constants.py +0 -2
  170. synth_ai/learning/ft_client.py +4 -56
  171. synth_ai/learning/health.py +13 -7
  172. synth_ai/learning/jobs.py +43 -47
  173. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  174. synth_ai/learning/rl/client.py +267 -0
  175. synth_ai/learning/rl/config.py +31 -0
  176. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  177. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  178. synth_ai/learning/rl/secrets.py +13 -0
  179. synth_ai/learning/rl_client.py +2 -253
  180. synth_ai/learning/sft/__init__.py +29 -0
  181. synth_ai/learning/sft/client.py +68 -0
  182. synth_ai/learning/sft/config.py +270 -0
  183. synth_ai/learning/sft/data.py +295 -0
  184. synth_ai/learning/sse.py +25 -26
  185. synth_ai/learning/validators.py +25 -24
  186. synth_ai/lm/__init__.py +21 -47
  187. synth_ai/task/__init__.py +26 -27
  188. synth_ai/task/apps/__init__.py +18 -19
  189. synth_ai/task/auth.py +35 -23
  190. synth_ai/task/client.py +15 -13
  191. synth_ai/task/contracts.py +37 -35
  192. synth_ai/task/datasets.py +9 -6
  193. synth_ai/task/errors.py +11 -10
  194. synth_ai/task/health.py +17 -11
  195. synth_ai/task/json.py +58 -24
  196. synth_ai/task/proxy.py +15 -14
  197. synth_ai/task/rubrics.py +22 -15
  198. synth_ai/task/server.py +43 -17
  199. synth_ai/task/tracing_utils.py +12 -7
  200. synth_ai/task/validators.py +0 -1
  201. synth_ai/task/vendors.py +5 -7
  202. synth_ai/tracing_v3/__init__.py +2 -0
  203. synth_ai/tracing_v3/abstractions.py +21 -4
  204. synth_ai/tracing_v3/db_config.py +26 -1
  205. synth_ai/tracing_v3/decorators.py +18 -15
  206. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  207. synth_ai/tracing_v3/hooks.py +6 -4
  208. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  209. synth_ai/tracing_v3/replica_sync.py +1 -0
  210. synth_ai/tracing_v3/session_tracer.py +63 -16
  211. synth_ai/tracing_v3/storage/base.py +89 -1
  212. synth_ai/tracing_v3/storage/config.py +21 -8
  213. synth_ai/tracing_v3/storage/factory.py +10 -8
  214. synth_ai/tracing_v3/storage/utils.py +4 -2
  215. synth_ai/tracing_v3/turso/daemon.py +7 -2
  216. synth_ai/tracing_v3/turso/models.py +5 -2
  217. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  218. synth_ai/tracing_v3/utils.py +4 -3
  219. synth_ai/v0/api/__init__.py +8 -0
  220. synth_ai/v0/api/models/__init__.py +8 -0
  221. synth_ai/v0/api/models/supported.py +8 -0
  222. synth_ai/v0/config/__init__.py +15 -0
  223. synth_ai/v0/config/base_url.py +12 -0
  224. synth_ai/v0/lm/__init__.py +51 -0
  225. synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
  226. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  227. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  228. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  229. synth_ai/{lm → v0/lm}/config.py +6 -1
  230. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  231. synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
  232. synth_ai/{lm → v0/lm}/core/main.py +19 -7
  233. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  234. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
  235. synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
  236. synth_ai/{lm → v0/lm}/overrides.py +4 -4
  237. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  238. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  239. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  240. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  241. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
  242. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  243. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  244. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
  245. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
  246. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
  247. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
  248. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  249. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  250. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  251. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  252. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  253. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  254. synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
  255. synth_ai/v0/tracing/upload.py +32 -135
  256. synth_ai/v0/tracing_v3/__init__.py +10 -0
  257. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  258. synth_ai/v0/tracing_v3/decorators.py +3 -0
  259. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  260. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  261. synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
  262. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -262
  263. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
  264. examples/common_old/backend.py +0 -21
  265. examples/evals_old/README.md +0 -98
  266. examples/evals_old/__init__.py +0 -6
  267. examples/evals_old/compare_models.py +0 -1037
  268. examples/evals_old/example_log.md +0 -145
  269. examples/evals_old/run_demo.sh +0 -126
  270. examples/evals_old/trace_analysis.py +0 -270
  271. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  272. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  273. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  274. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
  275. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  276. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  277. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  278. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  279. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  280. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
  281. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  282. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  283. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
  284. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  285. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  286. examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
  287. examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
  288. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  289. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  290. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
  291. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
  292. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
  293. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  294. examples/finetuning_old/synth_qwen_v1/util.py +0 -147
  295. examples/rl_old/task_app.py +0 -962
  296. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  297. examples/warming_up_to_rl/old/notes.md +0 -73
  298. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  299. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  300. synth_ai/experimental/synth_oss.py +0 -446
  301. synth_ai/install_sqld.sh +0 -40
  302. synth_ai/learning/filtering.py +0 -0
  303. synth_ai/learning/offline/dpo.py +0 -0
  304. synth_ai/learning/offline/providers.py +0 -7
  305. synth_ai/learning/offline/sft.py +0 -0
  306. synth_ai/learning/offline/shared.py +0 -0
  307. synth_ai/learning/online/grpo.py +0 -0
  308. synth_ai/learning/online/irft.py +0 -0
  309. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  310. synth_ai/learning/prompts/gepa.py +0 -0
  311. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  312. synth_ai/learning/prompts/mipro.py +0 -289
  313. synth_ai/learning/prompts/random_search.py +0 -246
  314. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  315. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  316. synth_ai/rl/secrets.py +0 -19
  317. synth_ai/scripts/verify_rewards.py +0 -100
  318. synth_ai/tracing/__init__.py +0 -30
  319. synth_ai/tracing_v1/__init__.py +0 -33
  320. synth_ai/tracing_v3/turso/__init__.py +0 -25
  321. synth_ai/tracing_v3/turso/manager.py +0 -774
  322. synth_ai/zyk/__init__.py +0 -30
  323. synth_ai-0.2.9.dev5.dist-info/METADATA +0 -131
  324. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  325. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  326. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  327. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  328. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  329. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  330. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  331. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  332. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  333. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  334. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  335. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  336. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  337. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  338. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  339. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  340. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  341. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  342. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  343. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  344. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  345. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  346. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  347. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  348. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  349. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
  350. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
  351. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
@@ -1,246 +0,0 @@
1
- """
2
- Random-search prompt optimizer (BootstrapFewShotWithRandomSearch), DSPy-inspired.
3
-
4
- Implements the high-level pseudocode of DSPy's Random Search optimizer in a
5
- provider-agnostic, modular style. You can plug in your own student/program and
6
- metric, and this module will explore baselines and bootstrapped few-shot variants.
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- import contextlib
12
- import random
13
- from collections.abc import Callable, Sequence
14
- from dataclasses import dataclass
15
- from typing import Any
16
-
17
- # ---------------------------
18
- # Protocol-like expectations (duck-typed)
19
- # ---------------------------
20
-
21
-
22
- class _ProgramLike:
23
- def reset_copy(self): # zero-shot copy
24
- return self
25
-
26
- def deepcopy(self): # deep copy
27
- return self
28
-
29
- def with_demos(self, demos: list[tuple[Any, Any]]):
30
- return self
31
-
32
- def run(self, x: Any) -> Any:
33
- raise NotImplementedError
34
-
35
-
36
- # ---------------------------
37
- # Helpers and lightweight components
38
- # ---------------------------
39
-
40
-
41
- @dataclass
42
- class EvalResult:
43
- score: float
44
- subscores: list[float]
45
-
46
-
47
- def evaluate(
48
- program: _ProgramLike, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
49
- ) -> EvalResult:
50
- subs = []
51
- for x, y in dataset:
52
- subs.append(metric(program.run(x), y))
53
- return EvalResult(sum(subs) / max(1, len(subs)), subs)
54
-
55
-
56
- class LabeledFewShot:
57
- def __init__(self, k: int):
58
- self.k = k
59
-
60
- def compile(
61
- self, student: _ProgramLike, trainset: Sequence[tuple[Any, Any]], sample: bool = True
62
- ) -> _ProgramLike:
63
- p = getattr(student, "deepcopy", student.reset_copy)()
64
- demos = list(trainset)
65
- if sample:
66
- random.shuffle(demos)
67
- p = p.with_demos(demos[: min(self.k, len(demos))])
68
- return p
69
-
70
-
71
- class BootstrapFewShot:
72
- def __init__(
73
- self,
74
- *,
75
- metric: Callable[[Any, Any], float],
76
- metric_threshold: float | None = None,
77
- max_bootstrapped_demos: int = 8,
78
- max_labeled_demos: int = 0,
79
- teacher_settings: dict[str, Any] | None = None,
80
- max_rounds: int = 1,
81
- ):
82
- self.metric = metric
83
- self.metric_threshold = metric_threshold
84
- self.max_bootstrapped_demos = max_bootstrapped_demos
85
- self.max_labeled_demos = max_labeled_demos
86
- self.teacher_settings = teacher_settings or {}
87
- self.max_rounds = max_rounds
88
-
89
- def compile(
90
- self,
91
- student: _ProgramLike,
92
- teacher: _ProgramLike | None,
93
- trainset: Sequence[tuple[Any, Any]],
94
- ) -> _ProgramLike:
95
- p = getattr(student, "deepcopy", student.reset_copy)()
96
- rng = random.Random()
97
- # If bootstrapped demos disabled, return labeled-only few-shot quickly
98
- if self.max_bootstrapped_demos <= 0:
99
- demos: list[tuple[Any, Any]] = []
100
- if self.max_labeled_demos > 0:
101
- demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
102
- return p.with_demos(demos)
103
- boot: list[tuple[Any, Any]] = []
104
- # Bootstrap demos by self consistency
105
- for _ in range(self.max_rounds):
106
- rng.shuffle(trainset := list(trainset))
107
- for x, y in trainset:
108
- yhat = p.run(x)
109
- ok = self.metric(yhat, y)
110
- if (self.metric_threshold is None and ok == 1) or (
111
- self.metric_threshold is not None and ok >= self.metric_threshold
112
- ):
113
- boot.append((x, y))
114
- if len(boot) >= self.max_bootstrapped_demos:
115
- break
116
- if len(boot) >= self.max_bootstrapped_demos:
117
- break
118
-
119
- # Optionally add labeled demos
120
- demos = list(boot)
121
- if self.max_labeled_demos > 0:
122
- demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
123
-
124
- return p.with_demos(demos)
125
-
126
-
127
- # ---------------------------
128
- # Random-search compile (BootstrapFewShotWithRandomSearch)
129
- # ---------------------------
130
-
131
-
132
- @dataclass
133
- class Candidate:
134
- score: float
135
- subscores: list[float]
136
- seed: int
137
- program: _ProgramLike
138
-
139
-
140
- def random_search_compile(
141
- student: _ProgramLike,
142
- trainset: Sequence[tuple[Any, Any]],
143
- valset: Sequence[tuple[Any, Any]],
144
- metric: Callable[[Any, Any], float],
145
- *,
146
- max_bootstrapped_demos: int = 8,
147
- max_labeled_demos: int = 4,
148
- max_rounds: int = 2,
149
- num_candidate_programs: int = 16,
150
- stop_at_score: float | None = None,
151
- evaluate_fn: Callable[[_ProgramLike, Sequence[tuple[Any, Any]], Callable[[Any, Any], float]], EvalResult] | None = None,
152
- on_candidate_evaluated: Callable[[int, float, EvalResult, dict[str, Any]], None] | None = None,
153
- ) -> tuple[_ProgramLike, list[dict[str, Any]]]:
154
- best_program: _ProgramLike | None = None
155
- best_score = float("-inf")
156
- candidates: list[Candidate] = []
157
- records: list[dict[str, Any]] = []
158
-
159
- seeds = list(range(num_candidate_programs))
160
- seeds = [-3, -2, -1] + seeds # zero-shot, labeled few-shot, bootstrapped few-shot
161
-
162
- rng = random.Random(0)
163
- for idx, seed in enumerate(seeds):
164
- train_copy = list(trainset)
165
-
166
- if seed == -3:
167
- program = getattr(student, "reset_copy", student.deepcopy)()
168
-
169
- elif seed == -2:
170
- program = LabeledFewShot(k=max_labeled_demos).compile(student, train_copy, sample=True)
171
-
172
- else:
173
- if seed >= 0:
174
- rng.shuffle(train_copy)
175
- if max_bootstrapped_demos <= 0:
176
- size = 0
177
- else:
178
- size = (
179
- max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
180
- )
181
- program = BootstrapFewShot(
182
- metric=metric,
183
- metric_threshold=None,
184
- max_bootstrapped_demos=size,
185
- max_labeled_demos=max_labeled_demos,
186
- teacher_settings={},
187
- max_rounds=max_rounds,
188
- ).compile(student, teacher=None, trainset=train_copy)
189
-
190
- res = (
191
- evaluate_fn(program, valset, metric)
192
- if evaluate_fn
193
- else evaluate(program, valset, metric)
194
- )
195
- cand = Candidate(score=res.score, subscores=res.subscores, seed=seed, program=program)
196
- candidates.append(cand)
197
- # Record an intervention summary for reproducibility
198
- intervention: dict[str, Any] = {"seed": seed}
199
- if hasattr(program, "demos"):
200
- try:
201
- intervention["demos"] = program.demos # type: ignore
202
- except Exception:
203
- intervention["demos"] = None
204
- # Type of candidate
205
- if seed == -3:
206
- intervention["kind"] = "zero_shot"
207
- intervention["label"] = "zero-shot"
208
- elif seed == -2:
209
- intervention["kind"] = "labeled_few_shot"
210
- intervention["label"] = f"labeled-{max_labeled_demos}"
211
- else:
212
- intervention["kind"] = "bootstrapped_few_shot"
213
- intervention["label"] = f"boot-b{max_bootstrapped_demos}-l{max_labeled_demos}"
214
- record_obj = {
215
- "score": cand.score,
216
- "subscores": cand.subscores,
217
- "intervention": intervention,
218
- }
219
- records.append(record_obj)
220
-
221
- if res.score > best_score:
222
- best_score, best_program = res.score, program
223
-
224
- if stop_at_score is not None and best_score >= stop_at_score:
225
- break
226
-
227
- if on_candidate_evaluated is not None:
228
- with contextlib.suppress(Exception):
229
- on_candidate_evaluated(idx + 1, res.score, res, intervention)
230
-
231
- # Attach candidates for inspection
232
- if hasattr(best_program, "candidate_programs"):
233
- # If user object supports attribute assignment
234
- with contextlib.suppress(Exception):
235
- best_program.candidate_programs = sorted(
236
- candidates, key=lambda c: c.score, reverse=True
237
- ) # type: ignore[attr-defined]
238
-
239
- return (best_program or getattr(student, "deepcopy", student)(), records)
240
-
241
-
242
- __all__ = [
243
- "random_search_compile",
244
- "LabeledFewShot",
245
- "BootstrapFewShot",
246
- ]
@@ -1,172 +0,0 @@
1
- """
2
- Example: MIPROv2-style optimizer on Banking77 using Groq gpt-oss-20b.
3
-
4
- Requires:
5
- - .env with GROQ_API_KEY
6
- - datasets
7
-
8
- Run:
9
- - uv run -q python -m synth_ai.learning.prompts.run_mipro_banking77
10
- """
11
-
12
- from __future__ import annotations
13
-
14
- import asyncio
15
- import json
16
- import os
17
- import random
18
- import time
19
- from collections.abc import Sequence
20
- from pathlib import Path
21
- from typing import Any
22
-
23
- from datasets import load_dataset
24
- from dotenv import load_dotenv
25
- from synth_ai.learning.prompts.mipro import ProgramAdapter, evaluate_program, mipro_v2_compile
26
- from synth_ai.lm.core.main_v3 import LM, build_messages
27
-
28
-
29
- def choose_label(pred: str, label_names: list[str]) -> str:
30
- norm = (pred or "").strip().lower()
31
- d = {ln.lower(): ln for ln in label_names}
32
- if norm in d:
33
- return d[norm]
34
-
35
- def score(cand: str) -> int:
36
- c = cand.lower()
37
- return sum(1 for w in c.split() if w in norm)
38
-
39
- return max(label_names, key=score)
40
-
41
-
42
- def accuracy(pred: str, gold: str, labels: list[str]) -> float:
43
- return 1.0 if choose_label(pred, labels) == gold else 0.0
44
-
45
-
46
- class NaivePromptModel:
47
- """Toy prompt model that returns simple instruction variants."""
48
-
49
- def generate_instructions(self, ctx: dict[str, Any], k: int = 8) -> list[str]:
50
- base = "Classify the Banking77 intent and return exactly one label."
51
- variants = [
52
- base,
53
- base + " Be concise.",
54
- base + " Use examples to guide your reasoning.",
55
- base + " Return only the label text.",
56
- base + " Follow the label names strictly.",
57
- base + " Do not include explanations.",
58
- base + " Think about similar intents before answering.",
59
- base + " Carefully consider the user's message.",
60
- ]
61
- random.shuffle(variants)
62
- return variants[:k]
63
-
64
-
65
- def build_run_fn(lm: LM, label_names: list[str]):
66
- def run_fn(x: str, _model: Any | None = None) -> str:
67
- # Use instructions and demos from adapter state (set by set_instructions/set_demos)
68
- # The adapter passes state via closure; we rebuild messages here
69
- instructions = state_ref.get("instructions", {}).get(
70
- "main", "You are an intent classifier for Banking77."
71
- )
72
- examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in state_ref.get("demos", []))
73
- sys = instructions
74
- user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
75
- messages = build_messages(sys, user, images_bytes=None, model_name=lm.model)
76
-
77
- async def _call():
78
- resp = await lm.respond_async(messages=messages)
79
- return (resp.raw_response or "").strip()
80
-
81
- return asyncio.run(_call())
82
-
83
- return run_fn
84
-
85
-
86
- def set_instructions(new_instr: dict[str, str], state: dict[str, Any]) -> dict[str, Any]:
87
- state["instructions"] = {**state.get("instructions", {}), **new_instr}
88
- return state
89
-
90
-
91
- def set_demos(demos: list[tuple[str, str]], state: dict[str, Any]) -> dict[str, Any]:
92
- state["demos"] = list(demos)
93
- return state
94
-
95
-
96
- def main():
97
- load_dotenv()
98
- random.seed(0)
99
-
100
- model = os.getenv("MODEL", "openai/gpt-oss-20b")
101
- vendor = os.getenv("VENDOR", "groq")
102
- lm = LM(model=model, vendor=vendor, temperature=0.0)
103
-
104
- print("Loading Banking77 dataset (train/dev split of test for demo)...")
105
- ds = load_dataset("banking77")
106
- label_names: list[str] = ds["test"].features["label"].names # type: ignore
107
-
108
- all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
109
- random.shuffle(all_items)
110
- trainset: Sequence[tuple[str, str]] = all_items[:80]
111
- valset: Sequence[tuple[str, str]] = all_items[80:160]
112
-
113
- global state_ref
114
- state_ref = {
115
- "instructions": {"main": "You are an intent classifier for Banking77."},
116
- "demos": [],
117
- }
118
- adapter = ProgramAdapter(
119
- run_fn=build_run_fn(lm, label_names),
120
- state=state_ref,
121
- _predictors=["main"],
122
- set_instructions=set_instructions,
123
- set_demos=set_demos,
124
- )
125
-
126
- def metric(yhat: str, y: str) -> float:
127
- return accuracy(yhat, y, label_names)
128
-
129
- prompt_model = NaivePromptModel()
130
- task_model = None # not used in this minimal example
131
-
132
- print("Running MIPROv2-style optimizer...")
133
- best, records = mipro_v2_compile(
134
- student=adapter,
135
- trainset=trainset,
136
- valset=valset,
137
- metric=metric,
138
- prompt_model=prompt_model,
139
- task_model=task_model,
140
- max_bootstrapped_demos=6,
141
- max_labeled_demos=4,
142
- num_candidates=6,
143
- num_trials=12,
144
- minibatch=True,
145
- minibatch_size=16,
146
- minibatch_full_eval_steps=3,
147
- seed=0,
148
- )
149
-
150
- res = evaluate_program(best, valset, metric)
151
- print(
152
- f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})"
153
- )
154
-
155
- out = {
156
- "context": {
157
- "model": model,
158
- "vendor": vendor,
159
- "train_size": len(trainset),
160
- "val_size": len(valset),
161
- },
162
- "trials": records,
163
- }
164
- out_dir = Path(__file__).parent
165
- fname = str(out_dir / f"mipro_banking77_{int(time.time())}.json")
166
- with open(fname, "w") as f:
167
- json.dump(out, f, indent=2)
168
- print(f"Saved trial records to {fname}")
169
-
170
-
171
- if __name__ == "__main__":
172
- main()