synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (349) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +23 -17
  3. examples/dev/qwen3_32b_qlora_4xh100.toml +40 -0
  4. examples/multi_step/crafter_rl_lora.md +29 -0
  5. examples/qwen_coder/README.md +102 -0
  6. examples/qwen_coder/_shared.py +113 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  8. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  9. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  10. examples/qwen_coder/generate_dataset.py +98 -0
  11. examples/qwen_coder/infer_ft_smoke.py +65 -0
  12. examples/qwen_coder/infer_prod_proxy.py +73 -0
  13. examples/qwen_coder/infer_via_synth.py +87 -0
  14. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  15. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  16. examples/qwen_coder/sft_full_17b.py +103 -0
  17. examples/qwen_coder/sft_lora_30b.py +110 -0
  18. examples/qwen_coder/subset_jsonl.py +39 -0
  19. examples/qwen_coder/todos.md +38 -0
  20. examples/qwen_coder/validate_jsonl.py +60 -0
  21. examples/rl/configs/eval_base_qwen.toml +1 -1
  22. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  23. examples/rl/download_dataset.py +26 -10
  24. examples/rl/run_eval.py +53 -52
  25. examples/rl/run_rl_and_save.py +29 -12
  26. examples/rl/task_app/math_single_step.py +180 -41
  27. examples/rl/task_app/math_task_app.py +14 -6
  28. examples/sft/README.md +139 -0
  29. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  30. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  31. examples/sft/evaluate.py +117 -0
  32. examples/sft/export_dataset.py +117 -0
  33. examples/sft/generate_traces.py +162 -0
  34. examples/swe/__init__.py +12 -0
  35. examples/swe/task_app/README.md +105 -0
  36. examples/swe/task_app/__init__.py +2 -0
  37. examples/swe/task_app/grpo_swe_mini.py +571 -0
  38. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  39. examples/swe/task_app/hosted/README.md +173 -0
  40. examples/swe/task_app/hosted/__init__.py +5 -0
  41. examples/swe/task_app/hosted/branching.py +143 -0
  42. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  43. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  44. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  45. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  46. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  47. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  48. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  49. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  50. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  53. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  54. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  55. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  56. examples/swe/task_app/hosted/hosted_app.py +204 -0
  57. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  58. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  59. examples/swe/task_app/hosted/main.py +100 -0
  60. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  61. examples/swe/task_app/hosted/registry.py +195 -0
  62. examples/swe/task_app/hosted/rollout.py +1869 -0
  63. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  64. examples/swe/task_app/hosted/storage/volume.py +211 -0
  65. examples/swe/task_app/hosted/test_agents.py +161 -0
  66. examples/swe/task_app/hosted/test_service.py +137 -0
  67. examples/swe/task_app/hosted/utils.py +62 -0
  68. examples/vlm/PROPOSAL.md +53 -0
  69. examples/vlm/README.md +68 -0
  70. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  71. examples/vlm/crafter_image_only_agent.py +207 -0
  72. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  73. examples/vlm/filter_image_rows.py +63 -0
  74. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  75. examples/warming_up_to_rl/analyze_trace_db.py +12 -10
  76. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  77. examples/warming_up_to_rl/export_trace_sft.py +218 -36
  78. examples/warming_up_to_rl/groq_test.py +15 -8
  79. examples/warming_up_to_rl/manage_secrets.py +29 -25
  80. examples/warming_up_to_rl/readme.md +9 -2
  81. examples/warming_up_to_rl/run_eval.py +137 -61
  82. examples/warming_up_to_rl/run_fft_and_save.py +131 -60
  83. examples/warming_up_to_rl/run_local_rollout.py +88 -39
  84. examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
  85. examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
  86. examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
  87. examples/warming_up_to_rl/run_rl_and_save.py +35 -12
  88. examples/warming_up_to_rl/run_rollout_remote.py +44 -19
  89. examples/warming_up_to_rl/task_app/README.md +6 -2
  90. examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
  91. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  111. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
  112. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  113. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
  114. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  115. synth_ai/__init__.py +1 -0
  116. synth_ai/api/models/supported.py +376 -0
  117. synth_ai/api/train/builders.py +157 -26
  118. synth_ai/api/train/cli.py +213 -57
  119. synth_ai/api/train/config_finder.py +65 -5
  120. synth_ai/api/train/env_resolver.py +33 -15
  121. synth_ai/api/train/pollers.py +13 -4
  122. synth_ai/api/train/supported_algos.py +139 -0
  123. synth_ai/api/train/task_app.py +5 -3
  124. synth_ai/api/train/utils.py +33 -48
  125. synth_ai/cli/__init__.py +19 -4
  126. synth_ai/cli/_modal_wrapper.py +28 -0
  127. synth_ai/cli/_typer_patch.py +49 -0
  128. synth_ai/cli/balance.py +2 -3
  129. synth_ai/cli/calc.py +1 -1
  130. synth_ai/cli/demo.py +21 -6
  131. synth_ai/cli/recent.py +2 -2
  132. synth_ai/cli/rl_demo.py +77 -17
  133. synth_ai/cli/root.py +116 -39
  134. synth_ai/cli/status.py +2 -2
  135. synth_ai/cli/task_apps.py +1699 -259
  136. synth_ai/cli/traces.py +7 -4
  137. synth_ai/cli/turso.py +73 -0
  138. synth_ai/cli/watch.py +12 -18
  139. synth_ai/core/experiment.py +0 -2
  140. synth_ai/demo_registry.py +68 -31
  141. synth_ai/demos/core/cli.py +516 -194
  142. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  143. synth_ai/demos/demo_task_apps/core.py +64 -28
  144. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  145. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
  146. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  147. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  148. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  149. synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
  150. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  151. synth_ai/environments/examples/bandit/engine.py +12 -4
  152. synth_ai/environments/examples/bandit/taskset.py +4 -4
  153. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  154. synth_ai/environments/reproducibility/tree.py +5 -6
  155. synth_ai/environments/service/app.py +11 -12
  156. synth_ai/environments/service/core_routes.py +10 -9
  157. synth_ai/environments/stateful/engine.py +1 -1
  158. synth_ai/environments/tasks/core.py +1 -0
  159. synth_ai/environments/tasks/filters.py +5 -6
  160. synth_ai/environments/tasks/utils.py +4 -5
  161. synth_ai/evals/base.py +0 -2
  162. synth_ai/handshake.py +11 -9
  163. synth_ai/http.py +1 -1
  164. synth_ai/http_client.py +43 -11
  165. synth_ai/inference/__init__.py +0 -2
  166. synth_ai/inference/client.py +20 -6
  167. synth_ai/jobs/client.py +103 -78
  168. synth_ai/learning/__init__.py +41 -6
  169. synth_ai/learning/algorithms.py +14 -0
  170. synth_ai/learning/client.py +121 -29
  171. synth_ai/learning/config.py +2 -40
  172. synth_ai/learning/constants.py +0 -2
  173. synth_ai/learning/ft_client.py +4 -56
  174. synth_ai/learning/health.py +13 -7
  175. synth_ai/learning/jobs.py +43 -47
  176. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  177. synth_ai/learning/rl/client.py +267 -0
  178. synth_ai/learning/rl/config.py +31 -0
  179. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  180. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  181. synth_ai/learning/rl/secrets.py +13 -0
  182. synth_ai/learning/rl_client.py +2 -253
  183. synth_ai/learning/sft/__init__.py +29 -0
  184. synth_ai/learning/sft/client.py +68 -0
  185. synth_ai/learning/sft/config.py +270 -0
  186. synth_ai/learning/sft/data.py +295 -0
  187. synth_ai/learning/sse.py +25 -26
  188. synth_ai/learning/validators.py +25 -24
  189. synth_ai/lm/__init__.py +21 -47
  190. synth_ai/task/__init__.py +26 -27
  191. synth_ai/task/apps/__init__.py +18 -19
  192. synth_ai/task/auth.py +35 -23
  193. synth_ai/task/client.py +15 -13
  194. synth_ai/task/contracts.py +37 -35
  195. synth_ai/task/datasets.py +9 -6
  196. synth_ai/task/errors.py +11 -10
  197. synth_ai/task/health.py +17 -11
  198. synth_ai/task/json.py +58 -24
  199. synth_ai/task/proxy.py +15 -14
  200. synth_ai/task/rubrics.py +22 -15
  201. synth_ai/task/server.py +43 -17
  202. synth_ai/task/tracing_utils.py +12 -7
  203. synth_ai/task/validators.py +0 -1
  204. synth_ai/task/vendors.py +5 -7
  205. synth_ai/tracing_v3/__init__.py +2 -0
  206. synth_ai/tracing_v3/abstractions.py +21 -4
  207. synth_ai/tracing_v3/db_config.py +26 -1
  208. synth_ai/tracing_v3/decorators.py +18 -15
  209. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  210. synth_ai/tracing_v3/hooks.py +6 -4
  211. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  212. synth_ai/tracing_v3/replica_sync.py +1 -0
  213. synth_ai/tracing_v3/session_tracer.py +63 -16
  214. synth_ai/tracing_v3/storage/base.py +89 -1
  215. synth_ai/tracing_v3/storage/config.py +21 -8
  216. synth_ai/tracing_v3/storage/factory.py +10 -8
  217. synth_ai/tracing_v3/storage/utils.py +4 -2
  218. synth_ai/tracing_v3/turso/daemon.py +7 -2
  219. synth_ai/tracing_v3/turso/models.py +5 -2
  220. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  221. synth_ai/tracing_v3/utils.py +4 -3
  222. synth_ai/v0/api/__init__.py +8 -0
  223. synth_ai/v0/api/models/__init__.py +8 -0
  224. synth_ai/v0/api/models/supported.py +8 -0
  225. synth_ai/v0/config/__init__.py +15 -0
  226. synth_ai/v0/config/base_url.py +12 -0
  227. synth_ai/v0/lm/__init__.py +51 -0
  228. synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
  229. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  230. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  231. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  232. synth_ai/{lm → v0/lm}/config.py +6 -1
  233. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  234. synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
  235. synth_ai/{lm → v0/lm}/core/main.py +19 -7
  236. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  237. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
  238. synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
  239. synth_ai/{lm → v0/lm}/overrides.py +4 -4
  240. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  241. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  242. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  243. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  244. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
  245. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  246. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  247. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
  248. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
  249. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
  250. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
  251. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  252. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  253. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  254. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  255. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  256. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  257. synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
  258. synth_ai/v0/tracing/upload.py +32 -135
  259. synth_ai/v0/tracing_v3/__init__.py +10 -0
  260. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  261. synth_ai/v0/tracing_v3/decorators.py +3 -0
  262. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  263. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  264. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/METADATA +10 -7
  265. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/RECORD +294 -258
  266. examples/common_old/backend.py +0 -21
  267. examples/evals_old/README.md +0 -98
  268. examples/evals_old/__init__.py +0 -6
  269. examples/evals_old/compare_models.py +0 -1037
  270. examples/evals_old/example_log.md +0 -145
  271. examples/evals_old/run_demo.sh +0 -126
  272. examples/evals_old/trace_analysis.py +0 -270
  273. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  274. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  275. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  276. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
  277. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  278. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  279. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  280. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  281. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  282. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
  283. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  284. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  285. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
  286. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  287. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  288. examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
  289. examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
  290. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  291. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  292. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
  293. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
  294. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
  295. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  296. examples/finetuning_old/synth_qwen_v1/util.py +0 -147
  297. examples/rl_old/task_app.py +0 -962
  298. synth_ai/experimental/synth_oss.py +0 -446
  299. synth_ai/install_sqld.sh +0 -40
  300. synth_ai/learning/filtering.py +0 -0
  301. synth_ai/learning/offline/dpo.py +0 -0
  302. synth_ai/learning/offline/providers.py +0 -7
  303. synth_ai/learning/offline/sft.py +0 -0
  304. synth_ai/learning/offline/shared.py +0 -0
  305. synth_ai/learning/online/grpo.py +0 -0
  306. synth_ai/learning/online/irft.py +0 -0
  307. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  308. synth_ai/learning/prompts/gepa.py +0 -0
  309. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  310. synth_ai/learning/prompts/mipro.py +0 -289
  311. synth_ai/learning/prompts/random_search.py +0 -246
  312. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  313. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  314. synth_ai/rl/secrets.py +0 -19
  315. synth_ai/scripts/verify_rewards.py +0 -100
  316. synth_ai/tracing/__init__.py +0 -30
  317. synth_ai/tracing_v1/__init__.py +0 -33
  318. synth_ai/tracing_v3/turso/__init__.py +0 -25
  319. synth_ai/tracing_v3/turso/manager.py +0 -774
  320. synth_ai/zyk/__init__.py +0 -30
  321. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  322. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  323. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  324. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  325. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  326. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  327. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  328. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  329. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  330. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  331. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  332. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  333. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  334. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  335. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  336. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  337. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  338. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  339. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  340. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  341. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  342. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  343. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  344. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  345. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  346. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/WHEEL +0 -0
  347. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/entry_points.txt +0 -0
  348. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/licenses/LICENSE +0 -0
  349. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/top_level.txt +0 -0
@@ -1,246 +0,0 @@
1
- """
2
- Random-search prompt optimizer (BootstrapFewShotWithRandomSearch), DSPy-inspired.
3
-
4
- Implements the high-level pseudocode of DSPy's Random Search optimizer in a
5
- provider-agnostic, modular style. You can plug in your own student/program and
6
- metric, and this module will explore baselines and bootstrapped few-shot variants.
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- import contextlib
12
- import random
13
- from collections.abc import Callable, Sequence
14
- from dataclasses import dataclass
15
- from typing import Any
16
-
17
- # ---------------------------
18
- # Protocol-like expectations (duck-typed)
19
- # ---------------------------
20
-
21
-
22
- class _ProgramLike:
23
- def reset_copy(self): # zero-shot copy
24
- return self
25
-
26
- def deepcopy(self): # deep copy
27
- return self
28
-
29
- def with_demos(self, demos: list[tuple[Any, Any]]):
30
- return self
31
-
32
- def run(self, x: Any) -> Any:
33
- raise NotImplementedError
34
-
35
-
36
- # ---------------------------
37
- # Helpers and lightweight components
38
- # ---------------------------
39
-
40
-
41
- @dataclass
42
- class EvalResult:
43
- score: float
44
- subscores: list[float]
45
-
46
-
47
- def evaluate(
48
- program: _ProgramLike, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
49
- ) -> EvalResult:
50
- subs = []
51
- for x, y in dataset:
52
- subs.append(metric(program.run(x), y))
53
- return EvalResult(sum(subs) / max(1, len(subs)), subs)
54
-
55
-
56
- class LabeledFewShot:
57
- def __init__(self, k: int):
58
- self.k = k
59
-
60
- def compile(
61
- self, student: _ProgramLike, trainset: Sequence[tuple[Any, Any]], sample: bool = True
62
- ) -> _ProgramLike:
63
- p = getattr(student, "deepcopy", student.reset_copy)()
64
- demos = list(trainset)
65
- if sample:
66
- random.shuffle(demos)
67
- p = p.with_demos(demos[: min(self.k, len(demos))])
68
- return p
69
-
70
-
71
- class BootstrapFewShot:
72
- def __init__(
73
- self,
74
- *,
75
- metric: Callable[[Any, Any], float],
76
- metric_threshold: float | None = None,
77
- max_bootstrapped_demos: int = 8,
78
- max_labeled_demos: int = 0,
79
- teacher_settings: dict[str, Any] | None = None,
80
- max_rounds: int = 1,
81
- ):
82
- self.metric = metric
83
- self.metric_threshold = metric_threshold
84
- self.max_bootstrapped_demos = max_bootstrapped_demos
85
- self.max_labeled_demos = max_labeled_demos
86
- self.teacher_settings = teacher_settings or {}
87
- self.max_rounds = max_rounds
88
-
89
- def compile(
90
- self,
91
- student: _ProgramLike,
92
- teacher: _ProgramLike | None,
93
- trainset: Sequence[tuple[Any, Any]],
94
- ) -> _ProgramLike:
95
- p = getattr(student, "deepcopy", student.reset_copy)()
96
- rng = random.Random()
97
- # If bootstrapped demos disabled, return labeled-only few-shot quickly
98
- if self.max_bootstrapped_demos <= 0:
99
- demos: list[tuple[Any, Any]] = []
100
- if self.max_labeled_demos > 0:
101
- demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
102
- return p.with_demos(demos)
103
- boot: list[tuple[Any, Any]] = []
104
- # Bootstrap demos by self consistency
105
- for _ in range(self.max_rounds):
106
- rng.shuffle(trainset := list(trainset))
107
- for x, y in trainset:
108
- yhat = p.run(x)
109
- ok = self.metric(yhat, y)
110
- if (self.metric_threshold is None and ok == 1) or (
111
- self.metric_threshold is not None and ok >= self.metric_threshold
112
- ):
113
- boot.append((x, y))
114
- if len(boot) >= self.max_bootstrapped_demos:
115
- break
116
- if len(boot) >= self.max_bootstrapped_demos:
117
- break
118
-
119
- # Optionally add labeled demos
120
- demos = list(boot)
121
- if self.max_labeled_demos > 0:
122
- demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
123
-
124
- return p.with_demos(demos)
125
-
126
-
127
- # ---------------------------
128
- # Random-search compile (BootstrapFewShotWithRandomSearch)
129
- # ---------------------------
130
-
131
-
132
- @dataclass
133
- class Candidate:
134
- score: float
135
- subscores: list[float]
136
- seed: int
137
- program: _ProgramLike
138
-
139
-
140
- def random_search_compile(
141
- student: _ProgramLike,
142
- trainset: Sequence[tuple[Any, Any]],
143
- valset: Sequence[tuple[Any, Any]],
144
- metric: Callable[[Any, Any], float],
145
- *,
146
- max_bootstrapped_demos: int = 8,
147
- max_labeled_demos: int = 4,
148
- max_rounds: int = 2,
149
- num_candidate_programs: int = 16,
150
- stop_at_score: float | None = None,
151
- evaluate_fn: Callable[[_ProgramLike, Sequence[tuple[Any, Any]], Callable[[Any, Any], float]], EvalResult] | None = None,
152
- on_candidate_evaluated: Callable[[int, float, EvalResult, dict[str, Any]], None] | None = None,
153
- ) -> tuple[_ProgramLike, list[dict[str, Any]]]:
154
- best_program: _ProgramLike | None = None
155
- best_score = float("-inf")
156
- candidates: list[Candidate] = []
157
- records: list[dict[str, Any]] = []
158
-
159
- seeds = list(range(num_candidate_programs))
160
- seeds = [-3, -2, -1] + seeds # zero-shot, labeled few-shot, bootstrapped few-shot
161
-
162
- rng = random.Random(0)
163
- for idx, seed in enumerate(seeds):
164
- train_copy = list(trainset)
165
-
166
- if seed == -3:
167
- program = getattr(student, "reset_copy", student.deepcopy)()
168
-
169
- elif seed == -2:
170
- program = LabeledFewShot(k=max_labeled_demos).compile(student, train_copy, sample=True)
171
-
172
- else:
173
- if seed >= 0:
174
- rng.shuffle(train_copy)
175
- if max_bootstrapped_demos <= 0:
176
- size = 0
177
- else:
178
- size = (
179
- max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
180
- )
181
- program = BootstrapFewShot(
182
- metric=metric,
183
- metric_threshold=None,
184
- max_bootstrapped_demos=size,
185
- max_labeled_demos=max_labeled_demos,
186
- teacher_settings={},
187
- max_rounds=max_rounds,
188
- ).compile(student, teacher=None, trainset=train_copy)
189
-
190
- res = (
191
- evaluate_fn(program, valset, metric)
192
- if evaluate_fn
193
- else evaluate(program, valset, metric)
194
- )
195
- cand = Candidate(score=res.score, subscores=res.subscores, seed=seed, program=program)
196
- candidates.append(cand)
197
- # Record an intervention summary for reproducibility
198
- intervention: dict[str, Any] = {"seed": seed}
199
- if hasattr(program, "demos"):
200
- try:
201
- intervention["demos"] = program.demos # type: ignore
202
- except Exception:
203
- intervention["demos"] = None
204
- # Type of candidate
205
- if seed == -3:
206
- intervention["kind"] = "zero_shot"
207
- intervention["label"] = "zero-shot"
208
- elif seed == -2:
209
- intervention["kind"] = "labeled_few_shot"
210
- intervention["label"] = f"labeled-{max_labeled_demos}"
211
- else:
212
- intervention["kind"] = "bootstrapped_few_shot"
213
- intervention["label"] = f"boot-b{max_bootstrapped_demos}-l{max_labeled_demos}"
214
- record_obj = {
215
- "score": cand.score,
216
- "subscores": cand.subscores,
217
- "intervention": intervention,
218
- }
219
- records.append(record_obj)
220
-
221
- if res.score > best_score:
222
- best_score, best_program = res.score, program
223
-
224
- if stop_at_score is not None and best_score >= stop_at_score:
225
- break
226
-
227
- if on_candidate_evaluated is not None:
228
- with contextlib.suppress(Exception):
229
- on_candidate_evaluated(idx + 1, res.score, res, intervention)
230
-
231
- # Attach candidates for inspection
232
- if hasattr(best_program, "candidate_programs"):
233
- # If user object supports attribute assignment
234
- with contextlib.suppress(Exception):
235
- best_program.candidate_programs = sorted(
236
- candidates, key=lambda c: c.score, reverse=True
237
- ) # type: ignore[attr-defined]
238
-
239
- return (best_program or getattr(student, "deepcopy", student)(), records)
240
-
241
-
242
- __all__ = [
243
- "random_search_compile",
244
- "LabeledFewShot",
245
- "BootstrapFewShot",
246
- ]
@@ -1,172 +0,0 @@
1
- """
2
- Example: MIPROv2-style optimizer on Banking77 using Groq gpt-oss-20b.
3
-
4
- Requires:
5
- - .env with GROQ_API_KEY
6
- - datasets
7
-
8
- Run:
9
- - uv run -q python -m synth_ai.learning.prompts.run_mipro_banking77
10
- """
11
-
12
- from __future__ import annotations
13
-
14
- import asyncio
15
- import json
16
- import os
17
- import random
18
- import time
19
- from collections.abc import Sequence
20
- from pathlib import Path
21
- from typing import Any
22
-
23
- from datasets import load_dataset
24
- from dotenv import load_dotenv
25
- from synth_ai.learning.prompts.mipro import ProgramAdapter, evaluate_program, mipro_v2_compile
26
- from synth_ai.lm.core.main_v3 import LM, build_messages
27
-
28
-
29
- def choose_label(pred: str, label_names: list[str]) -> str:
30
- norm = (pred or "").strip().lower()
31
- d = {ln.lower(): ln for ln in label_names}
32
- if norm in d:
33
- return d[norm]
34
-
35
- def score(cand: str) -> int:
36
- c = cand.lower()
37
- return sum(1 for w in c.split() if w in norm)
38
-
39
- return max(label_names, key=score)
40
-
41
-
42
- def accuracy(pred: str, gold: str, labels: list[str]) -> float:
43
- return 1.0 if choose_label(pred, labels) == gold else 0.0
44
-
45
-
46
- class NaivePromptModel:
47
- """Toy prompt model that returns simple instruction variants."""
48
-
49
- def generate_instructions(self, ctx: dict[str, Any], k: int = 8) -> list[str]:
50
- base = "Classify the Banking77 intent and return exactly one label."
51
- variants = [
52
- base,
53
- base + " Be concise.",
54
- base + " Use examples to guide your reasoning.",
55
- base + " Return only the label text.",
56
- base + " Follow the label names strictly.",
57
- base + " Do not include explanations.",
58
- base + " Think about similar intents before answering.",
59
- base + " Carefully consider the user's message.",
60
- ]
61
- random.shuffle(variants)
62
- return variants[:k]
63
-
64
-
65
- def build_run_fn(lm: LM, label_names: list[str]):
66
- def run_fn(x: str, _model: Any | None = None) -> str:
67
- # Use instructions and demos from adapter state (set by set_instructions/set_demos)
68
- # The adapter passes state via closure; we rebuild messages here
69
- instructions = state_ref.get("instructions", {}).get(
70
- "main", "You are an intent classifier for Banking77."
71
- )
72
- examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in state_ref.get("demos", []))
73
- sys = instructions
74
- user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
75
- messages = build_messages(sys, user, images_bytes=None, model_name=lm.model)
76
-
77
- async def _call():
78
- resp = await lm.respond_async(messages=messages)
79
- return (resp.raw_response or "").strip()
80
-
81
- return asyncio.run(_call())
82
-
83
- return run_fn
84
-
85
-
86
- def set_instructions(new_instr: dict[str, str], state: dict[str, Any]) -> dict[str, Any]:
87
- state["instructions"] = {**state.get("instructions", {}), **new_instr}
88
- return state
89
-
90
-
91
- def set_demos(demos: list[tuple[str, str]], state: dict[str, Any]) -> dict[str, Any]:
92
- state["demos"] = list(demos)
93
- return state
94
-
95
-
96
- def main():
97
- load_dotenv()
98
- random.seed(0)
99
-
100
- model = os.getenv("MODEL", "openai/gpt-oss-20b")
101
- vendor = os.getenv("VENDOR", "groq")
102
- lm = LM(model=model, vendor=vendor, temperature=0.0)
103
-
104
- print("Loading Banking77 dataset (train/dev split of test for demo)...")
105
- ds = load_dataset("banking77")
106
- label_names: list[str] = ds["test"].features["label"].names # type: ignore
107
-
108
- all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
109
- random.shuffle(all_items)
110
- trainset: Sequence[tuple[str, str]] = all_items[:80]
111
- valset: Sequence[tuple[str, str]] = all_items[80:160]
112
-
113
- global state_ref
114
- state_ref = {
115
- "instructions": {"main": "You are an intent classifier for Banking77."},
116
- "demos": [],
117
- }
118
- adapter = ProgramAdapter(
119
- run_fn=build_run_fn(lm, label_names),
120
- state=state_ref,
121
- _predictors=["main"],
122
- set_instructions=set_instructions,
123
- set_demos=set_demos,
124
- )
125
-
126
- def metric(yhat: str, y: str) -> float:
127
- return accuracy(yhat, y, label_names)
128
-
129
- prompt_model = NaivePromptModel()
130
- task_model = None # not used in this minimal example
131
-
132
- print("Running MIPROv2-style optimizer...")
133
- best, records = mipro_v2_compile(
134
- student=adapter,
135
- trainset=trainset,
136
- valset=valset,
137
- metric=metric,
138
- prompt_model=prompt_model,
139
- task_model=task_model,
140
- max_bootstrapped_demos=6,
141
- max_labeled_demos=4,
142
- num_candidates=6,
143
- num_trials=12,
144
- minibatch=True,
145
- minibatch_size=16,
146
- minibatch_full_eval_steps=3,
147
- seed=0,
148
- )
149
-
150
- res = evaluate_program(best, valset, metric)
151
- print(
152
- f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})"
153
- )
154
-
155
- out = {
156
- "context": {
157
- "model": model,
158
- "vendor": vendor,
159
- "train_size": len(trainset),
160
- "val_size": len(valset),
161
- },
162
- "trials": records,
163
- }
164
- out_dir = Path(__file__).parent
165
- fname = str(out_dir / f"mipro_banking77_{int(time.time())}.json")
166
- with open(fname, "w") as f:
167
- json.dump(out, f, indent=2)
168
- print(f"Saved trial records to {fname}")
169
-
170
-
171
- if __name__ == "__main__":
172
- main()