synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.9.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (327) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +8 -11
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/run_eval.py +36 -37
  19. examples/rl/run_rl_and_save.py +5 -5
  20. examples/rl/task_app/math_single_step.py +65 -43
  21. examples/rl/task_app/math_task_app.py +3 -3
  22. examples/sft/README.md +139 -0
  23. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  24. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  25. examples/sft/evaluate.py +117 -0
  26. examples/sft/export_dataset.py +117 -0
  27. examples/sft/generate_traces.py +162 -0
  28. examples/swe/__init__.py +12 -0
  29. examples/swe/task_app/README.md +105 -0
  30. examples/swe/task_app/__init__.py +2 -0
  31. examples/swe/task_app/grpo_swe_mini.py +571 -0
  32. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  33. examples/swe/task_app/hosted/README.md +173 -0
  34. examples/swe/task_app/hosted/__init__.py +5 -0
  35. examples/swe/task_app/hosted/branching.py +143 -0
  36. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  37. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  38. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  39. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  40. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  41. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  42. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  43. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  44. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  45. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  46. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  47. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  50. examples/swe/task_app/hosted/hosted_app.py +204 -0
  51. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  52. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  53. examples/swe/task_app/hosted/main.py +100 -0
  54. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  55. examples/swe/task_app/hosted/registry.py +195 -0
  56. examples/swe/task_app/hosted/rollout.py +1869 -0
  57. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  58. examples/swe/task_app/hosted/storage/volume.py +211 -0
  59. examples/swe/task_app/hosted/test_agents.py +161 -0
  60. examples/swe/task_app/hosted/test_service.py +137 -0
  61. examples/swe/task_app/hosted/utils.py +62 -0
  62. examples/vlm/README.md +68 -0
  63. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  64. examples/vlm/crafter_image_only_agent.py +207 -0
  65. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  66. examples/vlm/filter_image_rows.py +63 -0
  67. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  68. examples/warming_up_to_rl/analyze_trace_db.py +5 -5
  69. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  70. examples/warming_up_to_rl/export_trace_sft.py +78 -21
  71. examples/warming_up_to_rl/groq_test.py +4 -4
  72. examples/warming_up_to_rl/manage_secrets.py +13 -18
  73. examples/warming_up_to_rl/run_eval.py +42 -44
  74. examples/warming_up_to_rl/run_fft_and_save.py +11 -16
  75. examples/warming_up_to_rl/run_local_rollout.py +1 -3
  76. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
  77. examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
  78. examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
  79. examples/warming_up_to_rl/run_rl_and_save.py +5 -6
  80. examples/warming_up_to_rl/run_rollout_remote.py +8 -10
  81. examples/warming_up_to_rl/task_app/README.md +6 -2
  82. examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
  83. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  101. synth/__init__.py +14 -0
  102. synth_ai/__init__.py +26 -4
  103. synth_ai/api/models/supported.py +376 -0
  104. synth_ai/api/train/builders.py +128 -21
  105. synth_ai/api/train/cli.py +80 -64
  106. synth_ai/api/train/config_finder.py +7 -2
  107. synth_ai/api/train/env_resolver.py +1 -1
  108. synth_ai/api/train/pollers.py +2 -1
  109. synth_ai/api/train/supported_algos.py +139 -0
  110. synth_ai/api/train/task_app.py +1 -2
  111. synth_ai/api/train/utils.py +13 -44
  112. synth_ai/cli/__init__.py +8 -0
  113. synth_ai/cli/_modal_wrapper.py +28 -0
  114. synth_ai/cli/_typer_patch.py +49 -0
  115. synth_ai/cli/balance.py +1 -2
  116. synth_ai/cli/calc.py +1 -1
  117. synth_ai/cli/demo.py +2 -1
  118. synth_ai/cli/recent.py +2 -2
  119. synth_ai/cli/rl_demo.py +2 -1
  120. synth_ai/cli/root.py +11 -13
  121. synth_ai/cli/status.py +2 -2
  122. synth_ai/cli/task_apps.py +529 -179
  123. synth_ai/cli/traces.py +6 -4
  124. synth_ai/cli/watch.py +12 -18
  125. synth_ai/demo_registry.py +1 -1
  126. synth_ai/demos/core/cli.py +36 -43
  127. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  128. synth_ai/demos/demo_task_apps/core.py +17 -25
  129. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
  130. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  131. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
  132. synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
  133. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  134. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  135. synth_ai/environments/reproducibility/tree.py +2 -5
  136. synth_ai/environments/service/app.py +11 -12
  137. synth_ai/environments/service/core_routes.py +4 -7
  138. synth_ai/environments/stateful/engine.py +1 -1
  139. synth_ai/environments/tasks/core.py +1 -0
  140. synth_ai/environments/tasks/filters.py +5 -6
  141. synth_ai/environments/tasks/utils.py +4 -5
  142. synth_ai/handshake.py +9 -9
  143. synth_ai/http.py +1 -1
  144. synth_ai/http_client.py +18 -10
  145. synth_ai/inference/client.py +15 -5
  146. synth_ai/jobs/client.py +78 -83
  147. synth_ai/learning/__init__.py +41 -6
  148. synth_ai/learning/algorithms.py +14 -0
  149. synth_ai/learning/client.py +91 -24
  150. synth_ai/learning/config.py +2 -38
  151. synth_ai/learning/ft_client.py +4 -59
  152. synth_ai/learning/health.py +5 -6
  153. synth_ai/learning/jobs.py +31 -47
  154. synth_ai/{rl → learning/rl}/__init__.py +14 -4
  155. synth_ai/learning/rl/client.py +267 -0
  156. synth_ai/learning/rl/config.py +31 -0
  157. synth_ai/{rl → learning/rl}/contracts.py +5 -8
  158. synth_ai/{rl → learning/rl}/env_keys.py +39 -15
  159. synth_ai/learning/rl/secrets.py +13 -0
  160. synth_ai/learning/rl_client.py +2 -281
  161. synth_ai/learning/sft/__init__.py +29 -0
  162. synth_ai/learning/sft/client.py +68 -0
  163. synth_ai/learning/sft/config.py +270 -0
  164. synth_ai/learning/sft/data.py +295 -0
  165. synth_ai/learning/sse.py +25 -24
  166. synth_ai/learning/validators.py +25 -28
  167. synth_ai/lm/__init__.py +21 -47
  168. synth_ai/main.py +6 -0
  169. synth_ai/task/__init__.py +25 -27
  170. synth_ai/task/apps/__init__.py +7 -8
  171. synth_ai/task/auth.py +8 -8
  172. synth_ai/task/client.py +14 -14
  173. synth_ai/task/contracts.py +36 -35
  174. synth_ai/task/datasets.py +6 -5
  175. synth_ai/task/errors.py +10 -10
  176. synth_ai/task/health.py +17 -9
  177. synth_ai/task/json.py +58 -23
  178. synth_ai/task/proxy.py +13 -9
  179. synth_ai/task/rubrics.py +16 -15
  180. synth_ai/task/server.py +12 -12
  181. synth_ai/task/tracing_utils.py +4 -4
  182. synth_ai/task/vendors.py +5 -6
  183. synth_ai/tracing_v3/__init__.py +2 -0
  184. synth_ai/tracing_v3/abstractions.py +21 -4
  185. synth_ai/tracing_v3/decorators.py +18 -16
  186. synth_ai/tracing_v3/hooks.py +5 -5
  187. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  188. synth_ai/tracing_v3/session_tracer.py +40 -14
  189. synth_ai/tracing_v3/storage/base.py +85 -0
  190. synth_ai/tracing_v3/storage/config.py +21 -8
  191. synth_ai/tracing_v3/storage/factory.py +10 -7
  192. synth_ai/tracing_v3/storage/utils.py +4 -2
  193. synth_ai/tracing_v3/turso/daemon.py +7 -2
  194. synth_ai/tracing_v3/turso/models.py +2 -2
  195. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  196. synth_ai/tracing_v3/utils.py +4 -4
  197. synth_ai/v0/api/__init__.py +8 -0
  198. synth_ai/v0/api/models/__init__.py +8 -0
  199. synth_ai/v0/api/models/supported.py +8 -0
  200. synth_ai/v0/config/__init__.py +15 -0
  201. synth_ai/v0/config/base_url.py +12 -0
  202. synth_ai/v0/lm/__init__.py +51 -0
  203. synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
  204. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  205. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  206. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  207. synth_ai/{lm → v0/lm}/config.py +6 -1
  208. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  209. synth_ai/{lm → v0/lm}/core/main.py +6 -6
  210. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  211. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
  212. synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
  213. synth_ai/{lm → v0/lm}/overrides.py +2 -2
  214. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  215. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  216. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  217. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  218. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
  219. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  220. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  221. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
  222. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
  223. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
  224. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
  225. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  226. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  227. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  228. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  229. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  230. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  231. synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
  232. synth_ai/v0/tracing_v3/__init__.py +10 -0
  233. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  234. synth_ai/v0/tracing_v3/decorators.py +3 -0
  235. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  236. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  237. synth_ai-0.2.9.dev9.dist-info/METADATA +191 -0
  238. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/RECORD +268 -238
  239. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/top_level.txt +1 -0
  240. examples/common_old/backend.py +0 -20
  241. examples/evals_old/README.md +0 -98
  242. examples/evals_old/__init__.py +0 -6
  243. examples/evals_old/compare_models.py +0 -1038
  244. examples/evals_old/example_log.md +0 -145
  245. examples/evals_old/run_demo.sh +0 -126
  246. examples/evals_old/trace_analysis.py +0 -270
  247. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  248. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  249. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  250. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
  251. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  252. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  253. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  254. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  255. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  256. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
  257. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  258. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  259. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
  260. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  261. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  262. examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
  263. examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
  264. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  265. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  266. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
  267. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
  268. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
  269. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  270. examples/finetuning_old/synth_qwen_v1/util.py +0 -152
  271. examples/rl_old/task_app.py +0 -1131
  272. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  273. examples/warming_up_to_rl/old/notes.md +0 -73
  274. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  275. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  276. synth_ai/experimental/synth_oss.py +0 -445
  277. synth_ai/learning/filtering.py +0 -0
  278. synth_ai/learning/offline/dpo.py +0 -0
  279. synth_ai/learning/offline/providers.py +0 -7
  280. synth_ai/learning/offline/sft.py +0 -0
  281. synth_ai/learning/offline/shared.py +0 -0
  282. synth_ai/learning/online/grpo.py +0 -0
  283. synth_ai/learning/online/irft.py +0 -0
  284. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  285. synth_ai/learning/prompts/gepa.py +0 -0
  286. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
  287. synth_ai/learning/prompts/mipro.py +0 -289
  288. synth_ai/learning/prompts/random_search.py +0 -249
  289. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  290. synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
  291. synth_ai/rl/secrets.py +0 -19
  292. synth_ai/scripts/verify_rewards.py +0 -100
  293. synth_ai/tracing/__init__.py +0 -30
  294. synth_ai/tracing_v1/__init__.py +0 -33
  295. synth_ai/tracing_v3/turso/__init__.py +0 -25
  296. synth_ai/tracing_v3/turso/manager.py +0 -838
  297. synth_ai/zyk/__init__.py +0 -30
  298. synth_ai-0.2.9.dev7.dist-info/METADATA +0 -131
  299. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  300. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  301. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  302. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  303. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  304. /synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
  305. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  306. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  307. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  308. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  309. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  310. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  311. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  312. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  313. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  314. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  315. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  316. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  317. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  318. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  319. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  320. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  321. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  322. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  323. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  324. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  325. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/WHEEL +0 -0
  326. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/entry_points.txt +0 -0
  327. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/licenses/LICENSE +0 -0
@@ -1,249 +0,0 @@
1
- """
2
- Random-search prompt optimizer (BootstrapFewShotWithRandomSearch), DSPy-inspired.
3
-
4
- Implements the high-level pseudocode of DSPy's Random Search optimizer in a
5
- provider-agnostic, modular style. You can plug in your own student/program and
6
- metric, and this module will explore baselines and bootstrapped few-shot variants.
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- import contextlib
12
- import random
13
- from collections.abc import Callable, Sequence
14
- from dataclasses import dataclass
15
- from typing import Any
16
-
17
- # ---------------------------
18
- # Protocol-like expectations (duck-typed)
19
- # ---------------------------
20
-
21
-
22
- class _ProgramLike:
23
- def reset_copy(self): # zero-shot copy
24
- return self
25
-
26
- def deepcopy(self): # deep copy
27
- return self
28
-
29
- def with_demos(self, demos: list[tuple[Any, Any]]):
30
- return self
31
-
32
- def run(self, x: Any) -> Any:
33
- raise NotImplementedError
34
-
35
-
36
- # ---------------------------
37
- # Helpers and lightweight components
38
- # ---------------------------
39
-
40
-
41
- @dataclass
42
- class EvalResult:
43
- score: float
44
- subscores: list[float]
45
-
46
-
47
- def evaluate(
48
- program: _ProgramLike, dataset: Sequence[tuple[Any, Any]], metric: Callable[[Any, Any], float]
49
- ) -> EvalResult:
50
- subs = []
51
- for x, y in dataset:
52
- subs.append(metric(program.run(x), y))
53
- return EvalResult(sum(subs) / max(1, len(subs)), subs)
54
-
55
-
56
- class LabeledFewShot:
57
- def __init__(self, k: int):
58
- self.k = k
59
-
60
- def compile(
61
- self, student: _ProgramLike, trainset: Sequence[tuple[Any, Any]], sample: bool = True
62
- ) -> _ProgramLike:
63
- p = getattr(student, "deepcopy", student.reset_copy)()
64
- demos = list(trainset)
65
- if sample:
66
- random.shuffle(demos)
67
- p = p.with_demos(demos[: min(self.k, len(demos))])
68
- return p
69
-
70
-
71
- class BootstrapFewShot:
72
- def __init__(
73
- self,
74
- *,
75
- metric: Callable[[Any, Any], float],
76
- metric_threshold: float | None = None,
77
- max_bootstrapped_demos: int = 8,
78
- max_labeled_demos: int = 0,
79
- teacher_settings: dict[str, Any] | None = None,
80
- max_rounds: int = 1,
81
- ):
82
- self.metric = metric
83
- self.metric_threshold = metric_threshold
84
- self.max_bootstrapped_demos = max_bootstrapped_demos
85
- self.max_labeled_demos = max_labeled_demos
86
- self.teacher_settings = teacher_settings or {}
87
- self.max_rounds = max_rounds
88
-
89
- def compile(
90
- self,
91
- student: _ProgramLike,
92
- teacher: _ProgramLike | None,
93
- trainset: Sequence[tuple[Any, Any]],
94
- ) -> _ProgramLike:
95
- p = getattr(student, "deepcopy", student.reset_copy)()
96
- rng = random.Random()
97
- # If bootstrapped demos disabled, return labeled-only few-shot quickly
98
- if self.max_bootstrapped_demos <= 0:
99
- demos: list[tuple[Any, Any]] = []
100
- if self.max_labeled_demos > 0:
101
- demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
102
- return p.with_demos(demos)
103
- boot: list[tuple[Any, Any]] = []
104
- # Bootstrap demos by self consistency
105
- for _ in range(self.max_rounds):
106
- rng.shuffle(trainset := list(trainset))
107
- for x, y in trainset:
108
- yhat = p.run(x)
109
- ok = self.metric(yhat, y)
110
- if (self.metric_threshold is None and ok == 1) or (
111
- self.metric_threshold is not None and ok >= self.metric_threshold
112
- ):
113
- boot.append((x, y))
114
- if len(boot) >= self.max_bootstrapped_demos:
115
- break
116
- if len(boot) >= self.max_bootstrapped_demos:
117
- break
118
-
119
- # Optionally add labeled demos
120
- demos = list(boot)
121
- if self.max_labeled_demos > 0:
122
- demos += rng.sample(list(trainset), k=min(self.max_labeled_demos, len(trainset)))
123
-
124
- return p.with_demos(demos)
125
-
126
-
127
- # ---------------------------
128
- # Random-search compile (BootstrapFewShotWithRandomSearch)
129
- # ---------------------------
130
-
131
-
132
- @dataclass
133
- class Candidate:
134
- score: float
135
- subscores: list[float]
136
- seed: int
137
- program: _ProgramLike
138
-
139
-
140
- def random_search_compile(
141
- student: _ProgramLike,
142
- trainset: Sequence[tuple[Any, Any]],
143
- valset: Sequence[tuple[Any, Any]],
144
- metric: Callable[[Any, Any], float],
145
- *,
146
- max_bootstrapped_demos: int = 8,
147
- max_labeled_demos: int = 4,
148
- max_rounds: int = 2,
149
- num_candidate_programs: int = 16,
150
- stop_at_score: float | None = None,
151
- evaluate_fn: Callable[
152
- [_ProgramLike, Sequence[tuple[Any, Any]], Callable[[Any, Any], float]], EvalResult
153
- ]
154
- | None = None,
155
- on_candidate_evaluated: Callable[[int, float, EvalResult, dict[str, Any]], None] | None = None,
156
- ) -> tuple[_ProgramLike, list[dict[str, Any]]]:
157
- best_program: _ProgramLike | None = None
158
- best_score = float("-inf")
159
- candidates: list[Candidate] = []
160
- records: list[dict[str, Any]] = []
161
-
162
- seeds = list(range(num_candidate_programs))
163
- seeds = [-3, -2, -1] + seeds # zero-shot, labeled few-shot, bootstrapped few-shot
164
-
165
- rng = random.Random(0)
166
- for idx, seed in enumerate(seeds):
167
- train_copy = list(trainset)
168
-
169
- if seed == -3:
170
- program = getattr(student, "reset_copy", student.deepcopy)()
171
-
172
- elif seed == -2:
173
- program = LabeledFewShot(k=max_labeled_demos).compile(student, train_copy, sample=True)
174
-
175
- else:
176
- if seed >= 0:
177
- rng.shuffle(train_copy)
178
- if max_bootstrapped_demos <= 0:
179
- size = 0
180
- else:
181
- size = (
182
- max_bootstrapped_demos if seed == -1 else rng.randint(1, max_bootstrapped_demos)
183
- )
184
- program = BootstrapFewShot(
185
- metric=metric,
186
- metric_threshold=None,
187
- max_bootstrapped_demos=size,
188
- max_labeled_demos=max_labeled_demos,
189
- teacher_settings={},
190
- max_rounds=max_rounds,
191
- ).compile(student, teacher=None, trainset=train_copy)
192
-
193
- res = (
194
- evaluate_fn(program, valset, metric)
195
- if evaluate_fn
196
- else evaluate(program, valset, metric)
197
- )
198
- cand = Candidate(score=res.score, subscores=res.subscores, seed=seed, program=program)
199
- candidates.append(cand)
200
- # Record an intervention summary for reproducibility
201
- intervention: dict[str, Any] = {"seed": seed}
202
- if hasattr(program, "demos"):
203
- try:
204
- intervention["demos"] = program.demos # type: ignore
205
- except Exception:
206
- intervention["demos"] = None
207
- # Type of candidate
208
- if seed == -3:
209
- intervention["kind"] = "zero_shot"
210
- intervention["label"] = "zero-shot"
211
- elif seed == -2:
212
- intervention["kind"] = "labeled_few_shot"
213
- intervention["label"] = f"labeled-{max_labeled_demos}"
214
- else:
215
- intervention["kind"] = "bootstrapped_few_shot"
216
- intervention["label"] = f"boot-b{max_bootstrapped_demos}-l{max_labeled_demos}"
217
- record_obj = {
218
- "score": cand.score,
219
- "subscores": cand.subscores,
220
- "intervention": intervention,
221
- }
222
- records.append(record_obj)
223
-
224
- if res.score > best_score:
225
- best_score, best_program = res.score, program
226
-
227
- if stop_at_score is not None and best_score >= stop_at_score:
228
- break
229
-
230
- if on_candidate_evaluated is not None:
231
- with contextlib.suppress(Exception):
232
- on_candidate_evaluated(idx + 1, res.score, res, intervention)
233
-
234
- # Attach candidates for inspection
235
- if hasattr(best_program, "candidate_programs"):
236
- # If user object supports attribute assignment
237
- with contextlib.suppress(Exception):
238
- best_program.candidate_programs = sorted(
239
- candidates, key=lambda c: c.score, reverse=True
240
- ) # type: ignore[attr-defined]
241
-
242
- return (best_program or getattr(student, "deepcopy", student)(), records)
243
-
244
-
245
- __all__ = [
246
- "random_search_compile",
247
- "LabeledFewShot",
248
- "BootstrapFewShot",
249
- ]
@@ -1,172 +0,0 @@
1
- """
2
- Example: MIPROv2-style optimizer on Banking77 using Groq gpt-oss-20b.
3
-
4
- Requires:
5
- - .env with GROQ_API_KEY
6
- - datasets
7
-
8
- Run:
9
- - uv run -q python -m synth_ai.learning.prompts.run_mipro_banking77
10
- """
11
-
12
- from __future__ import annotations
13
-
14
- import asyncio
15
- import json
16
- import os
17
- import random
18
- import time
19
- from collections.abc import Sequence
20
- from pathlib import Path
21
- from typing import Any
22
-
23
- from datasets import load_dataset
24
- from dotenv import load_dotenv
25
- from synth_ai.learning.prompts.mipro import ProgramAdapter, evaluate_program, mipro_v2_compile
26
- from synth_ai.lm.core.main_v3 import LM, build_messages
27
-
28
-
29
- def choose_label(pred: str, label_names: list[str]) -> str:
30
- norm = (pred or "").strip().lower()
31
- d = {ln.lower(): ln for ln in label_names}
32
- if norm in d:
33
- return d[norm]
34
-
35
- def score(cand: str) -> int:
36
- c = cand.lower()
37
- return sum(1 for w in c.split() if w in norm)
38
-
39
- return max(label_names, key=score)
40
-
41
-
42
- def accuracy(pred: str, gold: str, labels: list[str]) -> float:
43
- return 1.0 if choose_label(pred, labels) == gold else 0.0
44
-
45
-
46
- class NaivePromptModel:
47
- """Toy prompt model that returns simple instruction variants."""
48
-
49
- def generate_instructions(self, ctx: dict[str, Any], k: int = 8) -> list[str]:
50
- base = "Classify the Banking77 intent and return exactly one label."
51
- variants = [
52
- base,
53
- base + " Be concise.",
54
- base + " Use examples to guide your reasoning.",
55
- base + " Return only the label text.",
56
- base + " Follow the label names strictly.",
57
- base + " Do not include explanations.",
58
- base + " Think about similar intents before answering.",
59
- base + " Carefully consider the user's message.",
60
- ]
61
- random.shuffle(variants)
62
- return variants[:k]
63
-
64
-
65
- def build_run_fn(lm: LM, label_names: list[str]):
66
- def run_fn(x: str, _model: Any | None = None) -> str:
67
- # Use instructions and demos from adapter state (set by set_instructions/set_demos)
68
- # The adapter passes state via closure; we rebuild messages here
69
- instructions = state_ref.get("instructions", {}).get(
70
- "main", "You are an intent classifier for Banking77."
71
- )
72
- examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in state_ref.get("demos", []))
73
- sys = instructions
74
- user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
75
- messages = build_messages(sys, user, images_bytes=None, model_name=lm.model)
76
-
77
- async def _call():
78
- resp = await lm.respond_async(messages=messages)
79
- return (resp.raw_response or "").strip()
80
-
81
- return asyncio.run(_call())
82
-
83
- return run_fn
84
-
85
-
86
- def set_instructions(new_instr: dict[str, str], state: dict[str, Any]) -> dict[str, Any]:
87
- state["instructions"] = {**state.get("instructions", {}), **new_instr}
88
- return state
89
-
90
-
91
- def set_demos(demos: list[tuple[str, str]], state: dict[str, Any]) -> dict[str, Any]:
92
- state["demos"] = list(demos)
93
- return state
94
-
95
-
96
- def main():
97
- load_dotenv()
98
- random.seed(0)
99
-
100
- model = os.getenv("MODEL", "openai/gpt-oss-20b")
101
- vendor = os.getenv("VENDOR", "groq")
102
- lm = LM(model=model, vendor=vendor, temperature=0.0)
103
-
104
- print("Loading Banking77 dataset (train/dev split of test for demo)...")
105
- ds = load_dataset("banking77")
106
- label_names: list[str] = ds["test"].features["label"].names # type: ignore
107
-
108
- all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
109
- random.shuffle(all_items)
110
- trainset: Sequence[tuple[str, str]] = all_items[:80]
111
- valset: Sequence[tuple[str, str]] = all_items[80:160]
112
-
113
- global state_ref
114
- state_ref = {
115
- "instructions": {"main": "You are an intent classifier for Banking77."},
116
- "demos": [],
117
- }
118
- adapter = ProgramAdapter(
119
- run_fn=build_run_fn(lm, label_names),
120
- state=state_ref,
121
- _predictors=["main"],
122
- set_instructions=set_instructions,
123
- set_demos=set_demos,
124
- )
125
-
126
- def metric(yhat: str, y: str) -> float:
127
- return accuracy(yhat, y, label_names)
128
-
129
- prompt_model = NaivePromptModel()
130
- task_model = None # not used in this minimal example
131
-
132
- print("Running MIPROv2-style optimizer...")
133
- best, records = mipro_v2_compile(
134
- student=adapter,
135
- trainset=trainset,
136
- valset=valset,
137
- metric=metric,
138
- prompt_model=prompt_model,
139
- task_model=task_model,
140
- max_bootstrapped_demos=6,
141
- max_labeled_demos=4,
142
- num_candidates=6,
143
- num_trials=12,
144
- minibatch=True,
145
- minibatch_size=16,
146
- minibatch_full_eval_steps=3,
147
- seed=0,
148
- )
149
-
150
- res = evaluate_program(best, valset, metric)
151
- print(
152
- f"Best program accuracy on val: {res.score:.2%} ({sum(res.subscores)}/{len(res.subscores)})"
153
- )
154
-
155
- out = {
156
- "context": {
157
- "model": model,
158
- "vendor": vendor,
159
- "train_size": len(trainset),
160
- "val_size": len(valset),
161
- },
162
- "trials": records,
163
- }
164
- out_dir = Path(__file__).parent
165
- fname = str(out_dir / f"mipro_banking77_{int(time.time())}.json")
166
- with open(fname, "w") as f:
167
- json.dump(out, f, indent=2)
168
- print(f"Saved trial records to {fname}")
169
-
170
-
171
- if __name__ == "__main__":
172
- main()