synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (351) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +23 -17
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +53 -52
  22. examples/rl/run_rl_and_save.py +29 -12
  23. examples/rl/task_app/math_single_step.py +180 -41
  24. examples/rl/task_app/math_task_app.py +14 -6
  25. examples/sft/README.md +139 -0
  26. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  27. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  28. examples/sft/evaluate.py +117 -0
  29. examples/sft/export_dataset.py +117 -0
  30. examples/sft/generate_traces.py +162 -0
  31. examples/swe/__init__.py +12 -0
  32. examples/swe/task_app/README.md +105 -0
  33. examples/swe/task_app/__init__.py +2 -0
  34. examples/swe/task_app/grpo_swe_mini.py +571 -0
  35. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  36. examples/swe/task_app/hosted/README.md +173 -0
  37. examples/swe/task_app/hosted/__init__.py +5 -0
  38. examples/swe/task_app/hosted/branching.py +143 -0
  39. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  40. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  41. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  42. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  43. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  44. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  45. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  46. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  47. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  50. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  53. examples/swe/task_app/hosted/hosted_app.py +204 -0
  54. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  55. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  56. examples/swe/task_app/hosted/main.py +100 -0
  57. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  58. examples/swe/task_app/hosted/registry.py +195 -0
  59. examples/swe/task_app/hosted/rollout.py +1869 -0
  60. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  61. examples/swe/task_app/hosted/storage/volume.py +211 -0
  62. examples/swe/task_app/hosted/test_agents.py +161 -0
  63. examples/swe/task_app/hosted/test_service.py +137 -0
  64. examples/swe/task_app/hosted/utils.py +62 -0
  65. examples/vlm/README.md +68 -0
  66. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  67. examples/vlm/crafter_image_only_agent.py +207 -0
  68. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  69. examples/vlm/filter_image_rows.py +63 -0
  70. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  71. examples/warming_up_to_rl/analyze_trace_db.py +12 -10
  72. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  73. examples/warming_up_to_rl/export_trace_sft.py +218 -36
  74. examples/warming_up_to_rl/groq_test.py +15 -8
  75. examples/warming_up_to_rl/manage_secrets.py +29 -25
  76. examples/warming_up_to_rl/readme.md +9 -2
  77. examples/warming_up_to_rl/run_eval.py +137 -61
  78. examples/warming_up_to_rl/run_fft_and_save.py +131 -60
  79. examples/warming_up_to_rl/run_local_rollout.py +88 -39
  80. examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
  81. examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
  82. examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
  83. examples/warming_up_to_rl/run_rl_and_save.py +35 -12
  84. examples/warming_up_to_rl/run_rollout_remote.py +44 -19
  85. examples/warming_up_to_rl/task_app/README.md +6 -2
  86. examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
  87. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  111. synth/__init__.py +14 -0
  112. synth_ai/__init__.py +20 -4
  113. synth_ai/api/models/supported.py +376 -0
  114. synth_ai/api/train/builders.py +157 -26
  115. synth_ai/api/train/cli.py +213 -57
  116. synth_ai/api/train/config_finder.py +65 -5
  117. synth_ai/api/train/env_resolver.py +33 -15
  118. synth_ai/api/train/pollers.py +13 -4
  119. synth_ai/api/train/supported_algos.py +139 -0
  120. synth_ai/api/train/task_app.py +5 -3
  121. synth_ai/api/train/utils.py +33 -48
  122. synth_ai/cli/__init__.py +19 -4
  123. synth_ai/cli/_modal_wrapper.py +28 -0
  124. synth_ai/cli/_typer_patch.py +49 -0
  125. synth_ai/cli/balance.py +2 -3
  126. synth_ai/cli/calc.py +1 -1
  127. synth_ai/cli/demo.py +21 -6
  128. synth_ai/cli/recent.py +2 -2
  129. synth_ai/cli/rl_demo.py +77 -17
  130. synth_ai/cli/root.py +116 -39
  131. synth_ai/cli/status.py +2 -2
  132. synth_ai/cli/task_apps.py +1699 -259
  133. synth_ai/cli/traces.py +7 -4
  134. synth_ai/cli/turso.py +73 -0
  135. synth_ai/cli/watch.py +12 -18
  136. synth_ai/core/experiment.py +0 -2
  137. synth_ai/demo_registry.py +68 -31
  138. synth_ai/demos/core/cli.py +516 -194
  139. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  140. synth_ai/demos/demo_task_apps/core.py +64 -28
  141. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  142. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
  143. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  144. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  145. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  146. synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
  147. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  148. synth_ai/environments/examples/bandit/engine.py +12 -4
  149. synth_ai/environments/examples/bandit/taskset.py +4 -4
  150. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  151. synth_ai/environments/reproducibility/tree.py +5 -6
  152. synth_ai/environments/service/app.py +11 -12
  153. synth_ai/environments/service/core_routes.py +10 -9
  154. synth_ai/environments/stateful/engine.py +1 -1
  155. synth_ai/environments/tasks/core.py +1 -0
  156. synth_ai/environments/tasks/filters.py +5 -6
  157. synth_ai/environments/tasks/utils.py +4 -5
  158. synth_ai/evals/base.py +0 -2
  159. synth_ai/handshake.py +11 -9
  160. synth_ai/http.py +1 -1
  161. synth_ai/http_client.py +43 -11
  162. synth_ai/inference/__init__.py +0 -2
  163. synth_ai/inference/client.py +20 -6
  164. synth_ai/jobs/client.py +103 -78
  165. synth_ai/learning/__init__.py +41 -6
  166. synth_ai/learning/algorithms.py +14 -0
  167. synth_ai/learning/client.py +121 -29
  168. synth_ai/learning/config.py +2 -40
  169. synth_ai/learning/constants.py +0 -2
  170. synth_ai/learning/ft_client.py +4 -56
  171. synth_ai/learning/health.py +13 -7
  172. synth_ai/learning/jobs.py +43 -47
  173. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  174. synth_ai/learning/rl/client.py +267 -0
  175. synth_ai/learning/rl/config.py +31 -0
  176. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  177. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  178. synth_ai/learning/rl/secrets.py +13 -0
  179. synth_ai/learning/rl_client.py +2 -253
  180. synth_ai/learning/sft/__init__.py +29 -0
  181. synth_ai/learning/sft/client.py +68 -0
  182. synth_ai/learning/sft/config.py +270 -0
  183. synth_ai/learning/sft/data.py +295 -0
  184. synth_ai/learning/sse.py +25 -26
  185. synth_ai/learning/validators.py +25 -24
  186. synth_ai/lm/__init__.py +21 -47
  187. synth_ai/task/__init__.py +26 -27
  188. synth_ai/task/apps/__init__.py +18 -19
  189. synth_ai/task/auth.py +35 -23
  190. synth_ai/task/client.py +15 -13
  191. synth_ai/task/contracts.py +37 -35
  192. synth_ai/task/datasets.py +9 -6
  193. synth_ai/task/errors.py +11 -10
  194. synth_ai/task/health.py +17 -11
  195. synth_ai/task/json.py +58 -24
  196. synth_ai/task/proxy.py +15 -14
  197. synth_ai/task/rubrics.py +22 -15
  198. synth_ai/task/server.py +43 -17
  199. synth_ai/task/tracing_utils.py +12 -7
  200. synth_ai/task/validators.py +0 -1
  201. synth_ai/task/vendors.py +5 -7
  202. synth_ai/tracing_v3/__init__.py +2 -0
  203. synth_ai/tracing_v3/abstractions.py +21 -4
  204. synth_ai/tracing_v3/db_config.py +26 -1
  205. synth_ai/tracing_v3/decorators.py +18 -15
  206. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  207. synth_ai/tracing_v3/hooks.py +6 -4
  208. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  209. synth_ai/tracing_v3/replica_sync.py +1 -0
  210. synth_ai/tracing_v3/session_tracer.py +63 -16
  211. synth_ai/tracing_v3/storage/base.py +89 -1
  212. synth_ai/tracing_v3/storage/config.py +21 -8
  213. synth_ai/tracing_v3/storage/factory.py +10 -8
  214. synth_ai/tracing_v3/storage/utils.py +4 -2
  215. synth_ai/tracing_v3/turso/daemon.py +7 -2
  216. synth_ai/tracing_v3/turso/models.py +5 -2
  217. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  218. synth_ai/tracing_v3/utils.py +4 -3
  219. synth_ai/v0/api/__init__.py +8 -0
  220. synth_ai/v0/api/models/__init__.py +8 -0
  221. synth_ai/v0/api/models/supported.py +8 -0
  222. synth_ai/v0/config/__init__.py +15 -0
  223. synth_ai/v0/config/base_url.py +12 -0
  224. synth_ai/v0/lm/__init__.py +51 -0
  225. synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
  226. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  227. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  228. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  229. synth_ai/{lm → v0/lm}/config.py +6 -1
  230. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  231. synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
  232. synth_ai/{lm → v0/lm}/core/main.py +19 -7
  233. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  234. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
  235. synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
  236. synth_ai/{lm → v0/lm}/overrides.py +4 -4
  237. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  238. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  239. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  240. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  241. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
  242. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  243. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  244. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
  245. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
  246. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
  247. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
  248. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  249. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  250. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  251. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  252. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  253. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  254. synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
  255. synth_ai/v0/tracing/upload.py +32 -135
  256. synth_ai/v0/tracing_v3/__init__.py +10 -0
  257. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  258. synth_ai/v0/tracing_v3/decorators.py +3 -0
  259. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  260. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  261. synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
  262. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -262
  263. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
  264. examples/common_old/backend.py +0 -21
  265. examples/evals_old/README.md +0 -98
  266. examples/evals_old/__init__.py +0 -6
  267. examples/evals_old/compare_models.py +0 -1037
  268. examples/evals_old/example_log.md +0 -145
  269. examples/evals_old/run_demo.sh +0 -126
  270. examples/evals_old/trace_analysis.py +0 -270
  271. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  272. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  273. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  274. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
  275. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  276. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  277. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  278. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  279. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  280. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
  281. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  282. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  283. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
  284. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  285. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  286. examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
  287. examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
  288. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  289. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  290. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
  291. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
  292. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
  293. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  294. examples/finetuning_old/synth_qwen_v1/util.py +0 -147
  295. examples/rl_old/task_app.py +0 -962
  296. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  297. examples/warming_up_to_rl/old/notes.md +0 -73
  298. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  299. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  300. synth_ai/experimental/synth_oss.py +0 -446
  301. synth_ai/install_sqld.sh +0 -40
  302. synth_ai/learning/filtering.py +0 -0
  303. synth_ai/learning/offline/dpo.py +0 -0
  304. synth_ai/learning/offline/providers.py +0 -7
  305. synth_ai/learning/offline/sft.py +0 -0
  306. synth_ai/learning/offline/shared.py +0 -0
  307. synth_ai/learning/online/grpo.py +0 -0
  308. synth_ai/learning/online/irft.py +0 -0
  309. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  310. synth_ai/learning/prompts/gepa.py +0 -0
  311. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  312. synth_ai/learning/prompts/mipro.py +0 -289
  313. synth_ai/learning/prompts/random_search.py +0 -246
  314. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  315. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  316. synth_ai/rl/secrets.py +0 -19
  317. synth_ai/scripts/verify_rewards.py +0 -100
  318. synth_ai/tracing/__init__.py +0 -30
  319. synth_ai/tracing_v1/__init__.py +0 -33
  320. synth_ai/tracing_v3/turso/__init__.py +0 -25
  321. synth_ai/tracing_v3/turso/manager.py +0 -774
  322. synth_ai/zyk/__init__.py +0 -30
  323. synth_ai-0.2.9.dev5.dist-info/METADATA +0 -131
  324. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  325. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  326. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  327. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  328. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  329. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  330. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  331. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  332. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  333. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  334. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  335. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  336. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  337. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  338. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  339. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  340. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  341. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  342. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  343. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  344. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  345. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  346. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  347. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  348. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  349. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
  350. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
  351. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
@@ -1,324 +0,0 @@
1
- """
2
- Example: Random Search optimizer on Banking77 using Groq gpt-oss-20b.
3
-
4
- Requires:
5
- - .env with GROQ_API_KEY
6
- - datasets (`uv add datasets` if needed)
7
-
8
- Run:
9
- - uv run -q python -m synth_ai.learning.prompts.run_random_search_banking77
10
- """
11
-
12
- from __future__ import annotations
13
-
14
- import asyncio
15
- import json
16
- import os
17
- import random
18
- import time
19
- from collections.abc import Sequence
20
- from dataclasses import dataclass, replace
21
- from pathlib import Path
22
- from types import SimpleNamespace
23
- from typing import Any
24
-
25
- from datasets import load_dataset
26
- from dotenv import load_dotenv
27
- from synth_ai.learning.prompts.random_search import random_search_compile
28
- from synth_ai.lm.core.main_v3 import LM, build_messages
29
- from tqdm import tqdm
30
-
31
-
32
- def choose_label(pred: str, label_names: list[str]) -> str:
33
- norm = (pred or "").strip().lower()
34
- d = {ln.lower(): ln for ln in label_names}
35
- if norm in d:
36
- return d[norm]
37
-
38
- def score(cand: str) -> int:
39
- c = cand.lower()
40
- return sum(1 for w in c.split() if w in norm)
41
-
42
- return max(label_names, key=score)
43
-
44
-
45
- def accuracy(pred: str, gold: str, labels: list[str]) -> float:
46
- return 1.0 if choose_label(pred, labels) == gold else 0.0
47
-
48
-
49
- @dataclass
50
- class StudentProgram:
51
- lm: LM
52
- label_names: list[str]
53
- instruction: str
54
- demos: list[tuple[str, str]]
55
-
56
- def reset_copy(self):
57
- return replace(self, instruction=self.instruction, demos=list(self.demos))
58
-
59
- def deepcopy(self):
60
- return replace(self, instruction=str(self.instruction), demos=list(self.demos))
61
-
62
- def with_demos(self, demos: list[tuple[str, str]]):
63
- return replace(self, demos=list(demos))
64
-
65
- def run(self, x: str) -> str:
66
- # Build a prompt with optional demos
67
- examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in self.demos)
68
- sys = self.instruction or "You are an intent classifier for Banking77."
69
- user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
70
- messages = build_messages(sys, user, images_bytes=None, model_name=self.lm.model)
71
-
72
- # Call LM synchronously via asyncio
73
- async def _call():
74
- resp = await self.lm.respond_async(messages=messages)
75
- return (resp.raw_response or "").strip()
76
-
77
- return asyncio.run(_call())
78
-
79
- async def _apredict(self, x: str):
80
- examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in self.demos)
81
- sys = self.instruction or "You are an intent classifier for Banking77."
82
- user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
83
- messages = build_messages(sys, user, images_bytes=None, model_name=self.lm.model)
84
- resp = await self.lm.respond_async(messages=messages)
85
- return (resp.raw_response or "").strip(), (resp.usage or {})
86
-
87
-
88
- def main():
89
- load_dotenv()
90
- random.seed(0)
91
-
92
- model = os.getenv("MODEL", "openai/gpt-oss-20b")
93
- vendor = os.getenv("VENDOR", "groq")
94
- lm = LM(model=model, vendor=vendor, temperature=0.0)
95
-
96
- print("Loading Banking77 dataset (train/dev split of test for demo)...")
97
- ds = load_dataset("banking77")
98
- label_names: list[str] = ds["test"].features["label"].names # type: ignore
99
-
100
- # Create small train/val from the test split for speed
101
- all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
102
- random.shuffle(all_items)
103
- trainset: Sequence[tuple[str, str]] = all_items[:40]
104
- valset: Sequence[tuple[str, str]] = all_items[40:60] # 20 examples
105
-
106
- student = StudentProgram(
107
- lm=lm,
108
- label_names=label_names,
109
- instruction="You are an intent classifier for the Banking77 dataset. Return exactly one label.",
110
- demos=[],
111
- )
112
-
113
- def metric(yhat: str, y: str) -> float:
114
- return accuracy(yhat, y, label_names)
115
-
116
- total_candidates = 3 + 3 # zero-shot, labeled few-shot, bootstrapped + 3 random seeds
117
- print(
118
- f"Running Random Search optimizer ({total_candidates} candidates, parallel eval of 20 questions)..."
119
- )
120
-
121
- def eval_parallel(program: StudentProgram, dataset: Sequence[tuple[str, str]], metric_fn):
122
- async def _run():
123
- xs = [x for x, _ in dataset]
124
- ys = [y for _, y in dataset]
125
- preds: list[Optional[str]] = [None] * len(xs)
126
- sem = asyncio.Semaphore(int(os.getenv("CONCURRENCY", "5")))
127
-
128
- async def worker(i: int, x: str, y: str):
129
- import time
130
-
131
- t_start = time.monotonic()
132
- try:
133
- async with sem:
134
- pred, usage = await asyncio.wait_for(
135
- program._apredict(x),
136
- timeout=float(os.getenv("TIMEOUT_S", "45")),
137
- )
138
- t_end = time.monotonic()
139
- return i, y, pred, t_start, t_end, usage or {}
140
- except asyncio.CancelledError:
141
- # Respect cancellation but return a placeholder record so scheduler can proceed
142
- t_end = time.monotonic()
143
- return i, y, "", t_start, t_end, {}
144
- except Exception:
145
- t_end = time.monotonic()
146
- return i, y, "", t_start, t_end, {}
147
-
148
- tasks = [asyncio.create_task(worker(i, x, y)) for i, (x, y) in enumerate(zip(xs, ys, strict=False))]
149
- correct_sum = 0.0
150
- processed = 0
151
- import statistics
152
- import time
153
-
154
- durations: list[float] = []
155
- in_tok_sum = 0
156
- out_tok_sum = 0
157
- in_tok_count = 0
158
- out_tok_count = 0
159
- details: list[dict[str, Any]] = []
160
- t_batch_start = time.monotonic()
161
- deadline = float(os.getenv("BATCH_DEADLINE_S", "20"))
162
- with tqdm(total=len(tasks), desc="Rollouts", leave=False) as pbar:
163
- pending = set(tasks)
164
- # Process completions until all done or deadline reached
165
- while pending:
166
- elapsed = time.monotonic() - t_batch_start
167
- remaining = max(0.0, deadline - elapsed)
168
- if remaining <= 0.0:
169
- # Cancel any remaining
170
- for t in pending:
171
- t.cancel()
172
- done, _ = await asyncio.wait(pending, return_when=asyncio.ALL_COMPLETED)
173
- # Record canceled as zeros
174
- for task in done:
175
- try:
176
- i, y_true, pred, t_start, t_end, usage = task.result()
177
- except Exception:
178
- # Unknown index: we can't recover; skip as it's canceled before start
179
- continue
180
- # Already processed ones shouldn't be in pending; skip
181
- break
182
- # Wait for at least one completion within remaining time (polling granularity <= 1s)
183
- timeout = min(1.0, remaining)
184
- done, pending = await asyncio.wait(
185
- pending, timeout=timeout, return_when=asyncio.FIRST_COMPLETED
186
- )
187
- import contextlib
188
- for task in done:
189
- try:
190
- i, y_true, pred, t_start, t_end, usage = task.result()
191
- except BaseException:
192
- # Treat as failure/cancelled
193
- continue
194
- durations.append(max(0.0, t_end - t_start))
195
- preds[i] = pred
196
- processed += 1
197
- with contextlib.suppress(Exception):
198
- correct_sum += float(metric_fn(pred, y_true))
199
- with contextlib.suppress(Exception):
200
- pt = usage.get("prompt_tokens") or usage.get("input_tokens")
201
- ct = usage.get("completion_tokens") or usage.get("output_tokens")
202
- if isinstance(pt, (int, float)):
203
- in_tok_sum += int(pt)
204
- in_tok_count += 1
205
- if isinstance(ct, (int, float)):
206
- out_tok_sum += int(ct)
207
- out_tok_count += 1
208
- details.append(
209
- {
210
- "index": i,
211
- "seconds": max(0.0, t_end - t_start),
212
- "score": float(metric_fn(pred, y_true)),
213
- "usage": {
214
- "prompt_tokens": usage.get("prompt_tokens")
215
- or usage.get("input_tokens"),
216
- "completion_tokens": usage.get("completion_tokens")
217
- or usage.get("output_tokens"),
218
- },
219
- }
220
- )
221
- pbar.update(1)
222
- med = statistics.median(durations) if durations else 0.0
223
- mx = max(durations) if durations else 0.0
224
- avg_in = (in_tok_sum / in_tok_count) if in_tok_count else 0.0
225
- avg_out = (out_tok_sum / out_tok_count) if out_tok_count else 0.0
226
- pbar.set_postfix(
227
- {
228
- "acc": f"{(correct_sum / processed):.2f}",
229
- "done": f"{processed}/{len(tasks)}",
230
- "med_s": f"{med:.1f}",
231
- "max_s": f"{mx:.1f}",
232
- "tin": f"{avg_in:.1f}",
233
- "tout": f"{avg_out:.1f}",
234
- }
235
- )
236
- # Compute score only from completed/successful rollouts (drop timeouts/cancelled)
237
- subs = [float(d.get("score", 0.0)) for d in details]
238
- result = SimpleNamespace(score=(sum(subs) / max(1, len(subs))), subscores=subs)
239
- result.details = details
240
- result.mean_in = (in_tok_sum / in_tok_count) if in_tok_count else 0.0
241
- result.mean_out = (out_tok_sum / out_tok_count) if out_tok_count else 0.0
242
- return result
243
-
244
- return asyncio.run(_run())
245
-
246
- pbar = tqdm(total=total_candidates, desc="Candidates")
247
- candidate_eval_details: dict[int, Any] = {}
248
-
249
- def on_cand(idx: int, score: float, res, intervention):
250
- pbar.update(1)
251
- pbar.set_postfix({"score": f"{score:.2f}"})
252
- # store per-instance details (for apples-to-apples)
253
- import contextlib
254
- with contextlib.suppress(Exception):
255
- candidate_eval_details[idx] = {
256
- "score": score,
257
- "mean_in": getattr(res, "mean_in", None),
258
- "mean_out": getattr(res, "mean_out", None),
259
- "instances": getattr(res, "details", None),
260
- }
261
- # visible summary line per candidate
262
- kind = (
263
- intervention.get("kind", "candidate") if isinstance(intervention, dict) else "candidate"
264
- )
265
- label = intervention.get("label") if isinstance(intervention, dict) else None
266
- seed = intervention.get("seed") if isinstance(intervention, dict) else None
267
- processed = len(getattr(res, "details", []) or [])
268
- from tqdm import tqdm as _tqdm
269
-
270
- _tqdm.write(
271
- f"Candidate {idx}/{total_candidates} [{kind}{'' if label is None else f', label={label}'}{'' if seed is None else f', seed={seed}'}]: "
272
- f"score={score:.2f} | mean tin/tout={getattr(res, 'mean_in', 0):.1f}/{getattr(res, 'mean_out', 0):.1f} | N={processed}"
273
- )
274
-
275
- best, records = random_search_compile(
276
- student=student,
277
- trainset=trainset,
278
- valset=valset,
279
- metric=metric,
280
- evaluate_fn=eval_parallel,
281
- max_bootstrapped_demos=0,
282
- max_labeled_demos=4,
283
- max_rounds=2,
284
- num_candidate_programs=3,
285
- on_candidate_evaluated=on_cand,
286
- )
287
- pbar.close()
288
-
289
- # Evaluate best on holdout (valset) with parallel rollouts
290
- print("Evaluating best program on val (parallel rollouts)...")
291
- best_res = eval_parallel(best, valset, metric)
292
- correct = int(round(best_res.score * max(1, len(best_res.subscores))))
293
- print(
294
- "Best program accuracy on val: "
295
- f"{correct}/{len(valset)} ({best_res.score:.2%}) "
296
- f"| mean tokens in/out: {getattr(best_res, 'mean_in', 0):.1f}/{getattr(best_res, 'mean_out', 0):.1f}"
297
- )
298
-
299
- # Save per-candidate scores and interventions
300
- out = {
301
- "context": {
302
- "model": model,
303
- "vendor": vendor,
304
- "train_size": len(trainset),
305
- "val_size": len(valset),
306
- },
307
- "candidates": records,
308
- "candidate_eval_details": candidate_eval_details,
309
- "best_eval_details": {
310
- "score": best_res.score,
311
- "mean_in": getattr(best_res, "mean_in", None),
312
- "mean_out": getattr(best_res, "mean_out", None),
313
- "instances": getattr(best_res, "details", None),
314
- },
315
- }
316
- out_dir = Path(__file__).parent
317
- fname = str(out_dir / f"random_search_banking77_{int(time.time())}.json")
318
- with open(fname, "w") as f:
319
- json.dump(out, f, indent=2)
320
- print(f"Saved candidate records to {fname}")
321
-
322
-
323
- if __name__ == "__main__":
324
- main()
synth_ai/rl/secrets.py DELETED
@@ -1,19 +0,0 @@
1
- from __future__ import annotations
2
-
3
- """Helpers for generating RL environment credentials."""
4
-
5
- import secrets
6
-
7
- __all__ = ["mint_environment_api_key"]
8
-
9
-
10
- def mint_environment_api_key() -> str:
11
- """Mint a random ENVIRONMENT_API_KEY value.
12
-
13
- The current format is 64 hexadecimal characters (256 bits of entropy), which
14
- matches the shell helpers used by the RL examples. This keeps the token easy
15
- to copy while remaining suitably strong for authentication.
16
- """
17
-
18
- # secrets.token_hex(32) → 32 random bytes rendered as 64 hex characters.
19
- return secrets.token_hex(32)
@@ -1,100 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Verify reward persistence in a traces database.
4
-
5
- Usage:
6
- uv run python -m synth_ai.scripts.verify_rewards --db /path/to/db.sqlite --min-reward 1
7
- """
8
-
9
- import argparse
10
- import asyncio
11
- import os
12
- from typing import Dict
13
-
14
- from sqlalchemy import text
15
-
16
- from synth_ai.tracing_v3.turso.manager import AsyncSQLTraceManager
17
-
18
-
19
- async def verify(db_path: str, min_reward: int) -> int:
20
- db_url = db_path
21
- if not db_url.startswith("sqlite+aiosqlite:///"):
22
- db_url = f"sqlite+aiosqlite:///{os.path.abspath(db_path)}"
23
-
24
- mgr = AsyncSQLTraceManager(db_url=db_url)
25
- await mgr.initialize()
26
-
27
- try:
28
- async with mgr.session() as session:
29
- # Sessions with outcome_rewards
30
- q_good = text(
31
- """
32
- SELECT session_id, MAX(total_reward) as total_reward
33
- FROM outcome_rewards
34
- GROUP BY session_id
35
- """
36
- )
37
- res = await session.execute(q_good)
38
- outcomes = {row[0]: int(row[1]) for row in res.fetchall()}
39
-
40
- # Sessions without outcome_rewards
41
- q_missing = text(
42
- """
43
- SELECT s.session_id
44
- FROM session_traces s
45
- LEFT JOIN outcome_rewards o ON s.session_id = o.session_id
46
- WHERE o.session_id IS NULL
47
- """
48
- )
49
- res2 = await session.execute(q_missing)
50
- missing = [row[0] for row in res2.fetchall()]
51
-
52
- # Aggregate event_rewards per session (informational)
53
- q_event = text(
54
- """
55
- SELECT session_id, COALESCE(SUM(reward_value), 0.0) as sum_rewards
56
- FROM event_rewards
57
- GROUP BY session_id
58
- """
59
- )
60
- res3 = await session.execute(q_event)
61
- event_sums: Dict[str, float] = {row[0]: float(row[1]) for row in res3.fetchall()}
62
-
63
- print(f"Sessions with outcome_rewards: {len(outcomes)}")
64
- print(f"Sessions missing outcome_rewards: {len(missing)}")
65
- if missing:
66
- print("Missing session_ids:", ", ".join(missing[:10]) + (" ..." if len(missing) > 10 else ""))
67
-
68
- # Threshold check
69
- qualifying = {sid: r for sid, r in outcomes.items() if r >= min_reward}
70
- print(f"Sessions with total_reward >= {min_reward}: {len(qualifying)}")
71
-
72
- # Show a small comparison snapshot
73
- sample = list(qualifying.items())[:5]
74
- for sid, tot in sample:
75
- er = event_sums.get(sid, 0.0)
76
- print(f" {sid}: outcome={tot}, sum(event_rewards)={er:.2f}")
77
-
78
- # Exit non-zero if any sessions are missing outcome rewards
79
- if missing:
80
- return 2
81
- if min_reward > 0 and not qualifying:
82
- return 3
83
- return 0
84
- finally:
85
- await mgr.close()
86
-
87
-
88
- def main() -> int:
89
- ap = argparse.ArgumentParser(description="Verify reward persistence in traces DB")
90
- ap.add_argument("--db", required=True, help="Path to traces SQLite DB (aiosqlite)")
91
- ap.add_argument("--min-reward", type=int, default=0, help="Minimum total_reward to consider qualifying")
92
- args = ap.parse_args()
93
-
94
- return asyncio.run(verify(args.db, args.min_reward))
95
-
96
-
97
- if __name__ == "__main__":
98
- raise SystemExit(main())
99
-
100
-
@@ -1,30 +0,0 @@
1
- import importlib as _importlib
2
- import sys as _sys
3
-
4
- _pkg = _importlib.import_module("synth_ai.v0.tracing")
5
- _sys.modules[__name__] = _pkg
6
-
7
- _SUBMODULES = [
8
- "abstractions",
9
- "base_client",
10
- "client_manager",
11
- "config",
12
- "context",
13
- "decorators",
14
- "immediate_client",
15
- "local",
16
- "log_client_base",
17
- "retry_queue",
18
- "trackers",
19
- "upload",
20
- "utils",
21
- ]
22
- for _m in _SUBMODULES:
23
- _sys.modules[f"{__name__}.{_m}"] = _importlib.import_module(f"synth_ai.v0.tracing.{_m}")
24
-
25
- _events_pkg = _importlib.import_module("synth_ai.v0.tracing.events")
26
- _sys.modules[f"{__name__}.events"] = _events_pkg
27
- for _m in ["manage", "scope", "store"]:
28
- _sys.modules[f"{__name__}.events.{_m}"] = _importlib.import_module(
29
- f"synth_ai.v0.tracing.events.{_m}"
30
- )
@@ -1,33 +0,0 @@
1
- import importlib as _importlib
2
- import sys as _sys
3
-
4
- # Forward top-level package
5
- _pkg = _importlib.import_module("synth_ai.v0.tracing_v1")
6
- _sys.modules[__name__] = _pkg
7
-
8
- # Explicitly forward submodules so `synth_ai.tracing_v1.X` works
9
- _SUBMODULES = [
10
- "abstractions",
11
- "base_client",
12
- "client_manager",
13
- "config",
14
- "context",
15
- "decorators",
16
- "immediate_client",
17
- "local",
18
- "log_client_base",
19
- "retry_queue",
20
- "trackers",
21
- "upload",
22
- "utils",
23
- ]
24
- for _m in _SUBMODULES:
25
- _sys.modules[f"{__name__}.{_m}"] = _importlib.import_module(f"synth_ai.v0.tracing_v1.{_m}")
26
-
27
- # Forward events package and its submodules
28
- _events_pkg = _importlib.import_module("synth_ai.v0.tracing_v1.events")
29
- _sys.modules[f"{__name__}.events"] = _events_pkg
30
- for _m in ["manage", "scope", "store"]:
31
- _sys.modules[f"{__name__}.events.{_m}"] = _importlib.import_module(
32
- f"synth_ai.v0.tracing_v1.events.{_m}"
33
- )
@@ -1,25 +0,0 @@
1
- """Turso/sqld implementation for tracing v3."""
2
-
3
- from .manager import AsyncSQLTraceManager
4
- from .models import (
5
- Base,
6
- Event,
7
- Experiment,
8
- Message,
9
- SessionTimestep,
10
- SessionTrace,
11
- System,
12
- SystemVersion,
13
- )
14
-
15
- __all__ = [
16
- "AsyncSQLTraceManager",
17
- "Base",
18
- "SessionTrace",
19
- "SessionTimestep",
20
- "Event",
21
- "Message",
22
- "Experiment",
23
- "System",
24
- "SystemVersion",
25
- ]