synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.9.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (327) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +8 -11
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/run_eval.py +36 -37
  19. examples/rl/run_rl_and_save.py +5 -5
  20. examples/rl/task_app/math_single_step.py +65 -43
  21. examples/rl/task_app/math_task_app.py +3 -3
  22. examples/sft/README.md +139 -0
  23. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  24. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  25. examples/sft/evaluate.py +117 -0
  26. examples/sft/export_dataset.py +117 -0
  27. examples/sft/generate_traces.py +162 -0
  28. examples/swe/__init__.py +12 -0
  29. examples/swe/task_app/README.md +105 -0
  30. examples/swe/task_app/__init__.py +2 -0
  31. examples/swe/task_app/grpo_swe_mini.py +571 -0
  32. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  33. examples/swe/task_app/hosted/README.md +173 -0
  34. examples/swe/task_app/hosted/__init__.py +5 -0
  35. examples/swe/task_app/hosted/branching.py +143 -0
  36. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  37. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  38. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  39. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  40. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  41. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  42. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  43. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  44. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  45. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  46. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  47. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  50. examples/swe/task_app/hosted/hosted_app.py +204 -0
  51. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  52. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  53. examples/swe/task_app/hosted/main.py +100 -0
  54. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  55. examples/swe/task_app/hosted/registry.py +195 -0
  56. examples/swe/task_app/hosted/rollout.py +1869 -0
  57. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  58. examples/swe/task_app/hosted/storage/volume.py +211 -0
  59. examples/swe/task_app/hosted/test_agents.py +161 -0
  60. examples/swe/task_app/hosted/test_service.py +137 -0
  61. examples/swe/task_app/hosted/utils.py +62 -0
  62. examples/vlm/README.md +68 -0
  63. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  64. examples/vlm/crafter_image_only_agent.py +207 -0
  65. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  66. examples/vlm/filter_image_rows.py +63 -0
  67. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  68. examples/warming_up_to_rl/analyze_trace_db.py +5 -5
  69. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  70. examples/warming_up_to_rl/export_trace_sft.py +78 -21
  71. examples/warming_up_to_rl/groq_test.py +4 -4
  72. examples/warming_up_to_rl/manage_secrets.py +13 -18
  73. examples/warming_up_to_rl/run_eval.py +42 -44
  74. examples/warming_up_to_rl/run_fft_and_save.py +11 -16
  75. examples/warming_up_to_rl/run_local_rollout.py +1 -3
  76. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
  77. examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
  78. examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
  79. examples/warming_up_to_rl/run_rl_and_save.py +5 -6
  80. examples/warming_up_to_rl/run_rollout_remote.py +8 -10
  81. examples/warming_up_to_rl/task_app/README.md +6 -2
  82. examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
  83. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  101. synth/__init__.py +14 -0
  102. synth_ai/__init__.py +26 -4
  103. synth_ai/api/models/supported.py +376 -0
  104. synth_ai/api/train/builders.py +128 -21
  105. synth_ai/api/train/cli.py +80 -64
  106. synth_ai/api/train/config_finder.py +7 -2
  107. synth_ai/api/train/env_resolver.py +1 -1
  108. synth_ai/api/train/pollers.py +2 -1
  109. synth_ai/api/train/supported_algos.py +139 -0
  110. synth_ai/api/train/task_app.py +1 -2
  111. synth_ai/api/train/utils.py +13 -44
  112. synth_ai/cli/__init__.py +8 -0
  113. synth_ai/cli/_modal_wrapper.py +28 -0
  114. synth_ai/cli/_typer_patch.py +49 -0
  115. synth_ai/cli/balance.py +1 -2
  116. synth_ai/cli/calc.py +1 -1
  117. synth_ai/cli/demo.py +2 -1
  118. synth_ai/cli/recent.py +2 -2
  119. synth_ai/cli/rl_demo.py +2 -1
  120. synth_ai/cli/root.py +11 -13
  121. synth_ai/cli/status.py +2 -2
  122. synth_ai/cli/task_apps.py +529 -179
  123. synth_ai/cli/traces.py +6 -4
  124. synth_ai/cli/watch.py +12 -18
  125. synth_ai/demo_registry.py +1 -1
  126. synth_ai/demos/core/cli.py +36 -43
  127. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  128. synth_ai/demos/demo_task_apps/core.py +17 -25
  129. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
  130. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  131. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
  132. synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
  133. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  134. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  135. synth_ai/environments/reproducibility/tree.py +2 -5
  136. synth_ai/environments/service/app.py +11 -12
  137. synth_ai/environments/service/core_routes.py +4 -7
  138. synth_ai/environments/stateful/engine.py +1 -1
  139. synth_ai/environments/tasks/core.py +1 -0
  140. synth_ai/environments/tasks/filters.py +5 -6
  141. synth_ai/environments/tasks/utils.py +4 -5
  142. synth_ai/handshake.py +9 -9
  143. synth_ai/http.py +1 -1
  144. synth_ai/http_client.py +18 -10
  145. synth_ai/inference/client.py +15 -5
  146. synth_ai/jobs/client.py +78 -83
  147. synth_ai/learning/__init__.py +41 -6
  148. synth_ai/learning/algorithms.py +14 -0
  149. synth_ai/learning/client.py +91 -24
  150. synth_ai/learning/config.py +2 -38
  151. synth_ai/learning/ft_client.py +4 -59
  152. synth_ai/learning/health.py +5 -6
  153. synth_ai/learning/jobs.py +31 -47
  154. synth_ai/{rl → learning/rl}/__init__.py +14 -4
  155. synth_ai/learning/rl/client.py +267 -0
  156. synth_ai/learning/rl/config.py +31 -0
  157. synth_ai/{rl → learning/rl}/contracts.py +5 -8
  158. synth_ai/{rl → learning/rl}/env_keys.py +39 -15
  159. synth_ai/learning/rl/secrets.py +13 -0
  160. synth_ai/learning/rl_client.py +2 -281
  161. synth_ai/learning/sft/__init__.py +29 -0
  162. synth_ai/learning/sft/client.py +68 -0
  163. synth_ai/learning/sft/config.py +270 -0
  164. synth_ai/learning/sft/data.py +295 -0
  165. synth_ai/learning/sse.py +25 -24
  166. synth_ai/learning/validators.py +25 -28
  167. synth_ai/lm/__init__.py +21 -47
  168. synth_ai/main.py +6 -0
  169. synth_ai/task/__init__.py +25 -27
  170. synth_ai/task/apps/__init__.py +7 -8
  171. synth_ai/task/auth.py +8 -8
  172. synth_ai/task/client.py +14 -14
  173. synth_ai/task/contracts.py +36 -35
  174. synth_ai/task/datasets.py +6 -5
  175. synth_ai/task/errors.py +10 -10
  176. synth_ai/task/health.py +17 -9
  177. synth_ai/task/json.py +58 -23
  178. synth_ai/task/proxy.py +13 -9
  179. synth_ai/task/rubrics.py +16 -15
  180. synth_ai/task/server.py +12 -12
  181. synth_ai/task/tracing_utils.py +4 -4
  182. synth_ai/task/vendors.py +5 -6
  183. synth_ai/tracing_v3/__init__.py +2 -0
  184. synth_ai/tracing_v3/abstractions.py +21 -4
  185. synth_ai/tracing_v3/decorators.py +18 -16
  186. synth_ai/tracing_v3/hooks.py +5 -5
  187. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  188. synth_ai/tracing_v3/session_tracer.py +40 -14
  189. synth_ai/tracing_v3/storage/base.py +85 -0
  190. synth_ai/tracing_v3/storage/config.py +21 -8
  191. synth_ai/tracing_v3/storage/factory.py +10 -7
  192. synth_ai/tracing_v3/storage/utils.py +4 -2
  193. synth_ai/tracing_v3/turso/daemon.py +7 -2
  194. synth_ai/tracing_v3/turso/models.py +2 -2
  195. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  196. synth_ai/tracing_v3/utils.py +4 -4
  197. synth_ai/v0/api/__init__.py +8 -0
  198. synth_ai/v0/api/models/__init__.py +8 -0
  199. synth_ai/v0/api/models/supported.py +8 -0
  200. synth_ai/v0/config/__init__.py +15 -0
  201. synth_ai/v0/config/base_url.py +12 -0
  202. synth_ai/v0/lm/__init__.py +51 -0
  203. synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
  204. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  205. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  206. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  207. synth_ai/{lm → v0/lm}/config.py +6 -1
  208. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  209. synth_ai/{lm → v0/lm}/core/main.py +6 -6
  210. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  211. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
  212. synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
  213. synth_ai/{lm → v0/lm}/overrides.py +2 -2
  214. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  215. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  216. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  217. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  218. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
  219. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  220. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  221. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
  222. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
  223. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
  224. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
  225. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  226. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  227. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  228. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  229. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  230. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  231. synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
  232. synth_ai/v0/tracing_v3/__init__.py +10 -0
  233. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  234. synth_ai/v0/tracing_v3/decorators.py +3 -0
  235. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  236. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  237. synth_ai-0.2.9.dev9.dist-info/METADATA +191 -0
  238. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/RECORD +268 -238
  239. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/top_level.txt +1 -0
  240. examples/common_old/backend.py +0 -20
  241. examples/evals_old/README.md +0 -98
  242. examples/evals_old/__init__.py +0 -6
  243. examples/evals_old/compare_models.py +0 -1038
  244. examples/evals_old/example_log.md +0 -145
  245. examples/evals_old/run_demo.sh +0 -126
  246. examples/evals_old/trace_analysis.py +0 -270
  247. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  248. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  249. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  250. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
  251. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  252. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  253. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  254. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  255. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  256. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
  257. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  258. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  259. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
  260. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  261. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  262. examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
  263. examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
  264. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  265. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  266. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
  267. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
  268. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
  269. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  270. examples/finetuning_old/synth_qwen_v1/util.py +0 -152
  271. examples/rl_old/task_app.py +0 -1131
  272. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  273. examples/warming_up_to_rl/old/notes.md +0 -73
  274. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  275. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  276. synth_ai/experimental/synth_oss.py +0 -445
  277. synth_ai/learning/filtering.py +0 -0
  278. synth_ai/learning/offline/dpo.py +0 -0
  279. synth_ai/learning/offline/providers.py +0 -7
  280. synth_ai/learning/offline/sft.py +0 -0
  281. synth_ai/learning/offline/shared.py +0 -0
  282. synth_ai/learning/online/grpo.py +0 -0
  283. synth_ai/learning/online/irft.py +0 -0
  284. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  285. synth_ai/learning/prompts/gepa.py +0 -0
  286. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
  287. synth_ai/learning/prompts/mipro.py +0 -289
  288. synth_ai/learning/prompts/random_search.py +0 -249
  289. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  290. synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
  291. synth_ai/rl/secrets.py +0 -19
  292. synth_ai/scripts/verify_rewards.py +0 -100
  293. synth_ai/tracing/__init__.py +0 -30
  294. synth_ai/tracing_v1/__init__.py +0 -33
  295. synth_ai/tracing_v3/turso/__init__.py +0 -25
  296. synth_ai/tracing_v3/turso/manager.py +0 -838
  297. synth_ai/zyk/__init__.py +0 -30
  298. synth_ai-0.2.9.dev7.dist-info/METADATA +0 -131
  299. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  300. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  301. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  302. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  303. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  304. /synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
  305. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  306. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  307. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  308. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  309. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  310. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  311. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  312. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  313. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  314. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  315. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  316. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  317. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  318. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  319. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  320. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  321. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  322. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  323. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  324. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  325. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/WHEEL +0 -0
  326. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/entry_points.txt +0 -0
  327. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/licenses/LICENSE +0 -0
@@ -1,329 +0,0 @@
1
- """
2
- Example: Random Search optimizer on Banking77 using Groq gpt-oss-20b.
3
-
4
- Requires:
5
- - .env with GROQ_API_KEY
6
- - datasets (`uv add datasets` if needed)
7
-
8
- Run:
9
- - uv run -q python -m synth_ai.learning.prompts.run_random_search_banking77
10
- """
11
-
12
- from __future__ import annotations
13
-
14
- import asyncio
15
- import json
16
- import os
17
- import random
18
- import time
19
- from collections.abc import Sequence
20
- from dataclasses import dataclass, replace
21
- from pathlib import Path
22
- from types import SimpleNamespace
23
- from typing import Any
24
-
25
- from datasets import load_dataset
26
- from dotenv import load_dotenv
27
- from synth_ai.learning.prompts.random_search import random_search_compile
28
- from synth_ai.lm.core.main_v3 import LM, build_messages
29
- from tqdm import tqdm
30
-
31
-
32
- def choose_label(pred: str, label_names: list[str]) -> str:
33
- norm = (pred or "").strip().lower()
34
- d = {ln.lower(): ln for ln in label_names}
35
- if norm in d:
36
- return d[norm]
37
-
38
- def score(cand: str) -> int:
39
- c = cand.lower()
40
- return sum(1 for w in c.split() if w in norm)
41
-
42
- return max(label_names, key=score)
43
-
44
-
45
- def accuracy(pred: str, gold: str, labels: list[str]) -> float:
46
- return 1.0 if choose_label(pred, labels) == gold else 0.0
47
-
48
-
49
- @dataclass
50
- class StudentProgram:
51
- lm: LM
52
- label_names: list[str]
53
- instruction: str
54
- demos: list[tuple[str, str]]
55
-
56
- def reset_copy(self):
57
- return replace(self, instruction=self.instruction, demos=list(self.demos))
58
-
59
- def deepcopy(self):
60
- return replace(self, instruction=str(self.instruction), demos=list(self.demos))
61
-
62
- def with_demos(self, demos: list[tuple[str, str]]):
63
- return replace(self, demos=list(demos))
64
-
65
- def run(self, x: str) -> str:
66
- # Build a prompt with optional demos
67
- examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in self.demos)
68
- sys = self.instruction or "You are an intent classifier for Banking77."
69
- user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
70
- messages = build_messages(sys, user, images_bytes=None, model_name=self.lm.model)
71
-
72
- # Call LM synchronously via asyncio
73
- async def _call():
74
- resp = await self.lm.respond_async(messages=messages)
75
- return (resp.raw_response or "").strip()
76
-
77
- return asyncio.run(_call())
78
-
79
- async def _apredict(self, x: str):
80
- examples = "\n".join(f"Input: {a}\nLabel: {b}" for a, b in self.demos)
81
- sys = self.instruction or "You are an intent classifier for Banking77."
82
- user = (f"Examples:\n{examples}\n\n" if examples else "") + f"Message: {x}\nLabel:"
83
- messages = build_messages(sys, user, images_bytes=None, model_name=self.lm.model)
84
- resp = await self.lm.respond_async(messages=messages)
85
- return (resp.raw_response or "").strip(), (resp.usage or {})
86
-
87
-
88
- def main():
89
- load_dotenv()
90
- random.seed(0)
91
-
92
- model = os.getenv("MODEL", "openai/gpt-oss-20b")
93
- vendor = os.getenv("VENDOR", "groq")
94
- lm = LM(model=model, vendor=vendor, temperature=0.0)
95
-
96
- print("Loading Banking77 dataset (train/dev split of test for demo)...")
97
- ds = load_dataset("banking77")
98
- label_names: list[str] = ds["test"].features["label"].names # type: ignore
99
-
100
- # Create small train/val from the test split for speed
101
- all_items = [(r["text"], label_names[int(r["label"])]) for r in ds["test"]]
102
- random.shuffle(all_items)
103
- trainset: Sequence[tuple[str, str]] = all_items[:40]
104
- valset: Sequence[tuple[str, str]] = all_items[40:60] # 20 examples
105
-
106
- student = StudentProgram(
107
- lm=lm,
108
- label_names=label_names,
109
- instruction="You are an intent classifier for the Banking77 dataset. Return exactly one label.",
110
- demos=[],
111
- )
112
-
113
- def metric(yhat: str, y: str) -> float:
114
- return accuracy(yhat, y, label_names)
115
-
116
- total_candidates = 3 + 3 # zero-shot, labeled few-shot, bootstrapped + 3 random seeds
117
- print(
118
- f"Running Random Search optimizer ({total_candidates} candidates, parallel eval of 20 questions)..."
119
- )
120
-
121
- def eval_parallel(program: StudentProgram, dataset: Sequence[tuple[str, str]], metric_fn):
122
- async def _run():
123
- xs = [x for x, _ in dataset]
124
- ys = [y for _, y in dataset]
125
- preds: list[Optional[str]] = [None] * len(xs)
126
- sem = asyncio.Semaphore(int(os.getenv("CONCURRENCY", "5")))
127
-
128
- async def worker(i: int, x: str, y: str):
129
- import time
130
-
131
- t_start = time.monotonic()
132
- try:
133
- async with sem:
134
- pred, usage = await asyncio.wait_for(
135
- program._apredict(x),
136
- timeout=float(os.getenv("TIMEOUT_S", "45")),
137
- )
138
- t_end = time.monotonic()
139
- return i, y, pred, t_start, t_end, usage or {}
140
- except asyncio.CancelledError:
141
- # Respect cancellation but return a placeholder record so scheduler can proceed
142
- t_end = time.monotonic()
143
- return i, y, "", t_start, t_end, {}
144
- except Exception:
145
- t_end = time.monotonic()
146
- return i, y, "", t_start, t_end, {}
147
-
148
- tasks = [
149
- asyncio.create_task(worker(i, x, y))
150
- for i, (x, y) in enumerate(zip(xs, ys, strict=False))
151
- ]
152
- correct_sum = 0.0
153
- processed = 0
154
- import statistics
155
- import time
156
-
157
- durations: list[float] = []
158
- in_tok_sum = 0
159
- out_tok_sum = 0
160
- in_tok_count = 0
161
- out_tok_count = 0
162
- details: list[dict[str, Any]] = []
163
- t_batch_start = time.monotonic()
164
- deadline = float(os.getenv("BATCH_DEADLINE_S", "20"))
165
- with tqdm(total=len(tasks), desc="Rollouts", leave=False) as pbar:
166
- pending = set(tasks)
167
- # Process completions until all done or deadline reached
168
- while pending:
169
- elapsed = time.monotonic() - t_batch_start
170
- remaining = max(0.0, deadline - elapsed)
171
- if remaining <= 0.0:
172
- # Cancel any remaining
173
- for t in pending:
174
- t.cancel()
175
- done, _ = await asyncio.wait(pending, return_when=asyncio.ALL_COMPLETED)
176
- # Record canceled as zeros
177
- for task in done:
178
- try:
179
- i, y_true, pred, t_start, t_end, usage = task.result()
180
- except Exception:
181
- # Unknown index: we can't recover; skip as it's canceled before start
182
- continue
183
- # Already processed ones shouldn't be in pending; skip
184
- break
185
- # Wait for at least one completion within remaining time (polling granularity <= 1s)
186
- timeout = min(1.0, remaining)
187
- done, pending = await asyncio.wait(
188
- pending, timeout=timeout, return_when=asyncio.FIRST_COMPLETED
189
- )
190
- import contextlib
191
-
192
- for task in done:
193
- try:
194
- i, y_true, pred, t_start, t_end, usage = task.result()
195
- except BaseException:
196
- # Treat as failure/cancelled
197
- continue
198
- durations.append(max(0.0, t_end - t_start))
199
- preds[i] = pred
200
- processed += 1
201
- with contextlib.suppress(Exception):
202
- correct_sum += float(metric_fn(pred, y_true))
203
- with contextlib.suppress(Exception):
204
- pt = usage.get("prompt_tokens") or usage.get("input_tokens")
205
- ct = usage.get("completion_tokens") or usage.get("output_tokens")
206
- if isinstance(pt, (int, float)):
207
- in_tok_sum += int(pt)
208
- in_tok_count += 1
209
- if isinstance(ct, (int, float)):
210
- out_tok_sum += int(ct)
211
- out_tok_count += 1
212
- details.append(
213
- {
214
- "index": i,
215
- "seconds": max(0.0, t_end - t_start),
216
- "score": float(metric_fn(pred, y_true)),
217
- "usage": {
218
- "prompt_tokens": usage.get("prompt_tokens")
219
- or usage.get("input_tokens"),
220
- "completion_tokens": usage.get("completion_tokens")
221
- or usage.get("output_tokens"),
222
- },
223
- }
224
- )
225
- pbar.update(1)
226
- med = statistics.median(durations) if durations else 0.0
227
- mx = max(durations) if durations else 0.0
228
- avg_in = (in_tok_sum / in_tok_count) if in_tok_count else 0.0
229
- avg_out = (out_tok_sum / out_tok_count) if out_tok_count else 0.0
230
- pbar.set_postfix(
231
- {
232
- "acc": f"{(correct_sum / processed):.2f}",
233
- "done": f"{processed}/{len(tasks)}",
234
- "med_s": f"{med:.1f}",
235
- "max_s": f"{mx:.1f}",
236
- "tin": f"{avg_in:.1f}",
237
- "tout": f"{avg_out:.1f}",
238
- }
239
- )
240
- # Compute score only from completed/successful rollouts (drop timeouts/cancelled)
241
- subs = [float(d.get("score", 0.0)) for d in details]
242
- result = SimpleNamespace(score=(sum(subs) / max(1, len(subs))), subscores=subs)
243
- result.details = details
244
- result.mean_in = (in_tok_sum / in_tok_count) if in_tok_count else 0.0
245
- result.mean_out = (out_tok_sum / out_tok_count) if out_tok_count else 0.0
246
- return result
247
-
248
- return asyncio.run(_run())
249
-
250
- pbar = tqdm(total=total_candidates, desc="Candidates")
251
- candidate_eval_details: dict[int, Any] = {}
252
-
253
- def on_cand(idx: int, score: float, res, intervention):
254
- pbar.update(1)
255
- pbar.set_postfix({"score": f"{score:.2f}"})
256
- # store per-instance details (for apples-to-apples)
257
- import contextlib
258
-
259
- with contextlib.suppress(Exception):
260
- candidate_eval_details[idx] = {
261
- "score": score,
262
- "mean_in": getattr(res, "mean_in", None),
263
- "mean_out": getattr(res, "mean_out", None),
264
- "instances": getattr(res, "details", None),
265
- }
266
- # visible summary line per candidate
267
- kind = (
268
- intervention.get("kind", "candidate") if isinstance(intervention, dict) else "candidate"
269
- )
270
- label = intervention.get("label") if isinstance(intervention, dict) else None
271
- seed = intervention.get("seed") if isinstance(intervention, dict) else None
272
- processed = len(getattr(res, "details", []) or [])
273
- from tqdm import tqdm as _tqdm
274
-
275
- _tqdm.write(
276
- f"Candidate {idx}/{total_candidates} [{kind}{'' if label is None else f', label={label}'}{'' if seed is None else f', seed={seed}'}]: "
277
- f"score={score:.2f} | mean tin/tout={getattr(res, 'mean_in', 0):.1f}/{getattr(res, 'mean_out', 0):.1f} | N={processed}"
278
- )
279
-
280
- best, records = random_search_compile(
281
- student=student,
282
- trainset=trainset,
283
- valset=valset,
284
- metric=metric,
285
- evaluate_fn=eval_parallel,
286
- max_bootstrapped_demos=0,
287
- max_labeled_demos=4,
288
- max_rounds=2,
289
- num_candidate_programs=3,
290
- on_candidate_evaluated=on_cand,
291
- )
292
- pbar.close()
293
-
294
- # Evaluate best on holdout (valset) with parallel rollouts
295
- print("Evaluating best program on val (parallel rollouts)...")
296
- best_res = eval_parallel(best, valset, metric)
297
- correct = int(round(best_res.score * max(1, len(best_res.subscores))))
298
- print(
299
- "Best program accuracy on val: "
300
- f"{correct}/{len(valset)} ({best_res.score:.2%}) "
301
- f"| mean tokens in/out: {getattr(best_res, 'mean_in', 0):.1f}/{getattr(best_res, 'mean_out', 0):.1f}"
302
- )
303
-
304
- # Save per-candidate scores and interventions
305
- out = {
306
- "context": {
307
- "model": model,
308
- "vendor": vendor,
309
- "train_size": len(trainset),
310
- "val_size": len(valset),
311
- },
312
- "candidates": records,
313
- "candidate_eval_details": candidate_eval_details,
314
- "best_eval_details": {
315
- "score": best_res.score,
316
- "mean_in": getattr(best_res, "mean_in", None),
317
- "mean_out": getattr(best_res, "mean_out", None),
318
- "instances": getattr(best_res, "details", None),
319
- },
320
- }
321
- out_dir = Path(__file__).parent
322
- fname = str(out_dir / f"random_search_banking77_{int(time.time())}.json")
323
- with open(fname, "w") as f:
324
- json.dump(out, f, indent=2)
325
- print(f"Saved candidate records to {fname}")
326
-
327
-
328
- if __name__ == "__main__":
329
- main()
synth_ai/rl/secrets.py DELETED
@@ -1,19 +0,0 @@
1
- from __future__ import annotations
2
-
3
- """Helpers for generating RL environment credentials."""
4
-
5
- import secrets
6
-
7
- __all__ = ["mint_environment_api_key"]
8
-
9
-
10
- def mint_environment_api_key() -> str:
11
- """Mint a random ENVIRONMENT_API_KEY value.
12
-
13
- The current format is 64 hexadecimal characters (256 bits of entropy), which
14
- matches the shell helpers used by the RL examples. This keeps the token easy
15
- to copy while remaining suitably strong for authentication.
16
- """
17
-
18
- # secrets.token_hex(32) → 32 random bytes rendered as 64 hex characters.
19
- return secrets.token_hex(32)
@@ -1,100 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Verify reward persistence in a traces database.
4
-
5
- Usage:
6
- uv run python -m synth_ai.scripts.verify_rewards --db /path/to/db.sqlite --min-reward 1
7
- """
8
-
9
- import argparse
10
- import asyncio
11
- import os
12
- from typing import Dict
13
-
14
- from sqlalchemy import text
15
-
16
- from synth_ai.tracing_v3.turso.manager import AsyncSQLTraceManager
17
-
18
-
19
- async def verify(db_path: str, min_reward: int) -> int:
20
- db_url = db_path
21
- if not db_url.startswith("sqlite+aiosqlite:///"):
22
- db_url = f"sqlite+aiosqlite:///{os.path.abspath(db_path)}"
23
-
24
- mgr = AsyncSQLTraceManager(db_url=db_url)
25
- await mgr.initialize()
26
-
27
- try:
28
- async with mgr.session() as session:
29
- # Sessions with outcome_rewards
30
- q_good = text(
31
- """
32
- SELECT session_id, MAX(total_reward) as total_reward
33
- FROM outcome_rewards
34
- GROUP BY session_id
35
- """
36
- )
37
- res = await session.execute(q_good)
38
- outcomes = {row[0]: int(row[1]) for row in res.fetchall()}
39
-
40
- # Sessions without outcome_rewards
41
- q_missing = text(
42
- """
43
- SELECT s.session_id
44
- FROM session_traces s
45
- LEFT JOIN outcome_rewards o ON s.session_id = o.session_id
46
- WHERE o.session_id IS NULL
47
- """
48
- )
49
- res2 = await session.execute(q_missing)
50
- missing = [row[0] for row in res2.fetchall()]
51
-
52
- # Aggregate event_rewards per session (informational)
53
- q_event = text(
54
- """
55
- SELECT session_id, COALESCE(SUM(reward_value), 0.0) as sum_rewards
56
- FROM event_rewards
57
- GROUP BY session_id
58
- """
59
- )
60
- res3 = await session.execute(q_event)
61
- event_sums: Dict[str, float] = {row[0]: float(row[1]) for row in res3.fetchall()}
62
-
63
- print(f"Sessions with outcome_rewards: {len(outcomes)}")
64
- print(f"Sessions missing outcome_rewards: {len(missing)}")
65
- if missing:
66
- print("Missing session_ids:", ", ".join(missing[:10]) + (" ..." if len(missing) > 10 else ""))
67
-
68
- # Threshold check
69
- qualifying = {sid: r for sid, r in outcomes.items() if r >= min_reward}
70
- print(f"Sessions with total_reward >= {min_reward}: {len(qualifying)}")
71
-
72
- # Show a small comparison snapshot
73
- sample = list(qualifying.items())[:5]
74
- for sid, tot in sample:
75
- er = event_sums.get(sid, 0.0)
76
- print(f" {sid}: outcome={tot}, sum(event_rewards)={er:.2f}")
77
-
78
- # Exit non-zero if any sessions are missing outcome rewards
79
- if missing:
80
- return 2
81
- if min_reward > 0 and not qualifying:
82
- return 3
83
- return 0
84
- finally:
85
- await mgr.close()
86
-
87
-
88
- def main() -> int:
89
- ap = argparse.ArgumentParser(description="Verify reward persistence in traces DB")
90
- ap.add_argument("--db", required=True, help="Path to traces SQLite DB (aiosqlite)")
91
- ap.add_argument("--min-reward", type=int, default=0, help="Minimum total_reward to consider qualifying")
92
- args = ap.parse_args()
93
-
94
- return asyncio.run(verify(args.db, args.min_reward))
95
-
96
-
97
- if __name__ == "__main__":
98
- raise SystemExit(main())
99
-
100
-
@@ -1,30 +0,0 @@
1
- import importlib as _importlib
2
- import sys as _sys
3
-
4
- _pkg = _importlib.import_module("synth_ai.v0.tracing")
5
- _sys.modules[__name__] = _pkg
6
-
7
- _SUBMODULES = [
8
- "abstractions",
9
- "base_client",
10
- "client_manager",
11
- "config",
12
- "context",
13
- "decorators",
14
- "immediate_client",
15
- "local",
16
- "log_client_base",
17
- "retry_queue",
18
- "trackers",
19
- "upload",
20
- "utils",
21
- ]
22
- for _m in _SUBMODULES:
23
- _sys.modules[f"{__name__}.{_m}"] = _importlib.import_module(f"synth_ai.v0.tracing.{_m}")
24
-
25
- _events_pkg = _importlib.import_module("synth_ai.v0.tracing.events")
26
- _sys.modules[f"{__name__}.events"] = _events_pkg
27
- for _m in ["manage", "scope", "store"]:
28
- _sys.modules[f"{__name__}.events.{_m}"] = _importlib.import_module(
29
- f"synth_ai.v0.tracing.events.{_m}"
30
- )
@@ -1,33 +0,0 @@
1
- import importlib as _importlib
2
- import sys as _sys
3
-
4
- # Forward top-level package
5
- _pkg = _importlib.import_module("synth_ai.v0.tracing_v1")
6
- _sys.modules[__name__] = _pkg
7
-
8
- # Explicitly forward submodules so `synth_ai.tracing_v1.X` works
9
- _SUBMODULES = [
10
- "abstractions",
11
- "base_client",
12
- "client_manager",
13
- "config",
14
- "context",
15
- "decorators",
16
- "immediate_client",
17
- "local",
18
- "log_client_base",
19
- "retry_queue",
20
- "trackers",
21
- "upload",
22
- "utils",
23
- ]
24
- for _m in _SUBMODULES:
25
- _sys.modules[f"{__name__}.{_m}"] = _importlib.import_module(f"synth_ai.v0.tracing_v1.{_m}")
26
-
27
- # Forward events package and its submodules
28
- _events_pkg = _importlib.import_module("synth_ai.v0.tracing_v1.events")
29
- _sys.modules[f"{__name__}.events"] = _events_pkg
30
- for _m in ["manage", "scope", "store"]:
31
- _sys.modules[f"{__name__}.events.{_m}"] = _importlib.import_module(
32
- f"synth_ai.v0.tracing_v1.events.{_m}"
33
- )
@@ -1,25 +0,0 @@
1
- """Turso/sqld implementation for tracing v3."""
2
-
3
- from .manager import AsyncSQLTraceManager
4
- from .models import (
5
- Base,
6
- Event,
7
- Experiment,
8
- Message,
9
- SessionTimestep,
10
- SessionTrace,
11
- System,
12
- SystemVersion,
13
- )
14
-
15
- __all__ = [
16
- "AsyncSQLTraceManager",
17
- "Base",
18
- "SessionTrace",
19
- "SessionTimestep",
20
- "Event",
21
- "Message",
22
- "Experiment",
23
- "System",
24
- "SystemVersion",
25
- ]