synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.9.dev8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (327) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +8 -11
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/run_eval.py +36 -37
  19. examples/rl/run_rl_and_save.py +5 -5
  20. examples/rl/task_app/math_single_step.py +65 -43
  21. examples/rl/task_app/math_task_app.py +3 -3
  22. examples/sft/README.md +139 -0
  23. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  24. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  25. examples/sft/evaluate.py +117 -0
  26. examples/sft/export_dataset.py +117 -0
  27. examples/sft/generate_traces.py +162 -0
  28. examples/swe/__init__.py +12 -0
  29. examples/swe/task_app/README.md +105 -0
  30. examples/swe/task_app/__init__.py +2 -0
  31. examples/swe/task_app/grpo_swe_mini.py +571 -0
  32. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  33. examples/swe/task_app/hosted/README.md +173 -0
  34. examples/swe/task_app/hosted/__init__.py +5 -0
  35. examples/swe/task_app/hosted/branching.py +143 -0
  36. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  37. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  38. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  39. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  40. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  41. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  42. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  43. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  44. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  45. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  46. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  47. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  50. examples/swe/task_app/hosted/hosted_app.py +204 -0
  51. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  52. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  53. examples/swe/task_app/hosted/main.py +100 -0
  54. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  55. examples/swe/task_app/hosted/registry.py +195 -0
  56. examples/swe/task_app/hosted/rollout.py +1869 -0
  57. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  58. examples/swe/task_app/hosted/storage/volume.py +211 -0
  59. examples/swe/task_app/hosted/test_agents.py +161 -0
  60. examples/swe/task_app/hosted/test_service.py +137 -0
  61. examples/swe/task_app/hosted/utils.py +62 -0
  62. examples/vlm/README.md +68 -0
  63. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  64. examples/vlm/crafter_image_only_agent.py +207 -0
  65. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  66. examples/vlm/filter_image_rows.py +63 -0
  67. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  68. examples/warming_up_to_rl/analyze_trace_db.py +5 -5
  69. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  70. examples/warming_up_to_rl/export_trace_sft.py +78 -21
  71. examples/warming_up_to_rl/groq_test.py +4 -4
  72. examples/warming_up_to_rl/manage_secrets.py +13 -18
  73. examples/warming_up_to_rl/run_eval.py +42 -44
  74. examples/warming_up_to_rl/run_fft_and_save.py +11 -16
  75. examples/warming_up_to_rl/run_local_rollout.py +1 -3
  76. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
  77. examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
  78. examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
  79. examples/warming_up_to_rl/run_rl_and_save.py +5 -6
  80. examples/warming_up_to_rl/run_rollout_remote.py +8 -10
  81. examples/warming_up_to_rl/task_app/README.md +6 -2
  82. examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
  83. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  101. synth/__init__.py +14 -0
  102. synth_ai/__init__.py +26 -4
  103. synth_ai/api/models/supported.py +376 -0
  104. synth_ai/api/train/builders.py +128 -21
  105. synth_ai/api/train/cli.py +80 -64
  106. synth_ai/api/train/config_finder.py +7 -2
  107. synth_ai/api/train/env_resolver.py +1 -1
  108. synth_ai/api/train/pollers.py +2 -1
  109. synth_ai/api/train/supported_algos.py +139 -0
  110. synth_ai/api/train/task_app.py +1 -2
  111. synth_ai/api/train/utils.py +13 -44
  112. synth_ai/cli/__init__.py +8 -0
  113. synth_ai/cli/_modal_wrapper.py +28 -0
  114. synth_ai/cli/_typer_patch.py +49 -0
  115. synth_ai/cli/balance.py +1 -2
  116. synth_ai/cli/calc.py +1 -1
  117. synth_ai/cli/demo.py +2 -1
  118. synth_ai/cli/recent.py +2 -2
  119. synth_ai/cli/rl_demo.py +2 -1
  120. synth_ai/cli/root.py +11 -13
  121. synth_ai/cli/status.py +2 -2
  122. synth_ai/cli/task_apps.py +529 -179
  123. synth_ai/cli/traces.py +6 -4
  124. synth_ai/cli/watch.py +12 -18
  125. synth_ai/demo_registry.py +1 -1
  126. synth_ai/demos/core/cli.py +36 -43
  127. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  128. synth_ai/demos/demo_task_apps/core.py +17 -25
  129. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
  130. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  131. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
  132. synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
  133. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  134. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  135. synth_ai/environments/reproducibility/tree.py +2 -5
  136. synth_ai/environments/service/app.py +11 -12
  137. synth_ai/environments/service/core_routes.py +4 -7
  138. synth_ai/environments/stateful/engine.py +1 -1
  139. synth_ai/environments/tasks/core.py +1 -0
  140. synth_ai/environments/tasks/filters.py +5 -6
  141. synth_ai/environments/tasks/utils.py +4 -5
  142. synth_ai/handshake.py +9 -9
  143. synth_ai/http.py +1 -1
  144. synth_ai/http_client.py +18 -10
  145. synth_ai/inference/client.py +15 -5
  146. synth_ai/jobs/client.py +78 -83
  147. synth_ai/learning/__init__.py +41 -6
  148. synth_ai/learning/algorithms.py +14 -0
  149. synth_ai/learning/client.py +91 -24
  150. synth_ai/learning/config.py +2 -38
  151. synth_ai/learning/ft_client.py +4 -59
  152. synth_ai/learning/health.py +5 -6
  153. synth_ai/learning/jobs.py +31 -47
  154. synth_ai/{rl → learning/rl}/__init__.py +14 -4
  155. synth_ai/learning/rl/client.py +267 -0
  156. synth_ai/learning/rl/config.py +31 -0
  157. synth_ai/{rl → learning/rl}/contracts.py +5 -8
  158. synth_ai/{rl → learning/rl}/env_keys.py +39 -15
  159. synth_ai/learning/rl/secrets.py +13 -0
  160. synth_ai/learning/rl_client.py +2 -281
  161. synth_ai/learning/sft/__init__.py +29 -0
  162. synth_ai/learning/sft/client.py +68 -0
  163. synth_ai/learning/sft/config.py +270 -0
  164. synth_ai/learning/sft/data.py +295 -0
  165. synth_ai/learning/sse.py +25 -24
  166. synth_ai/learning/validators.py +25 -28
  167. synth_ai/lm/__init__.py +21 -47
  168. synth_ai/main.py +4 -0
  169. synth_ai/task/__init__.py +25 -27
  170. synth_ai/task/apps/__init__.py +7 -8
  171. synth_ai/task/auth.py +8 -8
  172. synth_ai/task/client.py +14 -14
  173. synth_ai/task/contracts.py +36 -35
  174. synth_ai/task/datasets.py +6 -5
  175. synth_ai/task/errors.py +10 -10
  176. synth_ai/task/health.py +17 -9
  177. synth_ai/task/json.py +58 -23
  178. synth_ai/task/proxy.py +13 -9
  179. synth_ai/task/rubrics.py +16 -15
  180. synth_ai/task/server.py +12 -12
  181. synth_ai/task/tracing_utils.py +4 -4
  182. synth_ai/task/vendors.py +5 -6
  183. synth_ai/tracing_v3/__init__.py +2 -0
  184. synth_ai/tracing_v3/abstractions.py +21 -4
  185. synth_ai/tracing_v3/decorators.py +18 -16
  186. synth_ai/tracing_v3/hooks.py +5 -5
  187. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  188. synth_ai/tracing_v3/session_tracer.py +40 -14
  189. synth_ai/tracing_v3/storage/base.py +85 -0
  190. synth_ai/tracing_v3/storage/config.py +21 -8
  191. synth_ai/tracing_v3/storage/factory.py +10 -7
  192. synth_ai/tracing_v3/storage/utils.py +4 -2
  193. synth_ai/tracing_v3/turso/daemon.py +7 -2
  194. synth_ai/tracing_v3/turso/models.py +2 -2
  195. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  196. synth_ai/tracing_v3/utils.py +4 -4
  197. synth_ai/v0/api/__init__.py +8 -0
  198. synth_ai/v0/api/models/__init__.py +8 -0
  199. synth_ai/v0/api/models/supported.py +8 -0
  200. synth_ai/v0/config/__init__.py +15 -0
  201. synth_ai/v0/config/base_url.py +12 -0
  202. synth_ai/v0/lm/__init__.py +51 -0
  203. synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
  204. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  205. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  206. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  207. synth_ai/{lm → v0/lm}/config.py +6 -1
  208. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  209. synth_ai/{lm → v0/lm}/core/main.py +6 -6
  210. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  211. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
  212. synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
  213. synth_ai/{lm → v0/lm}/overrides.py +2 -2
  214. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  215. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  216. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  217. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  218. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
  219. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  220. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  221. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
  222. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
  223. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
  224. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
  225. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  226. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  227. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  228. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  229. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  230. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  231. synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
  232. synth_ai/v0/tracing_v3/__init__.py +10 -0
  233. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  234. synth_ai/v0/tracing_v3/decorators.py +3 -0
  235. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  236. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  237. synth_ai-0.2.9.dev8.dist-info/METADATA +191 -0
  238. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/RECORD +268 -238
  239. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/top_level.txt +1 -0
  240. examples/common_old/backend.py +0 -20
  241. examples/evals_old/README.md +0 -98
  242. examples/evals_old/__init__.py +0 -6
  243. examples/evals_old/compare_models.py +0 -1038
  244. examples/evals_old/example_log.md +0 -145
  245. examples/evals_old/run_demo.sh +0 -126
  246. examples/evals_old/trace_analysis.py +0 -270
  247. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  248. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  249. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  250. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
  251. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  252. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  253. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  254. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  255. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  256. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
  257. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  258. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  259. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
  260. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  261. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  262. examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
  263. examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
  264. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  265. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  266. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
  267. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
  268. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
  269. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  270. examples/finetuning_old/synth_qwen_v1/util.py +0 -152
  271. examples/rl_old/task_app.py +0 -1131
  272. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  273. examples/warming_up_to_rl/old/notes.md +0 -73
  274. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  275. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  276. synth_ai/experimental/synth_oss.py +0 -445
  277. synth_ai/learning/filtering.py +0 -0
  278. synth_ai/learning/offline/dpo.py +0 -0
  279. synth_ai/learning/offline/providers.py +0 -7
  280. synth_ai/learning/offline/sft.py +0 -0
  281. synth_ai/learning/offline/shared.py +0 -0
  282. synth_ai/learning/online/grpo.py +0 -0
  283. synth_ai/learning/online/irft.py +0 -0
  284. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  285. synth_ai/learning/prompts/gepa.py +0 -0
  286. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
  287. synth_ai/learning/prompts/mipro.py +0 -289
  288. synth_ai/learning/prompts/random_search.py +0 -249
  289. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  290. synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
  291. synth_ai/rl/secrets.py +0 -19
  292. synth_ai/scripts/verify_rewards.py +0 -100
  293. synth_ai/tracing/__init__.py +0 -30
  294. synth_ai/tracing_v1/__init__.py +0 -33
  295. synth_ai/tracing_v3/turso/__init__.py +0 -25
  296. synth_ai/tracing_v3/turso/manager.py +0 -838
  297. synth_ai/zyk/__init__.py +0 -30
  298. synth_ai-0.2.9.dev7.dist-info/METADATA +0 -131
  299. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  300. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  301. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  302. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  303. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  304. /synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
  305. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  306. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  307. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  308. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  309. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  310. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  311. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  312. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  313. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  314. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  315. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  316. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  317. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  318. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  319. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  320. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  321. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  322. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  323. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  324. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  325. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/WHEEL +0 -0
  326. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/entry_points.txt +0 -0
  327. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/licenses/LICENSE +0 -0
examples/sft/README.md ADDED
@@ -0,0 +1,139 @@
1
+ ### Supervised Fine-Tuning for Crafter
2
+
3
+ This folder provides a minimal, reusable SFT workflow that pulls out the SFT step from `examples/warming_up_to_rl/` and focuses it on LoRA/QLoRA. We've also added guidance for running full finetuning (FFT) so you can compare adapters against end-to-end weight updates.
4
+
5
+ It supports distilling Groq (or other vendor) rollouts into JSONL using tracing and then training a small base model like `Qwen/Qwen3-0.6B`.
6
+
7
+ ---
8
+
9
+ ### 0) Load environment from .env.dev (recommended)
10
+
11
+ Use your dev env file so keys/URLs are sourced consistently:
12
+
13
+ ```bash
14
+ # Example path; update to your actual dev env
15
+ set -a && source /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev && set +a
16
+ ```
17
+
18
+ This ensures `ENVIRONMENT_API_KEY`, `GROQ_API_KEY`, and (optionally) `BACKEND_BASE_URL` are available to the steps below.
19
+
20
+ ---
21
+
22
+ ### 1) Collect traces and export SFT JSONL
23
+
24
+ You can generate traces with the Crafter task app and then export them to SFT JSONL using the existing exporter:
25
+
26
+ ```bash
27
+ # Serve the task app locally with tracing enabled (example)
28
+ uvx synth-ai serve grpo-crafter \
29
+ --trace traces/v3 \
30
+ --trace-db traces/v3/synth_ai.db \
31
+ --port 8001
32
+
33
+ # Or run traced local rollouts to accumulate data
34
+ uv run python examples/warming_up_to_rl/run_local_rollout_traced.py \
35
+ --episodes 50 --max-turns 10
36
+
37
+ # Export SFT dataset from the trace DB
38
+ uv run python examples/warming_up_to_rl/export_trace_sft.py \
39
+ --db traces/v3/synth_ai.db \
40
+ --min-unique 0 \
41
+ --output examples/sft/ft_data/crafter_traces.jsonl
42
+ ```
43
+
44
+ Notes:
45
+ - The exporter uses achievements and event rewards to filter high-signal steps. Combine `--min-unique`, `--min-outcome-reward`, `--event-reward`, and `--require-achievement` to control data quality.
46
+ - You can restrict to sessions from certain providers/models with `--provider`/`--model`.
47
+ - Use `--limit` while debugging to reduce dataset size quickly.
48
+
49
+ ---
50
+
51
+ ### 2a) Train LoRA (QLoRA) on Qwen/Qwen3-0.6B
52
+
53
+ Use the standard CLI. Do not use a custom Python finetuning script. Point the CLI at your `.env.dev` so it picks up keys automatically:
54
+
55
+ ```bash
56
+ uvx synth-ai train \
57
+ --type sft \
58
+ --config examples/sft/configs/crafter_lora_qwen0p6b.toml \
59
+ --dataset examples/sft/ft_data/crafter_traces.jsonl \
60
+ --env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
61
+ ```
62
+
63
+ The config sets `training.use_qlora = true` and `hyperparameters.train_kind = "peft"` to request LoRA adapters.
64
+
65
+ Experiment tips:
66
+ - The backend currently defaults to a LoRA rank of 16. If you need other ranks, generate the payload with `--dry-run`, add `"lora_rank": <value>` (and optional `"lora_alpha"`, `"lora_dropout"`) under `hyperparameters`, and submit it via the API until the CLI exposes these knobs directly.
67
+ - Duplicate the TOML and adjust `hyperparameters.warmup_ratio`, `learning_rate`, or `gradient_accumulation_steps` to keep the global batch size comparable across datasets.
68
+
69
+ ---
70
+
71
+ ### 2b) Train Full Finetune (FFT) on Qwen/Qwen3-0.6B
72
+
73
+ Full finetuning updates all weights and uses a near-identical CLI flow with the LoRA toggle disabled. The helper config lives alongside the LoRA sample:
74
+
75
+ ```bash
76
+ uvx synth-ai train \
77
+ --type sft \
78
+ --config examples/sft/configs/crafter_fft_qwen0p6b.toml \
79
+ --dataset examples/sft/ft_data/crafter_traces.jsonl \
80
+ --env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
81
+ ```
82
+
83
+ Key differences vs LoRA:
84
+ - `training.use_qlora = false` and `hyperparameters.train_kind = "fft"` request a full-weight update.
85
+ - `per_device_batch` defaults to 1 to keep memory use comfortable on a single H100; raise gradually as you confirm headroom.
86
+ - FFT runs slower per step. Consider trimming the dataset with `--examples` or the exporter filters for quick baselines.
87
+
88
+ If you want the 4B Crafter FFT baseline from the RL examples, reuse `examples/warming_up_to_rl/configs/crafter_fft_4b.toml` with the same CLI command.
89
+
90
+ ---
91
+
92
+ ### 3) Evaluate the fine-tuned models
93
+
94
+ After the job completes, list your fine-tuned models and evaluate them in the Crafter loop:
95
+
96
+ ```bash
97
+ # List models
98
+ uv run python - <<'PY'
99
+ import asyncio
100
+ import os
101
+ from synth_ai.learning.client import LearningClient
102
+
103
+ backend = os.getenv("BACKEND_BASE_URL", "https://agent-learning.onrender.com/api")
104
+ api_key = os.getenv("SYNTH_API_KEY", "")
105
+ async def main():
106
+ client = LearningClient(backend, api_key)
107
+ models = await client.list_fine_tuned_models()
108
+ for m in models:
109
+ print(m)
110
+ asyncio.run(main())
111
+ PY
112
+
113
+ # Evaluate in the Crafter eval loop (example via warming_up_to_rl)
114
+ TASK_APP_URL=http://localhost:8001 \
115
+ uv run python examples/warming_up_to_rl/run_eval.py \
116
+ --toml examples/warming_up_to_rl/configs/eval_local_vllm.toml \
117
+ --model ft:YOUR_FT_MODEL_ID \
118
+ --use-rollout
119
+ ```
120
+
121
+ ---
122
+
123
+ ### 4) Plan comparison runs
124
+
125
+ Keep runs comparable by adjusting one axis at a time and logging the settings in your experiment tracker (spreadsheet, weights & biases, etc.).
126
+
127
+ - **LoRA rank sweeps:** start from `crafter_lora_qwen0p6b.toml`, clone it per rank (e.g., `r=4,8,16,64`). For now add the desired `lora_rank` in the job payload manually (see note above) and include it in the run name.
128
+ - **Dataset size:** duplicate the exported JSONL and slice with `head -n`, or pass `--examples N` to the CLI for quick subsamples. Track the effective token count using the exporter logs.
129
+ - **Data quality:** increase `--min-unique`, require specific achievements, or exclude low-reward sessions with `export_trace_sft.py`. Capture the filter tuple in your run metadata so evaluations stay reproducible.
130
+ - **FFT vs LoRA:** run both configs on the same dataset/cardinality so differences reflect the training method rather than the data.
131
+
132
+ For each sweep, use consistent evaluation seeds and write down throughput (tokens/sec) so you can weigh quality vs cost.
133
+
134
+ ---
135
+
136
+ ### Files
137
+ - `configs/crafter_lora_qwen0p6b.toml`: LoRA/QLoRA SFT config for `Qwen/Qwen3-0.6B`.
138
+ - `configs/crafter_fft_qwen0p6b.toml`: Full-finetune SFT config for `Qwen/Qwen3-0.6B`.
139
+ - `ft_data/`: place your exported JSONL here (ignored by VCS).
@@ -0,0 +1,44 @@
1
+ [job]
2
+ model = "Qwen/Qwen3-0.6B"
3
+ # Prefer passing --dataset at runtime for repeatability
4
+ # data = "examples/sft/ft_data/crafter_traces.jsonl"
5
+
6
+ [compute]
7
+ gpu_type = "H100"
8
+ gpu_count = 1
9
+ nodes = 1
10
+
11
+ [data]
12
+ topology = {}
13
+ # Optional validation set if you have one locally
14
+ # validation_path = "examples/sft/ft_data/crafter_traces.val.jsonl"
15
+
16
+ [training]
17
+ mode = "sft_offline"
18
+ use_qlora = false
19
+
20
+ [training.validation]
21
+ enabled = true
22
+ evaluation_strategy = "steps"
23
+ eval_steps = 50
24
+ save_best_model_at_end = true
25
+ metric_for_best_model = "val.loss"
26
+ greater_is_better = false
27
+
28
+ [hyperparameters]
29
+ n_epochs = 1
30
+ train_kind = "fft"
31
+ per_device_batch = 1
32
+ gradient_accumulation_steps = 32
33
+ sequence_length = 4096
34
+ learning_rate = 1e-5
35
+ warmup_ratio = 0.03
36
+ weight_decay = 0.01
37
+
38
+ [hyperparameters.parallelism]
39
+ use_deepspeed = true
40
+ deepspeed_stage = 2
41
+ fsdp = false
42
+ bf16 = true
43
+ fp16 = false
44
+ activation_checkpointing = true
@@ -0,0 +1,45 @@
1
+ [job]
2
+ model = "Qwen/Qwen3-0.6B"
3
+ # Optionally set here, but prefer passing --dataset at runtime
4
+ # data = "examples/sft/ft_data/crafter_traces.jsonl"
5
+
6
+ [compute]
7
+ gpu_type = "H100"
8
+ gpu_count = 1
9
+ nodes = 1
10
+
11
+ [data]
12
+ # Forwarded into metadata.effective_config
13
+ topology = {}
14
+ # Optional validation set if you have one locally
15
+ # validation_path = "examples/sft/ft_data/crafter_traces.val.jsonl"
16
+
17
+ [training]
18
+ mode = "lora"
19
+ use_qlora = true
20
+
21
+ [training.validation]
22
+ enabled = true
23
+ evaluation_strategy = "steps"
24
+ eval_steps = 50
25
+ save_best_model_at_end = true
26
+ metric_for_best_model = "val.loss"
27
+ greater_is_better = false
28
+
29
+ [hyperparameters]
30
+ n_epochs = 1
31
+ train_kind = "peft"
32
+ per_device_batch = 2
33
+ gradient_accumulation_steps = 32
34
+ sequence_length = 4096
35
+ learning_rate = 5e-6
36
+ warmup_ratio = 0.03
37
+
38
+ [hyperparameters.parallelism]
39
+ use_deepspeed = true
40
+ deepspeed_stage = 2
41
+ fsdp = false
42
+ bf16 = true
43
+ fp16 = false
44
+ activation_checkpointing = true
45
+
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env python3
2
+ """Evaluate a base or fine-tuned model on Crafter via the Task App rollout.
3
+
4
+ This mirrors the minimal evaluation loop: call `/rollout` for a set of seeds
5
+ and report outcome/step metrics. If tracing is enabled server-side, you can
6
+ use the exported sqlite DB for further analysis.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import asyncio
13
+ import os
14
+ from dataclasses import dataclass
15
+ from typing import Any
16
+
17
+ from synth_ai.task import (
18
+ RolloutEnvSpec,
19
+ RolloutPolicySpec,
20
+ RolloutRecordConfig,
21
+ RolloutRequest,
22
+ TaskAppClient,
23
+ )
24
+
25
+
26
+ @dataclass(slots=True)
27
+ class EvalArgs:
28
+ base_url: str
29
+ api_key: str
30
+ model: str
31
+ inference_url: str
32
+ inference_api_key: str
33
+ seeds: list[int]
34
+ max_llm_calls: int
35
+ timeout: float
36
+
37
+
38
+ def _ops(n: int) -> list[str]:
39
+ n = max(1, n)
40
+ ops: list[str] = []
41
+ for _ in range(n):
42
+ ops.extend(["agent", "env"]) # one LLM step followed by one env step
43
+ return ops
44
+
45
+
46
+ def _request(seed: int, a: EvalArgs) -> RolloutRequest:
47
+ return RolloutRequest(
48
+ run_id=f"eval-{seed}",
49
+ env=RolloutEnvSpec(env_name="crafter", seed=seed, config={}),
50
+ policy=RolloutPolicySpec(
51
+ policy_name="crafter-react",
52
+ config={"model": a.model, "inference_url": a.inference_url, "api_key": a.inference_api_key},
53
+ ),
54
+ ops=_ops(a.max_llm_calls),
55
+ record=RolloutRecordConfig(trajectories=True, return_trace=False, trace_format="compact"),
56
+ )
57
+
58
+
59
+ async def _eval_seed(client: TaskAppClient, seed: int, a: EvalArgs) -> dict[str, Any]:
60
+ resp = await client.rollout(_request(seed, a))
61
+ m = resp.metrics
62
+ return {"seed": seed, "num_steps": m.num_steps, "episode_returns": m.episode_returns, "outcome_score": m.outcome_score}
63
+
64
+
65
+ async def main() -> None:
66
+ p = argparse.ArgumentParser(description=__doc__)
67
+ p.add_argument("--base-url", default=os.getenv("TASK_APP_URL", "http://localhost:8001"))
68
+ p.add_argument("--api-key", default=os.getenv("ENVIRONMENT_API_KEY"))
69
+ p.add_argument("--model", required=True, help="Base or ft:<id> to evaluate")
70
+ p.add_argument("--inference-url", default=os.getenv("INFERENCE_URL", "https://api.groq.com/openai"))
71
+ p.add_argument("--inference-api-key", default=os.getenv("GROQ_API_KEY"))
72
+ p.add_argument("--seeds", default="0,1,2,3,4,5,6,7,8,9")
73
+ p.add_argument("--max-llm-calls", type=int, default=10)
74
+ p.add_argument("--timeout", type=float, default=60.0)
75
+ args = p.parse_args()
76
+
77
+ seeds = [int(s) for s in str(args.seeds).split(",") if s.strip()]
78
+ a = EvalArgs(
79
+ base_url=str(args.base_url).strip(),
80
+ api_key=str(args.api_key or "").strip(),
81
+ model=str(args.model).strip(),
82
+ inference_url=str(args.inference_url).strip(),
83
+ inference_api_key=str(args.inference_api_key or "").strip(),
84
+ seeds=seeds,
85
+ max_llm_calls=int(args.max_llm_calls),
86
+ timeout=float(args.timeout),
87
+ )
88
+ if not a.api_key:
89
+ raise SystemExit("ENVIRONMENT_API_KEY is required")
90
+ if not a.inference_api_key:
91
+ raise SystemExit("Inference API key (e.g., GROQ_API_KEY) is required")
92
+
93
+ results: list[dict[str, Any]] = []
94
+ async with TaskAppClient(a.base_url, api_key=a.api_key, timeout=a.timeout) as client:
95
+ for seed in a.seeds:
96
+ r = await _eval_seed(client, seed, a)
97
+ results.append(r)
98
+ print(f"seed={seed} return={r.get('episode_returns')}")
99
+
100
+ # Simple aggregate
101
+ flat_returns: list[float] = []
102
+ for r in results:
103
+ ers = r.get("episode_returns") or []
104
+ if isinstance(ers, list) and ers:
105
+ try:
106
+ flat_returns.append(float(ers[0]))
107
+ except Exception:
108
+ pass
109
+ if flat_returns:
110
+ mean_ret = sum(flat_returns) / len(flat_returns)
111
+ print(f"mean_return={mean_ret:.3f} over {len(flat_returns)} episodes")
112
+
113
+
114
+ if __name__ == "__main__":
115
+ asyncio.run(main())
116
+
117
+
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env python3
2
+ """Export SFT JSONL from tracing_v3 sqlite using the shared exporter utilities.
3
+
4
+ Thin wrapper over `examples/warming_up_to_rl/export_trace_sft.py` to keep the
5
+ SFT workflow self-contained in this folder while reusing tested logic.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ from pathlib import Path
12
+
13
+ from examples.warming_up_to_rl.export_trace_sft import (
14
+ build_sft_dataset,
15
+ connect,
16
+ fetch_achievement_data,
17
+ fetch_event_reward_totals,
18
+ fetch_outcome_rewards,
19
+ fetch_session_models,
20
+ parse_event_filters,
21
+ write_jsonl,
22
+ )
23
+
24
+
25
+ def main() -> None:
26
+ p = argparse.ArgumentParser(description=__doc__)
27
+ p.add_argument("--db", type=Path, default=Path("traces/v3/synth_ai.db"))
28
+ p.add_argument("--output", type=Path, default=Path("examples/sft/ft_data/crafter_traces.jsonl"))
29
+ p.add_argument("--model", action="append", dest="models")
30
+ p.add_argument("--provider", action="append", dest="providers")
31
+ p.add_argument("--min-unique", type=int, default=0)
32
+ p.add_argument("--max-unique", type=int, default=None)
33
+ p.add_argument("--exclude-achievement", action="append", dest="exclude_achievements")
34
+ p.add_argument("--require-achievement", action="append", dest="required_achievements")
35
+ p.add_argument("--min-outcome-reward", type=float, default=None)
36
+ p.add_argument("--max-outcome-reward", type=float, default=None)
37
+ p.add_argument("--event-reward", action="append", dest="event_reward_filters")
38
+ p.add_argument("--limit", type=int, default=None)
39
+ args = p.parse_args()
40
+
41
+ conn = connect(args.db)
42
+ try:
43
+ achievements_map, unique_counts, name_counts, size_counts, session_uniques, session_final = (
44
+ fetch_achievement_data(conn)
45
+ )
46
+ session_models = fetch_session_models(conn)
47
+ outcome_data = fetch_outcome_rewards(conn)
48
+ event_totals = fetch_event_reward_totals(conn)
49
+ event_filters = parse_event_filters(args.event_reward_filters)
50
+
51
+ allowed_models = set(args.models) if args.models else None
52
+ allowed_providers = set(args.providers) if args.providers else None
53
+ required_achievements = set(args.required_achievements or [])
54
+ excluded_achievements = set(args.exclude_achievements or [])
55
+
56
+ eligible: set[str] = set()
57
+ for session_id, (model_name, provider, _calls) in session_models.items():
58
+ if allowed_models and model_name not in allowed_models:
59
+ continue
60
+ if allowed_providers and (provider or "unknown") not in allowed_providers:
61
+ continue
62
+
63
+ session_unique = session_uniques.get(session_id, set())
64
+ adjusted_uniques = {a for a in session_unique if a not in excluded_achievements}
65
+ unique_count = len(adjusted_uniques)
66
+ if args.min_unique is not None and unique_count < args.min_unique:
67
+ continue
68
+ if args.max_unique is not None and unique_count > args.max_unique:
69
+ continue
70
+
71
+ outcome = outcome_data.get(session_id)
72
+ total_reward = outcome["total_reward"] if outcome else 0.0
73
+ final_achievements = (
74
+ outcome["achievements"] if outcome else session_final.get(session_id, set())
75
+ )
76
+ if args.min_outcome_reward is not None and total_reward < args.min_outcome_reward:
77
+ continue
78
+ if args.max_outcome_reward is not None and total_reward > args.max_outcome_reward:
79
+ continue
80
+ if required_achievements and not required_achievements.issubset(final_achievements):
81
+ continue
82
+
83
+ totals = event_totals.get(session_id, {})
84
+ meets_filters = True
85
+ for reward_type, min_total in event_filters:
86
+ total = totals.get(reward_type, {}).get("total", 0.0)
87
+ if total < min_total:
88
+ meets_filters = False
89
+ break
90
+ if not meets_filters:
91
+ continue
92
+ eligible.add(session_id)
93
+
94
+ if not eligible:
95
+ raise SystemExit("No sessions matched the provided filters.")
96
+
97
+ dataset = build_sft_dataset(
98
+ conn,
99
+ achievements_map,
100
+ eligible,
101
+ allowed_models=allowed_models,
102
+ limit=args.limit,
103
+ )
104
+ if not dataset:
105
+ raise SystemExit("No rollout steps matched the filters (after session selection).")
106
+
107
+ Path(args.output).parent.mkdir(parents=True, exist_ok=True)
108
+ write_jsonl(args.output, dataset)
109
+ print(f"Wrote {len(dataset)} examples -> {args.output}")
110
+ finally:
111
+ conn.close()
112
+
113
+
114
+ if __name__ == "__main__":
115
+ main()
116
+
117
+
@@ -0,0 +1,162 @@
1
+ #!/usr/bin/env python3
2
+ """Generate Crafter rollouts and server-side traces via the Task App.
3
+
4
+ This script is a slim wrapper around the Task App `/rollout` endpoint to
5
+ produce trajectories while the server (if configured with TASKAPP_TRACING_ENABLED)
6
+ persists traces to its sqlite database. Use `export_dataset.py` afterwards
7
+ to build an SFT JSONL.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import asyncio
14
+ import os
15
+ import sys
16
+ import time
17
+ from dataclasses import dataclass
18
+ from typing import Any
19
+
20
+ from synth_ai.task import (
21
+ RolloutEnvSpec,
22
+ RolloutPolicySpec,
23
+ RolloutRecordConfig,
24
+ RolloutRequest,
25
+ TaskAppClient,
26
+ )
27
+
28
+
29
+ def _ensure_str(val: Any, name: str) -> str:
30
+ s = str(val or "").strip()
31
+ if not s:
32
+ raise SystemExit(f"Missing required {name}")
33
+ return s
34
+
35
+
36
+ def _build_ops(max_llm_calls: int) -> list[str]:
37
+ max_llm_calls = max(1, int(max_llm_calls or 1))
38
+ ops: list[str] = []
39
+ for _ in range(max_llm_calls):
40
+ ops.extend(["agent", "env"]) # LLM step then env step
41
+ return ops
42
+
43
+
44
+ def _build_request(seed: int, run_id: str, model: str, inference_url: str, api_key: str, *, max_llm_calls: int, return_trace: bool) -> RolloutRequest:
45
+ policy_cfg: dict[str, Any] = {
46
+ "model": model,
47
+ "inference_url": inference_url,
48
+ "api_key": api_key,
49
+ }
50
+ record = RolloutRecordConfig(trajectories=True, return_trace=bool(return_trace), trace_format="compact")
51
+ return RolloutRequest(
52
+ run_id=run_id,
53
+ env=RolloutEnvSpec(env_name="crafter", seed=seed, config={}),
54
+ policy=RolloutPolicySpec(policy_name="crafter-react", config=policy_cfg),
55
+ ops=_build_ops(max_llm_calls),
56
+ record=record,
57
+ )
58
+
59
+
60
+ @dataclass(slots=True)
61
+ class Args:
62
+ base_url: str
63
+ api_key: str
64
+ inference_url: str
65
+ inference_api_key: str
66
+ model: str
67
+ episodes: int
68
+ start_seed: int
69
+ max_llm_calls: int
70
+ concurrency: int
71
+ return_trace: bool
72
+ timeout: float
73
+
74
+
75
+ async def _run_one(client: TaskAppClient, run_id: str, seed: int, a: Args) -> dict[str, Any]:
76
+ req = _build_request(
77
+ seed=seed,
78
+ run_id=f"{run_id}-seed{seed}",
79
+ model=a.model,
80
+ inference_url=a.inference_url,
81
+ api_key=a.inference_api_key,
82
+ max_llm_calls=a.max_llm_calls,
83
+ return_trace=a.return_trace,
84
+ )
85
+ resp = await client.rollout(req)
86
+ metrics = resp.metrics.model_dump()
87
+ return {
88
+ "seed": seed,
89
+ "num_steps": metrics.get("num_steps"),
90
+ "episode_returns": metrics.get("episode_returns"),
91
+ "outcome_score": metrics.get("outcome_score"),
92
+ }
93
+
94
+
95
+ async def _bounded_gather(n: int, coros: list[asyncio.Future]):
96
+ sem = asyncio.Semaphore(n)
97
+
98
+ async def _wrap(coro):
99
+ async with sem:
100
+ return await coro
101
+
102
+ return await asyncio.gather(*[_wrap(c) for c in coros])
103
+
104
+
105
+ async def main() -> None:
106
+ parser = argparse.ArgumentParser(description=__doc__)
107
+ parser.add_argument("--base-url", default=os.getenv("TASK_APP_URL", "http://localhost:8001"))
108
+ parser.add_argument("--api-key", default=os.getenv("ENVIRONMENT_API_KEY"))
109
+ parser.add_argument("--inference-url", default=os.getenv("INFERENCE_URL", "https://api.groq.com/openai"))
110
+ parser.add_argument("--inference-api-key", default=os.getenv("GROQ_API_KEY"))
111
+ parser.add_argument("--model", default=os.getenv("POLICY_MODEL", "llama-3.3-70b-versatile"))
112
+ parser.add_argument("--episodes", type=int, default=50)
113
+ parser.add_argument("--start-seed", type=int, default=0)
114
+ parser.add_argument("--max-llm-calls", type=int, default=10)
115
+ parser.add_argument("--concurrency", type=int, default=5)
116
+ parser.add_argument("--return-trace", action="store_true")
117
+ parser.add_argument("--timeout", type=float, default=60.0)
118
+ args_ns = parser.parse_args()
119
+
120
+ a = Args(
121
+ base_url=_ensure_str(args_ns.base_url, "--base-url"),
122
+ api_key=_ensure_str(args_ns.api_key, "--api-key"),
123
+ inference_url=_ensure_str(args_ns.inference_url, "--inference-url"),
124
+ inference_api_key=_ensure_str(args_ns.inference_api_key, "--inference-api-key"),
125
+ model=_ensure_str(args_ns.model, "--model"),
126
+ episodes=int(args_ns.episodes),
127
+ start_seed=int(args_ns.start_seed),
128
+ max_llm_calls=int(args_ns.max_llm_calls),
129
+ concurrency=max(1, int(args_ns.concurrency)),
130
+ return_trace=bool(args_ns.return_trace),
131
+ timeout=float(args_ns.timeout),
132
+ )
133
+
134
+ print(
135
+ f"[INFO] base={a.base_url} episodes={a.episodes} start_seed={a.start_seed} model={a.model} tp={a.max_llm_calls}"
136
+ )
137
+ run_id = f"traces-{int(time.time())}"
138
+
139
+ successes = 0
140
+ failures = 0
141
+ async with TaskAppClient(a.base_url, api_key=a.api_key, timeout=a.timeout) as client:
142
+ tasks = [
143
+ _run_one(client, run_id, seed, a) for seed in range(a.start_seed, a.start_seed + a.episodes)
144
+ ]
145
+ for result in await _bounded_gather(a.concurrency, tasks):
146
+ if isinstance(result, dict):
147
+ successes += 1
148
+ print(f"[OK] seed={result['seed']} return={result.get('episode_returns')}")
149
+ else:
150
+ failures += 1
151
+ print(f"[ERR] seed result not dict: {result}", file=sys.stderr)
152
+
153
+ print(f"[DONE] successes={successes} failures={failures}")
154
+
155
+
156
+ if __name__ == "__main__":
157
+ try:
158
+ asyncio.run(main())
159
+ except KeyboardInterrupt:
160
+ print("Interrupted", file=sys.stderr)
161
+
162
+
@@ -0,0 +1,12 @@
1
+ """SWE task app examples package."""
2
+
3
+ from importlib import resources as _resources
4
+
5
+ __all__ = ["path_for"]
6
+
7
+
8
+ def path_for(package: str, resource: str) -> str:
9
+ """Return path for packaged SWE example resources."""
10
+
11
+ with _resources.as_file(_resources.files(f"examples.swe.{package}") / resource) as path:
12
+ return str(path)