synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (351) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +23 -17
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +53 -52
  22. examples/rl/run_rl_and_save.py +29 -12
  23. examples/rl/task_app/math_single_step.py +180 -41
  24. examples/rl/task_app/math_task_app.py +14 -6
  25. examples/sft/README.md +139 -0
  26. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  27. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  28. examples/sft/evaluate.py +117 -0
  29. examples/sft/export_dataset.py +117 -0
  30. examples/sft/generate_traces.py +162 -0
  31. examples/swe/__init__.py +12 -0
  32. examples/swe/task_app/README.md +105 -0
  33. examples/swe/task_app/__init__.py +2 -0
  34. examples/swe/task_app/grpo_swe_mini.py +571 -0
  35. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  36. examples/swe/task_app/hosted/README.md +173 -0
  37. examples/swe/task_app/hosted/__init__.py +5 -0
  38. examples/swe/task_app/hosted/branching.py +143 -0
  39. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  40. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  41. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  42. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  43. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  44. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  45. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  46. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  47. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  50. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  53. examples/swe/task_app/hosted/hosted_app.py +204 -0
  54. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  55. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  56. examples/swe/task_app/hosted/main.py +100 -0
  57. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  58. examples/swe/task_app/hosted/registry.py +195 -0
  59. examples/swe/task_app/hosted/rollout.py +1869 -0
  60. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  61. examples/swe/task_app/hosted/storage/volume.py +211 -0
  62. examples/swe/task_app/hosted/test_agents.py +161 -0
  63. examples/swe/task_app/hosted/test_service.py +137 -0
  64. examples/swe/task_app/hosted/utils.py +62 -0
  65. examples/vlm/README.md +68 -0
  66. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  67. examples/vlm/crafter_image_only_agent.py +207 -0
  68. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  69. examples/vlm/filter_image_rows.py +63 -0
  70. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  71. examples/warming_up_to_rl/analyze_trace_db.py +12 -10
  72. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  73. examples/warming_up_to_rl/export_trace_sft.py +218 -36
  74. examples/warming_up_to_rl/groq_test.py +15 -8
  75. examples/warming_up_to_rl/manage_secrets.py +29 -25
  76. examples/warming_up_to_rl/readme.md +9 -2
  77. examples/warming_up_to_rl/run_eval.py +137 -61
  78. examples/warming_up_to_rl/run_fft_and_save.py +131 -60
  79. examples/warming_up_to_rl/run_local_rollout.py +88 -39
  80. examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
  81. examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
  82. examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
  83. examples/warming_up_to_rl/run_rl_and_save.py +35 -12
  84. examples/warming_up_to_rl/run_rollout_remote.py +44 -19
  85. examples/warming_up_to_rl/task_app/README.md +6 -2
  86. examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
  87. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  111. synth/__init__.py +14 -0
  112. synth_ai/__init__.py +20 -4
  113. synth_ai/api/models/supported.py +376 -0
  114. synth_ai/api/train/builders.py +157 -26
  115. synth_ai/api/train/cli.py +213 -57
  116. synth_ai/api/train/config_finder.py +65 -5
  117. synth_ai/api/train/env_resolver.py +33 -15
  118. synth_ai/api/train/pollers.py +13 -4
  119. synth_ai/api/train/supported_algos.py +139 -0
  120. synth_ai/api/train/task_app.py +5 -3
  121. synth_ai/api/train/utils.py +33 -48
  122. synth_ai/cli/__init__.py +19 -4
  123. synth_ai/cli/_modal_wrapper.py +28 -0
  124. synth_ai/cli/_typer_patch.py +49 -0
  125. synth_ai/cli/balance.py +2 -3
  126. synth_ai/cli/calc.py +1 -1
  127. synth_ai/cli/demo.py +21 -6
  128. synth_ai/cli/recent.py +2 -2
  129. synth_ai/cli/rl_demo.py +77 -17
  130. synth_ai/cli/root.py +116 -39
  131. synth_ai/cli/status.py +2 -2
  132. synth_ai/cli/task_apps.py +1699 -259
  133. synth_ai/cli/traces.py +7 -4
  134. synth_ai/cli/turso.py +73 -0
  135. synth_ai/cli/watch.py +12 -18
  136. synth_ai/core/experiment.py +0 -2
  137. synth_ai/demo_registry.py +68 -31
  138. synth_ai/demos/core/cli.py +516 -194
  139. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  140. synth_ai/demos/demo_task_apps/core.py +64 -28
  141. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  142. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
  143. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  144. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  145. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  146. synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
  147. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  148. synth_ai/environments/examples/bandit/engine.py +12 -4
  149. synth_ai/environments/examples/bandit/taskset.py +4 -4
  150. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  151. synth_ai/environments/reproducibility/tree.py +5 -6
  152. synth_ai/environments/service/app.py +11 -12
  153. synth_ai/environments/service/core_routes.py +10 -9
  154. synth_ai/environments/stateful/engine.py +1 -1
  155. synth_ai/environments/tasks/core.py +1 -0
  156. synth_ai/environments/tasks/filters.py +5 -6
  157. synth_ai/environments/tasks/utils.py +4 -5
  158. synth_ai/evals/base.py +0 -2
  159. synth_ai/handshake.py +11 -9
  160. synth_ai/http.py +1 -1
  161. synth_ai/http_client.py +43 -11
  162. synth_ai/inference/__init__.py +0 -2
  163. synth_ai/inference/client.py +20 -6
  164. synth_ai/jobs/client.py +103 -78
  165. synth_ai/learning/__init__.py +41 -6
  166. synth_ai/learning/algorithms.py +14 -0
  167. synth_ai/learning/client.py +121 -29
  168. synth_ai/learning/config.py +2 -40
  169. synth_ai/learning/constants.py +0 -2
  170. synth_ai/learning/ft_client.py +4 -56
  171. synth_ai/learning/health.py +13 -7
  172. synth_ai/learning/jobs.py +43 -47
  173. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  174. synth_ai/learning/rl/client.py +267 -0
  175. synth_ai/learning/rl/config.py +31 -0
  176. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  177. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  178. synth_ai/learning/rl/secrets.py +13 -0
  179. synth_ai/learning/rl_client.py +2 -253
  180. synth_ai/learning/sft/__init__.py +29 -0
  181. synth_ai/learning/sft/client.py +68 -0
  182. synth_ai/learning/sft/config.py +270 -0
  183. synth_ai/learning/sft/data.py +295 -0
  184. synth_ai/learning/sse.py +25 -26
  185. synth_ai/learning/validators.py +25 -24
  186. synth_ai/lm/__init__.py +21 -47
  187. synth_ai/task/__init__.py +26 -27
  188. synth_ai/task/apps/__init__.py +18 -19
  189. synth_ai/task/auth.py +35 -23
  190. synth_ai/task/client.py +15 -13
  191. synth_ai/task/contracts.py +37 -35
  192. synth_ai/task/datasets.py +9 -6
  193. synth_ai/task/errors.py +11 -10
  194. synth_ai/task/health.py +17 -11
  195. synth_ai/task/json.py +58 -24
  196. synth_ai/task/proxy.py +15 -14
  197. synth_ai/task/rubrics.py +22 -15
  198. synth_ai/task/server.py +43 -17
  199. synth_ai/task/tracing_utils.py +12 -7
  200. synth_ai/task/validators.py +0 -1
  201. synth_ai/task/vendors.py +5 -7
  202. synth_ai/tracing_v3/__init__.py +2 -0
  203. synth_ai/tracing_v3/abstractions.py +21 -4
  204. synth_ai/tracing_v3/db_config.py +26 -1
  205. synth_ai/tracing_v3/decorators.py +18 -15
  206. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  207. synth_ai/tracing_v3/hooks.py +6 -4
  208. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  209. synth_ai/tracing_v3/replica_sync.py +1 -0
  210. synth_ai/tracing_v3/session_tracer.py +63 -16
  211. synth_ai/tracing_v3/storage/base.py +89 -1
  212. synth_ai/tracing_v3/storage/config.py +21 -8
  213. synth_ai/tracing_v3/storage/factory.py +10 -8
  214. synth_ai/tracing_v3/storage/utils.py +4 -2
  215. synth_ai/tracing_v3/turso/daemon.py +7 -2
  216. synth_ai/tracing_v3/turso/models.py +5 -2
  217. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  218. synth_ai/tracing_v3/utils.py +4 -3
  219. synth_ai/v0/api/__init__.py +8 -0
  220. synth_ai/v0/api/models/__init__.py +8 -0
  221. synth_ai/v0/api/models/supported.py +8 -0
  222. synth_ai/v0/config/__init__.py +15 -0
  223. synth_ai/v0/config/base_url.py +12 -0
  224. synth_ai/v0/lm/__init__.py +51 -0
  225. synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
  226. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  227. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  228. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  229. synth_ai/{lm → v0/lm}/config.py +6 -1
  230. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  231. synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
  232. synth_ai/{lm → v0/lm}/core/main.py +19 -7
  233. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  234. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
  235. synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
  236. synth_ai/{lm → v0/lm}/overrides.py +4 -4
  237. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  238. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  239. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  240. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  241. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
  242. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  243. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  244. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
  245. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
  246. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
  247. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
  248. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  249. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  250. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  251. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  252. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  253. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  254. synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
  255. synth_ai/v0/tracing/upload.py +32 -135
  256. synth_ai/v0/tracing_v3/__init__.py +10 -0
  257. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  258. synth_ai/v0/tracing_v3/decorators.py +3 -0
  259. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  260. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  261. synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
  262. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -262
  263. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
  264. examples/common_old/backend.py +0 -21
  265. examples/evals_old/README.md +0 -98
  266. examples/evals_old/__init__.py +0 -6
  267. examples/evals_old/compare_models.py +0 -1037
  268. examples/evals_old/example_log.md +0 -145
  269. examples/evals_old/run_demo.sh +0 -126
  270. examples/evals_old/trace_analysis.py +0 -270
  271. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  272. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  273. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  274. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
  275. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  276. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  277. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  278. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  279. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  280. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
  281. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  282. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  283. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
  284. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  285. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  286. examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
  287. examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
  288. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  289. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  290. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
  291. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
  292. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
  293. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  294. examples/finetuning_old/synth_qwen_v1/util.py +0 -147
  295. examples/rl_old/task_app.py +0 -962
  296. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  297. examples/warming_up_to_rl/old/notes.md +0 -73
  298. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  299. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  300. synth_ai/experimental/synth_oss.py +0 -446
  301. synth_ai/install_sqld.sh +0 -40
  302. synth_ai/learning/filtering.py +0 -0
  303. synth_ai/learning/offline/dpo.py +0 -0
  304. synth_ai/learning/offline/providers.py +0 -7
  305. synth_ai/learning/offline/sft.py +0 -0
  306. synth_ai/learning/offline/shared.py +0 -0
  307. synth_ai/learning/online/grpo.py +0 -0
  308. synth_ai/learning/online/irft.py +0 -0
  309. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  310. synth_ai/learning/prompts/gepa.py +0 -0
  311. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  312. synth_ai/learning/prompts/mipro.py +0 -289
  313. synth_ai/learning/prompts/random_search.py +0 -246
  314. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  315. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  316. synth_ai/rl/secrets.py +0 -19
  317. synth_ai/scripts/verify_rewards.py +0 -100
  318. synth_ai/tracing/__init__.py +0 -30
  319. synth_ai/tracing_v1/__init__.py +0 -33
  320. synth_ai/tracing_v3/turso/__init__.py +0 -25
  321. synth_ai/tracing_v3/turso/manager.py +0 -774
  322. synth_ai/zyk/__init__.py +0 -30
  323. synth_ai-0.2.9.dev5.dist-info/METADATA +0 -131
  324. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  325. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  326. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  327. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  328. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  329. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  330. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  331. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  332. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  333. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  334. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  335. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  336. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  337. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  338. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  339. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  340. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  341. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  342. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  343. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  344. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  345. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  346. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  347. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  348. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  349. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
  350. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
  351. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
@@ -8,8 +8,9 @@ import json
8
8
  import sqlite3
9
9
  import sys
10
10
  from collections import Counter, defaultdict
11
+ from collections.abc import Iterable
11
12
  from pathlib import Path
12
- from typing import Any, Dict, Iterable, List, Set, Tuple
13
+ from typing import Any
13
14
 
14
15
  Row = sqlite3.Row
15
16
 
@@ -23,7 +24,7 @@ def connect(db_path: Path) -> sqlite3.Connection:
23
24
  def _parse_json(value: Any) -> Any:
24
25
  if value is None:
25
26
  return None
26
- if isinstance(value, (dict, list)):
27
+ if isinstance(value, dict | list):
27
28
  return value
28
29
  try:
29
30
  return json.loads(value)
@@ -31,7 +32,7 @@ def _parse_json(value: Any) -> Any:
31
32
  return None
32
33
 
33
34
 
34
- AchievementMap = dict[Tuple[str, int], dict[str, list[str]]]
35
+ AchievementMap = dict[tuple[str, int], dict[str, list[str]]]
35
36
 
36
37
 
37
38
  def fetch_achievement_data(
@@ -116,7 +117,7 @@ def fetch_achievement_data(
116
117
  achievement_name_counts.update(achievement_set)
117
118
 
118
119
  achievement_size_counts: Counter = Counter()
119
- for session_id, count in unique_counts_per_session.items():
120
+ for _session_id, count in unique_counts_per_session.items():
120
121
  achievement_size_counts[count] += 1
121
122
 
122
123
  return (
@@ -203,25 +204,71 @@ def parse_event_filters(specs: list[str] | None) -> list[tuple[str, float]]:
203
204
  if min_val_str:
204
205
  try:
205
206
  min_val = float(min_val_str)
206
- except ValueError:
207
+ except ValueError as e:
207
208
  print(f"Invalid event reward specification '{spec}'", file=sys.stderr)
208
- raise SystemExit(1)
209
+ raise SystemExit(1) from e
209
210
  filters.append((reward_type, min_val))
210
211
  return filters
211
212
 
212
213
 
213
- def _collect_text(parts: Iterable[dict[str, Any]] | None) -> str:
214
- texts: list[str] = []
214
+ def _collect_content(
215
+ parts: Iterable[dict[str, Any]] | None,
216
+ ) -> tuple[Any, bool]:
217
+ """Normalise multimodal content parts into OpenAI-style segments."""
218
+
215
219
  if not parts:
216
- return ""
220
+ return "", False
221
+
222
+ segments: list[dict[str, Any]] = []
223
+ has_image = False
224
+
217
225
  for part in parts:
218
226
  if not isinstance(part, dict):
219
227
  continue
220
- if part.get("type") == "text":
228
+ ptype = part.get("type")
229
+ if ptype == "text":
221
230
  text = part.get("text")
222
- if isinstance(text, str) and text:
223
- texts.append(text)
224
- return "\n".join(texts)
231
+ if isinstance(text, str):
232
+ segments.append({"type": "text", "text": text})
233
+ elif ptype == "image":
234
+ uri = part.get("uri")
235
+ mime_type = part.get("mime_type") or "image/png"
236
+ data_url = None
237
+ if isinstance(uri, str) and uri.startswith("data:"):
238
+ data_url = uri
239
+ else:
240
+ source = part.get("data") or part.get("source")
241
+ if isinstance(source, dict):
242
+ base64_data = source.get("data")
243
+ media_type = source.get("media_type") or mime_type
244
+ if isinstance(base64_data, str) and base64_data:
245
+ data_url = f"data:{media_type};base64,{base64_data}"
246
+ if data_url:
247
+ has_image = True
248
+ segments.append({"type": "image_url", "image_url": {"url": data_url}})
249
+ elif ptype == "image_url":
250
+ image_url = part.get("image_url", {})
251
+ if isinstance(image_url, dict):
252
+ url = image_url.get("url")
253
+ if isinstance(url, str) and url:
254
+ has_image = True
255
+ segments.append({"type": "image_url", "image_url": {"url": url}})
256
+
257
+ if not segments:
258
+ return "", False
259
+ if not has_image and len(segments) == 1 and segments[0]["type"] == "text":
260
+ return segments[0]["text"], False
261
+ return segments, has_image
262
+
263
+
264
+ def _normalise_output_content(content: Any) -> tuple[Any, bool]:
265
+ if isinstance(content, list):
266
+ return _collect_content(content)
267
+ if isinstance(content, str):
268
+ return content, False
269
+ if content is None:
270
+ return "", False
271
+ return str(content), False
225
272
 
226
273
 
227
274
  def _normalise_tool_calls(tool_calls: list[dict[str, Any]] | None) -> list[dict[str, Any]]:
@@ -233,7 +280,9 @@ def _normalise_tool_calls(tool_calls: list[dict[str, Any]] | None) -> list[dict[
233
280
  continue
234
281
  entry = dict(call)
235
282
 
236
- func_payload: dict[str, Any] | None = entry.get("function") if isinstance(entry.get("function"), dict) else None
283
+ func_payload: dict[str, Any] | None = (
284
+ entry.get("function") if isinstance(entry.get("function"), dict) else None
285
+ )
237
286
  name = entry.get("name") or (func_payload.get("name") if func_payload else None) or "tool"
238
287
 
239
288
  args = None
@@ -249,7 +298,7 @@ def _normalise_tool_calls(tool_calls: list[dict[str, Any]] | None) -> list[dict[
249
298
  except Exception:
250
299
  args = raw
251
300
 
252
- if isinstance(args, (dict, list)):
301
+ if isinstance(args, dict | list):
253
302
  args_str = json.dumps(args, ensure_ascii=False)
254
303
  elif isinstance(args, str):
255
304
  args_str = args
@@ -277,7 +326,7 @@ def _normalise_tool_calls(tool_calls: list[dict[str, Any]] | None) -> list[dict[
277
326
  def build_sft_dataset(
278
327
  conn: sqlite3.Connection,
279
328
  achievements_map: AchievementMap,
280
- sessions_filter: Set[str],
329
+ sessions_filter: set[str],
281
330
  *,
282
331
  allowed_models: set[str] | None = None,
283
332
  limit: int | None = None,
@@ -327,14 +376,18 @@ def build_sft_dataset(
327
376
 
328
377
  for record in call_records:
329
378
  messages: list[dict[str, Any]] = []
379
+ input_has_image = False
330
380
  for message in record.get("input_messages", []):
331
381
  role = message.get("role", "unknown")
332
- content = _collect_text(message.get("parts"))
333
- if not content:
382
+ content, has_image = _collect_content(message.get("parts"))
383
+ if (content == "" or content is None) and not has_image:
334
384
  continue
385
+ if has_image and role == "user":
386
+ input_has_image = True
335
387
  messages.append({"role": role, "content": content})
336
388
 
337
- assistant_content = ""
389
+ assistant_content_value: Any = ""
390
+ assistant_has_image = False
338
391
  assistant_tool_calls: list[dict[str, Any]] = []
339
392
 
340
393
  output_text = record.get("output_text")
@@ -349,17 +402,23 @@ def build_sft_dataset(
349
402
  choices = parsed_response.get("choices") or []
350
403
  if choices:
351
404
  message = choices[0].get("message") or {}
352
- assistant_content = message.get("content") or ""
405
+ assistant_content_value, assistant_has_image = _normalise_output_content(
406
+ message.get("content")
407
+ )
353
408
  assistant_tool_calls = _normalise_tool_calls(message.get("tool_calls"))
354
409
 
355
410
  if not assistant_tool_calls:
356
411
  assistant_tool_calls = _normalise_tool_calls(record.get("output_tool_calls"))
357
412
 
358
- assistant_message: dict[str, Any] = {"role": "assistant", "content": assistant_content or ""}
413
+ assistant_message: dict[str, Any] = {
414
+ "role": "assistant",
415
+ "content": assistant_content_value,
416
+ }
359
417
  if assistant_tool_calls:
360
418
  assistant_message["tool_calls"] = assistant_tool_calls
361
419
 
362
- if assistant_message.get("content") == "" and not assistant_message.get("tool_calls"):
420
+ content_empty = assistant_message.get("content") in ("", None)
421
+ if content_empty and not assistant_message.get("tool_calls"):
363
422
  continue
364
423
 
365
424
  messages.append(assistant_message)
@@ -380,6 +439,9 @@ def build_sft_dataset(
380
439
  "turned_true": achievements.get("all", []),
381
440
  "cumulative_unique": cumulative_unique[session_id],
382
441
  },
442
+ "user_has_image": input_has_image,
443
+ "assistant_has_image": assistant_has_image,
444
+ "has_image": input_has_image or assistant_has_image,
383
445
  }
384
446
 
385
447
  dataset.append({"messages": messages, "metadata": metadata})
@@ -426,27 +488,141 @@ def _validate_dataset(records: list[dict[str, Any]]) -> None:
426
488
  raise SystemExit(f"Validation error while exporting dataset:\n - {summary}")
427
489
 
428
490
 
491
+ def _find_trace_database() -> Path | None:
492
+ """Automatically discover the trace database in common locations."""
493
+
494
+ # Check for demo directory from state
495
+ try:
496
+ state_path = Path.home() / ".synth-ai" / "demo.json"
497
+ if state_path.exists():
498
+ import json
499
+
500
+ with state_path.open() as f:
501
+ data = json.load(f)
502
+ demo_dir = data.get("DEMO_DIR")
503
+ if demo_dir:
504
+ candidate = Path(demo_dir) / "traces" / "v3" / "synth_ai.db"
505
+ if candidate.exists():
506
+ return candidate
507
+ except Exception:
508
+ pass
509
+
510
+ # Search upward from current directory
511
+ cwd = Path.cwd()
512
+ for parent in [cwd] + list(cwd.parents):
513
+ candidate = parent / "traces" / "v3" / "synth_ai.db"
514
+ if candidate.exists():
515
+ return candidate
516
+
517
+ # Check standard locations
518
+ standard_locations = [
519
+ Path("traces/v3/synth_ai.db"),
520
+ Path("../traces/v3/synth_ai.db"),
521
+ Path.home() / "synth-ai" / "traces" / "v3" / "synth_ai.db",
522
+ ]
523
+
524
+ for location in standard_locations:
525
+ try:
526
+ if location.exists():
527
+ return location.resolve()
528
+ except Exception:
529
+ continue
530
+
531
+ return None
532
+
533
+
429
534
  def main() -> None:
430
535
  parser = argparse.ArgumentParser(description=__doc__)
431
- parser.add_argument("--db", type=Path, default=Path("traces/v3/synth_ai.db"), help="Path to tracing_v3 SQLite DB")
432
- parser.add_argument("--output", type=Path, required=True, help="Destination JSONL path for the exported dataset")
433
- parser.add_argument("--model", action="append", dest="models", help="Restrict to sessions whose dominant model matches (repeatable)")
434
- parser.add_argument("--provider", action="append", dest="providers", help="Restrict to sessions whose dominant provider matches (repeatable)")
435
- parser.add_argument("--min-unique", type=int, default=None, help="Minimum unique achievements per session")
436
- parser.add_argument("--max-unique", type=int, default=None, help="Maximum unique achievements per session")
536
+ parser.add_argument("--db", type=Path, default=None, help="Path to tracing_v3 SQLite DB")
537
+ parser.add_argument(
538
+ "--output",
539
+ type=Path,
540
+ required=False,
541
+ help="Destination JSONL path for the exported dataset",
542
+ )
543
+ parser.add_argument(
544
+ "--model",
545
+ action="append",
546
+ dest="models",
547
+ help="Restrict to sessions whose dominant model matches (repeatable)",
548
+ )
549
+ parser.add_argument(
550
+ "--provider",
551
+ action="append",
552
+ dest="providers",
553
+ help="Restrict to sessions whose dominant provider matches (repeatable)",
554
+ )
555
+ parser.add_argument(
556
+ "--min-unique", type=int, default=None, help="Minimum unique achievements per session"
557
+ )
558
+ parser.add_argument(
559
+ "--max-unique", type=int, default=None, help="Maximum unique achievements per session"
560
+ )
437
561
  parser.add_argument(
438
562
  "--exclude-achievement",
439
563
  action="append",
440
564
  dest="exclude_achievements",
441
565
  help="Achievements to ignore when evaluating --min-unique/--max-unique (repeatable)",
442
566
  )
443
- parser.add_argument("--require-achievement", action="append", dest="required_achievements", help="Require these outcome achievements (repeatable)")
444
- parser.add_argument("--min-outcome-reward", type=float, default=None, help="Minimum total outcome reward per session")
445
- parser.add_argument("--max-outcome-reward", type=float, default=None, help="Maximum total outcome reward per session")
446
- parser.add_argument("--event-reward", action="append", dest="event_reward_filters", help="Require reward_type[:min_total] in event_rewards (repeatable)")
447
- parser.add_argument("--limit", type=int, default=None, help="Maximum number of examples to emit")
567
+ parser.add_argument(
568
+ "--require-achievement",
569
+ action="append",
570
+ dest="required_achievements",
571
+ help="Require these outcome achievements (repeatable)",
572
+ )
573
+ parser.add_argument(
574
+ "--min-outcome-reward",
575
+ type=float,
576
+ default=None,
577
+ help="Minimum total outcome reward per session",
578
+ )
579
+ parser.add_argument(
580
+ "--max-outcome-reward",
581
+ type=float,
582
+ default=None,
583
+ help="Maximum total outcome reward per session",
584
+ )
585
+ parser.add_argument(
586
+ "--event-reward",
587
+ action="append",
588
+ dest="event_reward_filters",
589
+ help="Require reward_type[:min_total] in event_rewards (repeatable)",
590
+ )
591
+ parser.add_argument(
592
+ "--limit", type=int, default=None, help="Maximum number of examples to emit"
593
+ )
448
594
  args = parser.parse_args()
449
595
 
596
+ # Auto-discover database if not specified
597
+ db_path = args.db
598
+ if db_path is None:
599
+ db_path = _find_trace_database()
600
+ if db_path:
601
+ print(f"Found trace database: {db_path}")
602
+ else:
603
+ print("\nTrace database configuration:")
604
+ db_input = input("Trace database path [traces/v3/synth_ai.db]: ").strip()
605
+ db_path = Path(db_input) if db_input else Path("traces/v3/synth_ai.db")
606
+
607
+ if not db_path.exists():
608
+ print(f"Database not found: {db_path}", file=sys.stderr)
609
+ raise SystemExit(1)
610
+
611
+ output_path = args.output
612
+ if not output_path:
613
+ output_path = Path("ft_data/crafter_traces.jsonl")
614
+ print(f"Output will be written to: {output_path.resolve()}")
615
+
616
+ min_unique = args.min_unique
617
+ if min_unique is None:
618
+ min_unique = 0 # Default to including all traces
619
+ print(f"Minimum unique achievements filter: {min_unique} (all traces)")
620
+
621
+ # Override args with prompted values
622
+ args.db = db_path
623
+ args.output = output_path
624
+ args.min_unique = min_unique
625
+
450
626
  if not args.db.exists():
451
627
  print(f"Database not found: {args.db}", file=sys.stderr)
452
628
  raise SystemExit(1)
@@ -488,7 +664,11 @@ def main() -> None:
488
664
 
489
665
  outcome = outcome_data.get(session_id)
490
666
  total_reward = outcome["total_reward"] if outcome else 0.0
491
- final_achievements = outcome["achievements"] if outcome else session_final_achievements.get(session_id, set())
667
+ final_achievements = (
668
+ outcome["achievements"]
669
+ if outcome
670
+ else session_final_achievements.get(session_id, set())
671
+ )
492
672
 
493
673
  if args.min_outcome_reward is not None and total_reward < args.min_outcome_reward:
494
674
  continue
@@ -522,7 +702,9 @@ def main() -> None:
522
702
  )
523
703
 
524
704
  if not dataset:
525
- print("No rollout steps matched the filters (after session selection).", file=sys.stderr)
705
+ print(
706
+ "No rollout steps matched the filters (after session selection).", file=sys.stderr
707
+ )
526
708
  raise SystemExit(1)
527
709
 
528
710
  _validate_dataset(dataset)
@@ -530,7 +712,7 @@ def main() -> None:
530
712
  session_ids = {item.get("metadata", {}).get("session_id") for item in dataset}
531
713
  session_ids.discard(None)
532
714
  print(
533
- f"Wrote {len(dataset)} examples from {len(session_ids)} session(s) -> {args.output}",
715
+ f"Wrote {len(dataset)} examples from {len(session_ids)} session(s) -> {args.output.resolve()}",
534
716
  file=sys.stderr,
535
717
  )
536
718
  finally:
@@ -1,7 +1,7 @@
1
- from __future__ import annotations
2
-
3
1
  """Quick smoke test that drives a rollout through the Groq proxy-backed Crafter Task App."""
4
2
 
3
+ from __future__ import annotations
4
+
5
5
  import argparse
6
6
  import asyncio
7
7
  import os
@@ -29,8 +29,8 @@ def _build_policy_payload(seed: int, model: str) -> dict[str, Any]:
29
29
  {
30
30
  "role": "user",
31
31
  "content": (
32
- "Environment seed {seed}. Plan initial survival/crafting steps and then call interact with concrete actions."
33
- ).format(seed=seed),
32
+ f"Environment seed {seed}. Plan initial survival/crafting steps and then call interact with concrete actions."
33
+ ),
34
34
  },
35
35
  ],
36
36
  }
@@ -63,13 +63,21 @@ async def run(args: argparse.Namespace) -> None:
63
63
  response = await client.rollout(request)
64
64
  print("rollout.metrics →", to_jsonable(response.metrics.model_dump()))
65
65
  for idx, step in enumerate(response.trajectories[0].steps, start=1):
66
- print(f"step[{idx}] tool_calls={step.tool_calls} reward={step.reward} info={to_jsonable(step.info)}")
66
+ print(
67
+ f"step[{idx}] tool_calls={step.tool_calls} reward={step.reward} info={to_jsonable(step.info)}"
68
+ )
67
69
 
68
70
 
69
71
  def _parse_args() -> argparse.Namespace:
70
72
  parser = argparse.ArgumentParser(description=__doc__)
71
- parser.add_argument("--base-url", default=os.getenv("TASK_APP_BASE_URL", "http://localhost:8000"))
72
- parser.add_argument("--api-key", default=os.getenv("TASK_APP_API_KEY"), required=os.getenv("TASK_APP_API_KEY") is None)
73
+ parser.add_argument(
74
+ "--base-url", default=os.getenv("TASK_APP_BASE_URL", "http://localhost:8000")
75
+ )
76
+ parser.add_argument(
77
+ "--api-key",
78
+ default=os.getenv("TASK_APP_API_KEY"),
79
+ required=os.getenv("TASK_APP_API_KEY") is None,
80
+ )
73
81
  parser.add_argument("--model", default=os.getenv("GROQ_MODEL", "groq/mixtral-8x7b"))
74
82
  parser.add_argument("--inference-url", default=os.getenv("TASK_APP_INFERENCE_URL"))
75
83
  parser.add_argument("--seed", type=int, default=int(os.getenv("CRAFTER_TEST_SEED", "42")))
@@ -85,4 +93,3 @@ def main() -> None:
85
93
 
86
94
  if __name__ == "__main__":
87
95
  main()
88
-
@@ -8,11 +8,10 @@ import subprocess
8
8
  import sys
9
9
  import tempfile
10
10
  from pathlib import Path
11
- from typing import Dict, Tuple
12
11
 
13
12
 
14
- def load_env_file(path: Path) -> Dict[str, str]:
15
- env: Dict[str, str] = {}
13
+ def load_env_file(path: Path) -> dict[str, str]:
14
+ env: dict[str, str] = {}
16
15
  if not path.exists():
17
16
  raise FileNotFoundError(f".env not found at {path}")
18
17
  for line in path.read_text(encoding="utf-8").splitlines():
@@ -24,7 +23,7 @@ def load_env_file(path: Path) -> Dict[str, str]:
24
23
  return env
25
24
 
26
25
 
27
- def write_temp_env(kv: Dict[str, str]) -> Path:
26
+ def write_temp_env(kv: dict[str, str]) -> Path:
28
27
  fd, p = tempfile.mkstemp(prefix="modal_secret_", suffix=".env")
29
28
  path = Path(p)
30
29
  with os.fdopen(fd, "w", encoding="utf-8") as fh:
@@ -33,22 +32,27 @@ def write_temp_env(kv: Dict[str, str]) -> Path:
33
32
  return path
34
33
 
35
34
 
36
- def run(cmd: str) -> Tuple[int, str]:
37
- proc = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
35
+ def run(cmd: str) -> tuple[int, str]:
36
+ proc = subprocess.run(
37
+ cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
38
+ )
38
39
  return proc.returncode, proc.stdout
39
40
 
40
41
 
41
- def ensure_secret(secret_name: str, kv: Dict[str, str]) -> None:
42
+ def ensure_secret(secret_name: str, kv: dict[str, str]) -> None:
42
43
  if not kv:
43
44
  print(f"[skip] {secret_name}: no values provided")
44
45
  return
45
46
  # Prefer passing KEY=VALUE pairs to avoid Typer --env-file bug under some shells
46
47
  kv_args = " ".join([f"{shlex.quote(k)}={shlex.quote(v)}" for k, v in kv.items()])
48
+
47
49
  # Try plain modal first; fallback to uv run modal
48
- def _create() -> Tuple[int, str]:
50
+ def _create() -> tuple[int, str]:
49
51
  return run(f"modal secret create {shlex.quote(secret_name)} {kv_args}")
50
- def _delete() -> Tuple[int, str]:
52
+
53
+ def _delete() -> tuple[int, str]:
51
54
  return run(f"printf 'y\n' | modal secret delete {shlex.quote(secret_name)}")
55
+
52
56
  rc, out = _create()
53
57
  if rc != 0:
54
58
  # Fallback: use uv run modal
@@ -70,22 +74,17 @@ def ensure_secret(secret_name: str, kv: Dict[str, str]) -> None:
70
74
 
71
75
 
72
76
  def main() -> None:
73
- ap = argparse.ArgumentParser(description="Sync .env keys into Modal secret bundles for the task app")
74
- ap.add_argument("--env-path", default=str(Path(__file__).parent / ".env"), help="Path to .env with keys")
77
+ ap = argparse.ArgumentParser(
78
+ description="Sync .env keys into Modal secret bundles for the task app"
79
+ )
80
+ ap.add_argument(
81
+ "--env-path", default=str(Path(__file__).parent / ".env"), help="Path to .env with keys"
82
+ )
75
83
  args = ap.parse_args()
76
84
 
77
85
  env = load_env_file(Path(args.env_path))
78
86
 
79
87
  # Secrets used by the task app
80
- env_secret = {
81
- k: v
82
- for k, v in {
83
- "ENVIRONMENT_API_KEY": env.get("ENVIRONMENT_API_KEY", ""),
84
- "dev_environment_api_key": env.get("ENVIRONMENT_API_KEY", ""),
85
- }.items()
86
- if v
87
- }
88
-
89
88
  groq_secret = {
90
89
  k: v
91
90
  for k, v in {
@@ -105,9 +104,16 @@ def main() -> None:
105
104
  }
106
105
 
107
106
  # Optional: backend key (not mounted by task app today, but useful to keep consistent)
108
- synth_secret = {"SYNTH_API_KEY": env.get("SYNTH_API_KEY", "")} if env.get("SYNTH_API_KEY") else {}
109
-
110
- ensure_secret("crafter-environment-sdk", env_secret)
107
+ synth_secret = (
108
+ {"SYNTH_API_KEY": env.get("SYNTH_API_KEY", "")} if env.get("SYNTH_API_KEY") else {}
109
+ )
110
+
111
+ env_key = env.get("ENVIRONMENT_API_KEY", "")
112
+ if env_key:
113
+ print(
114
+ "Skipping Modal secret 'crafter-environment-sdk'; the task app now expects "
115
+ "ENVIRONMENT_API_KEY via --env-file so the CLI-minted value stays in sync."
116
+ )
111
117
  ensure_secret("groq-api-key", groq_secret)
112
118
  ensure_secret("openai-api-key", openai_secret)
113
119
  if synth_secret:
@@ -123,5 +129,3 @@ if __name__ == "__main__":
123
129
  except Exception as e:
124
130
  print(f"[error] {type(e).__name__}: {e}")
125
131
  sys.exit(1)
126
-
127
-
@@ -87,9 +87,16 @@ Evaluation scripts auto-load `.env` values. Update TOMLs under `configs/` with t
87
87
 
88
88
  ## 4. Tracing and SFT Dataset Export
89
89
 
90
- 1. Serve the task app with tracing enabled (see Section 2) or run the traced rollout helper:
90
+ 1. Serve the task app with tracing enabled (see Section 2). Optionally, run the traced rollout helper against the running server:
91
91
  ```bash
92
- uv run python examples/warming_up_to_rl/run_local_rollout_traced.py --episodes 10 --difficulty easy
92
+ uv run python examples/warming_up_to_rl/run_local_rollout_traced.py \
93
+ --base-url http://localhost:8001 \
94
+ --api-key "$ENVIRONMENT_API_KEY" \
95
+ --inference-api-key "$GROQ_API_KEY" \
96
+ --model qwen/qwen3-32b \
97
+ --inference-url https://api.groq.com/openai \
98
+ --max-llm-calls 3 \
99
+ --run-id local-trace
93
100
  ```
94
101
  2. Inspect local trace databases:
95
102
  ```bash