synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (349) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +23 -17
  3. examples/dev/qwen3_32b_qlora_4xh100.toml +40 -0
  4. examples/multi_step/crafter_rl_lora.md +29 -0
  5. examples/qwen_coder/README.md +102 -0
  6. examples/qwen_coder/_shared.py +113 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  8. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  9. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  10. examples/qwen_coder/generate_dataset.py +98 -0
  11. examples/qwen_coder/infer_ft_smoke.py +65 -0
  12. examples/qwen_coder/infer_prod_proxy.py +73 -0
  13. examples/qwen_coder/infer_via_synth.py +87 -0
  14. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  15. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  16. examples/qwen_coder/sft_full_17b.py +103 -0
  17. examples/qwen_coder/sft_lora_30b.py +110 -0
  18. examples/qwen_coder/subset_jsonl.py +39 -0
  19. examples/qwen_coder/todos.md +38 -0
  20. examples/qwen_coder/validate_jsonl.py +60 -0
  21. examples/rl/configs/eval_base_qwen.toml +1 -1
  22. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  23. examples/rl/download_dataset.py +26 -10
  24. examples/rl/run_eval.py +53 -52
  25. examples/rl/run_rl_and_save.py +29 -12
  26. examples/rl/task_app/math_single_step.py +180 -41
  27. examples/rl/task_app/math_task_app.py +14 -6
  28. examples/sft/README.md +139 -0
  29. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  30. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  31. examples/sft/evaluate.py +117 -0
  32. examples/sft/export_dataset.py +117 -0
  33. examples/sft/generate_traces.py +162 -0
  34. examples/swe/__init__.py +12 -0
  35. examples/swe/task_app/README.md +105 -0
  36. examples/swe/task_app/__init__.py +2 -0
  37. examples/swe/task_app/grpo_swe_mini.py +571 -0
  38. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  39. examples/swe/task_app/hosted/README.md +173 -0
  40. examples/swe/task_app/hosted/__init__.py +5 -0
  41. examples/swe/task_app/hosted/branching.py +143 -0
  42. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  43. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  44. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  45. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  46. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  47. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  48. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  49. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  50. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  53. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  54. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  55. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  56. examples/swe/task_app/hosted/hosted_app.py +204 -0
  57. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  58. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  59. examples/swe/task_app/hosted/main.py +100 -0
  60. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  61. examples/swe/task_app/hosted/registry.py +195 -0
  62. examples/swe/task_app/hosted/rollout.py +1869 -0
  63. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  64. examples/swe/task_app/hosted/storage/volume.py +211 -0
  65. examples/swe/task_app/hosted/test_agents.py +161 -0
  66. examples/swe/task_app/hosted/test_service.py +137 -0
  67. examples/swe/task_app/hosted/utils.py +62 -0
  68. examples/vlm/PROPOSAL.md +53 -0
  69. examples/vlm/README.md +68 -0
  70. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  71. examples/vlm/crafter_image_only_agent.py +207 -0
  72. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  73. examples/vlm/filter_image_rows.py +63 -0
  74. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  75. examples/warming_up_to_rl/analyze_trace_db.py +12 -10
  76. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  77. examples/warming_up_to_rl/export_trace_sft.py +218 -36
  78. examples/warming_up_to_rl/groq_test.py +15 -8
  79. examples/warming_up_to_rl/manage_secrets.py +29 -25
  80. examples/warming_up_to_rl/readme.md +9 -2
  81. examples/warming_up_to_rl/run_eval.py +137 -61
  82. examples/warming_up_to_rl/run_fft_and_save.py +131 -60
  83. examples/warming_up_to_rl/run_local_rollout.py +88 -39
  84. examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
  85. examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
  86. examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
  87. examples/warming_up_to_rl/run_rl_and_save.py +35 -12
  88. examples/warming_up_to_rl/run_rollout_remote.py +44 -19
  89. examples/warming_up_to_rl/task_app/README.md +6 -2
  90. examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
  91. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  111. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
  112. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  113. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
  114. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  115. synth_ai/__init__.py +1 -0
  116. synth_ai/api/models/supported.py +376 -0
  117. synth_ai/api/train/builders.py +157 -26
  118. synth_ai/api/train/cli.py +213 -57
  119. synth_ai/api/train/config_finder.py +65 -5
  120. synth_ai/api/train/env_resolver.py +33 -15
  121. synth_ai/api/train/pollers.py +13 -4
  122. synth_ai/api/train/supported_algos.py +139 -0
  123. synth_ai/api/train/task_app.py +5 -3
  124. synth_ai/api/train/utils.py +33 -48
  125. synth_ai/cli/__init__.py +19 -4
  126. synth_ai/cli/_modal_wrapper.py +28 -0
  127. synth_ai/cli/_typer_patch.py +49 -0
  128. synth_ai/cli/balance.py +2 -3
  129. synth_ai/cli/calc.py +1 -1
  130. synth_ai/cli/demo.py +21 -6
  131. synth_ai/cli/recent.py +2 -2
  132. synth_ai/cli/rl_demo.py +77 -17
  133. synth_ai/cli/root.py +116 -39
  134. synth_ai/cli/status.py +2 -2
  135. synth_ai/cli/task_apps.py +1699 -259
  136. synth_ai/cli/traces.py +7 -4
  137. synth_ai/cli/turso.py +73 -0
  138. synth_ai/cli/watch.py +12 -18
  139. synth_ai/core/experiment.py +0 -2
  140. synth_ai/demo_registry.py +68 -31
  141. synth_ai/demos/core/cli.py +516 -194
  142. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  143. synth_ai/demos/demo_task_apps/core.py +64 -28
  144. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  145. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
  146. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  147. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  148. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  149. synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
  150. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  151. synth_ai/environments/examples/bandit/engine.py +12 -4
  152. synth_ai/environments/examples/bandit/taskset.py +4 -4
  153. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  154. synth_ai/environments/reproducibility/tree.py +5 -6
  155. synth_ai/environments/service/app.py +11 -12
  156. synth_ai/environments/service/core_routes.py +10 -9
  157. synth_ai/environments/stateful/engine.py +1 -1
  158. synth_ai/environments/tasks/core.py +1 -0
  159. synth_ai/environments/tasks/filters.py +5 -6
  160. synth_ai/environments/tasks/utils.py +4 -5
  161. synth_ai/evals/base.py +0 -2
  162. synth_ai/handshake.py +11 -9
  163. synth_ai/http.py +1 -1
  164. synth_ai/http_client.py +43 -11
  165. synth_ai/inference/__init__.py +0 -2
  166. synth_ai/inference/client.py +20 -6
  167. synth_ai/jobs/client.py +103 -78
  168. synth_ai/learning/__init__.py +41 -6
  169. synth_ai/learning/algorithms.py +14 -0
  170. synth_ai/learning/client.py +121 -29
  171. synth_ai/learning/config.py +2 -40
  172. synth_ai/learning/constants.py +0 -2
  173. synth_ai/learning/ft_client.py +4 -56
  174. synth_ai/learning/health.py +13 -7
  175. synth_ai/learning/jobs.py +43 -47
  176. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  177. synth_ai/learning/rl/client.py +267 -0
  178. synth_ai/learning/rl/config.py +31 -0
  179. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  180. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  181. synth_ai/learning/rl/secrets.py +13 -0
  182. synth_ai/learning/rl_client.py +2 -253
  183. synth_ai/learning/sft/__init__.py +29 -0
  184. synth_ai/learning/sft/client.py +68 -0
  185. synth_ai/learning/sft/config.py +270 -0
  186. synth_ai/learning/sft/data.py +295 -0
  187. synth_ai/learning/sse.py +25 -26
  188. synth_ai/learning/validators.py +25 -24
  189. synth_ai/lm/__init__.py +21 -47
  190. synth_ai/task/__init__.py +26 -27
  191. synth_ai/task/apps/__init__.py +18 -19
  192. synth_ai/task/auth.py +35 -23
  193. synth_ai/task/client.py +15 -13
  194. synth_ai/task/contracts.py +37 -35
  195. synth_ai/task/datasets.py +9 -6
  196. synth_ai/task/errors.py +11 -10
  197. synth_ai/task/health.py +17 -11
  198. synth_ai/task/json.py +58 -24
  199. synth_ai/task/proxy.py +15 -14
  200. synth_ai/task/rubrics.py +22 -15
  201. synth_ai/task/server.py +43 -17
  202. synth_ai/task/tracing_utils.py +12 -7
  203. synth_ai/task/validators.py +0 -1
  204. synth_ai/task/vendors.py +5 -7
  205. synth_ai/tracing_v3/__init__.py +2 -0
  206. synth_ai/tracing_v3/abstractions.py +21 -4
  207. synth_ai/tracing_v3/db_config.py +26 -1
  208. synth_ai/tracing_v3/decorators.py +18 -15
  209. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  210. synth_ai/tracing_v3/hooks.py +6 -4
  211. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  212. synth_ai/tracing_v3/replica_sync.py +1 -0
  213. synth_ai/tracing_v3/session_tracer.py +63 -16
  214. synth_ai/tracing_v3/storage/base.py +89 -1
  215. synth_ai/tracing_v3/storage/config.py +21 -8
  216. synth_ai/tracing_v3/storage/factory.py +10 -8
  217. synth_ai/tracing_v3/storage/utils.py +4 -2
  218. synth_ai/tracing_v3/turso/daemon.py +7 -2
  219. synth_ai/tracing_v3/turso/models.py +5 -2
  220. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  221. synth_ai/tracing_v3/utils.py +4 -3
  222. synth_ai/v0/api/__init__.py +8 -0
  223. synth_ai/v0/api/models/__init__.py +8 -0
  224. synth_ai/v0/api/models/supported.py +8 -0
  225. synth_ai/v0/config/__init__.py +15 -0
  226. synth_ai/v0/config/base_url.py +12 -0
  227. synth_ai/v0/lm/__init__.py +51 -0
  228. synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
  229. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  230. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  231. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  232. synth_ai/{lm → v0/lm}/config.py +6 -1
  233. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  234. synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
  235. synth_ai/{lm → v0/lm}/core/main.py +19 -7
  236. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  237. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
  238. synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
  239. synth_ai/{lm → v0/lm}/overrides.py +4 -4
  240. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  241. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  242. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  243. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  244. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
  245. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  246. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  247. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
  248. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
  249. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
  250. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
  251. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  252. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  253. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  254. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  255. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  256. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  257. synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
  258. synth_ai/v0/tracing/upload.py +32 -135
  259. synth_ai/v0/tracing_v3/__init__.py +10 -0
  260. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  261. synth_ai/v0/tracing_v3/decorators.py +3 -0
  262. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  263. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  264. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/METADATA +10 -7
  265. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/RECORD +294 -258
  266. examples/common_old/backend.py +0 -21
  267. examples/evals_old/README.md +0 -98
  268. examples/evals_old/__init__.py +0 -6
  269. examples/evals_old/compare_models.py +0 -1037
  270. examples/evals_old/example_log.md +0 -145
  271. examples/evals_old/run_demo.sh +0 -126
  272. examples/evals_old/trace_analysis.py +0 -270
  273. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  274. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  275. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  276. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
  277. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  278. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  279. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  280. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  281. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  282. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
  283. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  284. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  285. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
  286. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  287. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  288. examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
  289. examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
  290. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  291. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  292. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
  293. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
  294. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
  295. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  296. examples/finetuning_old/synth_qwen_v1/util.py +0 -147
  297. examples/rl_old/task_app.py +0 -962
  298. synth_ai/experimental/synth_oss.py +0 -446
  299. synth_ai/install_sqld.sh +0 -40
  300. synth_ai/learning/filtering.py +0 -0
  301. synth_ai/learning/offline/dpo.py +0 -0
  302. synth_ai/learning/offline/providers.py +0 -7
  303. synth_ai/learning/offline/sft.py +0 -0
  304. synth_ai/learning/offline/shared.py +0 -0
  305. synth_ai/learning/online/grpo.py +0 -0
  306. synth_ai/learning/online/irft.py +0 -0
  307. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  308. synth_ai/learning/prompts/gepa.py +0 -0
  309. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  310. synth_ai/learning/prompts/mipro.py +0 -289
  311. synth_ai/learning/prompts/random_search.py +0 -246
  312. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  313. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  314. synth_ai/rl/secrets.py +0 -19
  315. synth_ai/scripts/verify_rewards.py +0 -100
  316. synth_ai/tracing/__init__.py +0 -30
  317. synth_ai/tracing_v1/__init__.py +0 -33
  318. synth_ai/tracing_v3/turso/__init__.py +0 -25
  319. synth_ai/tracing_v3/turso/manager.py +0 -774
  320. synth_ai/zyk/__init__.py +0 -30
  321. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  322. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  323. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  324. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  325. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  326. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  327. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  328. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  329. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  330. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  331. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  332. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  333. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  334. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  335. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  336. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  337. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  338. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  339. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  340. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  341. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  342. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  343. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  344. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  345. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  346. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/WHEEL +0 -0
  347. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/entry_points.txt +0 -0
  348. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/licenses/LICENSE +0 -0
  349. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/top_level.txt +0 -0
examples/vlm/README.md ADDED
@@ -0,0 +1,68 @@
1
+ # Crafter VLM Pipeline
2
+
3
+ This folder captures the reference workflow for fine-tuning Crafter policies with
4
+ multimodal (text + image) prompts. It stitches together the new image-aware tracing
5
+ plumbing with lightweight utilities for dataset curation and training.
6
+
7
+ ## Quick Start
8
+
9
+ 1. **Verify image capture**
10
+ ```
11
+ uv run python examples/vlm/crafter_image_only_agent.py --seed 7 --steps 5
12
+ ```
13
+ This writes PNG frames to `examples/vlm/output/frames/` and produces a JSONL preview
14
+ of OpenAI-style image-only user messages.
15
+
16
+ 2. **Collect traced rollouts**
17
+ Use the Crafter task app (or your existing pipeline) with tracing enabled. The new
18
+ tracing schema automatically records `observation_image_base64` and stores image parts
19
+ in LM call records.
20
+
21
+ 3. **Export multimodal SFT rows**
22
+ ```
23
+ uv run python examples/warming_up_to_rl/export_trace_sft.py \
24
+ --db traces/v3/synth_ai.db \
25
+ --output examples/vlm/output/crafter_traces_full.jsonl
26
+ ```
27
+ The exporter now emits `metadata.has_image`, `metadata.user_has_image`, and
28
+ `metadata.assistant_has_image` flags per turn.
29
+
30
+ 4. **Filter to image-rich turns**
31
+ ```
32
+ uv run python examples/vlm/filter_image_rows.py \
33
+ --input examples/vlm/output/crafter_traces_full.jsonl \
34
+ --output examples/vlm/output/crafter_vlm_dataset.jsonl
35
+ ```
36
+
37
+ 5. **(Optional) Split validation or augment**, then upload using the standard CLI:
38
+ ```
39
+ uv run python examples/warming_up_to_rl/run_fft_and_save.py \
40
+ --toml examples/vlm/configs/crafter_vlm_gpt4o.toml \
41
+ --data examples/vlm/output/crafter_vlm_dataset.jsonl
42
+ ```
43
+
44
+ ## Config & Utilities
45
+
46
+ | File | Purpose |
47
+ | --- | --- |
48
+ | `configs/crafter_vlm_gpt4o.toml` | Sample Synth job targeting an image-capable model (`openai/gpt-4o-mini`). Set `job.data` or pass `--data` explicitly. |
49
+ | `crafter_image_only_agent.py` | Captures frames and builds image-only prompts for sanity checks. |
50
+ | `filter_image_rows.py` | Extracts rows with image parts from exported JSONL datasets. |
51
+
52
+ ## Notes & Next Steps
53
+
54
+ - The training config assumes full-finetuning (`mode = "sft_offline"`). Adjust the
55
+ model id, hardware, or hyperparameters to match available infrastructure.
56
+ - Dataset rows emitted by `export_trace_sft.py` already contain OpenAI multimodal
57
+ content parts like:
58
+ ```json
59
+ {
60
+ "role": "user",
61
+ "content": [
62
+ {"type": "text", "text": "..."},
63
+ {"type": "image_url", "image_url": {"url": "data:image/png;base64,..." }}
64
+ ]
65
+ }
66
+ ```
67
+ - See `PROPOSAL.md` for a deeper dive into outstanding work (longer rollouts,
68
+ richer multimodal augmentations, evaluation ideas).
@@ -0,0 +1,44 @@
1
+ [job]
2
+ model = "openai/gpt-4o-mini-2024-07-18"
3
+ modalities = ["text", "image"]
4
+ # data = "examples/vlm/output/crafter_vlm_dataset.jsonl"
5
+ description = "Crafter VLM SFT (text + image prompts)"
6
+
7
+ [compute]
8
+ gpu_type = "A100"
9
+ gpu_count = 1
10
+ nodes = 1
11
+
12
+ [data]
13
+ topology = {}
14
+ # validation_path = "examples/vlm/output/crafter_vlm_dataset.val.jsonl"
15
+
16
+ [training]
17
+ mode = "sft_offline"
18
+ use_qlora = false
19
+
20
+ [training.validation]
21
+ enabled = true
22
+ evaluation_strategy = "steps"
23
+ eval_steps = 50
24
+ save_best_model_at_end = true
25
+ metric_for_best_model = "val.loss"
26
+ greater_is_better = false
27
+
28
+ [hyperparameters]
29
+ n_epochs = 1
30
+ train_kind = "fft"
31
+ per_device_batch = 1
32
+ gradient_accumulation_steps = 32
33
+ sequence_length = 4096
34
+ learning_rate = 1e-5
35
+ warmup_ratio = 0.03
36
+ weight_decay = 0.01
37
+
38
+ [hyperparameters.parallelism]
39
+ use_deepspeed = true
40
+ deepspeed_stage = 2
41
+ fsdp = false
42
+ bf16 = true
43
+ fp16 = false
44
+ activation_checkpointing = true
@@ -0,0 +1,207 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Run a minimal Crafter agent that emits image-only prompts and saves rendered frames.
4
+
5
+ This script demonstrates the multimodal observation pipeline by:
6
+ 1. Initialising a `CrafterClassicEnvironment` with a deterministic seed.
7
+ 2. Capturing `observation_image_base64` at each step and writing PNG frames.
8
+ 3. Building OpenAI-style user messages that contain only an image part.
9
+ 4. Emitting a small JSONL preview of the messages so they can be inspected or fed
10
+ directly into the fine-tuning dataset builder.
11
+
12
+ Usage:
13
+ uv run python examples/vlm/crafter_image_only_agent.py --seed 7 --steps 5
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import asyncio
20
+ import base64
21
+ import json
22
+ import random
23
+ from collections.abc import Iterable
24
+ from pathlib import Path
25
+ from typing import Any
26
+ from uuid import uuid4
27
+
28
+ from synth_ai.environments.environment.tools import EnvToolCall
29
+ from synth_ai.environments.examples.crafter_classic.environment import CrafterClassicEnvironment
30
+ from synth_ai.environments.examples.crafter_classic.taskset import (
31
+ CrafterTaskInstance,
32
+ CrafterTaskInstanceMetadata,
33
+ )
34
+ from synth_ai.environments.tasks.core import Impetus, Intent
35
+
36
+ ACTION_NAME_TO_ID = {
37
+ "noop": 0,
38
+ "move_left": 1,
39
+ "move_right": 2,
40
+ "move_up": 3,
41
+ "move_down": 4,
42
+ "do": 5,
43
+ "sleep": 6,
44
+ "place_stone": 7,
45
+ "place_table": 8,
46
+ "place_furnace": 9,
47
+ "place_plant": 10,
48
+ "make_wood_pickaxe": 11,
49
+ "make_stone_pickaxe": 12,
50
+ "make_iron_pickaxe": 13,
51
+ "make_wood_sword": 14,
52
+ "make_stone_sword": 15,
53
+ "make_iron_sword": 16,
54
+ }
55
+
56
+
57
+ def _build_task_instance(seed: int) -> CrafterTaskInstance:
58
+ """Construct a minimal Crafter task instance with the requested seed."""
59
+
60
+ impetus = Impetus(instructions="Explore the world and survive.")
61
+ intent = Intent(
62
+ rubric={"goal": "Unlock achievements and stay alive."},
63
+ gold_trajectories=None,
64
+ gold_state_diff={},
65
+ )
66
+ metadata = CrafterTaskInstanceMetadata(
67
+ difficulty="custom",
68
+ seed=seed,
69
+ num_trees_radius=0,
70
+ num_cows_radius=0,
71
+ num_hostiles_radius=0,
72
+ )
73
+ instance = CrafterTaskInstance(
74
+ id=uuid4(),
75
+ impetus=impetus,
76
+ intent=intent,
77
+ metadata=metadata,
78
+ is_reproducible=True,
79
+ initial_engine_snapshot=None,
80
+ )
81
+ # Attach environment config expected by the engine
82
+ instance.config = {"seed": seed, "length": 256, "area": [64, 64]}
83
+ return instance
84
+
85
+
86
+ def _select_actions(action_names: Iterable[str], steps: int) -> list[int]:
87
+ resolved: list[int] = []
88
+ names = list(action_names)
89
+ if not names:
90
+ names = ["move_right", "move_down", "move_left", "move_up", "do"]
91
+ for idx in range(steps):
92
+ name = names[idx % len(names)]
93
+ action_id = ACTION_NAME_TO_ID.get(name)
94
+ if action_id is None:
95
+ raise ValueError(f"Unknown Crafter action: {name}")
96
+ resolved.append(action_id)
97
+ return resolved
98
+
99
+
100
+ def _save_base64_png(data: str, path: Path) -> None:
101
+ """Decode a base64 string (with or without data URL prefix) and write to disk."""
102
+
103
+ if data.startswith("data:"):
104
+ _, _, encoded = data.partition(",")
105
+ else:
106
+ encoded = data
107
+ path.write_bytes(base64.b64decode(encoded))
108
+
109
+
110
+ def _build_image_only_message(data_url: str) -> dict[str, Any]:
111
+ return {
112
+ "role": "user",
113
+ "content": [{"type": "image_url", "image_url": {"url": data_url}}],
114
+ }
115
+
116
+
117
+ async def run(args: argparse.Namespace) -> None:
118
+ output_dir = Path(args.output_dir).resolve()
119
+ frames_dir = output_dir / "frames"
120
+ frames_dir.mkdir(parents=True, exist_ok=True)
121
+ messages_path = output_dir / "image_only_messages.jsonl"
122
+
123
+ task_instance = _build_task_instance(args.seed)
124
+ env = CrafterClassicEnvironment(task_instance)
125
+
126
+ # Initialise environment
127
+ raw_obs = await env.initialize()
128
+ observation = getattr(raw_obs, "observation", raw_obs)
129
+
130
+ action_ids = _select_actions(args.actions, args.steps)
131
+ records: list[dict[str, Any]] = []
132
+
133
+ for step_idx in range(args.steps):
134
+ obs_dict = observation if isinstance(observation, dict) else {}
135
+ image_b64 = obs_dict.get("observation_image_base64")
136
+ data_url = obs_dict.get("observation_image_data_url")
137
+
138
+ if image_b64:
139
+ frame_path = frames_dir / f"step_{step_idx:03d}.png"
140
+ _save_base64_png(image_b64, frame_path)
141
+
142
+ if data_url:
143
+ message = _build_image_only_message(data_url)
144
+ else:
145
+ message = {
146
+ "role": "user",
147
+ "content": [{"type": "text", "text": "Image missing from observation."}],
148
+ }
149
+
150
+ records.append(
151
+ {
152
+ "step": step_idx,
153
+ "action_id": action_ids[step_idx],
154
+ "message": message,
155
+ "observation_keys": sorted(obs_dict.keys()),
156
+ }
157
+ )
158
+
159
+ # For the very first step, show the message structure
160
+ if step_idx == 0:
161
+ print("=== Image-only message example ===")
162
+ print(json.dumps(message, indent=2))
163
+
164
+ tool_call = EnvToolCall(tool="interact", args={"action": int(action_ids[step_idx])})
165
+ env_step = await env.step(tool_call)
166
+ observation = getattr(env_step, "observation", env_step)
167
+
168
+ # Wrap up and dump the preview JSONL
169
+ await env.terminate()
170
+ with messages_path.open("w", encoding="utf-8") as fh:
171
+ for record in records:
172
+ fh.write(json.dumps(record, ensure_ascii=False) + "\n")
173
+
174
+ print(f"Saved {len(records)} frames -> {frames_dir}")
175
+ print(f"Saved image-only message preview -> {messages_path}")
176
+
177
+
178
+ def parse_args() -> argparse.Namespace:
179
+ parser = argparse.ArgumentParser(description=__doc__)
180
+ parser.add_argument("--seed", type=int, default=7, help="Crafter environment seed")
181
+ parser.add_argument("--steps", type=int, default=5, help="Number of env steps to capture")
182
+ parser.add_argument(
183
+ "--actions",
184
+ nargs="*",
185
+ default=["move_right", "move_down", "move_left", "move_up", "do"],
186
+ help="Sequence of Crafter action names to cycle through",
187
+ )
188
+ default_output = Path("examples/vlm/temp")
189
+ parser.add_argument(
190
+ "--output-dir",
191
+ type=Path,
192
+ default=default_output,
193
+ help=f"Directory for frames and message preview (default: {default_output})",
194
+ )
195
+ parser.add_argument(
196
+ "--randomise",
197
+ action="store_true",
198
+ help="Shuffle the provided action sequence before running",
199
+ )
200
+ args = parser.parse_args()
201
+ if args.randomise:
202
+ random.shuffle(args.actions)
203
+ return args
204
+
205
+
206
+ if __name__ == "__main__":
207
+ asyncio.run(run(parse_args()))
@@ -0,0 +1,277 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Crafter agent that calls the OpenAI Chat Completions API with image + text prompts.
4
+
5
+ The harness mirrors the text-based agent workflow from `examples/warming_up_to_rl`:
6
+ * Uses the Crafter policy to build prompts, maintain history, and parse tool calls.
7
+ * Executes actions against the Synth Crafter environment (no HTTP task app required).
8
+ * Persists every rendered frame to `examples/vlm/temp/` so you can inspect exactly
9
+ what the VLM saw.
10
+
11
+ Requirements:
12
+ - `OPENAI_API_KEY` environment variable.
13
+ - `openai` Python package (installed via project dependencies).
14
+
15
+ Usage:
16
+ uv run python examples/vlm/crafter_openai_vlm_agent.py \
17
+ --model gpt-4o-mini-2024-07-18 --seeds 10 --steps 10
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import asyncio
24
+ import base64
25
+ import json
26
+ import os
27
+ from pathlib import Path
28
+ from typing import Any
29
+ from uuid import uuid4
30
+
31
+ from examples.warming_up_to_rl.task_app.synth_envs_hosted.envs.crafter.environment import (
32
+ CrafterEnvironmentWrapper,
33
+ )
34
+ from examples.warming_up_to_rl.task_app.synth_envs_hosted.envs.crafter.policy import CrafterPolicy
35
+ from openai import OpenAI
36
+ from synth_ai.environments.examples.crafter_classic.environment import CrafterClassicEnvironment
37
+ from synth_ai.environments.examples.crafter_classic.taskset import (
38
+ CrafterTaskInstance,
39
+ CrafterTaskInstanceMetadata,
40
+ )
41
+ from synth_ai.environments.tasks.core import Impetus, Intent
42
+
43
+ DEFAULT_OUTPUT = Path("examples/vlm/temp")
44
+ FRAME_SUBDIR = "openai_agent_frames"
45
+
46
+
47
+ class EpisodeResult:
48
+ def __init__(self, seed: int) -> None:
49
+ self.seed = seed
50
+ self.steps_taken: int = 0
51
+ self.achievements: set[str] = set()
52
+ self.total_reward: float = 0.0
53
+ self.tool_calls: int = 0
54
+
55
+ def record_observation(self, observation: dict[str, Any]) -> None:
56
+ obs = observation.get("observation") if isinstance(observation, dict) else None
57
+ if not isinstance(obs, dict):
58
+ return
59
+ ach = obs.get("achievements_status")
60
+ if isinstance(ach, dict):
61
+ for name, unlocked in ach.items():
62
+ if unlocked:
63
+ self.achievements.add(str(name))
64
+ reward = obs.get("reward_last_step")
65
+ if isinstance(reward, (int, float)):
66
+ self.total_reward += float(reward)
67
+
68
+
69
+ def _ensure_client() -> OpenAI:
70
+ api_key = os.getenv("OPENAI_API_KEY")
71
+ if not api_key:
72
+ raise RuntimeError("OPENAI_API_KEY must be set for OpenAI calls")
73
+ return OpenAI(api_key=api_key)
74
+
75
+
76
+ def _build_task_instance(seed: int) -> CrafterTaskInstance:
77
+ impetus = Impetus(instructions="Explore, survive, and unlock achievements.")
78
+ intent = Intent(
79
+ rubric={"goal": "Maximise Crafter achievements."},
80
+ gold_trajectories=None,
81
+ gold_state_diff={},
82
+ )
83
+ metadata = CrafterTaskInstanceMetadata(
84
+ difficulty="custom",
85
+ seed=seed,
86
+ num_trees_radius=0,
87
+ num_cows_radius=0,
88
+ num_hostiles_radius=0,
89
+ )
90
+ instance = CrafterTaskInstance(
91
+ id=uuid4(),
92
+ impetus=impetus,
93
+ intent=intent,
94
+ metadata=metadata,
95
+ is_reproducible=True,
96
+ initial_engine_snapshot=None,
97
+ )
98
+ instance.config = {"seed": seed, "length": 256, "area": [64, 64]}
99
+ return instance
100
+
101
+
102
+ def _decode_and_save_image(observation: dict[str, Any], path: Path) -> None:
103
+ obs = observation.get("observation") if isinstance(observation, dict) else None
104
+ if not isinstance(obs, dict):
105
+ return
106
+ base64_data = obs.get("observation_image_base64")
107
+ if not isinstance(base64_data, str) or not base64_data:
108
+ return
109
+ path.parent.mkdir(parents=True, exist_ok=True)
110
+ try:
111
+ path.write_bytes(base64.b64decode(base64_data))
112
+ except Exception:
113
+ # Best-effort; corrupted frames should not halt rollout
114
+ pass
115
+
116
+
117
+ def _normalise_openai_request(payload: dict[str, Any], model: str, temperature: float) -> dict[str, Any]:
118
+ request = dict(payload)
119
+ request["model"] = model
120
+ request.setdefault("temperature", temperature)
121
+ request.setdefault("max_tokens", 512)
122
+ # Remove vendor-specific knobs unsupported by OpenAI
123
+ request.pop("stop_after_tool_calls", None)
124
+ request.pop("thinking_mode", None)
125
+ request.pop("thinking_budget", None)
126
+ max_completion = request.pop("max_completion_tokens", None)
127
+ if max_completion is not None:
128
+ request.setdefault("max_tokens", max_completion)
129
+ return request
130
+
131
+
132
+ async def _run_episode(
133
+ *,
134
+ seed: int,
135
+ client: OpenAI,
136
+ model: str,
137
+ max_steps: int,
138
+ output_dir: Path,
139
+ temperature: float,
140
+ ) -> EpisodeResult:
141
+ task_instance = _build_task_instance(seed)
142
+ env = CrafterClassicEnvironment(task_instance)
143
+ wrapper = CrafterEnvironmentWrapper(env, seed=seed)
144
+ policy = CrafterPolicy(inference_url="openai://chat-completions", model=model)
145
+ await policy.initialize({"use_tools": True, "model": model})
146
+
147
+ episode_result = EpisodeResult(seed=seed)
148
+
149
+ observation_packet = await wrapper.initialize()
150
+ episode_result.record_observation(observation_packet)
151
+
152
+ frames_root = output_dir / FRAME_SUBDIR / f"seed_{seed:04d}"
153
+ _decode_and_save_image(observation_packet, frames_root / "step_000.png")
154
+
155
+ for step_idx in range(max_steps):
156
+ obs_dict = observation_packet.get("observation")
157
+ if not isinstance(obs_dict, dict):
158
+ break
159
+
160
+ obs_text = policy._format_observation_for_llm(observation_packet) # noqa: SLF001
161
+ tool_calls, meta = await policy.step(
162
+ observation_text=obs_text,
163
+ metadata={"raw_observation": observation_packet},
164
+ )
165
+ if "inference_request" not in meta:
166
+ break
167
+
168
+ episode_result.steps_taken += 1
169
+ inference_request = _normalise_openai_request(
170
+ meta["inference_request"],
171
+ model=model,
172
+ temperature=temperature,
173
+ )
174
+
175
+ response = client.chat.completions.create(**inference_request)
176
+ response_dict = response.model_dump()
177
+
178
+ assistant_tool_calls = CrafterPolicy.parse_response_to_tool_calls(
179
+ response_dict,
180
+ use_tools=policy.use_tools,
181
+ )
182
+ if not assistant_tool_calls:
183
+ print(
184
+ f"Seed {seed}: no tool calls returned by model; ending episode early at step {step_idx}."
185
+ )
186
+ break
187
+
188
+ episode_result.tool_calls += len(assistant_tool_calls)
189
+
190
+ assistant_message = response_dict["choices"][0].get("message") or {}
191
+ assistant_text = assistant_message.get("content")
192
+
193
+ env_response = await wrapper.step(assistant_tool_calls)
194
+ if not isinstance(env_response, dict):
195
+ raise RuntimeError(
196
+ f"Unexpected environment response type: {type(env_response)!r}"
197
+ )
198
+ episode_result.record_observation(env_response)
199
+
200
+ policy._append_assistant_turn( # noqa: SLF001
201
+ assistant_text,
202
+ assistant_tool_calls,
203
+ env_response,
204
+ )
205
+
206
+ frame_path = frames_root / f"step_{step_idx + 1:03d}.png"
207
+ _decode_and_save_image(env_response, frame_path)
208
+
209
+ if env_response.get("done"):
210
+ break
211
+ observation_packet = env_response
212
+
213
+ await wrapper.terminate()
214
+ return episode_result
215
+
216
+
217
+ async def main() -> None:
218
+ parser = argparse.ArgumentParser(description=__doc__)
219
+ parser.add_argument("--model", default="gpt-4o-mini-2024-07-18", help="OpenAI model id")
220
+ parser.add_argument("--seeds", type=int, default=10, help="Number of random seeds to evaluate")
221
+ parser.add_argument("--steps", type=int, default=10, help="Max steps per seed")
222
+ parser.add_argument("--temperature", type=float, default=0.6, help="Sampling temperature")
223
+ parser.add_argument(
224
+ "--output-dir",
225
+ type=Path,
226
+ default=DEFAULT_OUTPUT,
227
+ help=f"Directory for saved frames and summaries (default: {DEFAULT_OUTPUT})",
228
+ )
229
+ args = parser.parse_args()
230
+
231
+ client = _ensure_client()
232
+ results: list[EpisodeResult] = []
233
+
234
+ seeds = list(range(args.seeds))
235
+ print(f"Running {len(seeds)} Crafter episodes with model={args.model}")
236
+
237
+ for seed in seeds:
238
+ result = await _run_episode(
239
+ seed=seed,
240
+ client=client,
241
+ model=args.model,
242
+ max_steps=args.steps,
243
+ output_dir=args.output_dir,
244
+ temperature=args.temperature,
245
+ )
246
+ results.append(result)
247
+ print(
248
+ f"Seed {seed:02d}: steps={result.steps_taken}, "
249
+ f"achievements={len(result.achievements)}, "
250
+ f"tool_calls={result.tool_calls}, reward≈{result.total_reward:.3f}"
251
+ )
252
+
253
+ summary = {
254
+ "model": args.model,
255
+ "episodes": len(results),
256
+ "mean_steps": round(
257
+ sum(res.steps_taken for res in results) / max(len(results), 1), 2
258
+ ),
259
+ "mean_achievements": round(
260
+ sum(len(res.achievements) for res in results) / max(len(results), 1), 2
261
+ ),
262
+ "total_tool_calls": sum(res.tool_calls for res in results),
263
+ "output_dir": str(args.output_dir / FRAME_SUBDIR),
264
+ }
265
+
266
+ args.output_dir.mkdir(parents=True, exist_ok=True)
267
+ summary_path = args.output_dir / "openai_agent_summary.json"
268
+ summary_path.write_text(json.dumps(summary, indent=2), encoding="utf-8")
269
+
270
+ print("\nSummary")
271
+ print("-------")
272
+ print(json.dumps(summary, indent=2))
273
+ print(f"\nFrames saved in: {summary['output_dir']}")
274
+
275
+
276
+ if __name__ == "__main__":
277
+ asyncio.run(main())
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Filter SFT JSONL rows to those that contain image content.
4
+
5
+ This is a convenience wrapper around `examples/warming_up_to_rl/export_trace_sft.py`
6
+ output now that each record's metadata includes `has_image`, `user_has_image`, and
7
+ `assistant_has_image`.
8
+
9
+ Usage:
10
+ uv run python examples/vlm/filter_image_rows.py \
11
+ --input examples/sft/ft_data/crafter_traces.jsonl \
12
+ --output examples/vlm/output/crafter_vlm_dataset.jsonl
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import argparse
18
+ import json
19
+ from pathlib import Path
20
+
21
+
22
+ def parse_args() -> argparse.Namespace:
23
+ parser = argparse.ArgumentParser(description=__doc__)
24
+ parser.add_argument("--input", type=Path, required=True, help="Source JSONL dataset")
25
+ parser.add_argument("--output", type=Path, required=True, help="Filtered JSONL path")
26
+ parser.add_argument(
27
+ "--include-assistant",
28
+ action="store_true",
29
+ help="Require the assistant message to include an image as well",
30
+ )
31
+ return parser.parse_args()
32
+
33
+
34
+ def main() -> None:
35
+ args = parse_args()
36
+ src = args.input
37
+ dst = args.output
38
+ dst.parent.mkdir(parents=True, exist_ok=True)
39
+
40
+ kept = 0
41
+ total = 0
42
+ with src.open("r", encoding="utf-8") as reader, dst.open("w", encoding="utf-8") as writer:
43
+ for line in reader:
44
+ total += 1
45
+ try:
46
+ record = json.loads(line)
47
+ except json.JSONDecodeError:
48
+ continue
49
+ metadata = record.get("metadata") or {}
50
+ has_user_image = bool(metadata.get("user_has_image"))
51
+ has_assistant_image = bool(metadata.get("assistant_has_image"))
52
+ if not has_user_image:
53
+ continue
54
+ if args.include_assistant and not has_assistant_image:
55
+ continue
56
+ writer.write(json.dumps(record, ensure_ascii=False) + "\n")
57
+ kept += 1
58
+
59
+ print(f"Filtered {kept} / {total} rows with user images -> {dst}")
60
+
61
+
62
+ if __name__ == "__main__":
63
+ main()