synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.9.dev8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (327) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +8 -11
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/run_eval.py +36 -37
  19. examples/rl/run_rl_and_save.py +5 -5
  20. examples/rl/task_app/math_single_step.py +65 -43
  21. examples/rl/task_app/math_task_app.py +3 -3
  22. examples/sft/README.md +139 -0
  23. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  24. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  25. examples/sft/evaluate.py +117 -0
  26. examples/sft/export_dataset.py +117 -0
  27. examples/sft/generate_traces.py +162 -0
  28. examples/swe/__init__.py +12 -0
  29. examples/swe/task_app/README.md +105 -0
  30. examples/swe/task_app/__init__.py +2 -0
  31. examples/swe/task_app/grpo_swe_mini.py +571 -0
  32. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  33. examples/swe/task_app/hosted/README.md +173 -0
  34. examples/swe/task_app/hosted/__init__.py +5 -0
  35. examples/swe/task_app/hosted/branching.py +143 -0
  36. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  37. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  38. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  39. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  40. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  41. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  42. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  43. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  44. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  45. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  46. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  47. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  50. examples/swe/task_app/hosted/hosted_app.py +204 -0
  51. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  52. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  53. examples/swe/task_app/hosted/main.py +100 -0
  54. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  55. examples/swe/task_app/hosted/registry.py +195 -0
  56. examples/swe/task_app/hosted/rollout.py +1869 -0
  57. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  58. examples/swe/task_app/hosted/storage/volume.py +211 -0
  59. examples/swe/task_app/hosted/test_agents.py +161 -0
  60. examples/swe/task_app/hosted/test_service.py +137 -0
  61. examples/swe/task_app/hosted/utils.py +62 -0
  62. examples/vlm/README.md +68 -0
  63. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  64. examples/vlm/crafter_image_only_agent.py +207 -0
  65. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  66. examples/vlm/filter_image_rows.py +63 -0
  67. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  68. examples/warming_up_to_rl/analyze_trace_db.py +5 -5
  69. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  70. examples/warming_up_to_rl/export_trace_sft.py +78 -21
  71. examples/warming_up_to_rl/groq_test.py +4 -4
  72. examples/warming_up_to_rl/manage_secrets.py +13 -18
  73. examples/warming_up_to_rl/run_eval.py +42 -44
  74. examples/warming_up_to_rl/run_fft_and_save.py +11 -16
  75. examples/warming_up_to_rl/run_local_rollout.py +1 -3
  76. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
  77. examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
  78. examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
  79. examples/warming_up_to_rl/run_rl_and_save.py +5 -6
  80. examples/warming_up_to_rl/run_rollout_remote.py +8 -10
  81. examples/warming_up_to_rl/task_app/README.md +6 -2
  82. examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
  83. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  101. synth/__init__.py +14 -0
  102. synth_ai/__init__.py +26 -4
  103. synth_ai/api/models/supported.py +376 -0
  104. synth_ai/api/train/builders.py +128 -21
  105. synth_ai/api/train/cli.py +80 -64
  106. synth_ai/api/train/config_finder.py +7 -2
  107. synth_ai/api/train/env_resolver.py +1 -1
  108. synth_ai/api/train/pollers.py +2 -1
  109. synth_ai/api/train/supported_algos.py +139 -0
  110. synth_ai/api/train/task_app.py +1 -2
  111. synth_ai/api/train/utils.py +13 -44
  112. synth_ai/cli/__init__.py +8 -0
  113. synth_ai/cli/_modal_wrapper.py +28 -0
  114. synth_ai/cli/_typer_patch.py +49 -0
  115. synth_ai/cli/balance.py +1 -2
  116. synth_ai/cli/calc.py +1 -1
  117. synth_ai/cli/demo.py +2 -1
  118. synth_ai/cli/recent.py +2 -2
  119. synth_ai/cli/rl_demo.py +2 -1
  120. synth_ai/cli/root.py +11 -13
  121. synth_ai/cli/status.py +2 -2
  122. synth_ai/cli/task_apps.py +529 -179
  123. synth_ai/cli/traces.py +6 -4
  124. synth_ai/cli/watch.py +12 -18
  125. synth_ai/demo_registry.py +1 -1
  126. synth_ai/demos/core/cli.py +36 -43
  127. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  128. synth_ai/demos/demo_task_apps/core.py +17 -25
  129. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
  130. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  131. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
  132. synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
  133. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  134. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  135. synth_ai/environments/reproducibility/tree.py +2 -5
  136. synth_ai/environments/service/app.py +11 -12
  137. synth_ai/environments/service/core_routes.py +4 -7
  138. synth_ai/environments/stateful/engine.py +1 -1
  139. synth_ai/environments/tasks/core.py +1 -0
  140. synth_ai/environments/tasks/filters.py +5 -6
  141. synth_ai/environments/tasks/utils.py +4 -5
  142. synth_ai/handshake.py +9 -9
  143. synth_ai/http.py +1 -1
  144. synth_ai/http_client.py +18 -10
  145. synth_ai/inference/client.py +15 -5
  146. synth_ai/jobs/client.py +78 -83
  147. synth_ai/learning/__init__.py +41 -6
  148. synth_ai/learning/algorithms.py +14 -0
  149. synth_ai/learning/client.py +91 -24
  150. synth_ai/learning/config.py +2 -38
  151. synth_ai/learning/ft_client.py +4 -59
  152. synth_ai/learning/health.py +5 -6
  153. synth_ai/learning/jobs.py +31 -47
  154. synth_ai/{rl → learning/rl}/__init__.py +14 -4
  155. synth_ai/learning/rl/client.py +267 -0
  156. synth_ai/learning/rl/config.py +31 -0
  157. synth_ai/{rl → learning/rl}/contracts.py +5 -8
  158. synth_ai/{rl → learning/rl}/env_keys.py +39 -15
  159. synth_ai/learning/rl/secrets.py +13 -0
  160. synth_ai/learning/rl_client.py +2 -281
  161. synth_ai/learning/sft/__init__.py +29 -0
  162. synth_ai/learning/sft/client.py +68 -0
  163. synth_ai/learning/sft/config.py +270 -0
  164. synth_ai/learning/sft/data.py +295 -0
  165. synth_ai/learning/sse.py +25 -24
  166. synth_ai/learning/validators.py +25 -28
  167. synth_ai/lm/__init__.py +21 -47
  168. synth_ai/main.py +4 -0
  169. synth_ai/task/__init__.py +25 -27
  170. synth_ai/task/apps/__init__.py +7 -8
  171. synth_ai/task/auth.py +8 -8
  172. synth_ai/task/client.py +14 -14
  173. synth_ai/task/contracts.py +36 -35
  174. synth_ai/task/datasets.py +6 -5
  175. synth_ai/task/errors.py +10 -10
  176. synth_ai/task/health.py +17 -9
  177. synth_ai/task/json.py +58 -23
  178. synth_ai/task/proxy.py +13 -9
  179. synth_ai/task/rubrics.py +16 -15
  180. synth_ai/task/server.py +12 -12
  181. synth_ai/task/tracing_utils.py +4 -4
  182. synth_ai/task/vendors.py +5 -6
  183. synth_ai/tracing_v3/__init__.py +2 -0
  184. synth_ai/tracing_v3/abstractions.py +21 -4
  185. synth_ai/tracing_v3/decorators.py +18 -16
  186. synth_ai/tracing_v3/hooks.py +5 -5
  187. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  188. synth_ai/tracing_v3/session_tracer.py +40 -14
  189. synth_ai/tracing_v3/storage/base.py +85 -0
  190. synth_ai/tracing_v3/storage/config.py +21 -8
  191. synth_ai/tracing_v3/storage/factory.py +10 -7
  192. synth_ai/tracing_v3/storage/utils.py +4 -2
  193. synth_ai/tracing_v3/turso/daemon.py +7 -2
  194. synth_ai/tracing_v3/turso/models.py +2 -2
  195. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  196. synth_ai/tracing_v3/utils.py +4 -4
  197. synth_ai/v0/api/__init__.py +8 -0
  198. synth_ai/v0/api/models/__init__.py +8 -0
  199. synth_ai/v0/api/models/supported.py +8 -0
  200. synth_ai/v0/config/__init__.py +15 -0
  201. synth_ai/v0/config/base_url.py +12 -0
  202. synth_ai/v0/lm/__init__.py +51 -0
  203. synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
  204. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  205. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  206. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  207. synth_ai/{lm → v0/lm}/config.py +6 -1
  208. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  209. synth_ai/{lm → v0/lm}/core/main.py +6 -6
  210. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  211. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
  212. synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
  213. synth_ai/{lm → v0/lm}/overrides.py +2 -2
  214. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  215. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  216. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  217. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  218. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
  219. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  220. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  221. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
  222. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
  223. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
  224. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
  225. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  226. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  227. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  228. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  229. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  230. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  231. synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
  232. synth_ai/v0/tracing_v3/__init__.py +10 -0
  233. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  234. synth_ai/v0/tracing_v3/decorators.py +3 -0
  235. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  236. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  237. synth_ai-0.2.9.dev8.dist-info/METADATA +191 -0
  238. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/RECORD +268 -238
  239. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/top_level.txt +1 -0
  240. examples/common_old/backend.py +0 -20
  241. examples/evals_old/README.md +0 -98
  242. examples/evals_old/__init__.py +0 -6
  243. examples/evals_old/compare_models.py +0 -1038
  244. examples/evals_old/example_log.md +0 -145
  245. examples/evals_old/run_demo.sh +0 -126
  246. examples/evals_old/trace_analysis.py +0 -270
  247. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  248. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  249. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  250. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
  251. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  252. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  253. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  254. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  255. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  256. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
  257. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  258. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  259. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
  260. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  261. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  262. examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
  263. examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
  264. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  265. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  266. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
  267. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
  268. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
  269. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  270. examples/finetuning_old/synth_qwen_v1/util.py +0 -152
  271. examples/rl_old/task_app.py +0 -1131
  272. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  273. examples/warming_up_to_rl/old/notes.md +0 -73
  274. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  275. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  276. synth_ai/experimental/synth_oss.py +0 -445
  277. synth_ai/learning/filtering.py +0 -0
  278. synth_ai/learning/offline/dpo.py +0 -0
  279. synth_ai/learning/offline/providers.py +0 -7
  280. synth_ai/learning/offline/sft.py +0 -0
  281. synth_ai/learning/offline/shared.py +0 -0
  282. synth_ai/learning/online/grpo.py +0 -0
  283. synth_ai/learning/online/irft.py +0 -0
  284. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  285. synth_ai/learning/prompts/gepa.py +0 -0
  286. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
  287. synth_ai/learning/prompts/mipro.py +0 -289
  288. synth_ai/learning/prompts/random_search.py +0 -249
  289. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  290. synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
  291. synth_ai/rl/secrets.py +0 -19
  292. synth_ai/scripts/verify_rewards.py +0 -100
  293. synth_ai/tracing/__init__.py +0 -30
  294. synth_ai/tracing_v1/__init__.py +0 -33
  295. synth_ai/tracing_v3/turso/__init__.py +0 -25
  296. synth_ai/tracing_v3/turso/manager.py +0 -838
  297. synth_ai/zyk/__init__.py +0 -30
  298. synth_ai-0.2.9.dev7.dist-info/METADATA +0 -131
  299. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  300. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  301. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  302. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  303. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  304. /synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
  305. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  306. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  307. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  308. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  309. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  310. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  311. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  312. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  313. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  314. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  315. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  316. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  317. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  318. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  319. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  320. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  321. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  322. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  323. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  324. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  325. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/WHEEL +0 -0
  326. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/entry_points.txt +0 -0
  327. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,5 @@
1
+ """Storage module for Modal Volume operations."""
2
+
3
+ from .volume import VolumeStorage, storage
4
+
5
+ __all__ = ["VolumeStorage", "storage"]
@@ -0,0 +1,211 @@
1
+ from __future__ import annotations
2
+
3
+ import gzip
4
+ import hashlib
5
+ import json
6
+ import os
7
+ import tarfile
8
+ import tempfile
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+
14
+ class VolumeStorage:
15
+ """Helpers for Modal Volume storage operations."""
16
+
17
+ def __init__(self, base_path: str = "/data/state") -> None:
18
+ self.base_path = Path(base_path)
19
+
20
+ def get_snapshot_path(
21
+ self,
22
+ rl_run_id: str,
23
+ kind: str,
24
+ snapshot_id: str,
25
+ ) -> Path:
26
+ """Build the path for a snapshot file."""
27
+ # Use first 2 chars of snapshot_id for sharding
28
+ shard1 = snapshot_id[:2] if len(snapshot_id) >= 2 else "00"
29
+ shard2 = snapshot_id[2:4] if len(snapshot_id) >= 4 else "00"
30
+
31
+ return (
32
+ self.base_path / "runs" / rl_run_id / kind / shard1 / shard2 / f"{snapshot_id}.tar.gz"
33
+ )
34
+
35
+ def get_index_path(self, rl_run_id: str) -> Path:
36
+ """Get the index file path for a run."""
37
+ return self.base_path / "runs" / rl_run_id / "index" / "meta.jsonl"
38
+
39
+ def write_snapshot_atomic(
40
+ self,
41
+ path: Path,
42
+ archive_bytes: bytes,
43
+ ) -> None:
44
+ """Atomically write a snapshot archive to disk."""
45
+ # Ensure parent directory exists
46
+ path.parent.mkdir(parents=True, exist_ok=True)
47
+
48
+ # Write to temp file first
49
+ tmp_path = path.with_suffix(".tmp")
50
+ with open(tmp_path, "wb") as f:
51
+ f.write(archive_bytes)
52
+ f.flush()
53
+ os.fsync(f.fileno())
54
+
55
+ # Atomic rename
56
+ os.replace(tmp_path, path)
57
+
58
+ def create_archive(
59
+ self,
60
+ state_dict: dict[str, Any],
61
+ meta: dict[str, Any],
62
+ ) -> bytes:
63
+ """Create a tar.gz archive with state and metadata."""
64
+ with tempfile.TemporaryDirectory() as tmpdir:
65
+ tmppath = Path(tmpdir)
66
+
67
+ # Write state.json
68
+ state_path = tmppath / "state.json"
69
+ with open(state_path, "w") as f:
70
+ json.dump(state_dict, f, sort_keys=True, indent=2)
71
+
72
+ # Write meta.json
73
+ meta_path = tmppath / "meta.json"
74
+ with open(meta_path, "w") as f:
75
+ json.dump(meta, f, sort_keys=True, indent=2)
76
+
77
+ # Create tar archive
78
+ tar_path = tmppath / "archive.tar"
79
+ with tarfile.open(tar_path, "w") as tar:
80
+ tar.add(state_path, arcname="state.json")
81
+ tar.add(meta_path, arcname="meta.json")
82
+
83
+ # Compress with gzip
84
+ with open(tar_path, "rb") as f:
85
+ tar_bytes = f.read()
86
+
87
+ compressed = gzip.compress(tar_bytes, compresslevel=6)
88
+
89
+ return compressed
90
+
91
+ def extract_archive(self, archive_bytes: bytes) -> tuple[dict[str, Any], dict[str, Any]]:
92
+ """Extract state and metadata from a tar.gz archive."""
93
+ # Decompress
94
+ tar_bytes = gzip.decompress(archive_bytes)
95
+
96
+ with tempfile.TemporaryDirectory() as tmpdir:
97
+ tmppath = Path(tmpdir)
98
+
99
+ # Write tar bytes to temp file
100
+ tar_path = tmppath / "archive.tar"
101
+ with open(tar_path, "wb") as f:
102
+ f.write(tar_bytes)
103
+
104
+ # Extract tar
105
+ with tarfile.open(tar_path, "r") as tar:
106
+ tar.extractall(tmppath)
107
+
108
+ # Read state and meta
109
+ with open(tmppath / "state.json") as f:
110
+ state = json.load(f)
111
+
112
+ with open(tmppath / "meta.json") as f:
113
+ meta = json.load(f)
114
+
115
+ return state, meta
116
+
117
+ def compute_snapshot_id(self, archive_bytes: bytes) -> str:
118
+ """Compute content-addressed snapshot ID."""
119
+ return hashlib.sha256(archive_bytes).hexdigest()
120
+
121
+ def save_snapshot(
122
+ self,
123
+ rl_run_id: str,
124
+ kind: str,
125
+ state_dict: dict[str, Any],
126
+ config: dict[str, Any] | None = None,
127
+ parent_snapshot_id: str | None = None,
128
+ ) -> tuple[str, str, int]:
129
+ """Save a snapshot and return (snapshot_id, path, size)."""
130
+ # Build metadata
131
+ meta = {
132
+ "kind": kind,
133
+ "rl_run_id": rl_run_id,
134
+ "schema_version": "1.0",
135
+ "created_at": datetime.utcnow().isoformat(),
136
+ }
137
+
138
+ if parent_snapshot_id:
139
+ meta["parent_snapshot_id"] = parent_snapshot_id
140
+
141
+ if config:
142
+ config_str = json.dumps(config, sort_keys=True)
143
+ meta["config_hash"] = hashlib.sha256(config_str.encode()).hexdigest()
144
+
145
+ # Create archive
146
+ archive_bytes = self.create_archive(state_dict, meta)
147
+
148
+ # Compute snapshot ID
149
+ snapshot_id = self.compute_snapshot_id(archive_bytes)
150
+ meta["snapshot_id"] = snapshot_id
151
+
152
+ # Recreate archive with snapshot_id in metadata
153
+ archive_bytes = self.create_archive(state_dict, meta)
154
+
155
+ # Get path and write
156
+ path = self.get_snapshot_path(rl_run_id, kind, snapshot_id)
157
+ self.write_snapshot_atomic(path, archive_bytes)
158
+
159
+ # Append to index
160
+ self.append_to_index(rl_run_id, meta)
161
+
162
+ return snapshot_id, str(path), len(archive_bytes)
163
+
164
+ def load_snapshot(
165
+ self,
166
+ rl_run_id: str,
167
+ kind: str,
168
+ snapshot_id: str,
169
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
170
+ """Load a snapshot and return (state_dict, meta)."""
171
+ path = self.get_snapshot_path(rl_run_id, kind, snapshot_id)
172
+
173
+ if not path.exists():
174
+ raise FileNotFoundError(f"Snapshot not found: {path}")
175
+
176
+ with open(path, "rb") as f:
177
+ archive_bytes = f.read()
178
+
179
+ state, meta = self.extract_archive(archive_bytes)
180
+ return state, meta
181
+
182
+ def append_to_index(
183
+ self,
184
+ rl_run_id: str,
185
+ meta: dict[str, Any],
186
+ ) -> None:
187
+ """Append metadata to the run's index file."""
188
+ index_path = self.get_index_path(rl_run_id)
189
+ index_path.parent.mkdir(parents=True, exist_ok=True)
190
+
191
+ with open(index_path, "a") as f:
192
+ f.write(json.dumps(meta) + "\n")
193
+
194
+ def read_index(self, rl_run_id: str) -> list[dict[str, Any]]:
195
+ """Read all entries from a run's index file."""
196
+ index_path = self.get_index_path(rl_run_id)
197
+
198
+ if not index_path.exists():
199
+ return []
200
+
201
+ entries = []
202
+ with open(index_path) as f:
203
+ for line in f:
204
+ if line.strip():
205
+ entries.append(json.loads(line))
206
+
207
+ return entries
208
+
209
+
210
+ # Global storage instance
211
+ storage = VolumeStorage()
@@ -0,0 +1,161 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Smoke test for Wordle and Sokoban ReAct agents using the hosted service.
4
+
5
+ Prereqs:
6
+ - Run the service: python examples/swe/task_app/hosted/main.py
7
+ - Run an OpenAI-compatible inference server (e.g., Flash/vLLM) at VLLM_BASE_URL
8
+ that serves model "gpt-5-nano" or adjust MODEL below.
9
+
10
+ This script will:
11
+ - Create a Wordle/Sokoban env
12
+ - Create corresponding *-react policy with tools
13
+ - Ask the policy for tool_calls via /policy/step (which calls the model)
14
+ - Apply tool_calls to the env via /env/step
15
+ """
16
+
17
+ import asyncio
18
+ import os
19
+
20
+ import httpx
21
+
22
+ BASE_URL = os.environ.get("SYNTH_ENVS_HOSTED_URL", "http://localhost:8000")
23
+ INFER_URL = os.environ.get("VLLM_BASE_URL", "http://localhost:8001")
24
+ MODEL = os.environ.get("MODEL", "gpt-5-nano")
25
+
26
+
27
+ async def run_wordle(rounds: int = 3) -> None:
28
+ async with httpx.AsyncClient() as client:
29
+ # Create env
30
+ resp = await client.post(
31
+ f"{BASE_URL}/env/create",
32
+ json={
33
+ "env_name": "Wordle",
34
+ "config": {"word_length": 5, "max_guesses": 6},
35
+ "seed": 0,
36
+ "rl_run_id": "agents-smoke",
37
+ },
38
+ )
39
+ resp.raise_for_status()
40
+ data = resp.json()
41
+ env_id = data["env_id"]
42
+ obs = data["observation"]
43
+ print("Wordle env created:", env_id)
44
+
45
+ # Create policy
46
+ resp = await client.post(
47
+ f"{BASE_URL}/policy/create",
48
+ json={
49
+ "policy_name": "wordle-react",
50
+ "config": {
51
+ "inference_url": INFER_URL,
52
+ "model": MODEL,
53
+ "use_tools": True,
54
+ "word_length": 5,
55
+ "max_guesses": 6,
56
+ },
57
+ "rl_run_id": "agents-smoke",
58
+ "bound_env_id": env_id,
59
+ },
60
+ )
61
+ resp.raise_for_status()
62
+ policy_id = resp.json()["policy_id"]
63
+ print("Wordle policy:", policy_id)
64
+
65
+ # Loop a few rounds
66
+ for i in range(rounds):
67
+ print(f"[Wordle] Round {i + 1}")
68
+ step_req = {"policy_id": policy_id, "observation": obs, "dry_run": False}
69
+ resp = await client.post(f"{BASE_URL}/policy/step", json=step_req)
70
+ resp.raise_for_status()
71
+ step_out = resp.json()
72
+ tool_calls = step_out.get("tool_calls", [])
73
+ print(" tool_calls:", tool_calls)
74
+ if not tool_calls:
75
+ break
76
+ resp = await client.post(
77
+ f"{BASE_URL}/env/step",
78
+ json={"env_id": env_id, "tool_calls": tool_calls},
79
+ )
80
+ resp.raise_for_status()
81
+ env_step = resp.json()
82
+ obs = env_step["observation"]
83
+ print(" done:", env_step.get("done"), "reward:", env_step.get("reward"))
84
+ if env_step.get("done"):
85
+ break
86
+
87
+
88
+ async def run_sokoban(rounds: int = 3) -> None:
89
+ async with httpx.AsyncClient() as client:
90
+ # Create env (no initial_state provided; relies on env default)
91
+ resp = await client.post(
92
+ f"{BASE_URL}/env/create",
93
+ json={
94
+ "env_name": "Sokoban",
95
+ "config": {"difficulty": "easy"},
96
+ "seed": 0,
97
+ "rl_run_id": "agents-smoke",
98
+ },
99
+ )
100
+ if resp.status_code != 200:
101
+ print("Sokoban create failed:", resp.status_code, resp.text)
102
+ return
103
+ data = resp.json()
104
+ env_id = data["env_id"]
105
+ obs = data["observation"]
106
+ print("Sokoban env created:", env_id)
107
+
108
+ resp = await client.post(
109
+ f"{BASE_URL}/policy/create",
110
+ json={
111
+ "policy_name": "sokoban-react",
112
+ "config": {
113
+ "inference_url": INFER_URL,
114
+ "model": MODEL,
115
+ "use_tools": True,
116
+ },
117
+ "rl_run_id": "agents-smoke",
118
+ "bound_env_id": env_id,
119
+ },
120
+ )
121
+ if resp.status_code != 200:
122
+ print("Sokoban policy create failed:", resp.status_code, resp.text)
123
+ return
124
+ policy_id = resp.json()["policy_id"]
125
+ print("Sokoban policy:", policy_id)
126
+
127
+ for i in range(rounds):
128
+ print(f"[Sokoban] Round {i + 1}")
129
+ step_req = {"policy_id": policy_id, "observation": obs, "dry_run": False}
130
+ resp = await client.post(f"{BASE_URL}/policy/step", json=step_req)
131
+ if resp.status_code != 200:
132
+ print(" policy step failed:", resp.status_code, resp.text)
133
+ break
134
+ step_out = resp.json()
135
+ tool_calls = step_out.get("tool_calls", [])
136
+ print(" tool_calls:", tool_calls)
137
+ if not tool_calls:
138
+ break
139
+ resp = await client.post(
140
+ f"{BASE_URL}/env/step",
141
+ json={"env_id": env_id, "tool_calls": tool_calls},
142
+ )
143
+ if resp.status_code != 200:
144
+ print(" env step failed:", resp.status_code, resp.text)
145
+ break
146
+ env_step = resp.json()
147
+ obs = env_step["observation"]
148
+ print(" done:", env_step.get("done"), "reward:", env_step.get("reward"))
149
+ if env_step.get("done"):
150
+ break
151
+
152
+
153
+ async def main():
154
+ print("Testing Wordle agent with model:", MODEL)
155
+ await run_wordle(rounds=3)
156
+ print("\nTesting Sokoban agent with model:", MODEL)
157
+ await run_sokoban(rounds=3)
158
+
159
+
160
+ if __name__ == "__main__":
161
+ asyncio.run(main())
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simple test script for the GRPO Synth Envs Hosted Service.
4
+
5
+ Run this after starting the service with:
6
+ python main.py
7
+ """
8
+
9
+ import asyncio
10
+ import json
11
+
12
+ import httpx
13
+
14
+
15
+ async def test_service():
16
+ """Test basic service functionality."""
17
+ base_url = "http://localhost:8000"
18
+
19
+ async with httpx.AsyncClient() as client:
20
+ # Test 1: Service info
21
+ print("1. Testing /info endpoint...")
22
+ response = await client.get(f"{base_url}/info")
23
+ assert response.status_code == 200
24
+ info = response.json()
25
+ print(f" Service info: {json.dumps(info, indent=2)}")
26
+
27
+ # Test 2: Health check
28
+ print("\n2. Testing /health endpoint...")
29
+ response = await client.get(f"{base_url}/health")
30
+ assert response.status_code == 200
31
+ print(f" Health: {response.json()}")
32
+
33
+ # Test 3: Create environment
34
+ print("\n3. Creating environment...")
35
+ response = await client.post(
36
+ f"{base_url}/env/create",
37
+ json={
38
+ "env_name": "crafter",
39
+ "config": {},
40
+ "seed": 42,
41
+ "rl_run_id": "test-run-001",
42
+ },
43
+ )
44
+ if response.status_code != 200:
45
+ print(f" Error: {response.status_code} - {response.text}")
46
+ return
47
+ env_data = response.json()
48
+ env_id = env_data["env_id"]
49
+ print(f" Created env: {env_id}")
50
+ print(f" Initial observation keys: {list(env_data['observation'].keys())}")
51
+
52
+ # Test 4: Create policy
53
+ print("\n4. Creating policy...")
54
+ response = await client.post(
55
+ f"{base_url}/policy/create",
56
+ json={
57
+ "policy_name": "crafter-react",
58
+ "config": {
59
+ "inference_url": "http://localhost:8001",
60
+ "model": "test-model",
61
+ },
62
+ "rl_run_id": "test-run-001",
63
+ "bound_env_id": env_id,
64
+ },
65
+ )
66
+ if response.status_code != 200:
67
+ print(f" Error: {response.status_code} - {response.text}")
68
+ return
69
+ policy_data = response.json()
70
+ policy_id = policy_data["policy_id"]
71
+ print(f" Created policy: {policy_id}")
72
+
73
+ # Test 5: Environment step with dummy tool calls
74
+ print("\n5. Testing environment step...")
75
+ response = await client.post(
76
+ f"{base_url}/env/step",
77
+ json={
78
+ "env_id": env_id,
79
+ "tool_calls": [{"tool": "interact", "args": {"action": "move_left"}}],
80
+ },
81
+ )
82
+ if response.status_code != 200:
83
+ print(f" Error: {response.status_code} - {response.text}")
84
+ else:
85
+ step_data = response.json()
86
+ print(f" Step result - done: {step_data['done']}, reward: {step_data.get('reward')}")
87
+
88
+ # Test 6: Environment snapshot
89
+ print("\n6. Creating environment snapshot...")
90
+ response = await client.post(f"{base_url}/env/snapshot", json={"env_id": env_id})
91
+ if response.status_code != 200:
92
+ print(f" Error: {response.status_code} - {response.text}")
93
+ else:
94
+ snapshot_data = response.json()
95
+ print(f" Snapshot ID: {snapshot_data['snapshot_id']}")
96
+ print(f" Size: {snapshot_data['size']} bytes")
97
+
98
+ # Test 7: Policy snapshot
99
+ print("\n7. Creating policy snapshot...")
100
+ response = await client.post(f"{base_url}/policy/snapshot", json={"policy_id": policy_id})
101
+ if response.status_code != 200:
102
+ print(f" Error: {response.status_code} - {response.text}")
103
+ else:
104
+ snapshot_data = response.json()
105
+ print(f" Snapshot ID: {snapshot_data['snapshot_id']}")
106
+ print(f" Size: {snapshot_data['size']} bytes")
107
+
108
+ # Test 8: Run status
109
+ print("\n8. Testing run status...")
110
+ response = await client.get(f"{base_url}/run/status/test-run-001")
111
+ if response.status_code != 200:
112
+ print(f" Error: {response.status_code} - {response.text}")
113
+ else:
114
+ status_data = response.json()
115
+ print(f" Run status: {status_data['status']}")
116
+
117
+ # Test 9: Terminate environment
118
+ print("\n9. Terminating environment...")
119
+ response = await client.post(f"{base_url}/env/terminate", json={"env_id": env_id})
120
+ if response.status_code != 200:
121
+ print(f" Error: {response.status_code} - {response.text}")
122
+ else:
123
+ print(f" Environment terminated: {response.json()['ok']}")
124
+
125
+ # Test 10: Terminate policy
126
+ print("\n10. Terminating policy...")
127
+ response = await client.post(f"{base_url}/policy/terminate", json={"policy_id": policy_id})
128
+ if response.status_code != 200:
129
+ print(f" Error: {response.status_code} - {response.text}")
130
+ else:
131
+ print(f" Policy terminated: {response.json()['ok']}")
132
+
133
+ print("\n✅ All basic tests completed!")
134
+
135
+
136
+ if __name__ == "__main__":
137
+ asyncio.run(test_service())
@@ -0,0 +1,62 @@
1
+ """Utility functions for the task service."""
2
+
3
+ from typing import Any
4
+
5
+ import numpy as np
6
+
7
+
8
+ def convert_numpy_to_python(obj: Any) -> Any:
9
+ """
10
+ Recursively convert numpy types to Python native types for JSON serialization.
11
+
12
+ Args:
13
+ obj: Object that may contain numpy types
14
+
15
+ Returns:
16
+ Object with numpy types converted to Python native types
17
+ """
18
+ if isinstance(obj, np.integer):
19
+ return int(obj)
20
+ elif isinstance(obj, np.floating):
21
+ return float(obj)
22
+ elif isinstance(obj, np.ndarray):
23
+ return obj.tolist()
24
+ elif isinstance(obj, dict):
25
+ return {key: convert_numpy_to_python(value) for key, value in obj.items()}
26
+ elif isinstance(obj, list | tuple):
27
+ return [convert_numpy_to_python(item) for item in obj]
28
+ else:
29
+ return obj
30
+
31
+
32
+ def sanitize_observation(observation: dict[str, Any]) -> dict[str, Any]:
33
+ """
34
+ Sanitize observation data for JSON serialization.
35
+
36
+ Converts numpy types and removes non-serializable objects.
37
+
38
+ Args:
39
+ observation: Raw observation from environment
40
+
41
+ Returns:
42
+ Sanitized observation safe for JSON serialization
43
+ """
44
+ if not isinstance(observation, dict):
45
+ return observation
46
+
47
+ sanitized = {}
48
+ for key, value in observation.items():
49
+ # Skip non-serializable keys or convert them
50
+ if key in ["semantic_map", "world_material_map", "observation_image"]:
51
+ # These are likely numpy arrays - convert to lists or skip
52
+ if isinstance(value, np.ndarray):
53
+ # For large arrays, we might want to skip or compress
54
+ # For now, skip them as they're likely debug info
55
+ continue
56
+ elif key == "player_position" and isinstance(value, tuple):
57
+ # Convert tuple with potential numpy types
58
+ sanitized[key] = [convert_numpy_to_python(v) for v in value]
59
+ else:
60
+ sanitized[key] = convert_numpy_to_python(value)
61
+
62
+ return sanitized
examples/vlm/README.md ADDED
@@ -0,0 +1,68 @@
1
+ # Crafter VLM Pipeline
2
+
3
+ This folder captures the reference workflow for fine-tuning Crafter policies with
4
+ multimodal (text + image) prompts. It stitches together the new image-aware tracing
5
+ plumbing with lightweight utilities for dataset curation and training.
6
+
7
+ ## Quick Start
8
+
9
+ 1. **Verify image capture**
10
+ ```
11
+ uv run python examples/vlm/crafter_image_only_agent.py --seed 7 --steps 5
12
+ ```
13
+ This writes PNG frames to `examples/vlm/output/frames/` and produces a JSONL preview
14
+ of OpenAI-style image-only user messages.
15
+
16
+ 2. **Collect traced rollouts**
17
+ Use the Crafter task app (or your existing pipeline) with tracing enabled. The new
18
+ tracing schema automatically records `observation_image_base64` and stores image parts
19
+ in LM call records.
20
+
21
+ 3. **Export multimodal SFT rows**
22
+ ```
23
+ uv run python examples/warming_up_to_rl/export_trace_sft.py \
24
+ --db traces/v3/synth_ai.db \
25
+ --output examples/vlm/output/crafter_traces_full.jsonl
26
+ ```
27
+ The exporter now emits `metadata.has_image`, `metadata.user_has_image`, and
28
+ `metadata.assistant_has_image` flags per turn.
29
+
30
+ 4. **Filter to image-rich turns**
31
+ ```
32
+ uv run python examples/vlm/filter_image_rows.py \
33
+ --input examples/vlm/output/crafter_traces_full.jsonl \
34
+ --output examples/vlm/output/crafter_vlm_dataset.jsonl
35
+ ```
36
+
37
+ 5. **(Optional) Split validation or augment**, then upload using the standard CLI:
38
+ ```
39
+ uv run python examples/warming_up_to_rl/run_fft_and_save.py \
40
+ --toml examples/vlm/configs/crafter_vlm_gpt4o.toml \
41
+ --data examples/vlm/output/crafter_vlm_dataset.jsonl
42
+ ```
43
+
44
+ ## Config & Utilities
45
+
46
+ | File | Purpose |
47
+ | --- | --- |
48
+ | `configs/crafter_vlm_gpt4o.toml` | Sample Synth job targeting an image-capable model (`openai/gpt-4o-mini`). Set `job.data` or pass `--data` explicitly. |
49
+ | `crafter_image_only_agent.py` | Captures frames and builds image-only prompts for sanity checks. |
50
+ | `filter_image_rows.py` | Extracts rows with image parts from exported JSONL datasets. |
51
+
52
+ ## Notes & Next Steps
53
+
54
+ - The training config assumes full-finetuning (`mode = "sft_offline"`). Adjust the
55
+ model id, hardware, or hyperparameters to match available infrastructure.
56
+ - Dataset rows emitted by `export_trace_sft.py` already contain OpenAI multimodal
57
+ content parts like:
58
+ ```json
59
+ {
60
+ "role": "user",
61
+ "content": [
62
+ {"type": "text", "text": "..."},
63
+ {"type": "image_url", "image_url": {"url": "data:image/png;base64,..." }}
64
+ ]
65
+ }
66
+ ```
67
+ - See `PROPOSAL.md` for a deeper dive into outstanding work (longer rollouts,
68
+ richer multimodal augmentations, evaluation ideas).
@@ -0,0 +1,44 @@
1
+ [job]
2
+ model = "openai/gpt-4o-mini-2024-07-18"
3
+ modalities = ["text", "image"]
4
+ # data = "examples/vlm/output/crafter_vlm_dataset.jsonl"
5
+ description = "Crafter VLM SFT (text + image prompts)"
6
+
7
+ [compute]
8
+ gpu_type = "A100"
9
+ gpu_count = 1
10
+ nodes = 1
11
+
12
+ [data]
13
+ topology = {}
14
+ # validation_path = "examples/vlm/output/crafter_vlm_dataset.val.jsonl"
15
+
16
+ [training]
17
+ mode = "sft_offline"
18
+ use_qlora = false
19
+
20
+ [training.validation]
21
+ enabled = true
22
+ evaluation_strategy = "steps"
23
+ eval_steps = 50
24
+ save_best_model_at_end = true
25
+ metric_for_best_model = "val.loss"
26
+ greater_is_better = false
27
+
28
+ [hyperparameters]
29
+ n_epochs = 1
30
+ train_kind = "fft"
31
+ per_device_batch = 1
32
+ gradient_accumulation_steps = 32
33
+ sequence_length = 4096
34
+ learning_rate = 1e-5
35
+ warmup_ratio = 0.03
36
+ weight_decay = 0.01
37
+
38
+ [hyperparameters.parallelism]
39
+ use_deepspeed = true
40
+ deepspeed_stage = 2
41
+ fsdp = false
42
+ bf16 = true
43
+ fp16 = false
44
+ activation_checkpointing = true