synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.9.dev8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (327) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +8 -11
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/run_eval.py +36 -37
  19. examples/rl/run_rl_and_save.py +5 -5
  20. examples/rl/task_app/math_single_step.py +65 -43
  21. examples/rl/task_app/math_task_app.py +3 -3
  22. examples/sft/README.md +139 -0
  23. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  24. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  25. examples/sft/evaluate.py +117 -0
  26. examples/sft/export_dataset.py +117 -0
  27. examples/sft/generate_traces.py +162 -0
  28. examples/swe/__init__.py +12 -0
  29. examples/swe/task_app/README.md +105 -0
  30. examples/swe/task_app/__init__.py +2 -0
  31. examples/swe/task_app/grpo_swe_mini.py +571 -0
  32. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  33. examples/swe/task_app/hosted/README.md +173 -0
  34. examples/swe/task_app/hosted/__init__.py +5 -0
  35. examples/swe/task_app/hosted/branching.py +143 -0
  36. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  37. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  38. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  39. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  40. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  41. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  42. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  43. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  44. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  45. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  46. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  47. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  50. examples/swe/task_app/hosted/hosted_app.py +204 -0
  51. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  52. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  53. examples/swe/task_app/hosted/main.py +100 -0
  54. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  55. examples/swe/task_app/hosted/registry.py +195 -0
  56. examples/swe/task_app/hosted/rollout.py +1869 -0
  57. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  58. examples/swe/task_app/hosted/storage/volume.py +211 -0
  59. examples/swe/task_app/hosted/test_agents.py +161 -0
  60. examples/swe/task_app/hosted/test_service.py +137 -0
  61. examples/swe/task_app/hosted/utils.py +62 -0
  62. examples/vlm/README.md +68 -0
  63. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  64. examples/vlm/crafter_image_only_agent.py +207 -0
  65. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  66. examples/vlm/filter_image_rows.py +63 -0
  67. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  68. examples/warming_up_to_rl/analyze_trace_db.py +5 -5
  69. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  70. examples/warming_up_to_rl/export_trace_sft.py +78 -21
  71. examples/warming_up_to_rl/groq_test.py +4 -4
  72. examples/warming_up_to_rl/manage_secrets.py +13 -18
  73. examples/warming_up_to_rl/run_eval.py +42 -44
  74. examples/warming_up_to_rl/run_fft_and_save.py +11 -16
  75. examples/warming_up_to_rl/run_local_rollout.py +1 -3
  76. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
  77. examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
  78. examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
  79. examples/warming_up_to_rl/run_rl_and_save.py +5 -6
  80. examples/warming_up_to_rl/run_rollout_remote.py +8 -10
  81. examples/warming_up_to_rl/task_app/README.md +6 -2
  82. examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
  83. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  101. synth/__init__.py +14 -0
  102. synth_ai/__init__.py +26 -4
  103. synth_ai/api/models/supported.py +376 -0
  104. synth_ai/api/train/builders.py +128 -21
  105. synth_ai/api/train/cli.py +80 -64
  106. synth_ai/api/train/config_finder.py +7 -2
  107. synth_ai/api/train/env_resolver.py +1 -1
  108. synth_ai/api/train/pollers.py +2 -1
  109. synth_ai/api/train/supported_algos.py +139 -0
  110. synth_ai/api/train/task_app.py +1 -2
  111. synth_ai/api/train/utils.py +13 -44
  112. synth_ai/cli/__init__.py +8 -0
  113. synth_ai/cli/_modal_wrapper.py +28 -0
  114. synth_ai/cli/_typer_patch.py +49 -0
  115. synth_ai/cli/balance.py +1 -2
  116. synth_ai/cli/calc.py +1 -1
  117. synth_ai/cli/demo.py +2 -1
  118. synth_ai/cli/recent.py +2 -2
  119. synth_ai/cli/rl_demo.py +2 -1
  120. synth_ai/cli/root.py +11 -13
  121. synth_ai/cli/status.py +2 -2
  122. synth_ai/cli/task_apps.py +529 -179
  123. synth_ai/cli/traces.py +6 -4
  124. synth_ai/cli/watch.py +12 -18
  125. synth_ai/demo_registry.py +1 -1
  126. synth_ai/demos/core/cli.py +36 -43
  127. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  128. synth_ai/demos/demo_task_apps/core.py +17 -25
  129. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
  130. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  131. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
  132. synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
  133. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  134. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  135. synth_ai/environments/reproducibility/tree.py +2 -5
  136. synth_ai/environments/service/app.py +11 -12
  137. synth_ai/environments/service/core_routes.py +4 -7
  138. synth_ai/environments/stateful/engine.py +1 -1
  139. synth_ai/environments/tasks/core.py +1 -0
  140. synth_ai/environments/tasks/filters.py +5 -6
  141. synth_ai/environments/tasks/utils.py +4 -5
  142. synth_ai/handshake.py +9 -9
  143. synth_ai/http.py +1 -1
  144. synth_ai/http_client.py +18 -10
  145. synth_ai/inference/client.py +15 -5
  146. synth_ai/jobs/client.py +78 -83
  147. synth_ai/learning/__init__.py +41 -6
  148. synth_ai/learning/algorithms.py +14 -0
  149. synth_ai/learning/client.py +91 -24
  150. synth_ai/learning/config.py +2 -38
  151. synth_ai/learning/ft_client.py +4 -59
  152. synth_ai/learning/health.py +5 -6
  153. synth_ai/learning/jobs.py +31 -47
  154. synth_ai/{rl → learning/rl}/__init__.py +14 -4
  155. synth_ai/learning/rl/client.py +267 -0
  156. synth_ai/learning/rl/config.py +31 -0
  157. synth_ai/{rl → learning/rl}/contracts.py +5 -8
  158. synth_ai/{rl → learning/rl}/env_keys.py +39 -15
  159. synth_ai/learning/rl/secrets.py +13 -0
  160. synth_ai/learning/rl_client.py +2 -281
  161. synth_ai/learning/sft/__init__.py +29 -0
  162. synth_ai/learning/sft/client.py +68 -0
  163. synth_ai/learning/sft/config.py +270 -0
  164. synth_ai/learning/sft/data.py +295 -0
  165. synth_ai/learning/sse.py +25 -24
  166. synth_ai/learning/validators.py +25 -28
  167. synth_ai/lm/__init__.py +21 -47
  168. synth_ai/main.py +4 -0
  169. synth_ai/task/__init__.py +25 -27
  170. synth_ai/task/apps/__init__.py +7 -8
  171. synth_ai/task/auth.py +8 -8
  172. synth_ai/task/client.py +14 -14
  173. synth_ai/task/contracts.py +36 -35
  174. synth_ai/task/datasets.py +6 -5
  175. synth_ai/task/errors.py +10 -10
  176. synth_ai/task/health.py +17 -9
  177. synth_ai/task/json.py +58 -23
  178. synth_ai/task/proxy.py +13 -9
  179. synth_ai/task/rubrics.py +16 -15
  180. synth_ai/task/server.py +12 -12
  181. synth_ai/task/tracing_utils.py +4 -4
  182. synth_ai/task/vendors.py +5 -6
  183. synth_ai/tracing_v3/__init__.py +2 -0
  184. synth_ai/tracing_v3/abstractions.py +21 -4
  185. synth_ai/tracing_v3/decorators.py +18 -16
  186. synth_ai/tracing_v3/hooks.py +5 -5
  187. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  188. synth_ai/tracing_v3/session_tracer.py +40 -14
  189. synth_ai/tracing_v3/storage/base.py +85 -0
  190. synth_ai/tracing_v3/storage/config.py +21 -8
  191. synth_ai/tracing_v3/storage/factory.py +10 -7
  192. synth_ai/tracing_v3/storage/utils.py +4 -2
  193. synth_ai/tracing_v3/turso/daemon.py +7 -2
  194. synth_ai/tracing_v3/turso/models.py +2 -2
  195. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  196. synth_ai/tracing_v3/utils.py +4 -4
  197. synth_ai/v0/api/__init__.py +8 -0
  198. synth_ai/v0/api/models/__init__.py +8 -0
  199. synth_ai/v0/api/models/supported.py +8 -0
  200. synth_ai/v0/config/__init__.py +15 -0
  201. synth_ai/v0/config/base_url.py +12 -0
  202. synth_ai/v0/lm/__init__.py +51 -0
  203. synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
  204. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  205. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  206. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  207. synth_ai/{lm → v0/lm}/config.py +6 -1
  208. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  209. synth_ai/{lm → v0/lm}/core/main.py +6 -6
  210. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  211. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
  212. synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
  213. synth_ai/{lm → v0/lm}/overrides.py +2 -2
  214. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  215. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  216. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  217. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  218. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
  219. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  220. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  221. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
  222. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
  223. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
  224. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
  225. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  226. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  227. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  228. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  229. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  230. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  231. synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
  232. synth_ai/v0/tracing_v3/__init__.py +10 -0
  233. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  234. synth_ai/v0/tracing_v3/decorators.py +3 -0
  235. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  236. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  237. synth_ai-0.2.9.dev8.dist-info/METADATA +191 -0
  238. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/RECORD +268 -238
  239. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/top_level.txt +1 -0
  240. examples/common_old/backend.py +0 -20
  241. examples/evals_old/README.md +0 -98
  242. examples/evals_old/__init__.py +0 -6
  243. examples/evals_old/compare_models.py +0 -1038
  244. examples/evals_old/example_log.md +0 -145
  245. examples/evals_old/run_demo.sh +0 -126
  246. examples/evals_old/trace_analysis.py +0 -270
  247. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  248. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  249. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  250. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
  251. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  252. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  253. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  254. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  255. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  256. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
  257. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  258. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  259. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
  260. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  261. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  262. examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
  263. examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
  264. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  265. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  266. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
  267. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
  268. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
  269. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  270. examples/finetuning_old/synth_qwen_v1/util.py +0 -152
  271. examples/rl_old/task_app.py +0 -1131
  272. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  273. examples/warming_up_to_rl/old/notes.md +0 -73
  274. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  275. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  276. synth_ai/experimental/synth_oss.py +0 -445
  277. synth_ai/learning/filtering.py +0 -0
  278. synth_ai/learning/offline/dpo.py +0 -0
  279. synth_ai/learning/offline/providers.py +0 -7
  280. synth_ai/learning/offline/sft.py +0 -0
  281. synth_ai/learning/offline/shared.py +0 -0
  282. synth_ai/learning/online/grpo.py +0 -0
  283. synth_ai/learning/online/irft.py +0 -0
  284. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  285. synth_ai/learning/prompts/gepa.py +0 -0
  286. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
  287. synth_ai/learning/prompts/mipro.py +0 -289
  288. synth_ai/learning/prompts/random_search.py +0 -249
  289. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  290. synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
  291. synth_ai/rl/secrets.py +0 -19
  292. synth_ai/scripts/verify_rewards.py +0 -100
  293. synth_ai/tracing/__init__.py +0 -30
  294. synth_ai/tracing_v1/__init__.py +0 -33
  295. synth_ai/tracing_v3/turso/__init__.py +0 -25
  296. synth_ai/tracing_v3/turso/manager.py +0 -838
  297. synth_ai/zyk/__init__.py +0 -30
  298. synth_ai-0.2.9.dev7.dist-info/METADATA +0 -131
  299. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  300. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  301. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  302. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  303. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  304. /synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
  305. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  306. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  307. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  308. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  309. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  310. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  311. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  312. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  313. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  314. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  315. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  316. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  317. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  318. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  319. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  320. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  321. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  322. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  323. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  324. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  325. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/WHEEL +0 -0
  326. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/entry_points.txt +0 -0
  327. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,305 @@
1
+ """Shared utilities for Crafter environment and policy.
2
+
3
+ This module formats Crafter observations for the LLM and parses actions.
4
+ It now mirrors the ludic_private implementation for semantic map rendering
5
+ by dynamically deriving the id->name mapping from the actual Crafter env
6
+ when available, with a sensible fallback. This fixes the issue where the
7
+ rendered surroundings appeared only as iron/stone due to a mismatched
8
+ hardcoded mapping.
9
+ """
10
+
11
+ import itertools
12
+ import re
13
+ from typing import Any
14
+
15
+ import numpy as np
16
+
17
+ VIEW_SIZE = 5 # Default view size for the map (match eval_rollout_table)
18
+
19
+ # Action mappings from the game
20
+ CRAFTER_ACTIONS = {
21
+ "noop": 0,
22
+ "move_left": 1,
23
+ "move_right": 2,
24
+ "move_up": 3,
25
+ "move_down": 4,
26
+ "do": 5,
27
+ "sleep": 6,
28
+ "place_stone": 7,
29
+ "place_table": 8,
30
+ "place_furnace": 9,
31
+ "place_plant": 10,
32
+ "make_wood_pickaxe": 11,
33
+ "make_stone_pickaxe": 12,
34
+ "make_iron_pickaxe": 13,
35
+ "make_wood_sword": 14,
36
+ "make_stone_sword": 15,
37
+ "make_iron_sword": 16,
38
+ }
39
+
40
+ # Common action aliases
41
+ ACTION_ALIASES = {
42
+ # Movement aliases
43
+ "left": "move_left",
44
+ "right": "move_right",
45
+ "up": "move_up",
46
+ "down": "move_down",
47
+ # Interaction aliases
48
+ "interact": "do",
49
+ "use": "do",
50
+ "action": "do",
51
+ # Sleep
52
+ "rest": "sleep",
53
+ # Crafting
54
+ "craft_wood_pickaxe": "make_wood_pickaxe",
55
+ "craft_stone_pickaxe": "make_stone_pickaxe",
56
+ "craft_iron_pickaxe": "make_iron_pickaxe",
57
+ "craft_wood_sword": "make_wood_sword",
58
+ "craft_stone_sword": "make_stone_sword",
59
+ "craft_iron_sword": "make_iron_sword",
60
+ }
61
+
62
+ VALID_PRIMARY_ACTIONS: set[str] = set(CRAFTER_ACTIONS.keys())
63
+ VALID_ACTION_ALIASES: set[str] = set(ACTION_ALIASES.keys())
64
+ ALL_VALID_ACTION_STRINGS: set[str] = VALID_PRIMARY_ACTIONS | VALID_ACTION_ALIASES
65
+
66
+
67
+ def validate_action(action: str) -> bool:
68
+ """Check if an action string is valid."""
69
+ normalized = action.strip().lower().replace(" ", "_")
70
+ return normalized in ALL_VALID_ACTION_STRINGS
71
+
72
+
73
+ def parse_actions(action_text: str) -> list[str]:
74
+ """Extract actions from response text.
75
+
76
+ Tries multiple parsing strategies:
77
+ 1. <action>...</action> tags (original format)
78
+ 2. [action]...[/action] or [action]... format
79
+ 3. ACTION: prefix format
80
+ 4. Plain action names if they match valid actions
81
+ 5. Newline-separated actions
82
+ """
83
+
84
+ # First try the original <action> tag format
85
+ matches = re.findall(r"<action>(.*?)</action>", action_text, re.IGNORECASE)
86
+ if matches:
87
+ return [m.strip() for m in matches if validate_action(m.strip())]
88
+
89
+ # Try [action] format
90
+ matches = re.findall(r"\[action\](.*?)(?:\[/action\]|\n|$)", action_text, re.IGNORECASE)
91
+ if matches:
92
+ return [m.strip() for m in matches if validate_action(m.strip())]
93
+
94
+ # If no tags found, try to parse plain text
95
+ text = action_text.strip()
96
+
97
+ # Check if the entire text is a valid action
98
+ if validate_action(text):
99
+ return [text]
100
+
101
+ # Try splitting by newlines and checking each line
102
+ lines = text.split("\n")
103
+ actions = []
104
+ for line in lines:
105
+ line = line.strip()
106
+
107
+ # Remove various prefixes
108
+ for prefix in ["ACTION:", "Action:", "action:", "ACTION", "-", "*", "•", "**ACTION:**"]:
109
+ if line.startswith(prefix):
110
+ line = line[len(prefix) :].strip()
111
+ break
112
+
113
+ # Also handle numbered lists
114
+ if re.match(r"^\d+\.\s*", line):
115
+ line = re.sub(r"^\d+\.\s*", "", line)
116
+
117
+ # Split by common separators to handle multiple actions on one line
118
+ parts = re.split(r"[,;]|\s+and\s+|\s+then\s+", line)
119
+
120
+ for part in parts:
121
+ part = part.strip()
122
+ # Remove quotes if present
123
+ if part.startswith('"') and part.endswith('"'):
124
+ part = part[1:-1]
125
+ if part.startswith("'") and part.endswith("'"):
126
+ part = part[1:-1]
127
+
128
+ # Check if it's a valid action
129
+ if part and validate_action(part):
130
+ actions.append(part)
131
+
132
+ return actions
133
+
134
+
135
+ def format_observation(obs_data: dict[str, Any], step_count: int = 0, max_steps: int = 100) -> str:
136
+ """Format a Crafter observation dictionary into a human-readable string.
137
+
138
+ This is critical for preventing massive token counts when observations
139
+ contain large numpy arrays or deeply nested structures.
140
+ """
141
+ if not obs_data:
142
+ return ""
143
+
144
+ # Extract key information
145
+ health = obs_data.get("health") or obs_data.get("inventory", {}).get("health", 0)
146
+ inventory_dict = obs_data.get("inventory", {})
147
+ pos = obs_data.get("player_position", [0, 0])
148
+ direction = obs_data.get("player_direction", [0, 1])
149
+ achievements = obs_data.get("achievements_status", {})
150
+
151
+ # Prefer step/max from observation if provided by the env
152
+ step_from_obs = (
153
+ obs_data.get("steps")
154
+ if obs_data.get("steps") is not None
155
+ else obs_data.get("num_steps_taken")
156
+ )
157
+ if isinstance(step_from_obs, int | float) and step_from_obs >= 0:
158
+ step_count = int(step_from_obs)
159
+
160
+ max_steps_from_obs = obs_data.get("max_steps_episode") or obs_data.get("max_steps")
161
+ if isinstance(max_steps_from_obs, int | float) and max_steps_from_obs > 0:
162
+ max_steps = int(max_steps_from_obs)
163
+
164
+ # Format inventory (skip health as it's shown separately)
165
+ inv_items = [f"{k}:{v}" for k, v in inventory_dict.items() if v > 0 and k != "health"]
166
+ inventory_str = ", ".join(inv_items) if inv_items else "empty"
167
+
168
+ # Format achievements
169
+ achieved_list = [k for k, v in achievements.items() if v]
170
+ achievements_str = ", ".join(achieved_list) if achieved_list else "none"
171
+
172
+ # Format semantic map view (simplified version)
173
+ map_view = _format_semantic_map_view(obs_data, VIEW_SIZE)
174
+
175
+ return (
176
+ f"=== CRAFTER GAME STATE ===\n"
177
+ f"Step: {step_count}/{max_steps}\n"
178
+ f"Health: {health}\n"
179
+ f"Position: {pos}\n"
180
+ f"Facing: {direction}\n"
181
+ f"Inventory: {inventory_str}\n"
182
+ f"Achievements: {achievements_str}\n"
183
+ f"{map_view}\n\n"
184
+ f"Choose your next actions.\n"
185
+ )
186
+
187
+
188
+ def _try_build_dynamic_mapping():
189
+ """Attempt to build id->name mapping from a real Crafter env.
190
+
191
+ Returns a list where index is semantic ID and value is the lowercase name.
192
+ On failure (crafter not installed or internal API changed), returns None.
193
+ """
194
+ try:
195
+ import crafter # type: ignore
196
+ except Exception:
197
+ return None
198
+
199
+ dummyenv = None
200
+ try:
201
+ dummyenv = crafter.Env()
202
+ # Combine material IDs and semantic view object IDs
203
+ world_ids = getattr(dummyenv, "_world", None)
204
+ sem_view = getattr(dummyenv, "_sem_view", None)
205
+ if world_ids is None or sem_view is None:
206
+ return None
207
+ mat_ids = getattr(world_ids, "_mat_ids", None)
208
+ obj_ids = getattr(sem_view, "_obj_ids", None)
209
+ if not isinstance(mat_ids, dict) or not isinstance(obj_ids, dict):
210
+ return None
211
+ max_id = max(max(mat_ids.values()), max(obj_ids.values())) + 1
212
+ id_to_item = ["void"] * max_id
213
+ for name, idx in itertools.chain(mat_ids.items(), obj_ids.items()):
214
+ if name is None:
215
+ clean = "none"
216
+ elif hasattr(name, "__name__"):
217
+ clean = name.__name__.lower()
218
+ else:
219
+ clean = str(name).lower()
220
+ if 0 <= idx < len(id_to_item):
221
+ id_to_item[idx] = clean
222
+ return id_to_item
223
+ except Exception:
224
+ return None
225
+ finally:
226
+ try:
227
+ if dummyenv is not None:
228
+ dummyenv.close()
229
+ except Exception:
230
+ pass
231
+
232
+
233
+ # Build dynamic mapping if possible; otherwise fall back to a basic map
234
+ _ID_TO_NAME = _try_build_dynamic_mapping()
235
+ _FALLBACK_ID_TO_NAME = {
236
+ 0: "none", # None from materials
237
+ 1: "water",
238
+ 2: "grass",
239
+ 3: "stone",
240
+ 4: "path",
241
+ 5: "sand",
242
+ 6: "tree",
243
+ 7: "lava",
244
+ 8: "coal",
245
+ 9: "iron",
246
+ 10: "diamond",
247
+ 11: "table",
248
+ 12: "furnace",
249
+ 13: "player",
250
+ 14: "cow",
251
+ 15: "zombie",
252
+ 16: "skeleton",
253
+ 17: "arrow",
254
+ 18: "plant",
255
+ }
256
+
257
+
258
+ def _format_semantic_map_view(obs_data: dict[str, Any], view_size: int = VIEW_SIZE) -> str:
259
+ """Format the semantic map into a text representation using dynamic IDs.
260
+
261
+ Shows a local view around the player with nearby objects.
262
+ """
263
+ semantic_map = obs_data.get("semantic_map")
264
+ player_position = obs_data.get("player_position", [0, 0])
265
+
266
+ if semantic_map is None:
267
+ return "Map view unavailable"
268
+
269
+ # Convert to numpy array if needed
270
+ sem_arr = np.asarray(semantic_map)
271
+ if sem_arr.ndim == 1:
272
+ # Reshape flat array to 2D
273
+ side = int(len(sem_arr) ** 0.5)
274
+ sem_arr = sem_arr.reshape(side, side)
275
+
276
+ px, py = map(int, player_position)
277
+ half = view_size // 2
278
+
279
+ # Choose mapping source
280
+ use_list = isinstance(_ID_TO_NAME, list) and len(_ID_TO_NAME) > 0
281
+
282
+ # Build matrix centered at player, then transpose for human-friendly view
283
+ matrix: list[list[str]] = []
284
+ for dy in range(-half, half + 1):
285
+ row_tokens: list[str] = []
286
+ for dx in range(-half, half + 1):
287
+ x, y = px + dx, py + dy
288
+ if not (0 <= x < sem_arr.shape[0] and 0 <= y < sem_arr.shape[1]):
289
+ row_tokens.append("void")
290
+ elif dx == 0 and dy == 0:
291
+ row_tokens.append("player")
292
+ else:
293
+ obj_id = int(sem_arr[x, y])
294
+ if use_list and 0 <= obj_id < len(_ID_TO_NAME):
295
+ name = _ID_TO_NAME[obj_id] # type: ignore[index]
296
+ else:
297
+ name = _FALLBACK_ID_TO_NAME.get(obj_id, str(obj_id))
298
+ row_tokens.append(name)
299
+ matrix.append(row_tokens)
300
+
301
+ transposed = list(zip(*matrix, strict=False))
302
+ grid_rows: list[str] = [" ".join(row) for row in transposed]
303
+ return (
304
+ "\nLocal Map View (" + str(view_size) + "x" + str(view_size) + "):\n" + "\n".join(grid_rows)
305
+ )
@@ -0,0 +1,47 @@
1
+ """OpenAI tools schema for Crafter, defined in Python."""
2
+
3
+ # Pass this list directly to OpenAI/vLLM `tools=`
4
+ TOOLS_SCHEMA = [
5
+ {
6
+ "type": "function",
7
+ "function": {
8
+ "name": "interact_many",
9
+ "description": "Execute a short sequence of Crafter actions in order (1-8).",
10
+ "parameters": {
11
+ "type": "object",
12
+ "properties": {
13
+ "actions": {
14
+ "type": "array",
15
+ "description": "List of Crafter actions to execute sequentially.",
16
+ "items": {
17
+ "type": "string",
18
+ "enum": [
19
+ "noop",
20
+ "move_left",
21
+ "move_right",
22
+ "move_up",
23
+ "move_down",
24
+ "do",
25
+ "sleep",
26
+ "place_stone",
27
+ "place_table",
28
+ "place_furnace",
29
+ "place_plant",
30
+ "make_wood_pickaxe",
31
+ "make_stone_pickaxe",
32
+ "make_iron_pickaxe",
33
+ "make_wood_sword",
34
+ "make_stone_sword",
35
+ "make_iron_sword",
36
+ ],
37
+ },
38
+ "minItems": 1,
39
+ "maxItems": 8,
40
+ }
41
+ },
42
+ "required": ["actions"],
43
+ "additionalProperties": False,
44
+ },
45
+ },
46
+ }
47
+ ]
@@ -0,0 +1,8 @@
1
+ """Mini-SWE environment and policy adapters."""
2
+
3
+ from .environment import MiniSweEnvironmentWrapper
4
+ from .policy import MiniSwePolicy
5
+ from .tools import TOOLS_SCHEMA
6
+
7
+ __all__ = ["MiniSweEnvironmentWrapper", "MiniSwePolicy", "TOOLS_SCHEMA"]
8
+