synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (353) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +23 -17
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +53 -52
  22. examples/rl/run_rl_and_save.py +29 -12
  23. examples/rl/task_app/math_single_step.py +180 -41
  24. examples/rl/task_app/math_task_app.py +14 -6
  25. examples/sft/README.md +139 -0
  26. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  27. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  28. examples/sft/evaluate.py +117 -0
  29. examples/sft/export_dataset.py +117 -0
  30. examples/sft/generate_traces.py +162 -0
  31. examples/swe/__init__.py +12 -0
  32. examples/swe/task_app/README.md +105 -0
  33. examples/swe/task_app/__init__.py +2 -0
  34. examples/swe/task_app/grpo_swe_mini.py +571 -0
  35. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  36. examples/swe/task_app/hosted/README.md +173 -0
  37. examples/swe/task_app/hosted/__init__.py +5 -0
  38. examples/swe/task_app/hosted/branching.py +143 -0
  39. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  40. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  41. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  42. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  43. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  44. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  45. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  46. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  47. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  50. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  53. examples/swe/task_app/hosted/hosted_app.py +204 -0
  54. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  55. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  56. examples/swe/task_app/hosted/main.py +100 -0
  57. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  58. examples/swe/task_app/hosted/registry.py +195 -0
  59. examples/swe/task_app/hosted/rollout.py +1869 -0
  60. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  61. examples/swe/task_app/hosted/storage/volume.py +211 -0
  62. examples/swe/task_app/hosted/test_agents.py +161 -0
  63. examples/swe/task_app/hosted/test_service.py +137 -0
  64. examples/swe/task_app/hosted/utils.py +62 -0
  65. examples/vlm/README.md +68 -0
  66. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  67. examples/vlm/crafter_image_only_agent.py +207 -0
  68. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  69. examples/vlm/filter_image_rows.py +63 -0
  70. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  71. examples/warming_up_to_rl/analyze_trace_db.py +12 -10
  72. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  73. examples/warming_up_to_rl/export_trace_sft.py +218 -36
  74. examples/warming_up_to_rl/groq_test.py +15 -8
  75. examples/warming_up_to_rl/manage_secrets.py +29 -25
  76. examples/warming_up_to_rl/readme.md +9 -2
  77. examples/warming_up_to_rl/run_eval.py +137 -61
  78. examples/warming_up_to_rl/run_fft_and_save.py +131 -60
  79. examples/warming_up_to_rl/run_local_rollout.py +88 -39
  80. examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
  81. examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
  82. examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
  83. examples/warming_up_to_rl/run_rl_and_save.py +35 -12
  84. examples/warming_up_to_rl/run_rollout_remote.py +44 -19
  85. examples/warming_up_to_rl/task_app/README.md +6 -2
  86. examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
  87. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  111. synth/__init__.py +14 -0
  112. synth_ai/__init__.py +20 -4
  113. synth_ai/api/models/supported.py +376 -0
  114. synth_ai/api/train/builders.py +157 -26
  115. synth_ai/api/train/cli.py +213 -57
  116. synth_ai/api/train/config_finder.py +65 -5
  117. synth_ai/api/train/env_resolver.py +33 -15
  118. synth_ai/api/train/pollers.py +13 -4
  119. synth_ai/api/train/supported_algos.py +139 -0
  120. synth_ai/api/train/task_app.py +5 -3
  121. synth_ai/api/train/utils.py +33 -48
  122. synth_ai/cli/__init__.py +19 -4
  123. synth_ai/cli/_modal_wrapper.py +28 -0
  124. synth_ai/cli/_typer_patch.py +49 -0
  125. synth_ai/cli/balance.py +2 -3
  126. synth_ai/cli/calc.py +1 -1
  127. synth_ai/cli/demo.py +21 -6
  128. synth_ai/cli/recent.py +2 -2
  129. synth_ai/cli/rl_demo.py +77 -17
  130. synth_ai/cli/root.py +116 -39
  131. synth_ai/cli/status.py +2 -2
  132. synth_ai/cli/task_apps.py +1709 -243
  133. synth_ai/cli/traces.py +7 -4
  134. synth_ai/cli/turso.py +73 -0
  135. synth_ai/cli/watch.py +12 -18
  136. synth_ai/core/experiment.py +0 -2
  137. synth_ai/demo_registry.py +68 -31
  138. synth_ai/demos/core/cli.py +516 -194
  139. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  140. synth_ai/demos/demo_task_apps/core.py +64 -28
  141. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  142. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
  143. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  144. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  145. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  146. synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
  147. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  148. synth_ai/environments/examples/bandit/engine.py +12 -4
  149. synth_ai/environments/examples/bandit/taskset.py +4 -4
  150. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  151. synth_ai/environments/reproducibility/tree.py +5 -6
  152. synth_ai/environments/service/app.py +11 -12
  153. synth_ai/environments/service/core_routes.py +10 -9
  154. synth_ai/environments/stateful/engine.py +1 -1
  155. synth_ai/environments/tasks/core.py +1 -0
  156. synth_ai/environments/tasks/filters.py +5 -6
  157. synth_ai/environments/tasks/utils.py +4 -5
  158. synth_ai/evals/base.py +0 -2
  159. synth_ai/handshake.py +11 -9
  160. synth_ai/http.py +1 -1
  161. synth_ai/http_client.py +43 -11
  162. synth_ai/inference/__init__.py +0 -2
  163. synth_ai/inference/client.py +20 -6
  164. synth_ai/jobs/client.py +103 -78
  165. synth_ai/learning/__init__.py +41 -6
  166. synth_ai/learning/algorithms.py +14 -0
  167. synth_ai/learning/client.py +121 -29
  168. synth_ai/learning/config.py +2 -40
  169. synth_ai/learning/constants.py +0 -2
  170. synth_ai/learning/ft_client.py +4 -56
  171. synth_ai/learning/health.py +13 -7
  172. synth_ai/learning/jobs.py +43 -47
  173. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  174. synth_ai/learning/rl/client.py +267 -0
  175. synth_ai/learning/rl/config.py +31 -0
  176. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  177. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  178. synth_ai/learning/rl/secrets.py +13 -0
  179. synth_ai/learning/rl_client.py +2 -253
  180. synth_ai/learning/sft/__init__.py +29 -0
  181. synth_ai/learning/sft/client.py +68 -0
  182. synth_ai/learning/sft/config.py +270 -0
  183. synth_ai/learning/sft/data.py +295 -0
  184. synth_ai/learning/sse.py +25 -26
  185. synth_ai/learning/validators.py +25 -24
  186. synth_ai/lm/__init__.py +21 -47
  187. synth_ai/task/__init__.py +26 -27
  188. synth_ai/task/apps/__init__.py +18 -19
  189. synth_ai/task/auth.py +35 -23
  190. synth_ai/task/client.py +15 -13
  191. synth_ai/task/contracts.py +37 -35
  192. synth_ai/task/datasets.py +9 -6
  193. synth_ai/task/errors.py +11 -10
  194. synth_ai/task/health.py +17 -11
  195. synth_ai/task/json.py +58 -24
  196. synth_ai/task/proxy.py +15 -14
  197. synth_ai/task/rubrics.py +22 -15
  198. synth_ai/task/server.py +43 -17
  199. synth_ai/task/tracing_utils.py +12 -7
  200. synth_ai/task/validators.py +0 -1
  201. synth_ai/task/vendors.py +5 -7
  202. synth_ai/tracing_v3/__init__.py +2 -0
  203. synth_ai/tracing_v3/abstractions.py +21 -4
  204. synth_ai/tracing_v3/db_config.py +26 -1
  205. synth_ai/tracing_v3/decorators.py +18 -15
  206. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  207. synth_ai/tracing_v3/hooks.py +6 -4
  208. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  209. synth_ai/tracing_v3/replica_sync.py +1 -0
  210. synth_ai/tracing_v3/session_tracer.py +63 -16
  211. synth_ai/tracing_v3/storage/base.py +89 -1
  212. synth_ai/tracing_v3/storage/config.py +21 -8
  213. synth_ai/tracing_v3/storage/factory.py +10 -8
  214. synth_ai/tracing_v3/storage/utils.py +4 -2
  215. synth_ai/tracing_v3/turso/daemon.py +7 -2
  216. synth_ai/tracing_v3/turso/models.py +5 -2
  217. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  218. synth_ai/tracing_v3/utils.py +4 -3
  219. synth_ai/v0/api/__init__.py +8 -0
  220. synth_ai/v0/api/models/__init__.py +8 -0
  221. synth_ai/v0/api/models/supported.py +8 -0
  222. synth_ai/v0/config/__init__.py +15 -0
  223. synth_ai/v0/config/base_url.py +12 -0
  224. synth_ai/v0/lm/__init__.py +51 -0
  225. synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
  226. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  227. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  228. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  229. synth_ai/{lm → v0/lm}/config.py +6 -1
  230. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  231. synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
  232. synth_ai/{lm → v0/lm}/core/main.py +19 -7
  233. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  234. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
  235. synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
  236. synth_ai/{lm → v0/lm}/overrides.py +4 -4
  237. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  238. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  239. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  240. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  241. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
  242. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  243. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  244. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
  245. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
  246. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
  247. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
  248. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  249. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  250. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  251. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  252. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  253. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  254. synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
  255. synth_ai/v0/tracing/upload.py +32 -135
  256. synth_ai/v0/tracing_v3/__init__.py +10 -0
  257. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  258. synth_ai/v0/tracing_v3/decorators.py +3 -0
  259. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  260. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  261. synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
  262. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -264
  263. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
  264. examples/common_old/backend.py +0 -21
  265. examples/evals_old/README.md +0 -98
  266. examples/evals_old/__init__.py +0 -6
  267. examples/evals_old/compare_models.py +0 -1037
  268. examples/evals_old/example_log.md +0 -145
  269. examples/evals_old/run_demo.sh +0 -126
  270. examples/evals_old/trace_analysis.py +0 -270
  271. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  272. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  273. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  274. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
  275. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  276. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  277. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  278. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  279. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  280. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
  281. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  282. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  283. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
  284. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  285. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  286. examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
  287. examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
  288. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  289. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  290. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
  291. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
  292. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
  293. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  294. examples/finetuning_old/synth_qwen_v1/util.py +0 -147
  295. examples/rl_old/task_app.py +0 -962
  296. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  297. examples/warming_up_to_rl/old/notes.md +0 -73
  298. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
  299. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  300. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  301. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  302. synth_ai/experimental/synth_oss.py +0 -446
  303. synth_ai/install_sqld.sh +0 -40
  304. synth_ai/learning/filtering.py +0 -0
  305. synth_ai/learning/offline/dpo.py +0 -0
  306. synth_ai/learning/offline/providers.py +0 -7
  307. synth_ai/learning/offline/sft.py +0 -0
  308. synth_ai/learning/offline/shared.py +0 -0
  309. synth_ai/learning/online/grpo.py +0 -0
  310. synth_ai/learning/online/irft.py +0 -0
  311. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  312. synth_ai/learning/prompts/gepa.py +0 -0
  313. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  314. synth_ai/learning/prompts/mipro.py +0 -289
  315. synth_ai/learning/prompts/random_search.py +0 -246
  316. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  317. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  318. synth_ai/rl/secrets.py +0 -19
  319. synth_ai/scripts/verify_rewards.py +0 -100
  320. synth_ai/tracing/__init__.py +0 -30
  321. synth_ai/tracing_v1/__init__.py +0 -33
  322. synth_ai/tracing_v3/turso/__init__.py +0 -25
  323. synth_ai/tracing_v3/turso/manager.py +0 -774
  324. synth_ai/zyk/__init__.py +0 -30
  325. synth_ai-0.2.9.dev4.dist-info/METADATA +0 -131
  326. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  327. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  328. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  329. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  330. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  331. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  332. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  333. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  334. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  335. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  336. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  337. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  338. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  339. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  340. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  341. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  342. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  343. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  344. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  345. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  346. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  347. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  348. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  349. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  350. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  351. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
@@ -1,28 +1,31 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Dict, List, Optional, Tuple
4
3
  from abc import ABC, abstractmethod
4
+ from typing import Any
5
+
5
6
  from .react_agent import CrafterReActAgent
6
7
  from .tools import TOOLS_SCHEMA
7
8
 
9
+
8
10
  # Define Policy base class here to avoid circular import
9
11
  class Policy(ABC):
10
12
  """Base class for environment-specific policies."""
11
-
13
+
12
14
  @abstractmethod
13
15
  def prepare_inference_request(
14
- self, observation: Dict[str, Any], history: List[Dict[str, Any]] = None
15
- ) -> Tuple[List[Dict[str, Any]], Optional[List[Dict[str, Any]]]]:
16
+ self, observation: dict[str, Any], history: list[dict[str, Any]] = None
17
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
16
18
  """Prepare an inference request."""
17
19
  pass
18
-
20
+
19
21
  @abstractmethod
20
22
  def parse_model_response(
21
- self, response: str, observation: Dict[str, Any]
22
- ) -> List[Dict[str, Any]]:
23
+ self, response: str, observation: dict[str, Any]
24
+ ) -> list[dict[str, Any]]:
23
25
  """Parse model response into tool calls."""
24
26
  pass
25
27
 
28
+
26
29
  # (imports moved to top of file to satisfy linter)
27
30
 
28
31
 
@@ -37,23 +40,23 @@ class CrafterPolicy(Policy):
37
40
 
38
41
  name: str = "crafter-react"
39
42
 
40
- def __init__(self, inference_url: str, model: Optional[str] = None) -> None:
43
+ def __init__(self, inference_url: str, model: str | None = None) -> None:
41
44
  self.inference_url = inference_url
42
45
  self.model = model
43
46
  self.use_tools = True
44
47
  # Sampling parameters (populated via initialize(config))
45
- self.temperature: Optional[float] = None
46
- self.top_p: Optional[float] = None
47
- self.max_tokens: Optional[int] = None
48
+ self.temperature: float | None = None
49
+ self.top_p: float | None = None
50
+ self.max_tokens: int | None = None
48
51
  # Thinking controls (populated via initialize(config))
49
- self.thinking_mode: Optional[str] = None
50
- self.thinking_budget: Optional[int] = None
52
+ self.thinking_mode: str | None = None
53
+ self.thinking_budget: int | None = None
51
54
  # Rolling conversation and action history for non-Markov policies
52
- self.history_messages: List[Dict[str, str]] = [] # chat-style without system
55
+ self.history_messages: list[dict[str, str]] = [] # chat-style without system
53
56
  self.turn_index: int = 0
54
- self.trajectory_history: List[Dict[str, Any]] = [] # env/policy step records
57
+ self.trajectory_history: list[dict[str, Any]] = [] # env/policy step records
55
58
 
56
- async def initialize(self, config: Dict[str, Any]) -> None:
59
+ async def initialize(self, config: dict[str, Any]) -> None:
57
60
  if "inference_url" in config:
58
61
  self.inference_url = config["inference_url"]
59
62
  if "model" in config:
@@ -89,15 +92,15 @@ class CrafterPolicy(Policy):
89
92
 
90
93
  def _append_assistant_turn(
91
94
  self,
92
- assistant_text: Optional[str],
93
- tool_calls: Optional[List[Dict[str, Any]]],
94
- env_result: Optional[Dict[str, Any]],
95
+ assistant_text: str | None,
96
+ tool_calls: list[dict[str, Any]] | None,
97
+ env_result: dict[str, Any] | None,
95
98
  ) -> None:
96
99
  # Record assistant content (if any)
97
100
  if assistant_text is not None:
98
101
  self.history_messages.append({"role": "assistant", "content": assistant_text})
99
102
  # Keep structured step record for training/analysis
100
- record: Dict[str, Any] = {"turn": self.turn_index}
103
+ record: dict[str, Any] = {"turn": self.turn_index}
101
104
  if tool_calls is not None:
102
105
  record["tool_calls"] = tool_calls
103
106
  if env_result is not None:
@@ -107,13 +110,17 @@ class CrafterPolicy(Policy):
107
110
  def build_inference_request(
108
111
  self,
109
112
  observation_text: str,
110
- history: Optional[List[Dict[str, str]]] = None,
111
- turn: Optional[int] = None,
112
- ) -> Dict[str, Any]:
113
+ history: list[dict[str, Any]] | None = None,
114
+ turn: int | None = None,
115
+ image_parts: list[dict[str, Any]] | None = None,
116
+ ) -> dict[str, Any]:
113
117
  messages = CrafterReActAgent.build_messages(
114
- observation=observation_text, history=history, turn=turn
118
+ observation=observation_text,
119
+ history=history,
120
+ turn=turn,
121
+ image_parts=image_parts,
115
122
  )
116
- payload: Dict[str, Any] = {
123
+ payload: dict[str, Any] = {
117
124
  "messages": messages,
118
125
  }
119
126
  if self.model is not None:
@@ -148,9 +155,9 @@ class CrafterPolicy(Policy):
148
155
 
149
156
  @staticmethod
150
157
  def parse_response_to_tool_calls(
151
- response: Dict[str, Any],
158
+ response: dict[str, Any],
152
159
  use_tools: bool = True,
153
- ) -> List[Dict[str, Any]]:
160
+ ) -> list[dict[str, Any]]:
154
161
  """Turn an inference response into environment tool calls.
155
162
 
156
163
  - If tools were used, expect tool_calls-compatible output and forward as-is
@@ -160,8 +167,8 @@ class CrafterPolicy(Policy):
160
167
  """
161
168
  # First check if we got actual tool calls
162
169
  choices = response.get("choices", [])
163
- tool_calls: List[Dict[str, Any]] = []
164
-
170
+ tool_calls: list[dict[str, Any]] = []
171
+
165
172
  for choice in choices:
166
173
  msg = choice.get("message", {})
167
174
  if "tool_calls" in msg and msg["tool_calls"] is not None:
@@ -185,18 +192,19 @@ class CrafterPolicy(Policy):
185
192
  "arguments": tc["arguments"],
186
193
  }
187
194
  )
188
-
195
+
189
196
  # If we got tool calls, return them
190
197
  if tool_calls:
191
198
  # Normalize common degenerate pattern ["move_right", "do"] when nothing is nearby.
192
199
  # If previous env_result indicates no interaction target, drop trailing 'do'.
193
- normalized: List[Dict[str, Any]] = []
200
+ normalized: list[dict[str, Any]] = []
194
201
  for tc in tool_calls:
195
202
  if tc and isinstance(tc, dict) and tc.get("tool_name") == "interact_many":
196
203
  args = tc.get("arguments")
197
204
  if isinstance(args, str):
198
205
  try:
199
206
  import json
207
+
200
208
  args = json.loads(args)
201
209
  except (json.JSONDecodeError, ValueError):
202
210
  args = {}
@@ -208,11 +216,13 @@ class CrafterPolicy(Policy):
208
216
  # Simple heuristic: avoid repeating same pair; avoid 'do' with no context
209
217
  if len(actions) == 2 and actions[0] == "move_right" and actions[1] == "do":
210
218
  actions = ["move_right"]
211
- normalized.append({"tool_name": "interact_many", "arguments": {"actions": actions or []}})
219
+ normalized.append(
220
+ {"tool_name": "interact_many", "arguments": {"actions": actions or []}}
221
+ )
212
222
  else:
213
223
  normalized.append(tc)
214
224
  return normalized
215
-
225
+
216
226
  # Otherwise, parse plain text content for actions
217
227
  text = ""
218
228
  for choice in choices:
@@ -221,24 +231,25 @@ class CrafterPolicy(Policy):
221
231
  if content:
222
232
  text = content
223
233
  break
224
-
234
+
225
235
  if text:
226
236
  # Try to parse actions from the text
227
237
  from .shared import parse_actions
238
+
228
239
  actions = parse_actions(text)
229
240
  if actions:
230
241
  # Wrap actions in interact_many tool call
231
242
  return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
232
-
243
+
233
244
  # No actions found
234
245
  return []
235
246
 
236
247
  async def step(
237
248
  self,
238
249
  observation_text: str,
239
- state: Optional[Dict[str, Any]] = None,
240
- metadata: Optional[Dict[str, Any]] = None,
241
- ) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
250
+ state: dict[str, Any] | None = None,
251
+ metadata: dict[str, Any] | None = None,
252
+ ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
242
253
  """Stateful step: update policy history and prepare inference request.
243
254
 
244
255
  Inputs (via metadata, optional):
@@ -255,16 +266,20 @@ class CrafterPolicy(Policy):
255
266
  """
256
267
  # If caller provided results from previous cycle, record them first
257
268
  if metadata is not None:
258
- prev_assistant_text: Optional[str] = None
259
- prev_tool_calls: Optional[List[Dict[str, Any]]] = None
260
- prev_env_result: Optional[Dict[str, Any]] = None
269
+ prev_assistant_text: str | None = None
270
+ prev_tool_calls: list[dict[str, Any]] | None = None
271
+ prev_env_result: dict[str, Any] | None = None
261
272
  if "prev_assistant_text" in metadata:
262
273
  prev_assistant_text = metadata["prev_assistant_text"]
263
274
  if "prev_tool_calls" in metadata:
264
275
  prev_tool_calls = metadata["prev_tool_calls"]
265
276
  if "prev_env_result" in metadata:
266
277
  prev_env_result = metadata["prev_env_result"]
267
- if prev_assistant_text is not None or prev_tool_calls is not None or prev_env_result is not None:
278
+ if (
279
+ prev_assistant_text is not None
280
+ or prev_tool_calls is not None
281
+ or prev_env_result is not None
282
+ ):
268
283
  self._append_assistant_turn(prev_assistant_text, prev_tool_calls, prev_env_result)
269
284
 
270
285
  # Append current observation as the next user message (internal history only)
@@ -273,11 +288,15 @@ class CrafterPolicy(Policy):
273
288
  # Build user message by combining the current observation text
274
289
  # (formatted surroundings/inventory) with the previous 3 tool calls as context.
275
290
  # Most recent first.
276
- lines: List[str] = []
277
- def _format_tool_call_line_for_context(tool_name: str, arguments: Any, max_chars: int = 500) -> str:
291
+ lines: list[str] = []
292
+
293
+ def _format_tool_call_line_for_context(
294
+ tool_name: str, arguments: Any, max_chars: int = 500
295
+ ) -> str:
278
296
  import json as _json
297
+
279
298
  # Render arguments compactly, then clip to max_chars
280
- if isinstance(arguments, (dict, list)):
299
+ if isinstance(arguments, dict | list):
281
300
  try:
282
301
  rendered = _json.dumps(arguments, ensure_ascii=False, separators=(",", ":"))
283
302
  except Exception:
@@ -289,6 +308,7 @@ class CrafterPolicy(Policy):
289
308
  if isinstance(rendered, str) and len(rendered) > max_chars:
290
309
  rendered = rendered[:max_chars]
291
310
  return f"- {tool_name}: {rendered}"
311
+
292
312
  # Prefer pulling from trajectory_history (accumulates over turns)
293
313
  for record in reversed(self.trajectory_history):
294
314
  if len(lines) >= 3:
@@ -306,7 +326,7 @@ class CrafterPolicy(Policy):
306
326
 
307
327
  # If trajectory history is empty (first few turns), fall back to metadata once
308
328
  if not lines and metadata is not None and metadata.get("prev_tool_calls"):
309
- calls: List[Dict[str, Any]] = metadata["prev_tool_calls"]
329
+ calls: list[dict[str, Any]] = metadata["prev_tool_calls"]
310
330
  for call in reversed(calls):
311
331
  if len(lines) >= 3:
312
332
  break
@@ -316,17 +336,27 @@ class CrafterPolicy(Policy):
316
336
  args = call.get("arguments")
317
337
  lines.append(_format_tool_call_line_for_context(name, args))
318
338
 
319
- context_text = "Previous tool calls (most recent first):\n" + ("\n".join(lines) if lines else "- none")
339
+ context_text = "Previous tool calls (most recent first):\n" + (
340
+ "\n".join(lines) if lines else "- none"
341
+ )
320
342
 
321
343
  # Combine observation with context so the model always sees surroundings/inventory
322
344
  combined_text = f"{observation_text}\n\n{context_text}"
323
345
 
346
+ raw_observation: dict[str, Any] | None = None
347
+ if metadata is not None:
348
+ raw_candidate = metadata.get("raw_observation")
349
+ if isinstance(raw_candidate, dict):
350
+ raw_observation = raw_candidate
351
+ image_parts = self._extract_image_parts(raw_observation)
352
+
324
353
  payload = self.build_inference_request(
325
354
  combined_text,
326
355
  history=[], # no prior user/assistant history
327
356
  turn=self.turn_index,
357
+ image_parts=image_parts,
328
358
  )
329
- #print("Debugging only:; ", payload)
359
+ # print("Debugging only:; ", payload)
330
360
  meta_out = {
331
361
  "inference_url": self.inference_url,
332
362
  "inference_request": payload,
@@ -335,19 +365,19 @@ class CrafterPolicy(Policy):
335
365
  }
336
366
  return [], meta_out
337
367
 
338
- def state_dict(self) -> Dict[str, Any]:
368
+ def state_dict(self) -> dict[str, Any]:
339
369
  return {
340
370
  "turn_index": self.turn_index,
341
371
  "history_messages": self.history_messages,
342
372
  "trajectory_history": self.trajectory_history,
343
373
  }
344
374
 
345
- def load_state_dict(self, state: Dict[str, Any]) -> None:
375
+ def load_state_dict(self, state: dict[str, Any]) -> None:
346
376
  self.turn_index = int(state["turn_index"])
347
377
  self.history_messages = state["history_messages"]
348
378
  self.trajectory_history = state["trajectory_history"]
349
379
 
350
- async def serialize(self) -> Dict[str, Any]:
380
+ async def serialize(self) -> dict[str, Any]:
351
381
  return {
352
382
  "name": self.name,
353
383
  "config": {
@@ -359,7 +389,7 @@ class CrafterPolicy(Policy):
359
389
  }
360
390
 
361
391
  @classmethod
362
- async def deserialize(cls, payload: Dict[str, Any]) -> "CrafterPolicy":
392
+ async def deserialize(cls, payload: dict[str, Any]) -> CrafterPolicy:
363
393
  config = payload["config"]
364
394
  state = payload["state"]
365
395
  policy = cls(
@@ -372,26 +402,28 @@ class CrafterPolicy(Policy):
372
402
 
373
403
  async def terminate(self) -> None:
374
404
  return None
375
-
405
+
376
406
  def prepare_inference_request(
377
- self, observation: Dict[str, Any], history: List[Dict[str, Any]] = None
378
- ) -> Tuple[List[Dict[str, Any]], Optional[List[Dict[str, Any]]]]:
407
+ self, observation: dict[str, Any], history: list[dict[str, Any]] = None
408
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
379
409
  """Prepare an inference request (implementing abstract method)."""
380
410
  # Format observation with rich contextual information
381
411
  observation_text = self._format_observation_for_llm(observation)
412
+ image_parts = self._extract_image_parts(observation)
382
413
 
383
414
  # Build messages (observation_text already formatted; no raw matrices)
384
415
  messages = CrafterReActAgent.build_messages(
385
416
  observation=observation_text,
386
417
  history=history,
387
- turn=self.turn_index
418
+ turn=self.turn_index,
419
+ image_parts=image_parts,
388
420
  )
389
421
 
390
422
  # Return messages and tools schema
391
423
  tools = TOOLS_SCHEMA if self.use_tools else None
392
424
  return messages, tools
393
425
 
394
- def _format_observation_for_llm(self, observation: Dict[str, Any]) -> str:
426
+ def _format_observation_for_llm(self, observation: dict[str, Any]) -> str:
395
427
  """Format observation with rich contextual information for the LLM using the shared formatter."""
396
428
  from .shared import format_observation
397
429
 
@@ -402,39 +434,43 @@ class CrafterPolicy(Policy):
402
434
  if not isinstance(obs_data, dict):
403
435
  return f"Observation: {str(observation)}"
404
436
 
405
-
406
437
  # Use the shared format_observation function with step information
407
438
  step_idx = observation.get("step_idx", 0)
408
439
  max_steps = 100 # Default max steps, could be made configurable
409
440
 
410
441
  # Get additional info from the observation wrapper
411
442
  info = observation.get("info", {})
412
- if isinstance(info, dict):
413
- # Merge health from info into obs_data for the formatter
414
- if "health" in info and "health" not in obs_data:
415
- obs_data = dict(obs_data) # Make a copy
416
- obs_data["health"] = info["health"]
443
+ if isinstance(info, dict) and "health" in info and "health" not in obs_data:
444
+ obs_data = dict(obs_data) # Make a copy
445
+ obs_data["health"] = info["health"]
417
446
 
418
447
  return format_observation(obs_data, step_count=step_idx, max_steps=max_steps)
419
-
448
+
449
+ def _extract_image_parts(
450
+ self, observation: dict[str, Any] | None
451
+ ) -> list[dict[str, Any]]:
452
+ """Crafter policy uses text-only prompts; do not attach image parts."""
453
+
454
+ return []
455
+
420
456
  def parse_model_response(
421
- self, response: str, observation: Dict[str, Any]
422
- ) -> List[Dict[str, Any]]:
457
+ self, response: str, observation: dict[str, Any]
458
+ ) -> list[dict[str, Any]]:
423
459
  """Parse model response into tool calls (implementing abstract method).
424
-
460
+
425
461
  Note: Despite the type hint, vLLM actually returns a dict response,
426
462
  not a string. We handle both cases.
427
463
  """
428
464
  # Handle dict response from vLLM (the actual case)
429
465
  if isinstance(response, dict):
430
466
  return self.parse_response_to_tool_calls(response, self.use_tools)
431
-
467
+
432
468
  # Handle string response (fallback case for raw text)
433
469
  if isinstance(response, str):
434
470
  actions = CrafterReActAgent.parse_actions_from_response(response)
435
471
  if actions:
436
472
  return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
437
-
473
+
438
474
  # Default empty response
439
475
  return []
440
476
 
@@ -7,7 +7,7 @@ utilities to keep a single parser.
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- from typing import Dict, List, Optional
10
+ from typing import Any
11
11
 
12
12
  from .shared import parse_actions
13
13
 
@@ -51,7 +51,7 @@ class CrafterReActAgent:
51
51
  "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
52
52
  "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
53
53
  )
54
-
54
+
55
55
  @staticmethod
56
56
  def get_system_prompt_with_tools() -> str:
57
57
  """System prompt for tool-based interaction (e.g., Qwen3 models)."""
@@ -80,17 +80,29 @@ class CrafterReActAgent:
80
80
  )
81
81
 
82
82
  @staticmethod
83
- def build_messages(observation: str, history: Optional[List[Dict[str, str]]] = None, turn: Optional[int] = None) -> List[Dict[str, str]]:
83
+ def build_messages(
84
+ observation: str,
85
+ history: list[dict[str, Any]] | None = None,
86
+ turn: int | None = None,
87
+ image_parts: list[dict[str, Any]] | None = None,
88
+ ) -> list[dict[str, Any]]:
84
89
  """Construct OpenAI-style messages list for vLLM generation."""
85
- msgs: List[Dict[str, str]] = [{"role": "system", "content": CrafterReActAgent.get_system_prompt()}]
90
+ msgs: list[dict[str, Any]] = [
91
+ {"role": "system", "content": CrafterReActAgent.get_system_prompt()}
92
+ ]
86
93
  if history:
87
94
  msgs.extend(history)
88
- msgs.append({"role": "user", "content": observation})
95
+ user_content: Any
96
+ if image_parts:
97
+ user_content = [{"type": "text", "text": observation}] + list(image_parts)
98
+ else:
99
+ user_content = observation
100
+ msgs.append({"role": "user", "content": user_content})
89
101
  return msgs
90
102
 
91
103
  @staticmethod
92
- def parse_actions_from_response(response_text: str) -> List[str]:
104
+ def parse_actions_from_response(response_text: str) -> list[str]:
93
105
  return parse_actions(response_text)
94
106
 
95
107
 
96
- __all__ = ["CrafterReActAgent"]
108
+ __all__ = ["CrafterReActAgent"]