synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.9.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (327) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +8 -11
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/run_eval.py +36 -37
  19. examples/rl/run_rl_and_save.py +5 -5
  20. examples/rl/task_app/math_single_step.py +65 -43
  21. examples/rl/task_app/math_task_app.py +3 -3
  22. examples/sft/README.md +139 -0
  23. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  24. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  25. examples/sft/evaluate.py +117 -0
  26. examples/sft/export_dataset.py +117 -0
  27. examples/sft/generate_traces.py +162 -0
  28. examples/swe/__init__.py +12 -0
  29. examples/swe/task_app/README.md +105 -0
  30. examples/swe/task_app/__init__.py +2 -0
  31. examples/swe/task_app/grpo_swe_mini.py +571 -0
  32. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  33. examples/swe/task_app/hosted/README.md +173 -0
  34. examples/swe/task_app/hosted/__init__.py +5 -0
  35. examples/swe/task_app/hosted/branching.py +143 -0
  36. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  37. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  38. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  39. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  40. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  41. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  42. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  43. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  44. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  45. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  46. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  47. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  50. examples/swe/task_app/hosted/hosted_app.py +204 -0
  51. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  52. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  53. examples/swe/task_app/hosted/main.py +100 -0
  54. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  55. examples/swe/task_app/hosted/registry.py +195 -0
  56. examples/swe/task_app/hosted/rollout.py +1869 -0
  57. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  58. examples/swe/task_app/hosted/storage/volume.py +211 -0
  59. examples/swe/task_app/hosted/test_agents.py +161 -0
  60. examples/swe/task_app/hosted/test_service.py +137 -0
  61. examples/swe/task_app/hosted/utils.py +62 -0
  62. examples/vlm/README.md +68 -0
  63. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  64. examples/vlm/crafter_image_only_agent.py +207 -0
  65. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  66. examples/vlm/filter_image_rows.py +63 -0
  67. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  68. examples/warming_up_to_rl/analyze_trace_db.py +5 -5
  69. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  70. examples/warming_up_to_rl/export_trace_sft.py +78 -21
  71. examples/warming_up_to_rl/groq_test.py +4 -4
  72. examples/warming_up_to_rl/manage_secrets.py +13 -18
  73. examples/warming_up_to_rl/run_eval.py +42 -44
  74. examples/warming_up_to_rl/run_fft_and_save.py +11 -16
  75. examples/warming_up_to_rl/run_local_rollout.py +1 -3
  76. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
  77. examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
  78. examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
  79. examples/warming_up_to_rl/run_rl_and_save.py +5 -6
  80. examples/warming_up_to_rl/run_rollout_remote.py +8 -10
  81. examples/warming_up_to_rl/task_app/README.md +6 -2
  82. examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
  83. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  101. synth/__init__.py +14 -0
  102. synth_ai/__init__.py +26 -4
  103. synth_ai/api/models/supported.py +376 -0
  104. synth_ai/api/train/builders.py +128 -21
  105. synth_ai/api/train/cli.py +80 -64
  106. synth_ai/api/train/config_finder.py +7 -2
  107. synth_ai/api/train/env_resolver.py +1 -1
  108. synth_ai/api/train/pollers.py +2 -1
  109. synth_ai/api/train/supported_algos.py +139 -0
  110. synth_ai/api/train/task_app.py +1 -2
  111. synth_ai/api/train/utils.py +13 -44
  112. synth_ai/cli/__init__.py +8 -0
  113. synth_ai/cli/_modal_wrapper.py +28 -0
  114. synth_ai/cli/_typer_patch.py +49 -0
  115. synth_ai/cli/balance.py +1 -2
  116. synth_ai/cli/calc.py +1 -1
  117. synth_ai/cli/demo.py +2 -1
  118. synth_ai/cli/recent.py +2 -2
  119. synth_ai/cli/rl_demo.py +2 -1
  120. synth_ai/cli/root.py +11 -13
  121. synth_ai/cli/status.py +2 -2
  122. synth_ai/cli/task_apps.py +529 -179
  123. synth_ai/cli/traces.py +6 -4
  124. synth_ai/cli/watch.py +12 -18
  125. synth_ai/demo_registry.py +1 -1
  126. synth_ai/demos/core/cli.py +36 -43
  127. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  128. synth_ai/demos/demo_task_apps/core.py +17 -25
  129. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
  130. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  131. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
  132. synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
  133. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  134. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  135. synth_ai/environments/reproducibility/tree.py +2 -5
  136. synth_ai/environments/service/app.py +11 -12
  137. synth_ai/environments/service/core_routes.py +4 -7
  138. synth_ai/environments/stateful/engine.py +1 -1
  139. synth_ai/environments/tasks/core.py +1 -0
  140. synth_ai/environments/tasks/filters.py +5 -6
  141. synth_ai/environments/tasks/utils.py +4 -5
  142. synth_ai/handshake.py +9 -9
  143. synth_ai/http.py +1 -1
  144. synth_ai/http_client.py +18 -10
  145. synth_ai/inference/client.py +15 -5
  146. synth_ai/jobs/client.py +78 -83
  147. synth_ai/learning/__init__.py +41 -6
  148. synth_ai/learning/algorithms.py +14 -0
  149. synth_ai/learning/client.py +91 -24
  150. synth_ai/learning/config.py +2 -38
  151. synth_ai/learning/ft_client.py +4 -59
  152. synth_ai/learning/health.py +5 -6
  153. synth_ai/learning/jobs.py +31 -47
  154. synth_ai/{rl → learning/rl}/__init__.py +14 -4
  155. synth_ai/learning/rl/client.py +267 -0
  156. synth_ai/learning/rl/config.py +31 -0
  157. synth_ai/{rl → learning/rl}/contracts.py +5 -8
  158. synth_ai/{rl → learning/rl}/env_keys.py +39 -15
  159. synth_ai/learning/rl/secrets.py +13 -0
  160. synth_ai/learning/rl_client.py +2 -281
  161. synth_ai/learning/sft/__init__.py +29 -0
  162. synth_ai/learning/sft/client.py +68 -0
  163. synth_ai/learning/sft/config.py +270 -0
  164. synth_ai/learning/sft/data.py +295 -0
  165. synth_ai/learning/sse.py +25 -24
  166. synth_ai/learning/validators.py +25 -28
  167. synth_ai/lm/__init__.py +21 -47
  168. synth_ai/main.py +6 -0
  169. synth_ai/task/__init__.py +25 -27
  170. synth_ai/task/apps/__init__.py +7 -8
  171. synth_ai/task/auth.py +8 -8
  172. synth_ai/task/client.py +14 -14
  173. synth_ai/task/contracts.py +36 -35
  174. synth_ai/task/datasets.py +6 -5
  175. synth_ai/task/errors.py +10 -10
  176. synth_ai/task/health.py +17 -9
  177. synth_ai/task/json.py +58 -23
  178. synth_ai/task/proxy.py +13 -9
  179. synth_ai/task/rubrics.py +16 -15
  180. synth_ai/task/server.py +12 -12
  181. synth_ai/task/tracing_utils.py +4 -4
  182. synth_ai/task/vendors.py +5 -6
  183. synth_ai/tracing_v3/__init__.py +2 -0
  184. synth_ai/tracing_v3/abstractions.py +21 -4
  185. synth_ai/tracing_v3/decorators.py +18 -16
  186. synth_ai/tracing_v3/hooks.py +5 -5
  187. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  188. synth_ai/tracing_v3/session_tracer.py +40 -14
  189. synth_ai/tracing_v3/storage/base.py +85 -0
  190. synth_ai/tracing_v3/storage/config.py +21 -8
  191. synth_ai/tracing_v3/storage/factory.py +10 -7
  192. synth_ai/tracing_v3/storage/utils.py +4 -2
  193. synth_ai/tracing_v3/turso/daemon.py +7 -2
  194. synth_ai/tracing_v3/turso/models.py +2 -2
  195. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  196. synth_ai/tracing_v3/utils.py +4 -4
  197. synth_ai/v0/api/__init__.py +8 -0
  198. synth_ai/v0/api/models/__init__.py +8 -0
  199. synth_ai/v0/api/models/supported.py +8 -0
  200. synth_ai/v0/config/__init__.py +15 -0
  201. synth_ai/v0/config/base_url.py +12 -0
  202. synth_ai/v0/lm/__init__.py +51 -0
  203. synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
  204. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  205. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  206. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  207. synth_ai/{lm → v0/lm}/config.py +6 -1
  208. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  209. synth_ai/{lm → v0/lm}/core/main.py +6 -6
  210. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  211. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
  212. synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
  213. synth_ai/{lm → v0/lm}/overrides.py +2 -2
  214. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  215. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  216. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  217. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  218. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
  219. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  220. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  221. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
  222. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
  223. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
  224. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
  225. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  226. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  227. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  228. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  229. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  230. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  231. synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
  232. synth_ai/v0/tracing_v3/__init__.py +10 -0
  233. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  234. synth_ai/v0/tracing_v3/decorators.py +3 -0
  235. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  236. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  237. synth_ai-0.2.9.dev9.dist-info/METADATA +191 -0
  238. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/RECORD +268 -238
  239. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/top_level.txt +1 -0
  240. examples/common_old/backend.py +0 -20
  241. examples/evals_old/README.md +0 -98
  242. examples/evals_old/__init__.py +0 -6
  243. examples/evals_old/compare_models.py +0 -1038
  244. examples/evals_old/example_log.md +0 -145
  245. examples/evals_old/run_demo.sh +0 -126
  246. examples/evals_old/trace_analysis.py +0 -270
  247. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  248. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  249. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  250. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
  251. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  252. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  253. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  254. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  255. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  256. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
  257. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  258. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  259. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
  260. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  261. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  262. examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
  263. examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
  264. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  265. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  266. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
  267. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
  268. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
  269. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  270. examples/finetuning_old/synth_qwen_v1/util.py +0 -152
  271. examples/rl_old/task_app.py +0 -1131
  272. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  273. examples/warming_up_to_rl/old/notes.md +0 -73
  274. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  275. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  276. synth_ai/experimental/synth_oss.py +0 -445
  277. synth_ai/learning/filtering.py +0 -0
  278. synth_ai/learning/offline/dpo.py +0 -0
  279. synth_ai/learning/offline/providers.py +0 -7
  280. synth_ai/learning/offline/sft.py +0 -0
  281. synth_ai/learning/offline/shared.py +0 -0
  282. synth_ai/learning/online/grpo.py +0 -0
  283. synth_ai/learning/online/irft.py +0 -0
  284. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  285. synth_ai/learning/prompts/gepa.py +0 -0
  286. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
  287. synth_ai/learning/prompts/mipro.py +0 -289
  288. synth_ai/learning/prompts/random_search.py +0 -249
  289. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  290. synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
  291. synth_ai/rl/secrets.py +0 -19
  292. synth_ai/scripts/verify_rewards.py +0 -100
  293. synth_ai/tracing/__init__.py +0 -30
  294. synth_ai/tracing_v1/__init__.py +0 -33
  295. synth_ai/tracing_v3/turso/__init__.py +0 -25
  296. synth_ai/tracing_v3/turso/manager.py +0 -838
  297. synth_ai/zyk/__init__.py +0 -30
  298. synth_ai-0.2.9.dev7.dist-info/METADATA +0 -131
  299. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  300. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  301. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  302. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  303. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  304. /synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
  305. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  306. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  307. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  308. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  309. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  310. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  311. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  312. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  313. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  314. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  315. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  316. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  317. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  318. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  319. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  320. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  321. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  322. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  323. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  324. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  325. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/WHEEL +0 -0
  326. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/entry_points.txt +0 -0
  327. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/licenses/LICENSE +0 -0
@@ -7,16 +7,19 @@ Baseline evaluation script (public-friendly skeleton)
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
- import os
10
+
11
+ import argparse
12
+ import asyncio
13
+ import contextlib
11
14
  import json
15
+ import os
12
16
  import re
13
- from typing import Any, Dict, List, Optional
14
- from collections import Counter
15
- import asyncio
16
- import httpx
17
- import argparse
18
17
  import tomllib
18
+ from collections import Counter
19
19
  from pathlib import Path
20
+ from typing import Any
21
+
22
+ import httpx
20
23
 
21
24
 
22
25
  class TaskAppClient:
@@ -25,12 +28,12 @@ class TaskAppClient:
25
28
  This is a public-friendly shim for examples, pending SDK surface consolidation.
26
29
  """
27
30
 
28
- def __init__(self, base_url: str, api_key: Optional[str] = None) -> None:
31
+ def __init__(self, base_url: str, api_key: str | None = None) -> None:
29
32
  self.base_url = base_url.rstrip("/")
30
33
  self.api_key = api_key
31
- self._client: Optional[httpx.AsyncClient] = None
34
+ self._client: httpx.AsyncClient | None = None
32
35
 
33
- async def __aenter__(self) -> "TaskAppClient":
36
+ async def __aenter__(self) -> TaskAppClient:
34
37
  headers = {}
35
38
  if self.api_key:
36
39
  headers["X-API-Key"] = self.api_key
@@ -56,9 +59,9 @@ class TaskAppClient:
56
59
  )
57
60
  return self._client
58
61
 
59
- async def initialize(self, env_name: str, config: Dict[str, Any]) -> Dict[str, Any]:
62
+ async def initialize(self, env_name: str, config: dict[str, Any]) -> dict[str, Any]:
60
63
  """POST /env/{env_name}/initialize (compat route supported in task app)."""
61
- payload: Dict[str, Any] = {
64
+ payload: dict[str, Any] = {
62
65
  "seed": config.get("seed"),
63
66
  }
64
67
  # Allow both world_config and config inputs; env routes will normalize difficulty
@@ -71,30 +74,30 @@ class TaskAppClient:
71
74
  return resp.json()
72
75
 
73
76
  async def step(
74
- self, env_name: str, env_id: str, tool_calls: List[Dict[str, Any]]
75
- ) -> Dict[str, Any]:
77
+ self, env_name: str, env_id: str, tool_calls: list[dict[str, Any]]
78
+ ) -> dict[str, Any]:
76
79
  """POST /env/{env_name}/step with wrapped tool_calls in action."""
77
80
  payload = {"env_id": env_id, "action": {"tool_calls": tool_calls}}
78
81
  resp = await self.client.post(f"/env/{env_name}/step", json=payload)
79
82
  resp.raise_for_status()
80
83
  return resp.json()
81
84
 
82
- async def terminate(self, env_name: str, env_id: str) -> Dict[str, Any]:
85
+ async def terminate(self, env_name: str, env_id: str) -> dict[str, Any]:
83
86
  resp = await self.client.post(f"/env/{env_name}/terminate", json={"env_id": env_id})
84
87
  resp.raise_for_status()
85
88
  return resp.json()
86
89
 
87
- async def get_info(self) -> Dict[str, Any]:
90
+ async def get_info(self) -> dict[str, Any]:
88
91
  resp = await self.client.get("/info")
89
92
  resp.raise_for_status()
90
93
  return resp.json()
91
94
 
92
- async def proxy_groq_chat(self, payload: Dict[str, Any]) -> Dict[str, Any]:
95
+ async def proxy_groq_chat(self, payload: dict[str, Any]) -> dict[str, Any]:
93
96
  resp = await self.client.post("/proxy/groq/v1/chat/completions", json=payload)
94
97
  resp.raise_for_status()
95
98
  return resp.json()
96
99
 
97
- async def vllm_chat(self, vllm_base_url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
100
+ async def vllm_chat(self, vllm_base_url: str, payload: dict[str, Any]) -> dict[str, Any]:
98
101
  async with httpx.AsyncClient(base_url=vllm_base_url.rstrip("/"), timeout=60.0) as c:
99
102
  resp = await c.post("/v1/chat/completions", json=payload)
100
103
  # Do not raise for status to surface body in errors
@@ -114,13 +117,13 @@ class TaskAppClient:
114
117
  seed: int,
115
118
  difficulty: str,
116
119
  policy_name: str,
117
- policy_config: Dict[str, Any],
120
+ policy_config: dict[str, Any],
118
121
  max_turns: int,
119
- ) -> Dict[str, Any]:
120
- ops: List[str] = []
122
+ ) -> dict[str, Any]:
123
+ ops: list[str] = []
121
124
  for _ in range(max_turns):
122
125
  ops.extend(["agent", "env"])
123
- payload: Dict[str, Any] = {
126
+ payload: dict[str, Any] = {
124
127
  "run_id": run_id,
125
128
  "env": {
126
129
  "env_name": env_name,
@@ -150,7 +153,7 @@ MAX_TURNS = int(os.getenv("MAX_TURNS", "10"))
150
153
  CONCURRENCY = int(os.getenv("CONCURRENCY", "1"))
151
154
 
152
155
 
153
- def _interact_tool_schema() -> List[Dict[str, Any]]:
156
+ def _interact_tool_schema() -> list[dict[str, Any]]:
154
157
  return [
155
158
  {
156
159
  "type": "function",
@@ -171,13 +174,12 @@ def _interact_tool_schema() -> List[Dict[str, Any]]:
171
174
 
172
175
 
173
176
  def _build_messages_from_observation(
174
- observation: Dict[str, Any], history: List[Dict[str, Any]]
175
- ) -> List[Dict[str, Any]]:
177
+ observation: dict[str, Any], history: list[dict[str, Any]]
178
+ ) -> list[dict[str, Any]]:
176
179
  inv = observation.get("inventory") or {}
177
180
  pos = observation.get("player_position") or []
178
181
  ach = observation.get("achievements_status") or {}
179
- turns_taken = observation.get("num_steps_taken") or 0
180
- user_lines: List[str] = []
182
+ user_lines: list[str] = []
181
183
  user_lines.append("Environment: CrafterClassic")
182
184
  user_lines.append(f"Player position: {pos}")
183
185
  user_lines.append(f"Inventory: {json.dumps(inv, ensure_ascii=False)}")
@@ -193,7 +195,7 @@ def _build_messages_from_observation(
193
195
  return [{"role": "user", "content": content}]
194
196
 
195
197
 
196
- def _parse_tool_calls_from_openai_response(data: Dict[str, Any]) -> List[str]:
198
+ def _parse_tool_calls_from_openai_response(data: dict[str, Any]) -> list[str]:
197
199
  try:
198
200
  choices = data.get("choices")
199
201
  if isinstance(choices, list) and choices:
@@ -248,11 +250,11 @@ async def _choose_actions_via_llm(
248
250
  client: TaskAppClient,
249
251
  provider: str,
250
252
  model: str,
251
- observation: Dict[str, Any],
252
- history: List[Dict[str, Any]],
253
- ) -> List[str]:
253
+ observation: dict[str, Any],
254
+ history: list[dict[str, Any]],
255
+ ) -> list[str]:
254
256
  messages = _build_messages_from_observation(observation, history)
255
- payload: Dict[str, Any] = {
257
+ payload: dict[str, Any] = {
256
258
  "model": model,
257
259
  "messages": messages,
258
260
  "tools": _interact_tool_schema(),
@@ -279,8 +281,8 @@ async def _choose_actions_via_llm(
279
281
  return actions or []
280
282
 
281
283
 
282
- def _expand_actions_to_tool_calls(actions: List[str]) -> List[Dict[str, Any]]:
283
- out: List[Dict[str, Any]] = []
284
+ def _expand_actions_to_tool_calls(actions: list[str]) -> list[dict[str, Any]]:
285
+ out: list[dict[str, Any]] = []
284
286
  for a in actions[:5]:
285
287
  out.append({"tool": "interact", "args": {"action": a}})
286
288
  return out
@@ -293,9 +295,7 @@ def _detect_provider(model: str) -> str:
293
295
  return "vllm"
294
296
 
295
297
 
296
- def _rollout_inference_url_from_cfg(
297
- cfg: Dict[str, Any], default_vllm: Optional[str]
298
- ) -> Optional[str]:
298
+ def _rollout_inference_url_from_cfg(cfg: dict[str, Any], default_vllm: str | None) -> str | None:
299
299
  # Prefer explicit inference_url in TOML; else fall back to discovered vLLM base
300
300
  url = cfg.get("inference_url")
301
301
  if isinstance(url, str) and url:
@@ -303,14 +303,14 @@ def _rollout_inference_url_from_cfg(
303
303
  return default_vllm
304
304
 
305
305
 
306
- async def eval_episode(client: TaskAppClient, seed: int) -> Dict[str, Any]:
306
+ async def eval_episode(client: TaskAppClient, seed: int) -> dict[str, Any]:
307
307
  env_name = "CrafterClassic"
308
- history: List[Dict[str, Any]] = []
308
+ history: list[dict[str, Any]] = []
309
309
  achievements: set[str] = set()
310
310
  turns = 0
311
311
 
312
312
  # Initialize environment
313
- init_cfg: Dict[str, Any] = {
313
+ init_cfg: dict[str, Any] = {
314
314
  "seed": seed,
315
315
  "world_config": {"difficulty": os.getenv("DIFFICULTY", "easy")},
316
316
  }
@@ -343,10 +343,8 @@ async def eval_episode(client: TaskAppClient, seed: int) -> Dict[str, Any]:
343
343
  if isinstance(nxt, dict):
344
344
  observation = nxt
345
345
  finally:
346
- try:
346
+ with contextlib.suppress(Exception):
347
347
  await client.terminate(env_name, env_id)
348
- except Exception:
349
- pass
350
348
 
351
349
  return {"seed": seed, "turns": turns, "achievements": sorted(achievements)}
352
350
 
@@ -377,7 +375,7 @@ async def main() -> None:
377
375
  args = parser.parse_args()
378
376
 
379
377
  global TASK_APP_URL, MODEL, NUM_EPISODES, MAX_TURNS, CONCURRENCY
380
- cfg: Dict[str, Any] = {}
378
+ cfg: dict[str, Any] = {}
381
379
  if args.toml:
382
380
  with open(args.toml, "rb") as f:
383
381
  cfg = tomllib.load(f)
@@ -418,7 +416,7 @@ async def main() -> None:
418
416
  try:
419
417
  run_id = f"eval-{seed}"
420
418
  # Build policy config from TOML (explicit control; no server-side guessing)
421
- policy_cfg: Dict[str, Any] = {
419
+ policy_cfg: dict[str, Any] = {
422
420
  "model": cfg.get("model", MODEL),
423
421
  "inference_url": inf_url,
424
422
  }
@@ -6,14 +6,12 @@ import json
6
6
  import os
7
7
  import sys
8
8
  import time
9
+ import tomllib
9
10
  from pathlib import Path
10
- from typing import Any, Dict, Tuple, List
11
+ from typing import Any
11
12
 
12
- import tomllib
13
- import re
14
13
  import requests
15
14
  from dotenv import load_dotenv
16
-
17
15
  from synth_ai.config.base_url import PROD_BASE_URL_DEFAULT
18
16
 
19
17
 
@@ -25,7 +23,7 @@ def mask(val: str) -> str:
25
23
 
26
24
  def post_multipart(
27
25
  base: str, api_key: str, path: str, file_field: str, filepath: Path
28
- ) -> Dict[str, Any]:
26
+ ) -> dict[str, Any]:
29
27
  """Upload a file, trying backend-specific endpoints with fallbacks.
30
28
 
31
29
  Priority:
@@ -40,7 +38,7 @@ def post_multipart(
40
38
  f"{base.rstrip('/')}/{path.lstrip('/')}", # e.g., /learning/files
41
39
  f"{base.rstrip('/')}/files", # OpenAI-style
42
40
  ]
43
- last_err: Dict[str, Any] | None = None
41
+ last_err: dict[str, Any] | None = None
44
42
  for ep in endpoints:
45
43
  try:
46
44
  r = requests.post(ep, headers=headers, files=files, data=data, timeout=300)
@@ -72,7 +70,7 @@ def post_multipart(
72
70
  return last_err or {"error": True, "detail": "upload_failed_all_endpoints"}
73
71
 
74
72
 
75
- def post_json(base: str, api_key: str, path: str, body: Dict[str, Any]) -> Dict[str, Any]:
73
+ def post_json(base: str, api_key: str, path: str, body: dict[str, Any]) -> dict[str, Any]:
76
74
  url = f"{base.rstrip('/')}/{path.lstrip('/')}"
77
75
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
78
76
  r = requests.post(url, headers=headers, data=json.dumps(body), timeout=120)
@@ -82,7 +80,7 @@ def post_json(base: str, api_key: str, path: str, body: Dict[str, Any]) -> Dict[
82
80
  return {"status": r.status_code, "text": r.text[:400]}
83
81
 
84
82
 
85
- def get_json(base: str, api_key: str, path: str) -> Dict[str, Any]:
83
+ def get_json(base: str, api_key: str, path: str) -> dict[str, Any]:
86
84
  url = f"{base.rstrip('/')}/{path.lstrip('/')}"
87
85
  headers = {"Authorization": f"Bearer {api_key}"}
88
86
  r = requests.get(url, headers=headers, timeout=30)
@@ -92,9 +90,9 @@ def get_json(base: str, api_key: str, path: str) -> Dict[str, Any]:
92
90
  return {"status": r.status_code, "text": r.text[:400]}
93
91
 
94
92
 
95
- def _find_fft_configs() -> List[Path]:
93
+ def _find_fft_configs() -> list[Path]:
96
94
  """Find FFT TOML configs in standard locations."""
97
- candidates: List[Path] = []
95
+ candidates: list[Path] = []
98
96
 
99
97
  # Check current directory configs/
100
98
  cwd = Path.cwd()
@@ -206,10 +204,7 @@ def main() -> None:
206
204
  # Try relative to cwd first, then relative to config directory
207
205
  cwd_relative = Path.cwd() / p
208
206
  config_relative = config_path.parent / p
209
- if cwd_relative.exists():
210
- p = cwd_relative.resolve()
211
- else:
212
- p = config_relative.resolve()
207
+ p = cwd_relative.resolve() if cwd_relative.exists() else config_relative.resolve()
213
208
  data_file = p
214
209
  if data_file is None:
215
210
  print("Missing dataset path in --data or [job].data", file=sys.stderr)
@@ -274,7 +269,7 @@ def main() -> None:
274
269
  )
275
270
 
276
271
  # 2) Build job payload
277
- hp_block: Dict[str, Any] = {
272
+ hp_block: dict[str, Any] = {
278
273
  "n_epochs": int(hp_cfg.get("n_epochs") or 1),
279
274
  }
280
275
  # Optional extras if present
@@ -295,7 +290,7 @@ def main() -> None:
295
290
  if parallel:
296
291
  hp_block["parallelism"] = parallel
297
292
 
298
- compute_block: Dict[str, Any] = {}
293
+ compute_block: dict[str, Any] = {}
299
294
  for k in ("gpu_type", "gpu_count", "nodes"):
300
295
  if k in compute_cfg:
301
296
  compute_block[k] = compute_cfg[k]
@@ -7,14 +7,12 @@ import argparse
7
7
  import asyncio
8
8
  import json
9
9
  import os
10
+ import sys
10
11
  from pathlib import Path
11
12
  from typing import Any
12
13
 
13
- import sys
14
-
15
14
  import httpx
16
15
  from dotenv import load_dotenv
17
-
18
16
  from synth_ai.task import (
19
17
  RolloutEnvSpec,
20
18
  RolloutPolicySpec,
@@ -7,14 +7,12 @@ import argparse
7
7
  import asyncio
8
8
  import json
9
9
  import os
10
+ import sys
10
11
  from pathlib import Path
11
12
  from typing import Any
12
13
 
13
- import sys
14
-
15
14
  import httpx
16
15
  from dotenv import load_dotenv
17
-
18
16
  from synth_ai.task import (
19
17
  RolloutEnvSpec,
20
18
  RolloutPolicySpec,
@@ -122,7 +120,7 @@ async def main() -> None:
122
120
  base_url = args.base_url
123
121
  if args.base_url == "http://localhost:8010":
124
122
  print("\nTask app configuration:")
125
- base_url_input = input(f"Task app base URL [http://localhost:8001]: ").strip()
123
+ base_url_input = input("Task app base URL [http://localhost:8001]: ").strip()
126
124
  base_url = base_url_input if base_url_input else "http://localhost:8001"
127
125
 
128
126
  model = args.model
@@ -5,7 +5,6 @@ from __future__ import annotations
5
5
 
6
6
  import argparse
7
7
  import asyncio
8
- import json
9
8
  import os
10
9
  from collections import Counter
11
10
  from pathlib import Path
@@ -13,15 +12,13 @@ from statistics import mean, median
13
12
  from typing import Any
14
13
 
15
14
  from dotenv import load_dotenv
16
-
17
- from synth_ai.task import TaskAppClient
18
-
19
15
  from synth_ai.task import (
20
16
  RolloutEnvSpec,
21
17
  RolloutPolicySpec,
22
18
  RolloutRecordConfig,
23
19
  RolloutRequest,
24
20
  RolloutSafetyConfig,
21
+ TaskAppClient,
25
22
  )
26
23
 
27
24
 
@@ -7,13 +7,11 @@ import argparse
7
7
  import asyncio
8
8
  import json
9
9
  import os
10
+ import sys
10
11
  from pathlib import Path
11
12
  from typing import Any
12
13
 
13
- import sys
14
-
15
14
  import httpx
16
-
17
15
  from synth_ai.task import (
18
16
  RolloutEnvSpec,
19
17
  RolloutPolicySpec,
@@ -331,7 +329,7 @@ async def main() -> None:
331
329
  base_url = args.base_url
332
330
  if args.base_url == "http://localhost:8001":
333
331
  print("\nTask app configuration:")
334
- base_url_input = input(f"Task app base URL [http://localhost:8001]: ").strip()
332
+ base_url_input = input("Task app base URL [http://localhost:8001]: ").strip()
335
333
  base_url = base_url_input if base_url_input else "http://localhost:8001"
336
334
 
337
335
  api_key = args.api_key or os.getenv("ENVIRONMENT_API_KEY")
@@ -379,7 +377,7 @@ async def main() -> None:
379
377
  print("\nRollout configuration:")
380
378
  max_llm_calls = args.max_llm_calls
381
379
  if args.max_llm_calls == 1:
382
- max_llm_calls_input = input(f"Max LLM calls [10]: ").strip()
380
+ max_llm_calls_input = input("Max LLM calls [10]: ").strip()
383
381
  max_llm_calls = int(max_llm_calls_input) if max_llm_calls_input else 10
384
382
 
385
383
  # Override args with prompted values
@@ -5,16 +5,15 @@ import argparse
5
5
  import json
6
6
  import os
7
7
  import sys
8
+ import tomllib
8
9
  from pathlib import Path
9
- from typing import Any, Dict
10
+ from typing import Any
10
11
 
11
- import tomllib
12
12
  import requests
13
-
14
13
  from synth_ai.config.base_url import PROD_BASE_URL_DEFAULT
15
14
 
16
15
 
17
- def _load_toml(path: Path) -> Dict[str, Any]:
16
+ def _load_toml(path: Path) -> dict[str, Any]:
18
17
  if not path.exists():
19
18
  print(f"config not found: {path}", file=sys.stderr)
20
19
  sys.exit(2)
@@ -74,7 +73,7 @@ def main() -> None:
74
73
  sys.exit(2)
75
74
 
76
75
  # Build create-job payload. Send full TOML under data.config, plus endpoint_base_url.
77
- payload: Dict[str, Any] = {
76
+ payload: dict[str, Any] = {
78
77
  "job_type": "rl",
79
78
  # Optional: compute pass-through
80
79
  "compute": cfg.get("compute", {}) if isinstance(cfg.get("compute"), dict) else {},
@@ -87,7 +86,7 @@ def main() -> None:
87
86
 
88
87
  backend = str(args.backend).rstrip("/")
89
88
  url = f"{backend}/rl/jobs"
90
- api_key = (os.getenv("SYNTH_API_KEY") or os.getenv("synth_key") or "").strip()
89
+ api_key = (os.getenv("SYNTH_API_KEY") or os.getenv("SYNTH_KEY") or "").strip()
91
90
  if not api_key:
92
91
  print("Missing SYNTH_API_KEY in env", file=sys.stderr)
93
92
  sys.exit(2)
@@ -10,6 +10,14 @@ import os
10
10
  import sys
11
11
 
12
12
  import httpx
13
+ from synth_ai.task import (
14
+ RolloutEnvSpec,
15
+ RolloutPolicySpec,
16
+ RolloutRecordConfig,
17
+ RolloutRequest,
18
+ RolloutSafetyConfig,
19
+ TaskAppClient,
20
+ )
13
21
 
14
22
 
15
23
  def check_health(base_url: str, api_key: str) -> None:
@@ -30,16 +38,6 @@ def check_health(base_url: str, api_key: str) -> None:
30
38
  print(f"warning: failed to call /health: {exc}")
31
39
 
32
40
 
33
- from synth_ai.task import (
34
- RolloutEnvSpec,
35
- RolloutPolicySpec,
36
- RolloutRecordConfig,
37
- RolloutRequest,
38
- RolloutSafetyConfig,
39
- TaskAppClient,
40
- )
41
-
42
-
43
41
  def build_request(
44
42
  *,
45
43
  run_id: str,
@@ -25,8 +25,12 @@ uvx synth-ai deploy grpo-crafter --name grpo-crafter-task-app
25
25
 
26
26
  Requirements:
27
27
  - Modal CLI installed and authenticated (`modal token new`).
28
- - Secrets `crafter-environment-sdk`, `groq-api-key`, and `openai-api-key`
29
- available in your Modal account.
28
+ - Either provide an `.env` with `ENVIRONMENT_API_KEY`, `GROQ_API_KEY`, and `OPENAI_API_KEY`
29
+ (recommended; pass via `--env-file`). The deploy command injects these values via an inline
30
+ Modal secret plus `Secret.from_dotenv`, so the minted environment key stays in sync with
31
+ what the CLI sends.
32
+ - Or ensure Modal secrets `groq-api-key` and `openai-api-key` exist and continue to supply
33
+ model vendor credentials that way.
30
34
 
31
35
  The CLI generates a Modal entrypoint on the fly using the shared
32
36
  `TaskAppConfig`, ensuring the container matches the local FastAPI behavior.