synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,46 @@
1
+ """Lightweight Modal deploy wrapper for Banking77 task app (web)."""
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ from pathlib import Path
6
+
7
+ try:
8
+ import modal # type: ignore
9
+ except Exception as exc: # pragma: no cover
10
+ raise SystemExit(f"Modal is required to deploy: {exc}")
11
+
12
+ _here = Path(__file__).resolve()
13
+ _parents = list(_here.parents)
14
+ REPO_ROOT = _parents[3] if len(_parents) > 3 else Path.cwd()
15
+
16
+ app = modal.App("synth-banking77-web")
17
+
18
+ _image = (
19
+ modal.Image.debian_slim(python_version="3.11")
20
+ .pip_install(
21
+ "synth-ai",
22
+ "datasets>=2.14.0",
23
+ "fastapi>=0.115.0",
24
+ "pydantic>=2.0.0",
25
+ "httpx>=0.26.0",
26
+ "python-dotenv>=1.0.0",
27
+ )
28
+ .env({"PYTHONPATH": "/opt/synth_ai_repo"})
29
+ .add_local_dir(str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai", copy=True)
30
+ .add_local_dir(str(REPO_ROOT / "examples"), "/opt/synth_ai_repo/examples", copy=True)
31
+ )
32
+ _env_file = REPO_ROOT / ".env"
33
+ if _env_file.exists():
34
+ _image = _image.add_local_file(str(_env_file), "/opt/synth_ai_repo/.env")
35
+
36
+
37
+ @app.function(image=_image, timeout=600)
38
+ @modal.asgi_app()
39
+ def web():
40
+ # Lazy import the task app to avoid local heavy deps
41
+ import contextlib
42
+ with contextlib.suppress(Exception):
43
+ from dotenv import load_dotenv # type: ignore
44
+ load_dotenv(str(REPO_ROOT / ".env"), override=False)
45
+ from examples.task_apps.banking77.banking77_task_app import fastapi_app # type: ignore
46
+ return fastapi_app()
@@ -271,3 +271,7 @@ min_official_score = 0.01 # Filter by outcome_rewards
271
271
  - `QUERY_EXAMPLES.md` - SQL queries for trace analysis
272
272
 
273
273
 
274
+
275
+
276
+
277
+
@@ -172,3 +172,7 @@ cat ft_data/crafter_image_only_sft.jsonl | jq .
172
172
  **Action Required**: Debug why messages aren't being saved to the database despite correct code path.
173
173
 
174
174
 
175
+
176
+
177
+
178
+
@@ -266,3 +266,7 @@ sqlite3 traces/v3/crafter_eval.db \
266
266
  **Status**: 🎉 **WORKING END-TO-END!**
267
267
 
268
268
 
269
+
270
+
271
+
272
+
@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
6
6
 
7
7
  ## Local development
8
8
  ```bash
9
- uvx synth-ai serve grpo-crafter --port 8001
9
+ uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
10
10
  # Optional extras:
11
11
  # --env-file path/to/.env # load additional environment variables
12
12
  # --reload # enable uvicorn auto-reload
@@ -6,12 +6,17 @@ import json
6
6
  import logging
7
7
  import os
8
8
  import sys
9
+ from urllib.parse import parse_qs, urlparse
9
10
  from collections.abc import Iterable, Sequence
10
11
  from contextlib import suppress
11
12
  from dataclasses import dataclass
13
+ from datetime import UTC, datetime
12
14
  from pathlib import Path
13
15
  from typing import Any
14
16
 
17
+ from fastapi import HTTPException
18
+ from pydantic import BaseModel
19
+
15
20
  from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
16
21
  from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
17
22
  from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
@@ -37,7 +42,16 @@ except Exception: # pragma: no cover - utils unavailable if optional deps missi
37
42
  """Fallback to shared utility for URL normalization."""
38
43
  return normalize_inference_url(raw_url) if raw_url else raw_url
39
44
 
40
- def extract_trace_correlation_id(_raw_url):
45
+ def extract_trace_correlation_id(_raw_url, mode=None):
46
+ if not isinstance(_raw_url, str):
47
+ return None
48
+ parsed = urlparse(_raw_url)
49
+ query_params = parse_qs(parsed.query or "")
50
+ for key in ("cid", "trace", "trace_correlation_id"):
51
+ values = query_params.get(key) or []
52
+ for value in values:
53
+ if isinstance(value, str) and value.strip():
54
+ return value.strip()
41
55
  return None
42
56
  logger = logging.getLogger(__name__)
43
57
 
@@ -651,12 +665,20 @@ def _resolve_trace_correlation_id(policy_cfg: dict[str, Any], mode: Any = None)
651
665
  if stripped:
652
666
  return stripped
653
667
 
654
- return extract_trace_correlation_id(policy_cfg.get("inference_url"))
668
+ return extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=mode)
655
669
 
656
670
 
657
671
  async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
658
672
  request = _coerce_math_to_crafter(request)
659
673
 
674
+ record_cfg = request.record.model_copy(
675
+ update={
676
+ "return_trace": True,
677
+ "trace_format": "structured",
678
+ }
679
+ )
680
+ request = request.model_copy(update={"record": record_cfg})
681
+
660
682
  policy_cfg = dict(request.policy.config or {})
661
683
  logger.info(
662
684
  "ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
@@ -800,11 +822,49 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
800
822
  trace_correlation_id,
801
823
  )
802
824
  data = legacy_response.model_dump()
825
+ legacy_trace = getattr(legacy_response, "trace", None)
826
+ if legacy_trace is not None:
827
+ if isinstance(legacy_trace, dict):
828
+ legacy_trace_preview = list(legacy_trace.keys())[:5]
829
+ else:
830
+ legacy_trace_preview = type(legacy_trace)
831
+ logger.info(
832
+ "ROLLOUT_EXEC: legacy response trace present type=%s preview=%s",
833
+ type(legacy_trace),
834
+ legacy_trace_preview,
835
+ )
836
+ logger.debug(
837
+ "ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
838
+ sorted(data.keys()),
839
+ bool(data.get("trace")),
840
+ )
803
841
  metrics = data.get("metrics", {}) or {}
804
842
  metrics.setdefault("outcome_score", None)
805
843
  metrics.setdefault("events_score", None)
806
844
  metrics.setdefault("details", {})
807
845
  data["metrics"] = metrics
846
+
847
+ if data.get("trace") is None:
848
+ legacy_trace = getattr(legacy_response, "trace", None)
849
+ if legacy_trace is not None:
850
+ data["trace"] = legacy_trace
851
+ else:
852
+ tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
853
+ if callable(tracer_factory):
854
+ tracer = tracer_factory()
855
+ logger.debug("ROLLOUT_EXEC: trace backfill factory=%s", type(tracer))
856
+ if isinstance(tracer, SessionTracer):
857
+ try:
858
+ await tracer.initialize()
859
+ if tracer.db is not None:
860
+ trace_row = await tracer.db.get_session_trace(request.run_id)
861
+ if trace_row is not None:
862
+ data["trace"] = trace_row
863
+ except Exception as exc:
864
+ logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
865
+ finally:
866
+ with suppress(Exception):
867
+ await tracer.close()
808
868
 
809
869
  # Add trace_correlation_id at TOP-LEVEL (REQUIRED for RL training pipeline)
810
870
  # Use fallback if somehow missing
@@ -820,12 +880,30 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
820
880
  if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
821
881
  existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
822
882
  data["pipeline_metadata"] = existing_meta
823
-
883
+
824
884
  # Add trace_correlation_id to each trajectory (required for RL training pipeline)
825
885
  if "trajectories" in data:
886
+ normalized_trajs: list[dict[str, Any]] = []
826
887
  for traj in data.get("trajectories", []):
827
- if isinstance(traj, dict):
828
- traj["trace_correlation_id"] = final_cid
888
+ if isinstance(traj, BaseModel):
889
+ traj_dict = traj.model_dump()
890
+ elif isinstance(traj, dict):
891
+ traj_dict = dict(traj)
892
+ else:
893
+ continue
894
+ traj_dict["trace_correlation_id"] = final_cid
895
+ if not traj_dict.get("inference_url"):
896
+ inferred_url = policy_cfg.get("inference_url")
897
+ if inferred_url:
898
+ traj_dict["inference_url"] = inferred_url
899
+ normalized_trajs.append(traj_dict)
900
+ if normalized_trajs:
901
+ data["trajectories"] = normalized_trajs
902
+ logger.info(
903
+ "ROLLOUT_EXEC: normalized trajectory sample run_id=%s inference_url=%s",
904
+ request.run_id,
905
+ normalized_trajs[0].get("inference_url") if normalized_trajs else None,
906
+ )
829
907
  logger.info(
830
908
  "ROLLOUT_EXEC: final pipeline metadata run_id=%s metadata=%s",
831
909
  request.run_id,
@@ -844,6 +922,12 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
844
922
  request.run_id,
845
923
  existing_meta,
846
924
  )
925
+
926
+ if data.get("trace") is None:
927
+ raise HTTPException(
928
+ status_code=500,
929
+ detail="trace_payload_missing: task app did not emit a SessionTrace",
930
+ )
847
931
 
848
932
  # ASSERTION: Verify trace_correlation_id is present in response at all required levels
849
933
  assert "trace_correlation_id" in data, (
@@ -962,6 +1046,7 @@ register_task_app(
962
1046
  (str(RUBRICS_ROOT), "/opt/synth_ai_repo/examples/multi_step/rubrics"),
963
1047
  ),
964
1048
  secret_names=("groq-api-key", "openai-api-key"),
1049
+ env_vars={"SERVICE": "MODAL"},
965
1050
  memory=16384,
966
1051
  cpu=4.0,
967
1052
  max_containers=10,
@@ -3,7 +3,7 @@
3
3
  This module now delegates to the TaskAppConfig defined in the colocated example at
4
4
  `examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
5
5
  (running the file directly or targeting `fastapi_app` from external tooling). Prefer using
6
- `uvx synth-ai serve grpo-crafter` for local development and testing.
6
+ `uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
@@ -197,6 +197,8 @@ class CrafterPolicy(Policy):
197
197
  if self.use_tools:
198
198
  payload["tools"] = TOOLS_SCHEMA
199
199
  payload["tool_choice"] = "required"
200
+ payload["function_call"] = {"name": "interact_many"}
201
+ payload["parallel_tool_calls"] = False
200
202
  # Ensure the inference server injects family-specific stop sequences
201
203
  # to terminate immediately after the first tool call for compliance.
202
204
  payload["stop_after_tool_calls"] = 1
@@ -207,13 +209,7 @@ class CrafterPolicy(Policy):
207
209
  response: dict[str, Any],
208
210
  use_tools: bool = True,
209
211
  ) -> list[dict[str, Any]]:
210
- """Turn an inference response into environment tool calls.
211
-
212
- - If tools were used, expect tool_calls-compatible output and forward as-is
213
- in our simple JSON format: {"tool_name": str, "arguments": {...}}.
214
- - If no tools, parse plain-text actions using CrafterReActAgent parser and
215
- wrap them into a single interact_many tool call.
216
- """
212
+ """Turn an inference response into environment tool calls."""
217
213
  # First check if we got actual tool calls
218
214
  choices = response.get("choices", [])
219
215
  tool_calls: list[dict[str, Any]] = []
@@ -272,24 +268,6 @@ class CrafterPolicy(Policy):
272
268
  normalized.append(tc)
273
269
  return normalized
274
270
 
275
- # Otherwise, parse plain text content for actions
276
- text = ""
277
- for choice in choices:
278
- msg = choice.get("message", {})
279
- content = msg.get("content", "")
280
- if content:
281
- text = content
282
- break
283
-
284
- if text:
285
- # Try to parse actions from the text
286
- from .shared import parse_actions
287
-
288
- actions = parse_actions(text)
289
- if actions:
290
- # Wrap actions in interact_many tool call
291
- return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
292
-
293
271
  # No actions found
294
272
  return []
295
273
 
@@ -542,7 +520,7 @@ class CrafterPolicy(Policy):
542
520
  "claude-3", # All Claude 3 models support vision
543
521
  "gemini", # Gemini models
544
522
  "qwen-vl", # Qwen Vision-Language models
545
- "qwen2-vl", # Qwen2 VL
523
+ "qwen3-vl", # Qwen3 VL
546
524
  "pixtral", # Mistral's vision model
547
525
  "llava", # LLaVA models
548
526
  "phi-3-vision", # Microsoft Phi-3 Vision
@@ -45,8 +45,7 @@ class CrafterReActAgent:
45
45
  "Action policy:\n"
46
46
  "- Always return a single tool call: interact_many({actions: [...]})\n"
47
47
  "- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
48
- "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
49
- "- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
48
+ "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n\n"
50
49
  "Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
51
50
  "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
52
51
  "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import contextlib
4
+ import logging
4
5
  import os
5
6
 
6
7
  from fastapi import FastAPI
@@ -9,6 +10,52 @@ from fastapi.responses import JSONResponse
9
10
  from pydantic import BaseModel
10
11
  from starlette.requests import Request
11
12
 
13
+ logger = logging.getLogger(__name__)
14
+
15
+ _VERSION_LOGGED = False
16
+
17
+
18
+ def _resolve_task_app_version() -> str:
19
+ env_version = os.getenv("TASK_APP_VERSION")
20
+ if isinstance(env_version, str) and env_version.strip():
21
+ return env_version.strip()
22
+
23
+ try:
24
+ import importlib.metadata as importlib_metadata # python 3.11 stdlib
25
+
26
+ pkg_version = importlib_metadata.version("synth-ai")
27
+ if isinstance(pkg_version, str) and pkg_version.strip():
28
+ return pkg_version.strip()
29
+ except Exception:
30
+ pass
31
+
32
+ try:
33
+ import synth_ai
34
+
35
+ attr_version = getattr(synth_ai, "__version__", None)
36
+ if isinstance(attr_version, str) and attr_version.strip():
37
+ return attr_version.strip()
38
+ except Exception:
39
+ pass
40
+
41
+ return "unknown"
42
+
43
+
44
+ def _log_task_app_version_once() -> None:
45
+ global _VERSION_LOGGED
46
+ if _VERSION_LOGGED:
47
+ return
48
+
49
+ version = _resolve_task_app_version()
50
+ build_id = os.getenv("TASK_APP_BUILD_ID")
51
+
52
+ if build_id:
53
+ logger.info("TASK_APP_VERSION: %s (build=%s)", version, build_id)
54
+ else:
55
+ logger.info("TASK_APP_VERSION: %s", version)
56
+
57
+ _VERSION_LOGGED = True
58
+
12
59
 
13
60
  class TaskApp:
14
61
  """Holds service configuration and shared state."""
@@ -56,6 +103,8 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
56
103
  allow_headers=["*"],
57
104
  )
58
105
 
106
+ _log_task_app_version_once()
107
+
59
108
  # Initialize task app configuration
60
109
  task_app = TaskApp()
61
110
  app.state.task_app = task_app