synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (236) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
  4. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  5. examples/multi_step/convert_traces_to_sft.py +84 -0
  6. examples/multi_step/run_sft_qwen30b.sh +45 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
  8. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  9. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  10. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  11. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  12. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  13. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  14. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  15. examples/qwen_vl/QUICKSTART.md +327 -0
  16. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  17. examples/qwen_vl/README.md +154 -0
  18. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  19. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  20. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  21. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  22. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  23. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  24. examples/qwen_vl/__init__.py +2 -0
  25. examples/qwen_vl/collect_data_via_cli.md +423 -0
  26. examples/qwen_vl/collect_vision_traces.py +368 -0
  27. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  28. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  29. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  30. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  31. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  32. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  33. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  34. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  35. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  36. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  37. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  38. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  39. examples/qwen_vl/run_vision_comparison.sh +62 -0
  40. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  41. examples/qwen_vl/test_image_validation.py +201 -0
  42. examples/qwen_vl/test_sft_vision_data.py +110 -0
  43. examples/rl/README.md +1 -1
  44. examples/rl/configs/eval_base_qwen.toml +17 -0
  45. examples/rl/configs/eval_rl_qwen.toml +13 -0
  46. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  47. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  48. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  49. examples/rl/run_eval.py +436 -0
  50. examples/rl/run_rl_and_save.py +111 -0
  51. examples/rl/task_app/README.md +22 -0
  52. examples/rl/task_app/math_single_step.py +990 -0
  53. examples/rl/task_app/math_task_app.py +111 -0
  54. examples/sft/README.md +5 -5
  55. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  56. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  57. examples/sft/evaluate.py +2 -4
  58. examples/sft/export_dataset.py +7 -4
  59. examples/swe/task_app/README.md +1 -1
  60. examples/swe/task_app/grpo_swe_mini.py +0 -1
  61. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  62. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  63. examples/swe/task_app/hosted/policy_routes.py +0 -2
  64. examples/swe/task_app/hosted/rollout.py +0 -8
  65. examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
  66. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
  67. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
  68. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
  69. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
  70. examples/task_apps/enron/__init__.py +1 -0
  71. examples/vlm/README.md +3 -3
  72. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  73. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  74. examples/vlm/filter_image_rows.py +1 -1
  75. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  76. examples/warming_up_to_rl/_utils.py +92 -0
  77. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  78. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  79. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  80. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  81. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  82. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  83. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  84. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  85. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  86. examples/warming_up_to_rl/readme.md +63 -132
  87. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  88. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  89. examples/warming_up_to_rl/task_app/README.md +42 -0
  90. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  91. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  111. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  112. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  113. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  114. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  115. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  116. synth_ai/__init__.py +44 -30
  117. synth_ai/_utils/__init__.py +47 -0
  118. synth_ai/_utils/base_url.py +10 -0
  119. synth_ai/_utils/http.py +10 -0
  120. synth_ai/_utils/prompts.py +10 -0
  121. synth_ai/_utils/task_app_state.py +12 -0
  122. synth_ai/_utils/user_config.py +10 -0
  123. synth_ai/api/models/supported.py +144 -7
  124. synth_ai/api/train/__init__.py +13 -1
  125. synth_ai/api/train/cli.py +30 -7
  126. synth_ai/api/train/config_finder.py +18 -11
  127. synth_ai/api/train/env_resolver.py +13 -10
  128. synth_ai/cli/__init__.py +62 -78
  129. synth_ai/cli/_modal_wrapper.py +7 -5
  130. synth_ai/cli/_typer_patch.py +0 -2
  131. synth_ai/cli/_validate_task_app.py +22 -4
  132. synth_ai/cli/legacy_root_backup.py +3 -1
  133. synth_ai/cli/lib/__init__.py +10 -0
  134. synth_ai/cli/lib/task_app_discovery.py +7 -0
  135. synth_ai/cli/lib/task_app_env.py +518 -0
  136. synth_ai/cli/recent.py +2 -1
  137. synth_ai/cli/setup.py +266 -0
  138. synth_ai/cli/status.py +1 -1
  139. synth_ai/cli/task_app_deploy.py +16 -0
  140. synth_ai/cli/task_app_list.py +25 -0
  141. synth_ai/cli/task_app_modal_serve.py +16 -0
  142. synth_ai/cli/task_app_serve.py +18 -0
  143. synth_ai/cli/task_apps.py +71 -31
  144. synth_ai/cli/traces.py +1 -1
  145. synth_ai/cli/train.py +18 -0
  146. synth_ai/cli/tui.py +7 -2
  147. synth_ai/cli/turso.py +1 -1
  148. synth_ai/cli/watch.py +1 -1
  149. synth_ai/demos/__init__.py +10 -0
  150. synth_ai/demos/core/__init__.py +28 -1
  151. synth_ai/demos/crafter/__init__.py +1 -0
  152. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  153. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  154. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  155. synth_ai/demos/demo_registry.py +176 -0
  156. synth_ai/demos/math/__init__.py +1 -0
  157. synth_ai/demos/math/_common.py +16 -0
  158. synth_ai/demos/math/app.py +38 -0
  159. synth_ai/demos/math/config.toml +76 -0
  160. synth_ai/demos/math/deploy_modal.py +54 -0
  161. synth_ai/demos/math/modal_task_app.py +702 -0
  162. synth_ai/demos/math/task_app_entry.py +51 -0
  163. synth_ai/environments/environment/core.py +7 -1
  164. synth_ai/environments/examples/bandit/engine.py +0 -1
  165. synth_ai/environments/examples/bandit/environment.py +0 -1
  166. synth_ai/environments/examples/wordle/environment.py +0 -1
  167. synth_ai/evals/base.py +16 -5
  168. synth_ai/evals/client.py +1 -1
  169. synth_ai/inference/client.py +1 -1
  170. synth_ai/judge_schemas.py +8 -8
  171. synth_ai/learning/client.py +1 -1
  172. synth_ai/learning/health.py +1 -1
  173. synth_ai/learning/jobs.py +1 -1
  174. synth_ai/learning/rl/client.py +1 -1
  175. synth_ai/learning/rl/env_keys.py +1 -1
  176. synth_ai/learning/rl/secrets.py +1 -1
  177. synth_ai/learning/sft/client.py +1 -1
  178. synth_ai/learning/sft/data.py +407 -4
  179. synth_ai/learning/validators.py +4 -1
  180. synth_ai/task/apps/__init__.py +4 -2
  181. synth_ai/task/config.py +6 -4
  182. synth_ai/task/rubrics/__init__.py +1 -2
  183. synth_ai/task/rubrics/loaders.py +14 -10
  184. synth_ai/task/rubrics.py +219 -0
  185. synth_ai/task/trace_correlation_helpers.py +24 -11
  186. synth_ai/task/tracing_utils.py +14 -3
  187. synth_ai/task/validators.py +2 -3
  188. synth_ai/tracing_v3/abstractions.py +3 -3
  189. synth_ai/tracing_v3/config.py +15 -13
  190. synth_ai/tracing_v3/constants.py +21 -0
  191. synth_ai/tracing_v3/db_config.py +3 -1
  192. synth_ai/tracing_v3/decorators.py +10 -7
  193. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  194. synth_ai/tracing_v3/session_tracer.py +7 -7
  195. synth_ai/tracing_v3/storage/base.py +29 -29
  196. synth_ai/tracing_v3/storage/config.py +3 -3
  197. synth_ai/tracing_v3/turso/daemon.py +8 -9
  198. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  199. synth_ai/tracing_v3/utils.py +2 -2
  200. synth_ai/tui/cli/query_experiments.py +4 -4
  201. synth_ai/tui/cli/query_experiments_v3.py +4 -4
  202. synth_ai/tui/dashboard.py +14 -9
  203. synth_ai/utils/__init__.py +101 -0
  204. synth_ai/utils/base_url.py +94 -0
  205. synth_ai/utils/cli.py +131 -0
  206. synth_ai/utils/env.py +287 -0
  207. synth_ai/utils/http.py +169 -0
  208. synth_ai/utils/modal.py +308 -0
  209. synth_ai/utils/process.py +212 -0
  210. synth_ai/utils/prompts.py +39 -0
  211. synth_ai/utils/sqld.py +122 -0
  212. synth_ai/utils/task_app_discovery.py +882 -0
  213. synth_ai/utils/task_app_env.py +186 -0
  214. synth_ai/utils/task_app_state.py +318 -0
  215. synth_ai/utils/user_config.py +137 -0
  216. synth_ai/v0/config/__init__.py +1 -5
  217. synth_ai/v0/config/base_url.py +1 -7
  218. synth_ai/v0/tracing/config.py +1 -1
  219. synth_ai/v0/tracing/decorators.py +1 -1
  220. synth_ai/v0/tracing/upload.py +1 -1
  221. synth_ai/v0/tracing_v1/config.py +1 -1
  222. synth_ai/v0/tracing_v1/decorators.py +1 -1
  223. synth_ai/v0/tracing_v1/upload.py +1 -1
  224. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  225. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
  226. synth_ai/cli/man.py +0 -106
  227. synth_ai/compound/cais.py +0 -0
  228. synth_ai/core/experiment.py +0 -13
  229. synth_ai/core/system.py +0 -15
  230. synth_ai/demo_registry.py +0 -295
  231. synth_ai/handshake.py +0 -109
  232. synth_ai/http.py +0 -26
  233. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  234. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  235. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  236. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -417,8 +417,6 @@ async def step_policy(
417
417
  inf_req = meta["inference_request"]
418
418
  msgs = inf_req["messages"]
419
419
  model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
420
- system_messages: list[str] = []
421
- user_messages: list[str] = []
422
420
  if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
423
421
  sys_text = msgs[0]["content"]
424
422
  policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
@@ -901,38 +899,71 @@ async def step_policy(
901
899
  req_body["temperature"] = 0.1
902
900
  meta["inference_request"] = req_body
903
901
 
904
- # Strip image parts: Crafter policy currently only uses text prompts.
905
- # Some providers reject image_url payloads entirely, so always flatten to plain text.
906
- req_body2 = meta.get("inference_request", {})
907
- if isinstance(req_body2, dict):
908
- msgs = req_body2.get("messages")
909
- if isinstance(msgs, list):
910
- new_msgs = []
911
- changed = False
912
- for m in msgs:
913
- try:
914
- if isinstance(m, dict):
915
- content = m.get("content")
916
- if isinstance(content, list):
917
- parts: list[str] = []
918
- for seg in content:
919
- if isinstance(seg, dict):
920
- txt = seg.get("text") or seg.get("content")
921
- if isinstance(txt, str) and txt:
922
- parts.append(txt)
923
- m2 = dict(m)
924
- m2["content"] = "\n".join(parts)
925
- new_msgs.append(m2)
926
- changed = True
902
+ # Message flattening: Convert multimodal content to text-only for non-vision models.
903
+ # SKIP message flattening for vision models to preserve image_url parts!
904
+ # The old code here was flattening multimodal content (list) to text-only (str),
905
+ # which strips out image_url parts. This breaks vision models.
906
+ # Only flatten for non-vision models that can't handle multimodal format.
907
+ is_vision_model = False
908
+ try:
909
+ # Check if the policy is a vision-capable policy
910
+ if isinstance(policy, CrafterPolicy):
911
+ is_vision_model = getattr(policy, "use_vision", False)
912
+ except Exception:
913
+ pass
914
+
915
+ logger.debug(f"🔊 [POLICY_ROUTES] is_vision_model={is_vision_model}, will_flatten={not is_vision_model}")
916
+
917
+ if not is_vision_model:
918
+ # Only flatten for non-vision models (backward compatibility)
919
+ req_body2 = meta.get("inference_request", {})
920
+ if isinstance(req_body2, dict):
921
+ msgs = req_body2.get("messages")
922
+ if isinstance(msgs, list):
923
+ new_msgs = []
924
+ changed = False
925
+ for m in msgs:
926
+ try:
927
+ if isinstance(m, dict):
928
+ content = m.get("content")
929
+ if isinstance(content, list):
930
+ parts: list[str] = []
931
+ for seg in content:
932
+ if isinstance(seg, dict):
933
+ txt = seg.get("text") or seg.get("content")
934
+ if isinstance(txt, str) and txt:
935
+ parts.append(txt)
936
+ m2 = dict(m)
937
+ m2["content"] = "\n".join(parts)
938
+ new_msgs.append(m2)
939
+ changed = True
940
+ else:
941
+ new_msgs.append(m)
927
942
  else:
928
943
  new_msgs.append(m)
929
- else:
944
+ except Exception:
930
945
  new_msgs.append(m)
931
- except Exception:
932
- new_msgs.append(m)
933
- if changed:
934
- req_body2["messages"] = new_msgs
935
- meta["inference_request"] = req_body2
946
+ if changed:
947
+ req_body2["messages"] = new_msgs
948
+ meta["inference_request"] = req_body2
949
+ logger.debug(f"🔊 [POLICY_ROUTES] Flattened messages for non-vision model")
950
+ else:
951
+ logger.debug(f"🔊 [POLICY_ROUTES] Preserving multimodal content for vision model")
952
+
953
+ # DEBUG: Log final message structure before calling inference
954
+ final_req = meta.get("inference_request", {})
955
+ if isinstance(final_req, dict):
956
+ final_msgs = final_req.get("messages", [])
957
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Sending {len(final_msgs)} messages to inference")
958
+ for idx, msg in enumerate(final_msgs):
959
+ if isinstance(msg, dict):
960
+ content = msg.get("content")
961
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}")
962
+ if isinstance(content, list):
963
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Content list has {len(content)} items")
964
+ for part_idx, part in enumerate(content[:3]): # Show first 3 items
965
+ if isinstance(part, dict):
966
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Part[{part_idx}]: type={part.get('type')}")
936
967
 
937
968
  _t_start = _t.time()
938
969
  call_started_at = datetime.utcnow()
@@ -985,10 +985,15 @@ class RolloutTracingContext:
985
985
  def build_trace_payload(self, session_trace: Any) -> dict[str, Any] | None:
986
986
  if not self.return_trace or session_trace is None:
987
987
  return None
988
- if self.trace_format == "full":
988
+
989
+ # For both "full" and "structured" formats, return the complete session trace
990
+ # The CLI (synth-ai eval) expects this for proper trace storage
991
+ if self.trace_format in ("full", "structured"):
989
992
  payload = session_trace.to_dict()
990
993
  payload.setdefault("metadata", {}).update(self.metadata_updates)
991
994
  return payload
995
+
996
+ # For "compact" format, return only summary stats
992
997
  metadata = dict(session_trace.metadata)
993
998
  metadata.update(self.metadata_updates)
994
999
  return {
@@ -1173,14 +1178,6 @@ async def execute_rollout(
1173
1178
  logger.debug(f"TRACER_FACTORY_FAIL: {exc}")
1174
1179
  tracing_context = RolloutTracingContext(tracer_instance, request, req)
1175
1180
  await tracing_context.start_session()
1176
- # Print whether tracing is active for this rollout
1177
- try:
1178
- print(
1179
- f"[rollout] tracing enabled={bool(tracing_context.enabled)} run_id={request.run_id}",
1180
- flush=True,
1181
- )
1182
- except Exception:
1183
- pass
1184
1181
 
1185
1182
  # Register run
1186
1183
  registry.register_run(request.run_id)
@@ -1625,16 +1622,21 @@ async def execute_rollout(
1625
1622
 
1626
1623
  elif op == "env":
1627
1624
  if not pending_tool_calls:
1625
+ # Instead of failing, inject a no-op action to keep the rollout going
1628
1626
  with contextlib.suppress(Exception):
1629
1627
  logger.warning(
1630
- "POLICY_STEP_FAIL: missing tool_calls; failing rollout run_id=%s op_idx=%s",
1628
+ "POLICY_STEP_NOOP: missing tool_calls; injecting noop action run_id=%s op_idx=%s",
1631
1629
  request.run_id,
1632
1630
  str(op_idx),
1633
1631
  )
1634
- raise HTTPException(
1635
- status_code=500,
1636
- detail="policy_step_failed: missing tool_calls (no_tool_calls)",
1637
- )
1632
+ # Create a noop tool call in the format expected by the environment
1633
+ pending_tool_calls = [
1634
+ {
1635
+ "id": f"noop_{op_idx}",
1636
+ "tool": "interact",
1637
+ "arguments": {"action": "noop"},
1638
+ }
1639
+ ]
1638
1640
 
1639
1641
  # Environment step
1640
1642
  from .environment_routes import EnvStepRequest, step_environment
@@ -1 +1,2 @@
1
1
 
2
+
examples/vlm/README.md CHANGED
@@ -21,8 +21,8 @@ plumbing with lightweight utilities for dataset curation and training.
21
21
  3. **Export multimodal SFT rows**
22
22
  ```
23
23
  uv run python examples/warming_up_to_rl/export_trace_sft.py \
24
- --db traces/v3/synth_ai.db \
25
- --output examples/vlm/output/crafter_traces_full.jsonl
24
+ --db traces/v3/task_app_traces_<timestamp>.db \
25
+ --output examples/vlm/output/crafter_sft_full.jsonl
26
26
  ```
27
27
  The exporter now emits `metadata.has_image`, `metadata.user_has_image`, and
28
28
  `metadata.assistant_has_image` flags per turn.
@@ -30,7 +30,7 @@ plumbing with lightweight utilities for dataset curation and training.
30
30
  4. **Filter to image-rich turns**
31
31
  ```
32
32
  uv run python examples/vlm/filter_image_rows.py \
33
- --input examples/vlm/output/crafter_traces_full.jsonl \
33
+ --input examples/vlm/output/crafter_sft_full.jsonl \
34
34
  --output examples/vlm/output/crafter_vlm_dataset.jsonl
35
35
  ```
36
36
 
@@ -1,3 +1,5 @@
1
+ type = "sft"
2
+
1
3
  [job]
2
4
  model = "openai/gpt-4o-mini-2024-07-18"
3
5
  modalities = ["text", "image"]
@@ -24,6 +24,7 @@ import asyncio
24
24
  import base64
25
25
  import json
26
26
  import os
27
+ from contextlib import suppress
27
28
  from pathlib import Path
28
29
  from typing import Any
29
30
  from uuid import uuid4
@@ -62,7 +63,7 @@ class EpisodeResult:
62
63
  if unlocked:
63
64
  self.achievements.add(str(name))
64
65
  reward = obs.get("reward_last_step")
65
- if isinstance(reward, (int, float)):
66
+ if isinstance(reward, int | float):
66
67
  self.total_reward += float(reward)
67
68
 
68
69
 
@@ -107,11 +108,8 @@ def _decode_and_save_image(observation: dict[str, Any], path: Path) -> None:
107
108
  if not isinstance(base64_data, str) or not base64_data:
108
109
  return
109
110
  path.parent.mkdir(parents=True, exist_ok=True)
110
- try:
111
+ with suppress(Exception):
111
112
  path.write_bytes(base64.b64decode(base64_data))
112
- except Exception:
113
- # Best-effort; corrupted frames should not halt rollout
114
- pass
115
113
 
116
114
 
117
115
  def _normalise_openai_request(payload: dict[str, Any], model: str, temperature: float) -> dict[str, Any]:
@@ -8,7 +8,7 @@ output now that each record's metadata includes `has_image`, `user_has_image`, a
8
8
 
9
9
  Usage:
10
10
  uv run python examples/vlm/filter_image_rows.py \
11
- --input examples/sft/ft_data/crafter_traces.jsonl \
11
+ --input examples/sft/ft_data/crafter_sft.jsonl \
12
12
  --output examples/vlm/output/crafter_vlm_dataset.jsonl
13
13
  """
14
14
 
@@ -224,7 +224,7 @@ async def _run_episode(
224
224
  if unlocked:
225
225
  achievements.add(str(name))
226
226
  reward = obs.get("reward_last_step")
227
- if isinstance(reward, (int, float)):
227
+ if isinstance(reward, int | float):
228
228
  total_reward += float(reward)
229
229
 
230
230
  _save_observation_frame(env_response, frames_dir / f"step_{step_idx + 1:03d}.png")
@@ -263,7 +263,7 @@ def _summarise(results: list[EpisodeResult]) -> dict[str, Any]:
263
263
  "mean_steps": round(mean_steps, 2),
264
264
  "mean_achievements": round(mean_achievements, 2),
265
265
  "total_tool_calls": sum(r.tool_calls for r in mode_results),
266
- "achievements": {name: count for name, count in sorted(achievement_counts.items())},
266
+ "achievements": dict(sorted(achievement_counts.items())),
267
267
  }
268
268
  return summary
269
269
 
@@ -0,0 +1,92 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable, Sequence
4
+
5
+ from synth_ai.task import (
6
+ RolloutEnvSpec,
7
+ RolloutPolicySpec,
8
+ RolloutRecordConfig,
9
+ RolloutRequest,
10
+ RolloutSafetyConfig,
11
+ )
12
+
13
+ DEFAULT_POLICY_NAME = "crafter-react"
14
+ DEFAULT_ENV_NAME = "crafter"
15
+
16
+
17
+ def parse_ops(spec: str | None) -> list[str] | None:
18
+ """Parse a comma-separated operations string into a list."""
19
+
20
+ if spec is None:
21
+ return None
22
+ ops = [op.strip() for op in spec.split(",") if op.strip()]
23
+ if not ops:
24
+ raise ValueError("Ops must contain at least one entry")
25
+ return ops
26
+
27
+
28
+ def ops_from_pairs(max_llm_calls: int, *, cap: int | None = None) -> list[str]:
29
+ """Return alternating agent/env ops for the requested number of LLM calls."""
30
+
31
+ pairs = max(1, int(max_llm_calls or 0))
32
+ if cap is not None:
33
+ pairs = min(pairs, cap)
34
+ ops: list[str] = []
35
+ for _ in range(pairs):
36
+ ops.extend(["agent", "env"])
37
+ return ops
38
+
39
+
40
+ def build_rollout_request(
41
+ *,
42
+ seed: int,
43
+ run_id: str,
44
+ model: str,
45
+ inference_url: str,
46
+ ops: Sequence[str] | Iterable[str],
47
+ inference_api_key: str | None = None,
48
+ extra_headers: dict[str, str] | None = None,
49
+ trace_format: str = "compact",
50
+ return_trace: bool = False,
51
+ policy_name: str = DEFAULT_POLICY_NAME,
52
+ env_name: str = DEFAULT_ENV_NAME,
53
+ max_policy_tokens: int | None = None,
54
+ record_trajectories: bool = True,
55
+ ) -> RolloutRequest:
56
+ """Construct a RolloutRequest shared across local rollout utilities."""
57
+
58
+ policy_config: dict[str, object] = {
59
+ "model": model,
60
+ "inference_url": inference_url,
61
+ }
62
+ if inference_api_key is not None:
63
+ policy_config["api_key"] = inference_api_key
64
+ if extra_headers:
65
+ policy_config["extra_headers"] = extra_headers
66
+ if max_policy_tokens is not None:
67
+ policy_config["max_completion_tokens"] = max_policy_tokens
68
+ policy_config["max_tokens"] = max_policy_tokens
69
+
70
+ record_cfg = RolloutRecordConfig(
71
+ trajectories=record_trajectories,
72
+ trace_format=trace_format,
73
+ return_trace=return_trace,
74
+ )
75
+ return RolloutRequest(
76
+ run_id=run_id,
77
+ env=RolloutEnvSpec(env_name=env_name, seed=seed, config={}),
78
+ policy=RolloutPolicySpec(policy_name=policy_name, config=policy_config),
79
+ ops=list(ops),
80
+ record=record_cfg,
81
+ on_done="reset",
82
+ safety=RolloutSafetyConfig(),
83
+ )
84
+
85
+
86
+ __all__ = [
87
+ "DEFAULT_POLICY_NAME",
88
+ "DEFAULT_ENV_NAME",
89
+ "build_rollout_request",
90
+ "ops_from_pairs",
91
+ "parse_ops",
92
+ ]
@@ -383,7 +383,7 @@ def main() -> None:
383
383
  parser.add_argument(
384
384
  "--db",
385
385
  type=Path,
386
- default=Path("traces/v3/synth_ai.db"),
386
+ default=Path("traces/task_app_traces.db"),
387
387
  help="Path to the tracing_v3 SQLite database",
388
388
  )
389
389
  args = parser.parse_args()
@@ -1,6 +1,8 @@
1
1
  # Crafter Full Finetune (FFT) example on H100
2
2
  # Adjust paths and hyperparameters to your environment before running.
3
3
 
4
+ type = "sft"
5
+
4
6
  [job]
5
7
  model = "Qwen/Qwen3-4B" # base model to finetune
6
8
  # Path to your SFT JSONL dataset
@@ -1,5 +1,7 @@
1
1
  # FFT job config for Qwen/Qwen3-4B on Crafter SFT dataset
2
2
 
3
+ type = "sft"
4
+
3
5
  [algorithm]
4
6
  type = "offline"
5
7
  method = "supervised_finetune"
@@ -1,5 +1,7 @@
1
1
  # Eval config for finetuned Qwen/Qwen3-4B (FFT) via task app rollout
2
2
 
3
+ type = "sft"
4
+
3
5
  # Required
4
6
  task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
5
7
  # Replace with your finished job id if different
@@ -1,6 +1,8 @@
1
1
  # Eval config for Groq Qwen3-32B
2
2
  # Fields mirror run_eval.py expectations
3
3
 
4
+ type = "rl"
5
+
4
6
  # Required
5
7
  task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
6
8
  model = "qwen/qwen3-32b"
@@ -1,5 +1,7 @@
1
1
  # Eval config for Synth Modal inference Qwen/Qwen3-4B via task app rollout
2
2
 
3
+ type = "rl"
4
+
3
5
  # Required
4
6
  task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
5
7
  model = "Qwen/Qwen3-4B"
@@ -20,4 +22,3 @@ concurrency = 10
20
22
  # fetch the vLLM base from the task app /info to use as inference_url.
21
23
  # - Ensure the task app mounts the openai-api-key secret if your vLLM gateway
22
24
  # requires a bearer token (OPENAI_API_KEY). Otherwise it will call unauthenticated.
23
-
@@ -1,11 +1,12 @@
1
1
  # RL training starting from base Qwen/Qwen3-4B (TOML-only model selection)
2
2
 
3
+ type = "rl"
4
+
3
5
  [algorithm]
4
6
  type = "online"
5
7
  method = "policy_gradient"
6
8
  variety = "gspo"
7
9
 
8
-
9
10
  [services]
10
11
  task_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
11
12
 
@@ -1,5 +1,7 @@
1
1
  # RL training starting from a finetuned model id (TOML-only model selection)
2
2
 
3
+ type = "rl"
4
+
3
5
  [services]
4
6
  # Task app base URL used by the RL job for rollouts
5
7
  # task_url = "https://YOUR-TASK-APP.modal.run"