synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
- examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
- examples/qwen_coder/configs/coder_lora_small.toml +2 -1
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +154 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +275 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +423 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
- examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +62 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +1 -1
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +37 -0
- examples/rl/configs/rl_from_base_qwen17.toml +76 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +22 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/sft/README.md +5 -5
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
- examples/sft/evaluate.py +2 -4
- examples/sft/export_dataset.py +7 -4
- examples/swe/task_app/README.md +1 -1
- examples/swe/task_app/grpo_swe_mini.py +0 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +0 -8
- examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
- examples/task_apps/enron/__init__.py +1 -0
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +144 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/cli.py +30 -7
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/cli/__init__.py +62 -78
- synth_ai/cli/_modal_wrapper.py +7 -5
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/recent.py +2 -1
- synth_ai/cli/setup.py +266 -0
- synth_ai/cli/status.py +1 -1
- synth_ai/cli/task_app_deploy.py +16 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +16 -0
- synth_ai/cli/task_app_serve.py +18 -0
- synth_ai/cli/task_apps.py +71 -31
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/train.py +18 -0
- synth_ai/cli/tui.py +7 -2
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/inference/client.py +1 -1
- synth_ai/judge_schemas.py +8 -8
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +1 -1
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/task/apps/__init__.py +4 -2
- synth_ai/task/config.py +6 -4
- synth_ai/task/rubrics/__init__.py +1 -2
- synth_ai/task/rubrics/loaders.py +14 -10
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +24 -11
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +2 -3
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/session_tracer.py +7 -7
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -9
- synth_ai/tracing_v3/turso/native_manager.py +80 -72
- synth_ai/tracing_v3/utils.py +2 -2
- synth_ai/tui/cli/query_experiments.py +4 -4
- synth_ai/tui/cli/query_experiments_v3.py +4 -4
- synth_ai/tui/dashboard.py +14 -9
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +287 -0
- synth_ai/utils/http.py +169 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
- synth_ai/cli/man.py +0 -106
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/http.py +0 -26
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
|
@@ -417,8 +417,6 @@ async def step_policy(
|
|
|
417
417
|
inf_req = meta["inference_request"]
|
|
418
418
|
msgs = inf_req["messages"]
|
|
419
419
|
model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
|
|
420
|
-
system_messages: list[str] = []
|
|
421
|
-
user_messages: list[str] = []
|
|
422
420
|
if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
|
|
423
421
|
sys_text = msgs[0]["content"]
|
|
424
422
|
policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
|
|
@@ -901,38 +899,71 @@ async def step_policy(
|
|
|
901
899
|
req_body["temperature"] = 0.1
|
|
902
900
|
meta["inference_request"] = req_body
|
|
903
901
|
|
|
904
|
-
#
|
|
905
|
-
#
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
902
|
+
# Message flattening: Convert multimodal content to text-only for non-vision models.
|
|
903
|
+
# SKIP message flattening for vision models to preserve image_url parts!
|
|
904
|
+
# The old code here was flattening multimodal content (list) to text-only (str),
|
|
905
|
+
# which strips out image_url parts. This breaks vision models.
|
|
906
|
+
# Only flatten for non-vision models that can't handle multimodal format.
|
|
907
|
+
is_vision_model = False
|
|
908
|
+
try:
|
|
909
|
+
# Check if the policy is a vision-capable policy
|
|
910
|
+
if isinstance(policy, CrafterPolicy):
|
|
911
|
+
is_vision_model = getattr(policy, "use_vision", False)
|
|
912
|
+
except Exception:
|
|
913
|
+
pass
|
|
914
|
+
|
|
915
|
+
logger.debug(f"🔊 [POLICY_ROUTES] is_vision_model={is_vision_model}, will_flatten={not is_vision_model}")
|
|
916
|
+
|
|
917
|
+
if not is_vision_model:
|
|
918
|
+
# Only flatten for non-vision models (backward compatibility)
|
|
919
|
+
req_body2 = meta.get("inference_request", {})
|
|
920
|
+
if isinstance(req_body2, dict):
|
|
921
|
+
msgs = req_body2.get("messages")
|
|
922
|
+
if isinstance(msgs, list):
|
|
923
|
+
new_msgs = []
|
|
924
|
+
changed = False
|
|
925
|
+
for m in msgs:
|
|
926
|
+
try:
|
|
927
|
+
if isinstance(m, dict):
|
|
928
|
+
content = m.get("content")
|
|
929
|
+
if isinstance(content, list):
|
|
930
|
+
parts: list[str] = []
|
|
931
|
+
for seg in content:
|
|
932
|
+
if isinstance(seg, dict):
|
|
933
|
+
txt = seg.get("text") or seg.get("content")
|
|
934
|
+
if isinstance(txt, str) and txt:
|
|
935
|
+
parts.append(txt)
|
|
936
|
+
m2 = dict(m)
|
|
937
|
+
m2["content"] = "\n".join(parts)
|
|
938
|
+
new_msgs.append(m2)
|
|
939
|
+
changed = True
|
|
940
|
+
else:
|
|
941
|
+
new_msgs.append(m)
|
|
927
942
|
else:
|
|
928
943
|
new_msgs.append(m)
|
|
929
|
-
|
|
944
|
+
except Exception:
|
|
930
945
|
new_msgs.append(m)
|
|
931
|
-
|
|
932
|
-
new_msgs
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
946
|
+
if changed:
|
|
947
|
+
req_body2["messages"] = new_msgs
|
|
948
|
+
meta["inference_request"] = req_body2
|
|
949
|
+
logger.debug(f"🔊 [POLICY_ROUTES] Flattened messages for non-vision model")
|
|
950
|
+
else:
|
|
951
|
+
logger.debug(f"🔊 [POLICY_ROUTES] Preserving multimodal content for vision model")
|
|
952
|
+
|
|
953
|
+
# DEBUG: Log final message structure before calling inference
|
|
954
|
+
final_req = meta.get("inference_request", {})
|
|
955
|
+
if isinstance(final_req, dict):
|
|
956
|
+
final_msgs = final_req.get("messages", [])
|
|
957
|
+
logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Sending {len(final_msgs)} messages to inference")
|
|
958
|
+
for idx, msg in enumerate(final_msgs):
|
|
959
|
+
if isinstance(msg, dict):
|
|
960
|
+
content = msg.get("content")
|
|
961
|
+
logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}")
|
|
962
|
+
if isinstance(content, list):
|
|
963
|
+
logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Content list has {len(content)} items")
|
|
964
|
+
for part_idx, part in enumerate(content[:3]): # Show first 3 items
|
|
965
|
+
if isinstance(part, dict):
|
|
966
|
+
logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Part[{part_idx}]: type={part.get('type')}")
|
|
936
967
|
|
|
937
968
|
_t_start = _t.time()
|
|
938
969
|
call_started_at = datetime.utcnow()
|
|
@@ -985,10 +985,15 @@ class RolloutTracingContext:
|
|
|
985
985
|
def build_trace_payload(self, session_trace: Any) -> dict[str, Any] | None:
|
|
986
986
|
if not self.return_trace or session_trace is None:
|
|
987
987
|
return None
|
|
988
|
-
|
|
988
|
+
|
|
989
|
+
# For both "full" and "structured" formats, return the complete session trace
|
|
990
|
+
# The CLI (synth-ai eval) expects this for proper trace storage
|
|
991
|
+
if self.trace_format in ("full", "structured"):
|
|
989
992
|
payload = session_trace.to_dict()
|
|
990
993
|
payload.setdefault("metadata", {}).update(self.metadata_updates)
|
|
991
994
|
return payload
|
|
995
|
+
|
|
996
|
+
# For "compact" format, return only summary stats
|
|
992
997
|
metadata = dict(session_trace.metadata)
|
|
993
998
|
metadata.update(self.metadata_updates)
|
|
994
999
|
return {
|
|
@@ -1173,14 +1178,6 @@ async def execute_rollout(
|
|
|
1173
1178
|
logger.debug(f"TRACER_FACTORY_FAIL: {exc}")
|
|
1174
1179
|
tracing_context = RolloutTracingContext(tracer_instance, request, req)
|
|
1175
1180
|
await tracing_context.start_session()
|
|
1176
|
-
# Print whether tracing is active for this rollout
|
|
1177
|
-
try:
|
|
1178
|
-
print(
|
|
1179
|
-
f"[rollout] tracing enabled={bool(tracing_context.enabled)} run_id={request.run_id}",
|
|
1180
|
-
flush=True,
|
|
1181
|
-
)
|
|
1182
|
-
except Exception:
|
|
1183
|
-
pass
|
|
1184
1181
|
|
|
1185
1182
|
# Register run
|
|
1186
1183
|
registry.register_run(request.run_id)
|
|
@@ -1625,16 +1622,21 @@ async def execute_rollout(
|
|
|
1625
1622
|
|
|
1626
1623
|
elif op == "env":
|
|
1627
1624
|
if not pending_tool_calls:
|
|
1625
|
+
# Instead of failing, inject a no-op action to keep the rollout going
|
|
1628
1626
|
with contextlib.suppress(Exception):
|
|
1629
1627
|
logger.warning(
|
|
1630
|
-
"
|
|
1628
|
+
"POLICY_STEP_NOOP: missing tool_calls; injecting noop action run_id=%s op_idx=%s",
|
|
1631
1629
|
request.run_id,
|
|
1632
1630
|
str(op_idx),
|
|
1633
1631
|
)
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1632
|
+
# Create a noop tool call in the format expected by the environment
|
|
1633
|
+
pending_tool_calls = [
|
|
1634
|
+
{
|
|
1635
|
+
"id": f"noop_{op_idx}",
|
|
1636
|
+
"tool": "interact",
|
|
1637
|
+
"arguments": {"action": "noop"},
|
|
1638
|
+
}
|
|
1639
|
+
]
|
|
1638
1640
|
|
|
1639
1641
|
# Environment step
|
|
1640
1642
|
from .environment_routes import EnvStepRequest, step_environment
|
examples/vlm/README.md
CHANGED
|
@@ -21,8 +21,8 @@ plumbing with lightweight utilities for dataset curation and training.
|
|
|
21
21
|
3. **Export multimodal SFT rows**
|
|
22
22
|
```
|
|
23
23
|
uv run python examples/warming_up_to_rl/export_trace_sft.py \
|
|
24
|
-
|
|
25
|
-
--output examples/vlm/output/
|
|
24
|
+
--db traces/v3/task_app_traces_<timestamp>.db \
|
|
25
|
+
--output examples/vlm/output/crafter_sft_full.jsonl
|
|
26
26
|
```
|
|
27
27
|
The exporter now emits `metadata.has_image`, `metadata.user_has_image`, and
|
|
28
28
|
`metadata.assistant_has_image` flags per turn.
|
|
@@ -30,7 +30,7 @@ plumbing with lightweight utilities for dataset curation and training.
|
|
|
30
30
|
4. **Filter to image-rich turns**
|
|
31
31
|
```
|
|
32
32
|
uv run python examples/vlm/filter_image_rows.py \
|
|
33
|
-
--input examples/vlm/output/
|
|
33
|
+
--input examples/vlm/output/crafter_sft_full.jsonl \
|
|
34
34
|
--output examples/vlm/output/crafter_vlm_dataset.jsonl
|
|
35
35
|
```
|
|
36
36
|
|
|
@@ -24,6 +24,7 @@ import asyncio
|
|
|
24
24
|
import base64
|
|
25
25
|
import json
|
|
26
26
|
import os
|
|
27
|
+
from contextlib import suppress
|
|
27
28
|
from pathlib import Path
|
|
28
29
|
from typing import Any
|
|
29
30
|
from uuid import uuid4
|
|
@@ -62,7 +63,7 @@ class EpisodeResult:
|
|
|
62
63
|
if unlocked:
|
|
63
64
|
self.achievements.add(str(name))
|
|
64
65
|
reward = obs.get("reward_last_step")
|
|
65
|
-
if isinstance(reward,
|
|
66
|
+
if isinstance(reward, int | float):
|
|
66
67
|
self.total_reward += float(reward)
|
|
67
68
|
|
|
68
69
|
|
|
@@ -107,11 +108,8 @@ def _decode_and_save_image(observation: dict[str, Any], path: Path) -> None:
|
|
|
107
108
|
if not isinstance(base64_data, str) or not base64_data:
|
|
108
109
|
return
|
|
109
110
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
110
|
-
|
|
111
|
+
with suppress(Exception):
|
|
111
112
|
path.write_bytes(base64.b64decode(base64_data))
|
|
112
|
-
except Exception:
|
|
113
|
-
# Best-effort; corrupted frames should not halt rollout
|
|
114
|
-
pass
|
|
115
113
|
|
|
116
114
|
|
|
117
115
|
def _normalise_openai_request(payload: dict[str, Any], model: str, temperature: float) -> dict[str, Any]:
|
|
@@ -8,7 +8,7 @@ output now that each record's metadata includes `has_image`, `user_has_image`, a
|
|
|
8
8
|
|
|
9
9
|
Usage:
|
|
10
10
|
uv run python examples/vlm/filter_image_rows.py \
|
|
11
|
-
--input examples/sft/ft_data/
|
|
11
|
+
--input examples/sft/ft_data/crafter_sft.jsonl \
|
|
12
12
|
--output examples/vlm/output/crafter_vlm_dataset.jsonl
|
|
13
13
|
"""
|
|
14
14
|
|
|
@@ -224,7 +224,7 @@ async def _run_episode(
|
|
|
224
224
|
if unlocked:
|
|
225
225
|
achievements.add(str(name))
|
|
226
226
|
reward = obs.get("reward_last_step")
|
|
227
|
-
if isinstance(reward,
|
|
227
|
+
if isinstance(reward, int | float):
|
|
228
228
|
total_reward += float(reward)
|
|
229
229
|
|
|
230
230
|
_save_observation_frame(env_response, frames_dir / f"step_{step_idx + 1:03d}.png")
|
|
@@ -263,7 +263,7 @@ def _summarise(results: list[EpisodeResult]) -> dict[str, Any]:
|
|
|
263
263
|
"mean_steps": round(mean_steps, 2),
|
|
264
264
|
"mean_achievements": round(mean_achievements, 2),
|
|
265
265
|
"total_tool_calls": sum(r.tool_calls for r in mode_results),
|
|
266
|
-
"achievements":
|
|
266
|
+
"achievements": dict(sorted(achievement_counts.items())),
|
|
267
267
|
}
|
|
268
268
|
return summary
|
|
269
269
|
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable, Sequence
|
|
4
|
+
|
|
5
|
+
from synth_ai.task import (
|
|
6
|
+
RolloutEnvSpec,
|
|
7
|
+
RolloutPolicySpec,
|
|
8
|
+
RolloutRecordConfig,
|
|
9
|
+
RolloutRequest,
|
|
10
|
+
RolloutSafetyConfig,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
DEFAULT_POLICY_NAME = "crafter-react"
|
|
14
|
+
DEFAULT_ENV_NAME = "crafter"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def parse_ops(spec: str | None) -> list[str] | None:
|
|
18
|
+
"""Parse a comma-separated operations string into a list."""
|
|
19
|
+
|
|
20
|
+
if spec is None:
|
|
21
|
+
return None
|
|
22
|
+
ops = [op.strip() for op in spec.split(",") if op.strip()]
|
|
23
|
+
if not ops:
|
|
24
|
+
raise ValueError("Ops must contain at least one entry")
|
|
25
|
+
return ops
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def ops_from_pairs(max_llm_calls: int, *, cap: int | None = None) -> list[str]:
|
|
29
|
+
"""Return alternating agent/env ops for the requested number of LLM calls."""
|
|
30
|
+
|
|
31
|
+
pairs = max(1, int(max_llm_calls or 0))
|
|
32
|
+
if cap is not None:
|
|
33
|
+
pairs = min(pairs, cap)
|
|
34
|
+
ops: list[str] = []
|
|
35
|
+
for _ in range(pairs):
|
|
36
|
+
ops.extend(["agent", "env"])
|
|
37
|
+
return ops
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def build_rollout_request(
|
|
41
|
+
*,
|
|
42
|
+
seed: int,
|
|
43
|
+
run_id: str,
|
|
44
|
+
model: str,
|
|
45
|
+
inference_url: str,
|
|
46
|
+
ops: Sequence[str] | Iterable[str],
|
|
47
|
+
inference_api_key: str | None = None,
|
|
48
|
+
extra_headers: dict[str, str] | None = None,
|
|
49
|
+
trace_format: str = "compact",
|
|
50
|
+
return_trace: bool = False,
|
|
51
|
+
policy_name: str = DEFAULT_POLICY_NAME,
|
|
52
|
+
env_name: str = DEFAULT_ENV_NAME,
|
|
53
|
+
max_policy_tokens: int | None = None,
|
|
54
|
+
record_trajectories: bool = True,
|
|
55
|
+
) -> RolloutRequest:
|
|
56
|
+
"""Construct a RolloutRequest shared across local rollout utilities."""
|
|
57
|
+
|
|
58
|
+
policy_config: dict[str, object] = {
|
|
59
|
+
"model": model,
|
|
60
|
+
"inference_url": inference_url,
|
|
61
|
+
}
|
|
62
|
+
if inference_api_key is not None:
|
|
63
|
+
policy_config["api_key"] = inference_api_key
|
|
64
|
+
if extra_headers:
|
|
65
|
+
policy_config["extra_headers"] = extra_headers
|
|
66
|
+
if max_policy_tokens is not None:
|
|
67
|
+
policy_config["max_completion_tokens"] = max_policy_tokens
|
|
68
|
+
policy_config["max_tokens"] = max_policy_tokens
|
|
69
|
+
|
|
70
|
+
record_cfg = RolloutRecordConfig(
|
|
71
|
+
trajectories=record_trajectories,
|
|
72
|
+
trace_format=trace_format,
|
|
73
|
+
return_trace=return_trace,
|
|
74
|
+
)
|
|
75
|
+
return RolloutRequest(
|
|
76
|
+
run_id=run_id,
|
|
77
|
+
env=RolloutEnvSpec(env_name=env_name, seed=seed, config={}),
|
|
78
|
+
policy=RolloutPolicySpec(policy_name=policy_name, config=policy_config),
|
|
79
|
+
ops=list(ops),
|
|
80
|
+
record=record_cfg,
|
|
81
|
+
on_done="reset",
|
|
82
|
+
safety=RolloutSafetyConfig(),
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
__all__ = [
|
|
87
|
+
"DEFAULT_POLICY_NAME",
|
|
88
|
+
"DEFAULT_ENV_NAME",
|
|
89
|
+
"build_rollout_request",
|
|
90
|
+
"ops_from_pairs",
|
|
91
|
+
"parse_ops",
|
|
92
|
+
]
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Eval config for Synth Modal inference Qwen/Qwen3-4B via task app rollout
|
|
2
2
|
|
|
3
|
+
type = "rl"
|
|
4
|
+
|
|
3
5
|
# Required
|
|
4
6
|
task_app_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
|
|
5
7
|
model = "Qwen/Qwen3-4B"
|
|
@@ -20,4 +22,3 @@ concurrency = 10
|
|
|
20
22
|
# fetch the vLLM base from the task app /info to use as inference_url.
|
|
21
23
|
# - Ensure the task app mounts the openai-api-key secret if your vLLM gateway
|
|
22
24
|
# requires a bearer token (OPENAI_API_KEY). Otherwise it will call unauthenticated.
|
|
23
|
-
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
# RL training starting from base Qwen/Qwen3-4B (TOML-only model selection)
|
|
2
2
|
|
|
3
|
+
type = "rl"
|
|
4
|
+
|
|
3
5
|
[algorithm]
|
|
4
6
|
type = "online"
|
|
5
7
|
method = "policy_gradient"
|
|
6
8
|
variety = "gspo"
|
|
7
9
|
|
|
8
|
-
|
|
9
10
|
[services]
|
|
10
11
|
task_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
|
|
11
12
|
|