synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +190 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
- examples/qwen_coder/configs/coder_lora_small.toml +2 -1
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +154 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +275 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +423 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
- examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +62 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +1 -1
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +37 -0
- examples/rl/configs/rl_from_base_qwen17.toml +76 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +22 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/sft/README.md +5 -5
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
- examples/sft/evaluate.py +4 -4
- examples/sft/export_dataset.py +7 -4
- examples/sft/generate_traces.py +2 -0
- examples/swe/task_app/README.md +1 -1
- examples/swe/task_app/grpo_swe_mini.py +1 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +2 -8
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/__init__.py +3 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
- examples/task_apps/pokemon_red/task_app.py +199 -6
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/groq_test.py +2 -0
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/run_rollout_remote.py +2 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +145 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/cli.py +30 -7
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/cli/__init__.py +66 -49
- synth_ai/cli/_modal_wrapper.py +9 -6
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/recent.py +1 -0
- synth_ai/cli/setup.py +266 -0
- synth_ai/cli/task_app_deploy.py +16 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +16 -0
- synth_ai/cli/task_app_serve.py +18 -0
- synth_ai/cli/task_apps.py +392 -141
- synth_ai/cli/train.py +18 -0
- synth_ai/cli/tui.py +62 -0
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/verilog/engine.py +76 -10
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/inference/client.py +1 -1
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +1 -1
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/task/__init__.py +11 -1
- synth_ai/task/apps/__init__.py +5 -2
- synth_ai/task/config.py +259 -0
- synth_ai/task/contracts.py +15 -2
- synth_ai/task/rubrics/__init__.py +4 -2
- synth_ai/task/rubrics/loaders.py +27 -4
- synth_ai/task/rubrics/scoring.py +3 -0
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +145 -2
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/session_tracer.py +10 -0
- synth_ai/tracing_v3/turso/daemon.py +2 -2
- synth_ai/tracing_v3/turso/native_manager.py +108 -77
- synth_ai/tracing_v3/utils.py +1 -1
- synth_ai/tui/__init__.py +5 -0
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/cli/__init__.py +1 -0
- synth_ai/tui/cli/query_experiments.py +164 -0
- synth_ai/tui/cli/query_experiments_v3.py +164 -0
- synth_ai/tui/dashboard.py +911 -0
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +287 -0
- synth_ai/utils/http.py +169 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
- synth_ai/cli/man.py +0 -106
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/http.py +0 -26
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
|
@@ -149,7 +149,11 @@ class OpenAIClient:
|
|
|
149
149
|
OpenAI-compatible chat completion response
|
|
150
150
|
"""
|
|
151
151
|
base = (base_url or self.base_url).rstrip("/")
|
|
152
|
-
|
|
152
|
+
# Don't append /v1/chat/completions if the URL already contains it
|
|
153
|
+
if "/v1/chat/completions" in base:
|
|
154
|
+
url = base
|
|
155
|
+
else:
|
|
156
|
+
url = base + "/v1/chat/completions"
|
|
153
157
|
timeout = timeout_s or self.timeout_s
|
|
154
158
|
|
|
155
159
|
# Merge headers
|
|
@@ -164,10 +168,28 @@ class OpenAIClient:
|
|
|
164
168
|
except Exception:
|
|
165
169
|
pass
|
|
166
170
|
|
|
167
|
-
#
|
|
171
|
+
# Set Authorization header based on the target URL
|
|
168
172
|
try:
|
|
169
173
|
low_url = (url or "").lower()
|
|
170
|
-
|
|
174
|
+
|
|
175
|
+
# If calling OpenAI directly (api.openai.com)
|
|
176
|
+
if "api.openai.com" in low_url:
|
|
177
|
+
openai_key = os.getenv("OPENAI_API_KEY")
|
|
178
|
+
if openai_key and isinstance(openai_key, str):
|
|
179
|
+
headers["Authorization"] = f"Bearer {openai_key}"
|
|
180
|
+
|
|
181
|
+
# If target is Synth backend (any deployment), use SYNTH_API_KEY
|
|
182
|
+
# Matches: synth-backend-*, agent-learning*, localhost:8000, 127.0.0.1:8000
|
|
183
|
+
elif any(pattern in low_url for pattern in [
|
|
184
|
+
"synth-backend", "synth.run", "agent-learning",
|
|
185
|
+
"localhost:8000", "127.0.0.1:8000"
|
|
186
|
+
]):
|
|
187
|
+
synth_key = os.getenv("SYNTH_API_KEY")
|
|
188
|
+
if synth_key and isinstance(synth_key, str):
|
|
189
|
+
headers["Authorization"] = f"Bearer {synth_key}"
|
|
190
|
+
|
|
191
|
+
# If target is Groq, use GROQ_API_KEY
|
|
192
|
+
elif "/proxy/groq" in low_url or "api.groq.com" in low_url:
|
|
171
193
|
gk = os.getenv("GROQ_API_KEY")
|
|
172
194
|
if gk and isinstance(gk, str):
|
|
173
195
|
headers["Authorization"] = f"Bearer {gk}"
|
|
@@ -196,8 +218,20 @@ class OpenAIClient:
|
|
|
196
218
|
# Do NOT fall back silently; surface the error so callers fail fast
|
|
197
219
|
raise
|
|
198
220
|
|
|
221
|
+
# DEBUG: Log request BEFORE _fix_model_parameters
|
|
222
|
+
logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Request message[1] content type: {type(request.get('messages', [])[1].get('content') if len(request.get('messages', [])) > 1 else None)}")
|
|
223
|
+
if len(request.get("messages", [])) > 1:
|
|
224
|
+
msg1_content = request["messages"][1].get("content")
|
|
225
|
+
logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Message[1] content value: {msg1_content if not isinstance(msg1_content, list) else f'list[{len(msg1_content)}]'}")
|
|
226
|
+
|
|
199
227
|
# Fix parameter compatibility for newer models
|
|
200
228
|
processed_request = self._fix_model_parameters(request, target_url=url)
|
|
229
|
+
|
|
230
|
+
# DEBUG: Log request AFTER _fix_model_parameters
|
|
231
|
+
logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Processed message[1] content type: {type(processed_request.get('messages', [])[1].get('content') if len(processed_request.get('messages', [])) > 1 else None)}")
|
|
232
|
+
if len(processed_request.get("messages", [])) > 1:
|
|
233
|
+
msg1_content_post = processed_request["messages"][1].get("content")
|
|
234
|
+
logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Message[1] content value: {msg1_content_post if not isinstance(msg1_content_post, list) else f'list[{len(msg1_content_post)}]'}")
|
|
201
235
|
|
|
202
236
|
# Log request (redact messages in production)
|
|
203
237
|
logger.info(f"Inference POST target: {url}")
|
|
@@ -206,6 +240,24 @@ class OpenAIClient:
|
|
|
206
240
|
with contextlib.suppress(Exception):
|
|
207
241
|
keys_preview = sorted(processed_request.keys())
|
|
208
242
|
logger.info(f"Request keys: {keys_preview}")
|
|
243
|
+
# DEBUG: Log message structure for vision debugging
|
|
244
|
+
if "messages" in processed_request:
|
|
245
|
+
msgs = processed_request["messages"]
|
|
246
|
+
if isinstance(msgs, list):
|
|
247
|
+
logger.debug(f"🔊 [OPENAI_CLIENT] Request has {len(msgs)} messages")
|
|
248
|
+
for idx, msg in enumerate(msgs):
|
|
249
|
+
if isinstance(msg, dict):
|
|
250
|
+
role = msg.get("role")
|
|
251
|
+
content = msg.get("content")
|
|
252
|
+
if isinstance(content, list):
|
|
253
|
+
logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content=list[{len(content)}]")
|
|
254
|
+
for part_idx, part in enumerate(content):
|
|
255
|
+
if isinstance(part, dict):
|
|
256
|
+
part_type = part.get("type")
|
|
257
|
+
logger.debug(f"🔊 [OPENAI_CLIENT] Part[{part_idx}]: type={part_type}")
|
|
258
|
+
else:
|
|
259
|
+
content_len = len(str(content)) if content else 0
|
|
260
|
+
logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content_type={type(content).__name__}, len={content_len}")
|
|
209
261
|
|
|
210
262
|
# Final hard-guard for OpenAI: ensure unsupported field is not present
|
|
211
263
|
try:
|
|
@@ -10,11 +10,13 @@ from fastapi import APIRouter, HTTPException, Request
|
|
|
10
10
|
from pydantic import BaseModel
|
|
11
11
|
|
|
12
12
|
from synth_ai.task.auth import allowed_environment_api_keys, normalize_environment_api_key
|
|
13
|
+
from synth_ai.task.contracts import RolloutMode
|
|
13
14
|
|
|
14
15
|
from .envs.crafter.policy import CrafterPolicy
|
|
15
16
|
from .inference.openai_client import create_inference_client
|
|
16
17
|
from .registry import registry
|
|
17
18
|
from .storage.volume import storage
|
|
19
|
+
from .utils import ensure_chat_completions_url
|
|
18
20
|
|
|
19
21
|
# Token budgeting (shared logic with inference server)
|
|
20
22
|
try:
|
|
@@ -40,6 +42,7 @@ class PolicyCreateRequest(BaseModel):
|
|
|
40
42
|
parent_policy_id: str | None = None
|
|
41
43
|
rl_run_id: str
|
|
42
44
|
bound_env_id: str | None = None
|
|
45
|
+
mode: RolloutMode
|
|
43
46
|
|
|
44
47
|
|
|
45
48
|
class PolicyCreateResponse(BaseModel):
|
|
@@ -119,6 +122,14 @@ async def create_policy(
|
|
|
119
122
|
config.setdefault("inference_url", f"{base_url}/proxy")
|
|
120
123
|
config["provider"] = "openai"
|
|
121
124
|
|
|
125
|
+
received_url = config.get("inference_url")
|
|
126
|
+
logger.info(
|
|
127
|
+
"POLICY_CREATE: policy=%s provider=%s raw_inference_url=%s",
|
|
128
|
+
request.policy_name,
|
|
129
|
+
provider,
|
|
130
|
+
received_url,
|
|
131
|
+
)
|
|
132
|
+
|
|
122
133
|
if "inference_url" not in config and task_app is not None:
|
|
123
134
|
task_base_url = getattr(task_app, "vllm_base_url", None)
|
|
124
135
|
if task_base_url:
|
|
@@ -133,6 +144,31 @@ async def create_policy(
|
|
|
133
144
|
detail="Policy configuration must include 'inference_url' and 'model'.",
|
|
134
145
|
)
|
|
135
146
|
|
|
147
|
+
# Get mode from PolicyCreateRequest (defaults to "rl" for backward compatibility)
|
|
148
|
+
mode = request.mode
|
|
149
|
+
logger.info("POLICY_CREATE: Using mode=%s for URL processing", mode)
|
|
150
|
+
|
|
151
|
+
sanitized_url = ensure_chat_completions_url(config.get("inference_url"), mode=mode)
|
|
152
|
+
if isinstance(sanitized_url, str) and sanitized_url:
|
|
153
|
+
if sanitized_url != config.get("inference_url"):
|
|
154
|
+
logger.warning(
|
|
155
|
+
"POLICY_CREATE: normalized inference_url for policy=%s provider=%s mode=%s from %s to %s",
|
|
156
|
+
request.policy_name,
|
|
157
|
+
provider,
|
|
158
|
+
mode,
|
|
159
|
+
config.get("inference_url"),
|
|
160
|
+
sanitized_url,
|
|
161
|
+
)
|
|
162
|
+
config["inference_url"] = sanitized_url
|
|
163
|
+
else:
|
|
164
|
+
logger.warning(
|
|
165
|
+
"POLICY_CREATE: unable to normalize inference_url for policy=%s provider=%s mode=%s raw=%s",
|
|
166
|
+
request.policy_name,
|
|
167
|
+
mode,
|
|
168
|
+
provider,
|
|
169
|
+
config.get("inference_url"),
|
|
170
|
+
)
|
|
171
|
+
|
|
136
172
|
# Create policy instance based on name
|
|
137
173
|
pname = request.policy_name.lower()
|
|
138
174
|
if pname in ["crafter-react", "crafter"]:
|
|
@@ -381,8 +417,6 @@ async def step_policy(
|
|
|
381
417
|
inf_req = meta["inference_request"]
|
|
382
418
|
msgs = inf_req["messages"]
|
|
383
419
|
model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
|
|
384
|
-
system_messages: list[str] = []
|
|
385
|
-
user_messages: list[str] = []
|
|
386
420
|
if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
|
|
387
421
|
sys_text = msgs[0]["content"]
|
|
388
422
|
policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
|
|
@@ -507,7 +541,22 @@ async def step_policy(
|
|
|
507
541
|
|
|
508
542
|
# Ensure meta carries the final target URL for downstream logging/clients
|
|
509
543
|
with contextlib.suppress(Exception):
|
|
510
|
-
|
|
544
|
+
sanitized_target = ensure_chat_completions_url(target_url)
|
|
545
|
+
if sanitized_target and sanitized_target != target_url:
|
|
546
|
+
logger.warning(
|
|
547
|
+
"POLICY_STEP: normalized inference_url mid-flight policy=%s from %s to %s",
|
|
548
|
+
policy_name,
|
|
549
|
+
target_url,
|
|
550
|
+
sanitized_target,
|
|
551
|
+
)
|
|
552
|
+
elif not sanitized_target:
|
|
553
|
+
logger.info(
|
|
554
|
+
"POLICY_STEP: inference_url unchanged policy=%s target=%s",
|
|
555
|
+
policy_name,
|
|
556
|
+
target_url,
|
|
557
|
+
)
|
|
558
|
+
meta["inference_url"] = sanitized_target if sanitized_target else target_url
|
|
559
|
+
target_url = sanitized_target or target_url
|
|
511
560
|
|
|
512
561
|
# Select API key based on resolved target URL
|
|
513
562
|
api_key_override = None
|
|
@@ -850,38 +899,71 @@ async def step_policy(
|
|
|
850
899
|
req_body["temperature"] = 0.1
|
|
851
900
|
meta["inference_request"] = req_body
|
|
852
901
|
|
|
853
|
-
#
|
|
854
|
-
#
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
902
|
+
# Message flattening: Convert multimodal content to text-only for non-vision models.
|
|
903
|
+
# SKIP message flattening for vision models to preserve image_url parts!
|
|
904
|
+
# The old code here was flattening multimodal content (list) to text-only (str),
|
|
905
|
+
# which strips out image_url parts. This breaks vision models.
|
|
906
|
+
# Only flatten for non-vision models that can't handle multimodal format.
|
|
907
|
+
is_vision_model = False
|
|
908
|
+
try:
|
|
909
|
+
# Check if the policy is a vision-capable policy
|
|
910
|
+
if isinstance(policy, CrafterPolicy):
|
|
911
|
+
is_vision_model = getattr(policy, "use_vision", False)
|
|
912
|
+
except Exception:
|
|
913
|
+
pass
|
|
914
|
+
|
|
915
|
+
logger.debug(f"🔊 [POLICY_ROUTES] is_vision_model={is_vision_model}, will_flatten={not is_vision_model}")
|
|
916
|
+
|
|
917
|
+
if not is_vision_model:
|
|
918
|
+
# Only flatten for non-vision models (backward compatibility)
|
|
919
|
+
req_body2 = meta.get("inference_request", {})
|
|
920
|
+
if isinstance(req_body2, dict):
|
|
921
|
+
msgs = req_body2.get("messages")
|
|
922
|
+
if isinstance(msgs, list):
|
|
923
|
+
new_msgs = []
|
|
924
|
+
changed = False
|
|
925
|
+
for m in msgs:
|
|
926
|
+
try:
|
|
927
|
+
if isinstance(m, dict):
|
|
928
|
+
content = m.get("content")
|
|
929
|
+
if isinstance(content, list):
|
|
930
|
+
parts: list[str] = []
|
|
931
|
+
for seg in content:
|
|
932
|
+
if isinstance(seg, dict):
|
|
933
|
+
txt = seg.get("text") or seg.get("content")
|
|
934
|
+
if isinstance(txt, str) and txt:
|
|
935
|
+
parts.append(txt)
|
|
936
|
+
m2 = dict(m)
|
|
937
|
+
m2["content"] = "\n".join(parts)
|
|
938
|
+
new_msgs.append(m2)
|
|
939
|
+
changed = True
|
|
940
|
+
else:
|
|
941
|
+
new_msgs.append(m)
|
|
876
942
|
else:
|
|
877
943
|
new_msgs.append(m)
|
|
878
|
-
|
|
944
|
+
except Exception:
|
|
879
945
|
new_msgs.append(m)
|
|
880
|
-
|
|
881
|
-
new_msgs
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
946
|
+
if changed:
|
|
947
|
+
req_body2["messages"] = new_msgs
|
|
948
|
+
meta["inference_request"] = req_body2
|
|
949
|
+
logger.debug(f"🔊 [POLICY_ROUTES] Flattened messages for non-vision model")
|
|
950
|
+
else:
|
|
951
|
+
logger.debug(f"🔊 [POLICY_ROUTES] Preserving multimodal content for vision model")
|
|
952
|
+
|
|
953
|
+
# DEBUG: Log final message structure before calling inference
|
|
954
|
+
final_req = meta.get("inference_request", {})
|
|
955
|
+
if isinstance(final_req, dict):
|
|
956
|
+
final_msgs = final_req.get("messages", [])
|
|
957
|
+
logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Sending {len(final_msgs)} messages to inference")
|
|
958
|
+
for idx, msg in enumerate(final_msgs):
|
|
959
|
+
if isinstance(msg, dict):
|
|
960
|
+
content = msg.get("content")
|
|
961
|
+
logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}")
|
|
962
|
+
if isinstance(content, list):
|
|
963
|
+
logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Content list has {len(content)} items")
|
|
964
|
+
for part_idx, part in enumerate(content[:3]): # Show first 3 items
|
|
965
|
+
if isinstance(part, dict):
|
|
966
|
+
logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Part[{part_idx}]: type={part.get('type')}")
|
|
885
967
|
|
|
886
968
|
_t_start = _t.time()
|
|
887
969
|
call_started_at = datetime.utcnow()
|
|
@@ -13,6 +13,7 @@ from pydantic import BaseModel, Field
|
|
|
13
13
|
from synth_ai.lm.vendors.base import BaseLMResponse
|
|
14
14
|
from synth_ai.task.tracing_utils import unique_sft_path
|
|
15
15
|
from synth_ai.tracing_v3.abstractions import EnvironmentEvent, LMCAISEvent, TimeRecord
|
|
16
|
+
from synth_ai.task.contracts import RolloutMode
|
|
16
17
|
from synth_ai.tracing_v3.llm_call_record_helpers import create_llm_call_record_from_response
|
|
17
18
|
from synth_ai.tracing_v3.session_tracer import SessionTracer
|
|
18
19
|
|
|
@@ -120,6 +121,8 @@ class RolloutRequest(BaseModel):
|
|
|
120
121
|
# Optional run/session context
|
|
121
122
|
training_session_id: str | None = None
|
|
122
123
|
synth_base_url: str | None = None
|
|
124
|
+
# Mode controls URL transformation: REQUIRED to make intent explicit
|
|
125
|
+
mode: RolloutMode
|
|
123
126
|
|
|
124
127
|
|
|
125
128
|
class RolloutStep(BaseModel):
|
|
@@ -140,6 +143,7 @@ class RolloutTrajectory(BaseModel):
|
|
|
140
143
|
final: dict[str, Any] | None = None
|
|
141
144
|
length: int
|
|
142
145
|
decision_samples: list[dict[str, Any]] | None = None
|
|
146
|
+
inference_url: str | None = None
|
|
143
147
|
|
|
144
148
|
|
|
145
149
|
def _normalize_step_strategy(raw_strategy: Any) -> str:
|
|
@@ -452,11 +456,12 @@ class RolloutMetrics(BaseModel):
|
|
|
452
456
|
class RolloutResponse(BaseModel):
|
|
453
457
|
run_id: str
|
|
454
458
|
trajectories: list[RolloutTrajectory]
|
|
455
|
-
branches: dict[str, list[str]] =
|
|
459
|
+
branches: dict[str, list[str]] = Field(default_factory=dict)
|
|
456
460
|
metrics: RolloutMetrics
|
|
457
461
|
aborted: bool = False
|
|
458
462
|
ops_executed: int = 0
|
|
459
463
|
trace: dict[str, Any] | None = None
|
|
464
|
+
pipeline_metadata: dict[str, Any] = Field(default_factory=dict)
|
|
460
465
|
|
|
461
466
|
|
|
462
467
|
class RolloutTracingContext:
|
|
@@ -567,7 +572,7 @@ class RolloutTracingContext:
|
|
|
567
572
|
try:
|
|
568
573
|
await self.tracer.record_message(
|
|
569
574
|
content=self._prompt_payload(entry, role="system"),
|
|
570
|
-
message_type="
|
|
575
|
+
message_type="system", # Use standard message type
|
|
571
576
|
metadata=self._message_metadata(),
|
|
572
577
|
)
|
|
573
578
|
except Exception as exc:
|
|
@@ -576,11 +581,16 @@ class RolloutTracingContext:
|
|
|
576
581
|
try:
|
|
577
582
|
await self.tracer.record_message(
|
|
578
583
|
content=self._prompt_payload(entry, role="user"),
|
|
579
|
-
message_type="
|
|
584
|
+
message_type="user", # Use standard message type
|
|
580
585
|
metadata=self._message_metadata(),
|
|
581
586
|
)
|
|
582
587
|
except Exception as exc:
|
|
583
588
|
logger.debug("TRACING_USER_MSG_FAIL: %s", exc)
|
|
589
|
+
|
|
590
|
+
# Debug: Check message count
|
|
591
|
+
if self.tracer and self.tracer._current_trace:
|
|
592
|
+
msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
|
|
593
|
+
logger.info(f"[TRACE_DEBUG] After record_policy_prompts: {msg_count} messages in trace")
|
|
584
594
|
|
|
585
595
|
def _content_to_text(self, content: Any) -> str:
|
|
586
596
|
if isinstance(content, str):
|
|
@@ -656,8 +666,8 @@ class RolloutTracingContext:
|
|
|
656
666
|
try:
|
|
657
667
|
await self.tracer.record_message(
|
|
658
668
|
content=self._safe_json(tool_calls),
|
|
659
|
-
message_type="
|
|
660
|
-
metadata=self._message_metadata(),
|
|
669
|
+
message_type="assistant", # Map to standard assistant message type
|
|
670
|
+
metadata={**self._message_metadata(), "is_tool_call": True},
|
|
661
671
|
)
|
|
662
672
|
except Exception as exc:
|
|
663
673
|
logger.debug("TRACING_TOOL_MSG_FAIL: %s", exc)
|
|
@@ -928,11 +938,22 @@ class RolloutTracingContext:
|
|
|
928
938
|
except Exception as exc:
|
|
929
939
|
logger.debug("TRACING_OUTCOME_FAIL: %s", exc)
|
|
930
940
|
try:
|
|
941
|
+
# Debug: Check message count before end_session
|
|
942
|
+
if self.tracer._current_trace:
|
|
943
|
+
msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
|
|
944
|
+
logger.info(f"[TRACE_DEBUG] Before end_session: {msg_count} messages in trace")
|
|
945
|
+
|
|
931
946
|
self.session_trace = await self.tracer.end_session()
|
|
932
|
-
|
|
947
|
+
|
|
948
|
+
# Debug: Check if session was saved
|
|
949
|
+
if self.session_trace:
|
|
950
|
+
logger.info(f"[TRACE_DEBUG] Session ended successfully, session_id={self.session_trace.session_id}")
|
|
933
951
|
self.session_trace.metadata.update(self.metadata_updates)
|
|
952
|
+
logger.info(f"[TRACE_DEBUG] session_trace.metadata keys: {list(self.session_trace.metadata.keys())}")
|
|
953
|
+
else:
|
|
954
|
+
logger.warning("[TRACE_DEBUG] end_session returned None!")
|
|
934
955
|
except Exception as exc:
|
|
935
|
-
logger.
|
|
956
|
+
logger.warning(f"TRACING_END_SESSION_FAIL: {exc}", exc_info=True)
|
|
936
957
|
self.session_trace = None
|
|
937
958
|
with contextlib.suppress(Exception):
|
|
938
959
|
await self.tracer.close()
|
|
@@ -964,10 +985,15 @@ class RolloutTracingContext:
|
|
|
964
985
|
def build_trace_payload(self, session_trace: Any) -> dict[str, Any] | None:
|
|
965
986
|
if not self.return_trace or session_trace is None:
|
|
966
987
|
return None
|
|
967
|
-
|
|
988
|
+
|
|
989
|
+
# For both "full" and "structured" formats, return the complete session trace
|
|
990
|
+
# The CLI (synth-ai eval) expects this for proper trace storage
|
|
991
|
+
if self.trace_format in ("full", "structured"):
|
|
968
992
|
payload = session_trace.to_dict()
|
|
969
993
|
payload.setdefault("metadata", {}).update(self.metadata_updates)
|
|
970
994
|
return payload
|
|
995
|
+
|
|
996
|
+
# For "compact" format, return only summary stats
|
|
971
997
|
metadata = dict(session_trace.metadata)
|
|
972
998
|
metadata.update(self.metadata_updates)
|
|
973
999
|
return {
|
|
@@ -1056,12 +1082,14 @@ async def execute_rollout(
|
|
|
1056
1082
|
req: Request,
|
|
1057
1083
|
) -> RolloutResponse:
|
|
1058
1084
|
"""Execute a rollout with coordinated environment and policy steps."""
|
|
1085
|
+
logger.info("ROLLOUT: mode = %s", request.mode)
|
|
1086
|
+
|
|
1059
1087
|
# Emit rollout identifier early for correlation
|
|
1060
1088
|
with contextlib.suppress(Exception):
|
|
1061
1089
|
_rid = getattr(request, "run_id", None)
|
|
1062
1090
|
_pol = getattr(request.policy, "policy_name", None) or getattr(request.policy, "policy_id", None)
|
|
1063
1091
|
_env = getattr(request.env, "env_name", None) or getattr(request.env, "env_id", None)
|
|
1064
|
-
logger.info("ROLLOUT_BEGIN: run_id=%s policy=%s env=%s", _rid, _pol, _env)
|
|
1092
|
+
logger.info("ROLLOUT_BEGIN: run_id=%s policy=%s env=%s mode=%s", _rid, _pol, _env, request.mode)
|
|
1065
1093
|
print(f"[rollout] begin run_id={_rid} policy={_pol} env={_env}", flush=True)
|
|
1066
1094
|
# Enforce per-episode step cap via env-specific parameters; default to 20 if omitted
|
|
1067
1095
|
try:
|
|
@@ -1150,14 +1178,6 @@ async def execute_rollout(
|
|
|
1150
1178
|
logger.debug(f"TRACER_FACTORY_FAIL: {exc}")
|
|
1151
1179
|
tracing_context = RolloutTracingContext(tracer_instance, request, req)
|
|
1152
1180
|
await tracing_context.start_session()
|
|
1153
|
-
# Print whether tracing is active for this rollout
|
|
1154
|
-
try:
|
|
1155
|
-
print(
|
|
1156
|
-
f"[rollout] tracing enabled={bool(tracing_context.enabled)} run_id={request.run_id}",
|
|
1157
|
-
flush=True,
|
|
1158
|
-
)
|
|
1159
|
-
except Exception:
|
|
1160
|
-
pass
|
|
1161
1181
|
|
|
1162
1182
|
# Register run
|
|
1163
1183
|
registry.register_run(request.run_id)
|
|
@@ -1271,6 +1291,7 @@ async def execute_rollout(
|
|
|
1271
1291
|
config=_policy_config,
|
|
1272
1292
|
rl_run_id=request.run_id,
|
|
1273
1293
|
bound_env_id=env_id,
|
|
1294
|
+
mode=request.mode, # Pass through mode for URL transformation control
|
|
1274
1295
|
),
|
|
1275
1296
|
req,
|
|
1276
1297
|
)
|
|
@@ -1601,16 +1622,21 @@ async def execute_rollout(
|
|
|
1601
1622
|
|
|
1602
1623
|
elif op == "env":
|
|
1603
1624
|
if not pending_tool_calls:
|
|
1625
|
+
# Instead of failing, inject a no-op action to keep the rollout going
|
|
1604
1626
|
with contextlib.suppress(Exception):
|
|
1605
1627
|
logger.warning(
|
|
1606
|
-
"
|
|
1628
|
+
"POLICY_STEP_NOOP: missing tool_calls; injecting noop action run_id=%s op_idx=%s",
|
|
1607
1629
|
request.run_id,
|
|
1608
1630
|
str(op_idx),
|
|
1609
1631
|
)
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1632
|
+
# Create a noop tool call in the format expected by the environment
|
|
1633
|
+
pending_tool_calls = [
|
|
1634
|
+
{
|
|
1635
|
+
"id": f"noop_{op_idx}",
|
|
1636
|
+
"tool": "interact",
|
|
1637
|
+
"arguments": {"action": "noop"},
|
|
1638
|
+
}
|
|
1639
|
+
]
|
|
1614
1640
|
|
|
1615
1641
|
# Environment step
|
|
1616
1642
|
from .environment_routes import EnvStepRequest, step_environment
|
|
@@ -1843,14 +1869,73 @@ async def execute_rollout(
|
|
|
1843
1869
|
timing_final.setdefault("overhead_ms", 0.0)
|
|
1844
1870
|
|
|
1845
1871
|
# Build trajectory
|
|
1846
|
-
# Extract inference_url from policy
|
|
1872
|
+
# Extract inference_url from policy config (REQUIRED for trace correlation)
|
|
1873
|
+
# The trainer sets this in policy config with ?cid=... parameter
|
|
1847
1874
|
inference_url = None
|
|
1848
|
-
|
|
1875
|
+
|
|
1876
|
+
# Try policy config from request first (most reliable source)
|
|
1877
|
+
try:
|
|
1878
|
+
policy_config_snapshot = (
|
|
1879
|
+
request.policy.config if isinstance(request.policy.config, dict) else {}
|
|
1880
|
+
)
|
|
1881
|
+
inference_url = policy_config_snapshot.get("inference_url")
|
|
1882
|
+
if inference_url:
|
|
1883
|
+
logger.info(
|
|
1884
|
+
"ROLLOUT_TRAJECTORY: extracted inference_url from request.policy.config run_id=%s url=%s",
|
|
1885
|
+
request.run_id,
|
|
1886
|
+
inference_url,
|
|
1887
|
+
)
|
|
1888
|
+
except Exception as exc:
|
|
1889
|
+
logger.warning(
|
|
1890
|
+
"ROLLOUT_TRAJECTORY: failed to get inference_url from request.policy.config run_id=%s: %s",
|
|
1891
|
+
request.run_id,
|
|
1892
|
+
exc,
|
|
1893
|
+
)
|
|
1894
|
+
|
|
1895
|
+
# Fallback: Try policy handle snapshot (if request.policy.config failed)
|
|
1896
|
+
if not inference_url and policy_handle is not None:
|
|
1849
1897
|
try:
|
|
1850
1898
|
policy_snapshot = policy_handle.snapshot()
|
|
1851
1899
|
inference_url = policy_snapshot.get("config", {}).get("inference_url")
|
|
1852
|
-
|
|
1853
|
-
|
|
1900
|
+
if inference_url:
|
|
1901
|
+
logger.info(
|
|
1902
|
+
"ROLLOUT_TRAJECTORY: extracted inference_url from policy_handle.snapshot run_id=%s url=%s",
|
|
1903
|
+
request.run_id,
|
|
1904
|
+
inference_url,
|
|
1905
|
+
)
|
|
1906
|
+
except Exception as exc:
|
|
1907
|
+
logger.warning(
|
|
1908
|
+
"ROLLOUT_TRAJECTORY: failed to snapshot policy for run_id=%s policy_id=%s: %s",
|
|
1909
|
+
request.run_id,
|
|
1910
|
+
policy_id,
|
|
1911
|
+
exc,
|
|
1912
|
+
)
|
|
1913
|
+
|
|
1914
|
+
# ASSERTION: inference_url MUST be present (required by RolloutTrajectory schema)
|
|
1915
|
+
if not inference_url:
|
|
1916
|
+
raise ValueError(
|
|
1917
|
+
f"FATAL: inference_url is required but not found!\n"
|
|
1918
|
+
f"\n"
|
|
1919
|
+
f"run_id: {request.run_id}\n"
|
|
1920
|
+
f"policy_id: {policy_id}\n"
|
|
1921
|
+
f"policy_config_keys: {list(policy_config_snapshot.keys()) if 'policy_config_snapshot' in locals() else 'N/A'}\n"
|
|
1922
|
+
f"\n"
|
|
1923
|
+
f"The trainer MUST set inference_url in policy config with ?cid=... parameter.\n"
|
|
1924
|
+
f"This is required for trace correlation and hydration.\n"
|
|
1925
|
+
)
|
|
1926
|
+
|
|
1927
|
+
# policy_config_snapshot already set above in try block (line 1876-1878)
|
|
1928
|
+
# Ensure it exists for logging below
|
|
1929
|
+
if 'policy_config_snapshot' not in locals():
|
|
1930
|
+
policy_config_snapshot = {}
|
|
1931
|
+
|
|
1932
|
+
logger.info(
|
|
1933
|
+
"ROLLOUT_TRAJECTORY: run_id=%s policy_id=%s inference_url=%s trace_id=%s",
|
|
1934
|
+
request.run_id,
|
|
1935
|
+
policy_id,
|
|
1936
|
+
inference_url,
|
|
1937
|
+
policy_config_snapshot.get("trace_correlation_id"),
|
|
1938
|
+
)
|
|
1854
1939
|
|
|
1855
1940
|
trajectory = RolloutTrajectory(
|
|
1856
1941
|
env_id=env_id,
|
|
@@ -1948,12 +2033,17 @@ async def execute_rollout(
|
|
|
1948
2033
|
)
|
|
1949
2034
|
finalized = True
|
|
1950
2035
|
trace_payload = tracing_context.build_trace_payload(session_trace)
|
|
2036
|
+
|
|
2037
|
+
# Debug: Check trace payload
|
|
2038
|
+
logger.info(f"[TRACE_DEBUG] trace_payload is None: {trace_payload is None}, return_trace={tracing_context.return_trace}")
|
|
2039
|
+
if trace_payload:
|
|
2040
|
+
logger.info(f"[TRACE_DEBUG] trace_payload keys: {list(trace_payload.keys())}")
|
|
1951
2041
|
|
|
1952
2042
|
# Hard-fail if no steps executed (avg_turns == 0 scenario)
|
|
1953
2043
|
if metrics.num_steps <= 0:
|
|
1954
2044
|
raise HTTPException(status_code=500, detail="no_steps_executed: avg_turns == 0")
|
|
1955
2045
|
|
|
1956
|
-
|
|
2046
|
+
response = RolloutResponse(
|
|
1957
2047
|
run_id=request.run_id,
|
|
1958
2048
|
trajectories=[trajectory],
|
|
1959
2049
|
branches={},
|
|
@@ -1962,6 +2052,16 @@ async def execute_rollout(
|
|
|
1962
2052
|
ops_executed=ops_executed,
|
|
1963
2053
|
trace=trace_payload,
|
|
1964
2054
|
)
|
|
2055
|
+
logger.info(
|
|
2056
|
+
"ROLLOUT_RESPONSE: run_id=%s aborted=%s ops_executed=%s metrics_steps=%s trace_present=%s pipeline_metadata=%s",
|
|
2057
|
+
request.run_id,
|
|
2058
|
+
aborted,
|
|
2059
|
+
ops_executed,
|
|
2060
|
+
metrics.num_steps,
|
|
2061
|
+
bool(trace_payload),
|
|
2062
|
+
response.pipeline_metadata,
|
|
2063
|
+
)
|
|
2064
|
+
return response
|
|
1965
2065
|
|
|
1966
2066
|
except Exception as e:
|
|
1967
2067
|
logger.error(f"Rollout failed for run {request.run_id}: {e}")
|