synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/baseline/banking77_baseline.py +204 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +6 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -38
- examples/swe/task_app/hosted/policy_routes.py +17 -0
- examples/swe/task_app/hosted/rollout.py +4 -2
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +841 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +288 -39
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
- synth_ai/api/train/builders.py +99 -4
- synth_ai/api/train/cli.py +516 -26
- synth_ai/api/train/config_finder.py +13 -2
- synth_ai/api/train/configs/__init__.py +23 -2
- synth_ai/api/train/configs/prompt_learning.py +442 -0
- synth_ai/api/train/configs/rl.py +61 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/task_app.py +1 -1
- synth_ai/api/train/validators.py +277 -0
- synth_ai/auth/credentials.py +119 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cli/__init__.py +94 -18
- synth_ai/cli/__main__.py +0 -0
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +84 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1436 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +30 -158
- synth_ai/cli/deploy/__init__.py +43 -0
- synth_ai/cli/deploy.py +162 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +14 -8
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +107 -0
- synth_ai/cli/root.py +9 -5
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +51 -1480
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -10
- synth_ai/learning/__init__.py +10 -0
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +184 -0
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/pricing/__init__.py +2 -0
- synth_ai/pricing/model_pricing.py +57 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +518 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +2 -0
- synth_ai/task/tracing_utils.py +25 -25
- synth_ai/task/validators.py +45 -9
- synth_ai/task_app_cfgs.py +21 -0
- synth_ai/tracing_v3/config.py +162 -19
- synth_ai/tracing_v3/constants.py +1 -1
- synth_ai/tracing_v3/db_config.py +24 -38
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/storage/config.py +47 -13
- synth_ai/tracing_v3/storage/factory.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +113 -11
- synth_ai/tracing_v3/turso/native_manager.py +92 -16
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +30 -1
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/cli.py +149 -5
- synth_ai/utils/env.py +40 -33
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/modal.py +285 -3
- synth_ai/utils/paths.py +48 -0
- synth_ai/utils/uvicorn.py +113 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
|
@@ -4,6 +4,7 @@ import contextlib
|
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
6
|
from datetime import datetime
|
|
7
|
+
import asyncio
|
|
7
8
|
from typing import Any
|
|
8
9
|
|
|
9
10
|
from fastapi import APIRouter, HTTPException, Request
|
|
@@ -35,6 +36,13 @@ logger = logging.getLogger(__name__)
|
|
|
35
36
|
|
|
36
37
|
router = APIRouter()
|
|
37
38
|
|
|
39
|
+
# Global concurrency limit for outbound inference to avoid backend overload/timeouts
|
|
40
|
+
try:
|
|
41
|
+
_INFERENCE_CONCURRENCY = int(os.getenv("INFERENCE_CONCURRENCY", "2") or "2")
|
|
42
|
+
except Exception: # pragma: no cover
|
|
43
|
+
_INFERENCE_CONCURRENCY = 2
|
|
44
|
+
_inference_sem = asyncio.Semaphore(max(1, _INFERENCE_CONCURRENCY))
|
|
45
|
+
|
|
38
46
|
|
|
39
47
|
class PolicyCreateRequest(BaseModel):
|
|
40
48
|
policy_name: str
|
|
@@ -250,6 +258,11 @@ async def step_policy(
|
|
|
250
258
|
task_app = req.app.state.task_app
|
|
251
259
|
policy = handle.policy
|
|
252
260
|
tracing_context = getattr(req.state, "rollout_tracing", None)
|
|
261
|
+
if tracing_context is None:
|
|
262
|
+
print(
|
|
263
|
+
f"[TRACE_DEBUG] Missing tracing context on policy step; policy_id={request.policy_id}",
|
|
264
|
+
flush=True,
|
|
265
|
+
)
|
|
253
266
|
|
|
254
267
|
obs_text = request.observation
|
|
255
268
|
if isinstance(request.observation, dict):
|
|
@@ -462,6 +475,8 @@ async def step_policy(
|
|
|
462
475
|
)
|
|
463
476
|
|
|
464
477
|
# Emit full system/user prompts for observability (no secrets included)
|
|
478
|
+
system_prompt_records: list[dict[str, Any]] = []
|
|
479
|
+
user_prompt_records: list[dict[str, Any]] = []
|
|
465
480
|
try:
|
|
466
481
|
|
|
467
482
|
def _as_text(content: object) -> str:
|
|
@@ -481,8 +496,6 @@ async def step_policy(
|
|
|
481
496
|
return "".join(parts)
|
|
482
497
|
return str(content)
|
|
483
498
|
|
|
484
|
-
system_prompt_records: list[dict[str, Any]] = []
|
|
485
|
-
user_prompt_records: list[dict[str, Any]] = []
|
|
486
499
|
for message in msgs:
|
|
487
500
|
role = message.get("role")
|
|
488
501
|
raw_content = message.get("content")
|
|
@@ -525,6 +538,11 @@ async def step_policy(
|
|
|
525
538
|
|
|
526
539
|
if tracing_context is not None:
|
|
527
540
|
try:
|
|
541
|
+
logger.info(
|
|
542
|
+
"[TRACE_DEBUG] record_policy_prompts sys=%s user=%s",
|
|
543
|
+
len(system_prompt_records),
|
|
544
|
+
len(user_prompt_records),
|
|
545
|
+
)
|
|
528
546
|
await tracing_context.record_policy_prompts(
|
|
529
547
|
system_prompt_records, user_prompt_records
|
|
530
548
|
)
|
|
@@ -541,6 +559,14 @@ async def step_policy(
|
|
|
541
559
|
|
|
542
560
|
# Ensure meta carries the final target URL for downstream logging/clients
|
|
543
561
|
with contextlib.suppress(Exception):
|
|
562
|
+
# Bulletproof normalizer at the call site (in addition to client-side)
|
|
563
|
+
try:
|
|
564
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.utils import (
|
|
565
|
+
force_normalize_chat_completions_url,
|
|
566
|
+
)
|
|
567
|
+
target_url = force_normalize_chat_completions_url(target_url)
|
|
568
|
+
except Exception:
|
|
569
|
+
pass
|
|
544
570
|
sanitized_target = ensure_chat_completions_url(target_url)
|
|
545
571
|
if sanitized_target and sanitized_target != target_url:
|
|
546
572
|
logger.warning(
|
|
@@ -589,6 +615,28 @@ async def step_policy(
|
|
|
589
615
|
except Exception:
|
|
590
616
|
api_key_override = None
|
|
591
617
|
|
|
618
|
+
# Fallback: If target is OpenAI but OPENAI_API_KEY is missing, route to Synth API
|
|
619
|
+
try:
|
|
620
|
+
import os as _os2
|
|
621
|
+
_low = str(target_url or "").lower()
|
|
622
|
+
if ("api.openai.com" in _low) and not (_os2.getenv("OPENAI_API_KEY")):
|
|
623
|
+
# Prefer task_app.synth_base_url if available; else default
|
|
624
|
+
synth_base = getattr(task_app, "synth_base_url", None)
|
|
625
|
+
if isinstance(synth_base, str) and synth_base.strip():
|
|
626
|
+
base = synth_base.rstrip("/")
|
|
627
|
+
fallback = base + "/inference/v1/chat/completions"
|
|
628
|
+
else:
|
|
629
|
+
fallback = "https://api.synth.run/api/inference/v1/chat/completions"
|
|
630
|
+
fixed = ensure_chat_completions_url(fallback)
|
|
631
|
+
logger.warning(
|
|
632
|
+
"POLICY_STEP: OPENAI key missing; falling back to Synth route %s",
|
|
633
|
+
fixed,
|
|
634
|
+
)
|
|
635
|
+
meta["inference_url"] = fixed
|
|
636
|
+
target_url = fixed
|
|
637
|
+
except Exception:
|
|
638
|
+
pass
|
|
639
|
+
|
|
592
640
|
if api_key_override:
|
|
593
641
|
try:
|
|
594
642
|
masked = f"{api_key_override[:6]}…{api_key_override[-4:]}"
|
|
@@ -780,9 +828,10 @@ async def step_policy(
|
|
|
780
828
|
"sokoban-react",
|
|
781
829
|
"crafter-react",
|
|
782
830
|
) and getattr(policy, "use_tools", True):
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
831
|
+
inf_req = meta.get("inference_request", {})
|
|
832
|
+
req_tools = inf_req.get("tools")
|
|
833
|
+
req_tool_choice = inf_req.get("tool_choice")
|
|
834
|
+
req_stop_after = inf_req.get("stop_after_tool_calls")
|
|
786
835
|
logger.info(
|
|
787
836
|
f"TOOLCALL_CONFIG: policy={policy_name} tools_present={bool(req_tools)} tool_choice={req_tool_choice} stop_after={req_stop_after}"
|
|
788
837
|
)
|
|
@@ -791,6 +840,8 @@ async def step_policy(
|
|
|
791
840
|
status_code=500,
|
|
792
841
|
detail=f"TOOLCALL_ASSERTION_FAIL: Missing tools or tool_choice!=required for policy {policy_name}",
|
|
793
842
|
)
|
|
843
|
+
if req_stop_after is None:
|
|
844
|
+
inf_req["stop_after_tool_calls"] = 1
|
|
794
845
|
|
|
795
846
|
# Call inference service with retries for Flash cold-start (503)
|
|
796
847
|
import time as _t
|
|
@@ -967,13 +1018,14 @@ async def step_policy(
|
|
|
967
1018
|
|
|
968
1019
|
_t_start = _t.time()
|
|
969
1020
|
call_started_at = datetime.utcnow()
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
1021
|
+
async with _inference_sem:
|
|
1022
|
+
inference_response = await client.generate_with_retries(
|
|
1023
|
+
request=meta["inference_request"],
|
|
1024
|
+
base_url=meta["inference_url"],
|
|
1025
|
+
max_retries=12,
|
|
1026
|
+
backoff_factor=2.0,
|
|
1027
|
+
extra_headers=extra_headers,
|
|
1028
|
+
)
|
|
977
1029
|
meta["inference_ms"] = int((_t.time() - _t_start) * 1000)
|
|
978
1030
|
call_completed_at = datetime.utcnow()
|
|
979
1031
|
|
|
@@ -1053,6 +1105,23 @@ async def step_policy(
|
|
|
1053
1105
|
except Exception as exc:
|
|
1054
1106
|
logger.debug(f"TRACING_LLM_FAIL: {exc}")
|
|
1055
1107
|
|
|
1108
|
+
if not tool_calls:
|
|
1109
|
+
preview = ""
|
|
1110
|
+
try:
|
|
1111
|
+
preview = str(meta.get("raw_response") or "")[:400]
|
|
1112
|
+
except Exception:
|
|
1113
|
+
preview = "<unavailable>"
|
|
1114
|
+
logger.error(
|
|
1115
|
+
{
|
|
1116
|
+
"rollout.policy_step": True,
|
|
1117
|
+
"policy_id": request.policy_id,
|
|
1118
|
+
"error": "no_tool_calls",
|
|
1119
|
+
"inference_url": meta.get("inference_url"),
|
|
1120
|
+
"raw_preview": preview,
|
|
1121
|
+
}
|
|
1122
|
+
)
|
|
1123
|
+
raise RuntimeError("Policy step produced no tool calls; inference response unusable.")
|
|
1124
|
+
|
|
1056
1125
|
return PolicyStepResponse(
|
|
1057
1126
|
tool_calls=tool_calls,
|
|
1058
1127
|
meta=meta,
|
|
@@ -491,6 +491,10 @@ class RolloutTracingContext:
|
|
|
491
491
|
getattr(request.record, "trace_format", "compact") or "compact"
|
|
492
492
|
).lower()
|
|
493
493
|
self.return_trace = bool(getattr(request.record, "return_trace", False))
|
|
494
|
+
print(
|
|
495
|
+
f"[TRACE_DEBUG] RolloutTracingContext init: trace_format={self.trace_format} return_trace={self.return_trace}",
|
|
496
|
+
flush=True,
|
|
497
|
+
)
|
|
494
498
|
self.sft_output_dir = getattr(fastapi_request.app.state, "sft_output_dir", None)
|
|
495
499
|
self.session_trace = None
|
|
496
500
|
self.metadata_updates: dict[str, Any] = {}
|
|
@@ -513,19 +517,24 @@ class RolloutTracingContext:
|
|
|
513
517
|
|
|
514
518
|
async def start_session(self) -> None:
|
|
515
519
|
if not self.enabled or self.tracer is None:
|
|
520
|
+
print("[TRACE_DEBUG] start_session skipped: tracer disabled", flush=True)
|
|
516
521
|
return
|
|
517
522
|
try:
|
|
518
523
|
await self.tracer.initialize()
|
|
524
|
+
print("[TRACE_DEBUG] tracer initialized", flush=True)
|
|
519
525
|
except Exception as exc:
|
|
520
526
|
logger.debug("TRACING_INIT_FAIL: %s", exc)
|
|
527
|
+
# Hard fail: tracing requested but cannot initialize
|
|
528
|
+
raise
|
|
521
529
|
try:
|
|
522
530
|
await self.tracer.start_session(
|
|
523
531
|
session_id=self.run_id, metadata=dict(self.metadata_base)
|
|
524
532
|
)
|
|
533
|
+
print(f"[TRACE_DEBUG] start_session succeeded for run_id={self.run_id}", flush=True)
|
|
525
534
|
except Exception as exc:
|
|
526
535
|
logger.info("TRACING_START_FAIL: %s", exc)
|
|
527
|
-
|
|
528
|
-
|
|
536
|
+
# Hard fail: tracing requested but cannot start session
|
|
537
|
+
raise
|
|
529
538
|
|
|
530
539
|
async def start_decision(self, turn_number: int) -> None:
|
|
531
540
|
self.current_turn = turn_number
|
|
@@ -590,7 +599,7 @@ class RolloutTracingContext:
|
|
|
590
599
|
# Debug: Check message count
|
|
591
600
|
if self.tracer and self.tracer._current_trace:
|
|
592
601
|
msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
|
|
593
|
-
|
|
602
|
+
print(f"[TRACE_DEBUG] After record_policy_prompts: {msg_count} messages", flush=True)
|
|
594
603
|
|
|
595
604
|
def _content_to_text(self, content: Any) -> str:
|
|
596
605
|
if isinstance(content, str):
|
|
@@ -664,11 +673,20 @@ class RolloutTracingContext:
|
|
|
664
673
|
return
|
|
665
674
|
if self.enabled and self.tracer is not None:
|
|
666
675
|
try:
|
|
676
|
+
payload = {
|
|
677
|
+
"role": "assistant",
|
|
678
|
+
"tool_calls": tool_calls,
|
|
679
|
+
}
|
|
667
680
|
await self.tracer.record_message(
|
|
668
|
-
content=
|
|
669
|
-
message_type="assistant",
|
|
681
|
+
content=payload,
|
|
682
|
+
message_type="assistant",
|
|
670
683
|
metadata={**self._message_metadata(), "is_tool_call": True},
|
|
671
684
|
)
|
|
685
|
+
if self.tracer._current_trace:
|
|
686
|
+
print(
|
|
687
|
+
f"[TRACE_DEBUG] After tool invocation: messages={len(self.tracer._current_trace.markov_blanket_message_history)}",
|
|
688
|
+
flush=True,
|
|
689
|
+
)
|
|
672
690
|
except Exception as exc:
|
|
673
691
|
logger.debug("TRACING_TOOL_MSG_FAIL: %s", exc)
|
|
674
692
|
|
|
@@ -774,9 +792,33 @@ class RolloutTracingContext:
|
|
|
774
792
|
}
|
|
775
793
|
)
|
|
776
794
|
|
|
795
|
+
assistant_structured = assistant_content if assistant_content is not None else ""
|
|
796
|
+
assistant_text = self._content_to_text(assistant_content)
|
|
797
|
+
|
|
798
|
+
if self.enabled and self.tracer is not None:
|
|
799
|
+
assistant_payload: dict[str, Any] = {
|
|
800
|
+
"role": "assistant",
|
|
801
|
+
"content": assistant_structured,
|
|
802
|
+
"text": assistant_text,
|
|
803
|
+
}
|
|
804
|
+
if isinstance(assistant_message, dict):
|
|
805
|
+
if assistant_message.get("tool_calls"):
|
|
806
|
+
assistant_payload["tool_calls"] = assistant_message.get("tool_calls")
|
|
807
|
+
if assistant_message.get("reasoning"):
|
|
808
|
+
assistant_payload["reasoning"] = assistant_message.get("reasoning")
|
|
809
|
+
if assistant_message.get("thinking"):
|
|
810
|
+
assistant_payload["thinking"] = assistant_message.get("thinking")
|
|
811
|
+
try:
|
|
812
|
+
await self.tracer.record_message(
|
|
813
|
+
content=assistant_payload,
|
|
814
|
+
message_type="assistant",
|
|
815
|
+
metadata=self._message_metadata(),
|
|
816
|
+
)
|
|
817
|
+
except Exception as exc:
|
|
818
|
+
logger.debug("TRACING_ASSISTANT_MSG_FAIL: %s", exc)
|
|
819
|
+
|
|
777
820
|
if self.sft_output_dir is not None:
|
|
778
821
|
assistant_structured = assistant_content if assistant_content is not None else ""
|
|
779
|
-
assistant_text = self._content_to_text(assistant_content)
|
|
780
822
|
dialogue_structured: list[dict[str, Any]] = []
|
|
781
823
|
for content in self.latest_system_prompt_content:
|
|
782
824
|
if content is None:
|
|
@@ -941,17 +983,23 @@ class RolloutTracingContext:
|
|
|
941
983
|
# Debug: Check message count before end_session
|
|
942
984
|
if self.tracer._current_trace:
|
|
943
985
|
msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
|
|
944
|
-
|
|
945
|
-
|
|
986
|
+
print(f"[TRACE_DEBUG] Before end_session: {msg_count} messages in trace", flush=True)
|
|
987
|
+
|
|
946
988
|
self.session_trace = await self.tracer.end_session()
|
|
947
989
|
|
|
948
990
|
# Debug: Check if session was saved
|
|
949
991
|
if self.session_trace:
|
|
950
|
-
|
|
992
|
+
print(
|
|
993
|
+
f"[TRACE_DEBUG] Session ended successfully, session_id={self.session_trace.session_id}",
|
|
994
|
+
flush=True,
|
|
995
|
+
)
|
|
951
996
|
self.session_trace.metadata.update(self.metadata_updates)
|
|
952
|
-
|
|
997
|
+
print(
|
|
998
|
+
f"[TRACE_DEBUG] session_trace.metadata keys: {list(self.session_trace.metadata.keys())}",
|
|
999
|
+
flush=True,
|
|
1000
|
+
)
|
|
953
1001
|
else:
|
|
954
|
-
|
|
1002
|
+
print("[TRACE_DEBUG] end_session returned None!", flush=True)
|
|
955
1003
|
except Exception as exc:
|
|
956
1004
|
logger.warning(f"TRACING_END_SESSION_FAIL: {exc}", exc_info=True)
|
|
957
1005
|
self.session_trace = None
|
|
@@ -991,6 +1039,10 @@ class RolloutTracingContext:
|
|
|
991
1039
|
if self.trace_format in ("full", "structured"):
|
|
992
1040
|
payload = session_trace.to_dict()
|
|
993
1041
|
payload.setdefault("metadata", {}).update(self.metadata_updates)
|
|
1042
|
+
print(
|
|
1043
|
+
f"[TRACE_DEBUG] build_trace_payload returning structured trace with messages={len(payload.get('markov_blanket_message_history') or [])}",
|
|
1044
|
+
flush=True,
|
|
1045
|
+
)
|
|
994
1046
|
return payload
|
|
995
1047
|
|
|
996
1048
|
# For "compact" format, return only summary stats
|
|
@@ -1929,6 +1981,15 @@ async def execute_rollout(
|
|
|
1929
1981
|
if 'policy_config_snapshot' not in locals():
|
|
1930
1982
|
policy_config_snapshot = {}
|
|
1931
1983
|
|
|
1984
|
+
# Normalize inference URL for trajectory (and ensure no path in query)
|
|
1985
|
+
try:
|
|
1986
|
+
from .utils import force_normalize_chat_completions_url, ensure_chat_completions_url
|
|
1987
|
+
inference_url = force_normalize_chat_completions_url(inference_url)
|
|
1988
|
+
# apply mode-aware normalization too (keeps cid, appends path if missing)
|
|
1989
|
+
inference_url = ensure_chat_completions_url(inference_url, mode=request.mode)
|
|
1990
|
+
except Exception:
|
|
1991
|
+
pass
|
|
1992
|
+
|
|
1932
1993
|
logger.info(
|
|
1933
1994
|
"ROLLOUT_TRAJECTORY: run_id=%s policy_id=%s inference_url=%s trace_id=%s",
|
|
1934
1995
|
request.run_id,
|
|
@@ -2043,6 +2104,16 @@ async def execute_rollout(
|
|
|
2043
2104
|
if metrics.num_steps <= 0:
|
|
2044
2105
|
raise HTTPException(status_code=500, detail="no_steps_executed: avg_turns == 0")
|
|
2045
2106
|
|
|
2107
|
+
# Ensure at least one tool call executed successfully
|
|
2108
|
+
tool_call_executed = any(
|
|
2109
|
+
isinstance(step.tool_calls, list) and len(step.tool_calls) > 0 for step in trajectory_steps
|
|
2110
|
+
)
|
|
2111
|
+
if not tool_call_executed:
|
|
2112
|
+
raise HTTPException(
|
|
2113
|
+
status_code=502,
|
|
2114
|
+
detail="no_tool_calls_executed: model failed to produce actionable tool calls.",
|
|
2115
|
+
)
|
|
2116
|
+
|
|
2046
2117
|
response = RolloutResponse(
|
|
2047
2118
|
run_id=request.run_id,
|
|
2048
2119
|
trajectories=[trajectory],
|
|
@@ -11,6 +11,129 @@ logger = logging.getLogger(__name__)
|
|
|
11
11
|
_CHAT_COMPLETIONS_SUFFIX = "/v1/chat/completions"
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
def force_normalize_chat_completions_url(raw_url: Any) -> str:
|
|
15
|
+
"""
|
|
16
|
+
Bulletproof normalizer: converts ANY malformed inference URL into the
|
|
17
|
+
correct chat-completions URL form.
|
|
18
|
+
|
|
19
|
+
Rules:
|
|
20
|
+
- Final path MUST end with /v1/chat/completions
|
|
21
|
+
- Query MUST NOT contain any '/' characters (no path segments in query)
|
|
22
|
+
- If the original query contained a path (e.g., '?cid=.../v1/chat/completions'),
|
|
23
|
+
extract that path and move it to the URL path; keep remaining query params
|
|
24
|
+
- Preserve scheme, host, port and existing query params order as much as possible
|
|
25
|
+
|
|
26
|
+
Examples:
|
|
27
|
+
https://host?cid=trace_123/v1/chat/completions
|
|
28
|
+
-> https://host/v1/chat/completions?cid=trace_123
|
|
29
|
+
https://host:8000?cid=trace_abc/v1/chat/completions&foo=bar
|
|
30
|
+
-> https://host:8000/v1/chat/completions?cid=trace_abc&foo=bar
|
|
31
|
+
https://host?cid=trace_123/v1/chat/completions?other=param
|
|
32
|
+
-> https://host/v1/chat/completions?cid=trace_123&other=param
|
|
33
|
+
"""
|
|
34
|
+
if not isinstance(raw_url, str):
|
|
35
|
+
return raw_url
|
|
36
|
+
url = raw_url.strip()
|
|
37
|
+
if not url:
|
|
38
|
+
return raw_url
|
|
39
|
+
|
|
40
|
+
parsed = urlparse(url)
|
|
41
|
+
path = (parsed.path or "").rstrip("/")
|
|
42
|
+
query = parsed.query or ""
|
|
43
|
+
|
|
44
|
+
# If query contains a path (has '/'), extract and repair
|
|
45
|
+
if query and "/" in query:
|
|
46
|
+
# Split query at the first '/' (everything before is real query params)
|
|
47
|
+
before_slash, after_slash = query.split("/", 1)
|
|
48
|
+
|
|
49
|
+
# after_slash may contain path and then more query params separated by '&' or '?' (malformed)
|
|
50
|
+
sep_indices = [i for i in [after_slash.find("&"), after_slash.find("?")] if i >= 0]
|
|
51
|
+
cut_idx = min(sep_indices) if sep_indices else len(after_slash)
|
|
52
|
+
path_from_query = "/" + after_slash[:cut_idx] # restore leading '/'
|
|
53
|
+
extra_query = after_slash[cut_idx + 1 :] if cut_idx < len(after_slash) else ""
|
|
54
|
+
|
|
55
|
+
# Merge query params: base (before_slash) + extra_query
|
|
56
|
+
merged_query = before_slash
|
|
57
|
+
if extra_query:
|
|
58
|
+
merged_query = f"{merged_query}&{extra_query}" if merged_query else extra_query
|
|
59
|
+
|
|
60
|
+
# Decide final path
|
|
61
|
+
if path_from_query.startswith(_CHAT_COMPLETIONS_SUFFIX):
|
|
62
|
+
final_path = path_from_query
|
|
63
|
+
else:
|
|
64
|
+
final_path = f"{path_from_query.rstrip('/')}{_CHAT_COMPLETIONS_SUFFIX}"
|
|
65
|
+
|
|
66
|
+
parsed = parsed._replace(path=final_path, query=merged_query)
|
|
67
|
+
url = urlunparse(parsed)
|
|
68
|
+
parsed = urlparse(url)
|
|
69
|
+
path = parsed.path or ""
|
|
70
|
+
query = parsed.query or ""
|
|
71
|
+
|
|
72
|
+
# Ensure path ends with chat completions suffix
|
|
73
|
+
if not path.endswith(_CHAT_COMPLETIONS_SUFFIX):
|
|
74
|
+
new_path = f"{path}{_CHAT_COMPLETIONS_SUFFIX}" if path else _CHAT_COMPLETIONS_SUFFIX
|
|
75
|
+
parsed = parsed._replace(path=new_path)
|
|
76
|
+
url = urlunparse(parsed)
|
|
77
|
+
parsed = urlparse(url)
|
|
78
|
+
path = parsed.path or ""
|
|
79
|
+
query = parsed.query or ""
|
|
80
|
+
|
|
81
|
+
# Final validation: no '/' in query
|
|
82
|
+
if query and "/" in query:
|
|
83
|
+
# As a last resort, drop anything after the first '/'
|
|
84
|
+
safe_query = query.split("/")[0]
|
|
85
|
+
parsed = parsed._replace(query=safe_query)
|
|
86
|
+
url = urlunparse(parsed)
|
|
87
|
+
|
|
88
|
+
return url
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _validate_url_structure(url: str, context: str = "") -> None:
|
|
92
|
+
"""
|
|
93
|
+
Validate that a URL has correct structure (path before query, not vice versa).
|
|
94
|
+
|
|
95
|
+
Raises ValueError if URL is malformed.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
url: The URL to validate
|
|
99
|
+
context: Optional context for error messages
|
|
100
|
+
|
|
101
|
+
Raises:
|
|
102
|
+
ValueError: If URL is malformed (path-like segments in query string)
|
|
103
|
+
"""
|
|
104
|
+
if not isinstance(url, str) or not url.strip():
|
|
105
|
+
return
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
parsed = urlparse(url)
|
|
109
|
+
query = parsed.query or ""
|
|
110
|
+
|
|
111
|
+
# CRITICAL CHECK: If query contains path-like segments (contains /), it's malformed
|
|
112
|
+
if query and "/" in query:
|
|
113
|
+
path_segment = query.split("/", 1)[1] if "/" in query else ""
|
|
114
|
+
error_msg = (
|
|
115
|
+
f"FATAL [TASK_APP_URL_VALIDATION]: Malformed inference URL detected!\n"
|
|
116
|
+
f"\n"
|
|
117
|
+
f"URL: {url}\n"
|
|
118
|
+
f"Context: {context}\n"
|
|
119
|
+
f"\n"
|
|
120
|
+
f"The URL has a path-like segment ('/{path_segment}') in the query string.\n"
|
|
121
|
+
f"This indicates incorrect URL construction upstream.\n"
|
|
122
|
+
f"\n"
|
|
123
|
+
f"Expected: https://host/v1/chat/completions?cid=trace_123\n"
|
|
124
|
+
f"Malformed: https://host?cid=trace_123/v1/chat/completions\n"
|
|
125
|
+
f"\n"
|
|
126
|
+
f"This should be caught by the trainer, but if you see this,\n"
|
|
127
|
+
f"the trainer's URL validation may have failed.\n"
|
|
128
|
+
)
|
|
129
|
+
logger.error(error_msg)
|
|
130
|
+
raise ValueError(error_msg)
|
|
131
|
+
except ValueError:
|
|
132
|
+
raise
|
|
133
|
+
except Exception as e:
|
|
134
|
+
logger.warning(f"[URL_VALIDATION] Failed to parse URL: {url} (context: {context}, error: {e})")
|
|
135
|
+
|
|
136
|
+
|
|
14
137
|
def ensure_chat_completions_url(raw_url: Any, mode: str | None = None) -> Any:
|
|
15
138
|
"""
|
|
16
139
|
Ensure inference URLs point at the chat completions endpoint.
|
|
@@ -43,9 +166,75 @@ def ensure_chat_completions_url(raw_url: Any, mode: str | None = None) -> Any:
|
|
|
43
166
|
|
|
44
167
|
parsed = urlparse(url)
|
|
45
168
|
path = (parsed.path or "").rstrip("/")
|
|
169
|
+
query = parsed.query
|
|
170
|
+
|
|
171
|
+
logger.debug(
|
|
172
|
+
"ensure_chat_completions_url: parsing url=%s -> path=%r query=%r",
|
|
173
|
+
url,
|
|
174
|
+
path,
|
|
175
|
+
query,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# CRITICAL: Check for malformed URLs (path in query) and fix them FIRST
|
|
179
|
+
# Example: https://host?cid=trace_123/v1/chat/completions
|
|
180
|
+
# Should be: https://host/v1/chat/completions?cid=trace_123
|
|
181
|
+
if query and "/" in query:
|
|
182
|
+
logger.error(
|
|
183
|
+
f"[URL_FIX] Detected malformed URL in ensure_chat_completions_url: {url}\n"
|
|
184
|
+
f"Path-like segment found in query string. Attempting to fix..."
|
|
185
|
+
)
|
|
186
|
+
# Split query at first "/" to separate query params from path
|
|
187
|
+
query_parts = query.split("/", 1)
|
|
188
|
+
if len(query_parts) == 2:
|
|
189
|
+
# query_parts[0] is the actual query (e.g., "cid=trace_123")
|
|
190
|
+
# query_parts[1] is the path that was incorrectly put in query
|
|
191
|
+
actual_query = query_parts[0]
|
|
192
|
+
path_and_more = query_parts[1] # Could be "v1/chat/completions" or "v1/chat/completions&foo=bar"
|
|
193
|
+
|
|
194
|
+
# Extract the path part (everything before "&" or "?" if present)
|
|
195
|
+
# Handle both "&" (query param separator) and "?" (another malformed query separator)
|
|
196
|
+
if "&" in path_and_more:
|
|
197
|
+
# Path is followed by more query params (separated by &)
|
|
198
|
+
path_segment, extra_query = path_and_more.split("&", 1)
|
|
199
|
+
path_in_query = "/" + path_segment # Restore leading slash
|
|
200
|
+
# Merge extra query params with actual_query
|
|
201
|
+
actual_query = f"{actual_query}&{extra_query}"
|
|
202
|
+
elif "?" in path_and_more:
|
|
203
|
+
# Path is followed by more query params (separated by ?, which is malformed)
|
|
204
|
+
path_segment, extra_query = path_and_more.split("?", 1)
|
|
205
|
+
path_in_query = "/" + path_segment # Restore leading slash
|
|
206
|
+
# Merge extra query params with actual_query (use & as separator)
|
|
207
|
+
actual_query = f"{actual_query}&{extra_query}"
|
|
208
|
+
else:
|
|
209
|
+
# No extra query params, just the path
|
|
210
|
+
path_in_query = "/" + path_and_more # Restore leading slash
|
|
211
|
+
|
|
212
|
+
# If the path_in_query already contains /v1/chat/completions, use it
|
|
213
|
+
# Otherwise, append /v1/chat/completions
|
|
214
|
+
if path_in_query.startswith("/v1/chat/completions"):
|
|
215
|
+
final_path = path_in_query
|
|
216
|
+
else:
|
|
217
|
+
# Append /v1/chat/completions to whatever path we found
|
|
218
|
+
final_path = path_in_query.rstrip("/") + "/v1/chat/completions"
|
|
219
|
+
|
|
220
|
+
# Reconstruct URL correctly: path comes before query
|
|
221
|
+
parsed = parsed._replace(path=final_path, query=actual_query)
|
|
222
|
+
fixed_url = urlunparse(parsed)
|
|
223
|
+
logger.warning(f"[URL_FIX] Fixed malformed URL:\n FROM: {url}\n TO: {fixed_url}")
|
|
224
|
+
url = fixed_url
|
|
225
|
+
# Re-parse after fix
|
|
226
|
+
parsed = urlparse(url)
|
|
227
|
+
path = parsed.path.rstrip("/")
|
|
228
|
+
query = parsed.query
|
|
229
|
+
else:
|
|
230
|
+
# Can't parse - this shouldn't happen but validate will catch it
|
|
231
|
+
logger.error(f"[URL_FIX] Could not parse malformed query: {query}")
|
|
232
|
+
_validate_url_structure(url, context="ensure_chat_completions_url input - cannot fix")
|
|
233
|
+
|
|
46
234
|
if path.endswith("/v1/chat/completions"):
|
|
47
235
|
logger.debug("ensure_chat_completions_url: URL already normalized %s", url)
|
|
48
|
-
#
|
|
236
|
+
# Validate final URL
|
|
237
|
+
_validate_url_structure(url, context="ensure_chat_completions_url output")
|
|
49
238
|
return url
|
|
50
239
|
|
|
51
240
|
if not path:
|
|
@@ -55,6 +244,10 @@ def ensure_chat_completions_url(raw_url: Any, mode: str | None = None) -> Any:
|
|
|
55
244
|
|
|
56
245
|
rebuilt = parsed._replace(path=new_path)
|
|
57
246
|
normalized = urlunparse(rebuilt)
|
|
247
|
+
|
|
248
|
+
# CRITICAL: Validate the normalized URL
|
|
249
|
+
_validate_url_structure(normalized, context="ensure_chat_completions_url output")
|
|
250
|
+
|
|
58
251
|
logger.info(
|
|
59
252
|
"ensure_chat_completions_url: RL mode - normalized inference URL from %s to %s",
|
|
60
253
|
url,
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
This mirrors the structure of the Crafter task app wrapper while delegating
|
|
4
4
|
all configuration to the colocated `grpo_enron.py` module. Normal usage should
|
|
5
|
-
prefer invoking `uvx synth-ai
|
|
5
|
+
prefer invoking `uvx synth-ai deploy --runtime uvicorn grpo-enron`, but this module remains for
|
|
6
6
|
direct execution or importing the FastAPI app object.
|
|
7
7
|
"""
|
|
8
8
|
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""GEPA benchmark task apps (HotpotQA, IFBench, HoVer, PUPA)."""
|
|
2
|
+
|
|
3
|
+
# Import modules for side effects (task app registration) when package is imported.
|
|
4
|
+
from . import hotpotqa_task_app # noqa: F401
|
|
5
|
+
from . import hover_task_app # noqa: F401
|
|
6
|
+
from . import ifbench_task_app # noqa: F401
|
|
7
|
+
from . import pupa_task_app # noqa: F401
|