synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/baseline/banking77_baseline.py +204 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +6 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -38
- examples/swe/task_app/hosted/policy_routes.py +17 -0
- examples/swe/task_app/hosted/rollout.py +4 -2
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +841 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +288 -39
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
- synth_ai/api/train/builders.py +99 -4
- synth_ai/api/train/cli.py +516 -26
- synth_ai/api/train/config_finder.py +13 -2
- synth_ai/api/train/configs/__init__.py +23 -2
- synth_ai/api/train/configs/prompt_learning.py +442 -0
- synth_ai/api/train/configs/rl.py +61 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/task_app.py +1 -1
- synth_ai/api/train/validators.py +277 -0
- synth_ai/auth/credentials.py +119 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cli/__init__.py +94 -18
- synth_ai/cli/__main__.py +0 -0
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +84 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1436 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +30 -158
- synth_ai/cli/deploy/__init__.py +43 -0
- synth_ai/cli/deploy.py +162 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +14 -8
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +107 -0
- synth_ai/cli/root.py +9 -5
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +51 -1480
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -10
- synth_ai/learning/__init__.py +10 -0
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +184 -0
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/pricing/__init__.py +2 -0
- synth_ai/pricing/model_pricing.py +57 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +518 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +2 -0
- synth_ai/task/tracing_utils.py +25 -25
- synth_ai/task/validators.py +45 -9
- synth_ai/task_app_cfgs.py +21 -0
- synth_ai/tracing_v3/config.py +162 -19
- synth_ai/tracing_v3/constants.py +1 -1
- synth_ai/tracing_v3/db_config.py +24 -38
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/storage/config.py +47 -13
- synth_ai/tracing_v3/storage/factory.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +113 -11
- synth_ai/tracing_v3/turso/native_manager.py +92 -16
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +30 -1
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/cli.py +149 -5
- synth_ai/utils/env.py +40 -33
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/modal.py +285 -3
- synth_ai/utils/paths.py +48 -0
- synth_ai/utils/uvicorn.py +113 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
|
@@ -7,7 +7,9 @@ import logging
|
|
|
7
7
|
import os
|
|
8
8
|
import time
|
|
9
9
|
from typing import Any
|
|
10
|
+
from urllib.parse import urlparse, urlunparse
|
|
10
11
|
|
|
12
|
+
import click
|
|
11
13
|
import httpx
|
|
12
14
|
|
|
13
15
|
logger = logging.getLogger(__name__)
|
|
@@ -50,20 +52,19 @@ class OpenAIClient:
|
|
|
50
52
|
# Make a copy to avoid modifying the original
|
|
51
53
|
fixed_request = request.copy()
|
|
52
54
|
|
|
53
|
-
# Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI
|
|
54
|
-
#
|
|
55
|
+
# Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI).
|
|
56
|
+
# Groq shares the API surface but we keep tool enforcement fields intact.
|
|
55
57
|
is_openai = False
|
|
58
|
+
is_groq = False
|
|
56
59
|
try:
|
|
57
60
|
if isinstance(target_url, str):
|
|
58
61
|
low = target_url.lower()
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
or ("/proxy/openai" in low)
|
|
66
|
-
)
|
|
62
|
+
if "groq.com" in low or "/proxy/groq" in low:
|
|
63
|
+
is_groq = True
|
|
64
|
+
elif ("openai.com" in low) or ("azure" in low and ".openai." in low) or (
|
|
65
|
+
"/proxy/openai" in low
|
|
66
|
+
):
|
|
67
|
+
is_openai = True
|
|
67
68
|
except Exception:
|
|
68
69
|
is_openai = False
|
|
69
70
|
|
|
@@ -149,11 +150,169 @@ class OpenAIClient:
|
|
|
149
150
|
OpenAI-compatible chat completion response
|
|
150
151
|
"""
|
|
151
152
|
base = (base_url or self.base_url).rstrip("/")
|
|
152
|
-
#
|
|
153
|
-
|
|
153
|
+
# Ensure processed_request is defined for error logging paths
|
|
154
|
+
processed_request: dict[str, Any] = dict(request or {})
|
|
155
|
+
|
|
156
|
+
# Bulletproof normalization BEFORE any parsing
|
|
157
|
+
def _local_force_normalize(u: str) -> str:
|
|
158
|
+
if not isinstance(u, str) or not u:
|
|
159
|
+
return u
|
|
160
|
+
p = urlparse(u)
|
|
161
|
+
path = (p.path or "").rstrip("/")
|
|
162
|
+
q = p.query or ""
|
|
163
|
+
# If query contains a path segment, extract and repair
|
|
164
|
+
if q and "/" in q:
|
|
165
|
+
before, after = q.split("/", 1)
|
|
166
|
+
# Split off any extra query parameters that were appended after the path
|
|
167
|
+
cut_positions = [i for i in [after.find("&"), after.find("?")] if i >= 0]
|
|
168
|
+
cut = min(cut_positions) if cut_positions else len(after)
|
|
169
|
+
path_from_query = "/" + after[:cut]
|
|
170
|
+
extra_query = after[cut + 1 :] if cut < len(after) else ""
|
|
171
|
+
merged_query = before
|
|
172
|
+
if extra_query:
|
|
173
|
+
merged_query = f"{merged_query}&{extra_query}" if merged_query else extra_query
|
|
174
|
+
# Ensure final path
|
|
175
|
+
final_path = path_from_query if path_from_query.startswith("/v1/chat/completions") else f"{path_from_query.rstrip('/')}/v1/chat/completions"
|
|
176
|
+
p = p._replace(path=final_path, query=merged_query)
|
|
177
|
+
u = urlunparse(p)
|
|
178
|
+
p = urlparse(u)
|
|
179
|
+
path = p.path or ""
|
|
180
|
+
q = p.query or ""
|
|
181
|
+
if not path.endswith("/v1/chat/completions"):
|
|
182
|
+
new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
|
|
183
|
+
p = p._replace(path=new_path)
|
|
184
|
+
u = urlunparse(p)
|
|
185
|
+
p = urlparse(u)
|
|
186
|
+
q = p.query or ""
|
|
187
|
+
if q and "/" in q:
|
|
188
|
+
# Last-resort: drop anything after first '/'
|
|
189
|
+
safe_q = q.split("/")[0]
|
|
190
|
+
p = p._replace(query=safe_q)
|
|
191
|
+
u = urlunparse(p)
|
|
192
|
+
return u
|
|
193
|
+
|
|
194
|
+
norm_base = None
|
|
195
|
+
try:
|
|
196
|
+
# Try importing shared normalizer first
|
|
197
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.utils import (
|
|
198
|
+
force_normalize_chat_completions_url,
|
|
199
|
+
)
|
|
200
|
+
norm_base = force_normalize_chat_completions_url(base)
|
|
201
|
+
except Exception:
|
|
202
|
+
norm_base = _local_force_normalize(base)
|
|
203
|
+
base = norm_base or base
|
|
204
|
+
# Parse URL to handle query parameters correctly
|
|
205
|
+
parsed = urlparse(base)
|
|
206
|
+
path = parsed.path.rstrip("/")
|
|
207
|
+
query = parsed.query
|
|
208
|
+
|
|
209
|
+
# Debug: Log URL parsing
|
|
210
|
+
logger.error(f"[URL_PARSE] base={base} parsed.path={parsed.path} parsed.query={parsed.query}")
|
|
211
|
+
|
|
212
|
+
# CRITICAL FIX: Handle malformed URLs where path is incorrectly in the query string
|
|
213
|
+
# Example: https://host?cid=trace_123/v1/chat/completions
|
|
214
|
+
# Should be: https://host/v1/chat/completions?cid=trace_123
|
|
215
|
+
|
|
216
|
+
# ALWAYS check for malformed URLs - this is CRITICAL
|
|
217
|
+
# CRASH IMMEDIATELY if URL is malformed - don't let it through!
|
|
218
|
+
if query and "/" in query:
|
|
219
|
+
logger.error(f"[URL_FATAL] MALFORMED URL DETECTED AT START: base={base} query={query}")
|
|
220
|
+
# Try to fix it
|
|
221
|
+
logger.error(f"[URL_FIX_TRIGGERED] Query contains '/': query={query}")
|
|
222
|
+
# This is a malformed URL - extract path from query and fix it
|
|
223
|
+
logger.error(
|
|
224
|
+
f"[URL_FIX] Malformed URL detected: {base}\n"
|
|
225
|
+
f"Query contains path segments. Fixing..."
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Find where the path starts in the query string
|
|
229
|
+
# The query format is: "cid=value/path" or similar
|
|
230
|
+
# We need to find the first "/" that starts a path segment
|
|
231
|
+
query_parts = query.split("/", 1)
|
|
232
|
+
if len(query_parts) == 2:
|
|
233
|
+
# query_parts[0] is the actual query (e.g., "cid=trace_123")
|
|
234
|
+
# query_parts[1] is the path that was incorrectly put in query
|
|
235
|
+
actual_query = query_parts[0]
|
|
236
|
+
path_and_more = query_parts[1] # Could be "v1/chat/completions" or "v1/chat/completions&foo=bar"
|
|
237
|
+
|
|
238
|
+
# Extract the path part (everything before "&" or "?" if present)
|
|
239
|
+
# Handle both "&" (query param separator) and "?" (another malformed query separator)
|
|
240
|
+
if "&" in path_and_more:
|
|
241
|
+
# Path is followed by more query params (separated by &)
|
|
242
|
+
path_segment, extra_query = path_and_more.split("&", 1)
|
|
243
|
+
path_in_query = "/" + path_segment # Restore leading slash
|
|
244
|
+
# Merge extra query params with actual_query
|
|
245
|
+
actual_query = f"{actual_query}&{extra_query}"
|
|
246
|
+
elif "?" in path_and_more:
|
|
247
|
+
# Path is followed by more query params (separated by ?, which is malformed)
|
|
248
|
+
path_segment, extra_query = path_and_more.split("?", 1)
|
|
249
|
+
path_in_query = "/" + path_segment # Restore leading slash
|
|
250
|
+
# Merge extra query params with actual_query (use & as separator)
|
|
251
|
+
actual_query = f"{actual_query}&{extra_query}"
|
|
252
|
+
else:
|
|
253
|
+
# No extra query params, just the path
|
|
254
|
+
path_in_query = "/" + path_and_more # Restore leading slash
|
|
255
|
+
|
|
256
|
+
# If the path_in_query already contains /v1/chat/completions, use it
|
|
257
|
+
# Otherwise, append /v1/chat/completions
|
|
258
|
+
if path_in_query.startswith("/v1/chat/completions"):
|
|
259
|
+
final_path = path_in_query
|
|
260
|
+
else:
|
|
261
|
+
# Append /v1/chat/completions to whatever path we found
|
|
262
|
+
final_path = path_in_query.rstrip("/") + "/v1/chat/completions"
|
|
263
|
+
|
|
264
|
+
# Reconstruct URL correctly: path comes before query
|
|
265
|
+
parsed = parsed._replace(path=final_path, query=actual_query)
|
|
266
|
+
url = urlunparse(parsed)
|
|
267
|
+
logger.warning(f"[URL_FIX] Fixed malformed URL:\n FROM: {base}\n TO: {url}")
|
|
268
|
+
else:
|
|
269
|
+
# Can't parse, fall through to normal processing
|
|
270
|
+
logger.error(f"[URL_FIX] Could not parse malformed query: {query}")
|
|
271
|
+
path = parsed.path.rstrip("/")
|
|
272
|
+
if not path.endswith("/v1/chat/completions"):
|
|
273
|
+
new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
|
|
274
|
+
parsed = parsed._replace(path=new_path)
|
|
275
|
+
url = urlunparse(parsed)
|
|
276
|
+
else:
|
|
277
|
+
url = base
|
|
278
|
+
# Normal case: query params are separate from path
|
|
279
|
+
elif path.endswith("/v1/chat/completions"):
|
|
154
280
|
url = base
|
|
155
281
|
else:
|
|
156
|
-
|
|
282
|
+
# Append /v1/chat/completions to the path, preserving query params
|
|
283
|
+
new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
|
|
284
|
+
parsed = parsed._replace(path=new_path)
|
|
285
|
+
url = urlunparse(parsed)
|
|
286
|
+
logger.debug(f"[URL_CONSTRUCT] Added path to URL: {base} -> {url}")
|
|
287
|
+
|
|
288
|
+
# FINAL VALIDATION: Ensure the constructed URL is correct
|
|
289
|
+
final_parsed = urlparse(url)
|
|
290
|
+
final_path = final_parsed.path or ""
|
|
291
|
+
final_query = final_parsed.query or ""
|
|
292
|
+
|
|
293
|
+
# Verify path is correct
|
|
294
|
+
if not final_path.endswith("/v1/chat/completions"):
|
|
295
|
+
error_msg = (
|
|
296
|
+
f"FATAL [OpenAIClient]: URL missing /v1/chat/completions path!\n"
|
|
297
|
+
f"Original: {base}\n"
|
|
298
|
+
f"Constructed: {url}\n"
|
|
299
|
+
f"Path: {final_path}\n"
|
|
300
|
+
)
|
|
301
|
+
logger.error(error_msg)
|
|
302
|
+
raise ValueError(error_msg)
|
|
303
|
+
|
|
304
|
+
# Verify query doesn't contain path segments
|
|
305
|
+
if final_query and "/" in final_query:
|
|
306
|
+
error_msg = (
|
|
307
|
+
f"FATAL [OpenAIClient]: Query still contains path segments after fix!\n"
|
|
308
|
+
f"Original: {base}\n"
|
|
309
|
+
f"Constructed: {url}\n"
|
|
310
|
+
f"Query: {final_query}\n"
|
|
311
|
+
f"This indicates a bug in URL construction logic."
|
|
312
|
+
)
|
|
313
|
+
logger.error(error_msg)
|
|
314
|
+
raise ValueError(error_msg)
|
|
315
|
+
|
|
157
316
|
timeout = timeout_s or self.timeout_s
|
|
158
317
|
|
|
159
318
|
# Merge headers
|
|
@@ -234,38 +393,104 @@ class OpenAIClient:
|
|
|
234
393
|
logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Message[1] content value: {msg1_content_post if not isinstance(msg1_content_post, list) else f'list[{len(msg1_content_post)}]'}")
|
|
235
394
|
|
|
236
395
|
# Log request (redact messages in production)
|
|
396
|
+
# CRITICAL: Verify URL is correct BEFORE making HTTP request
|
|
397
|
+
final_parsed_check = urlparse(url)
|
|
398
|
+
logger.error(f"[URL_FINAL_CHECK] Before HTTP request: url={url} path={final_parsed_check.path} query={final_parsed_check.query}")
|
|
399
|
+
|
|
400
|
+
# CRASH IF URL IS STILL MALFORMED - DO NOT PROCEED
|
|
401
|
+
if final_parsed_check.query and "/" in final_parsed_check.query:
|
|
402
|
+
error_msg = (
|
|
403
|
+
f"FATAL [OpenAIClient]: URL IS STILL MALFORMED AFTER FIX ATTEMPT!\n"
|
|
404
|
+
f"Original base_url: {base_url or self.base_url}\n"
|
|
405
|
+
f"Constructed URL: {url}\n"
|
|
406
|
+
f"Path: {final_parsed_check.path}\n"
|
|
407
|
+
f"Query (contains path): {final_parsed_check.query}\n"
|
|
408
|
+
f"This will cause a 404 error. CRASHING NOW to prevent bad request."
|
|
409
|
+
)
|
|
410
|
+
logger.error(error_msg)
|
|
411
|
+
raise ValueError(error_msg)
|
|
412
|
+
|
|
413
|
+
# Verify path is correct
|
|
414
|
+
if not final_parsed_check.path.endswith("/v1/chat/completions"):
|
|
415
|
+
error_msg = (
|
|
416
|
+
f"FATAL [OpenAIClient]: URL missing /v1/chat/completions path!\n"
|
|
417
|
+
f"URL: {url}\n"
|
|
418
|
+
f"Path: {final_parsed_check.path}\n"
|
|
419
|
+
)
|
|
420
|
+
logger.error(error_msg)
|
|
421
|
+
raise ValueError(error_msg)
|
|
422
|
+
|
|
423
|
+
# Log request with detailed prompts/tools preview and sampling settings (Authorization is not logged)
|
|
237
424
|
logger.info(f"Inference POST target: {url}")
|
|
238
425
|
if extra_headers:
|
|
239
426
|
logger.info(f"Extra headers: {extra_headers}")
|
|
240
427
|
with contextlib.suppress(Exception):
|
|
241
428
|
keys_preview = sorted(processed_request.keys())
|
|
242
429
|
logger.info(f"Request keys: {keys_preview}")
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
430
|
+
|
|
431
|
+
# Detailed IO log: messages/tools/sampling and final payload fields
|
|
432
|
+
try:
|
|
433
|
+
import json as _json
|
|
434
|
+
|
|
435
|
+
def _truncate(text: str, limit: int = 2000) -> str:
|
|
436
|
+
return text if len(text) <= limit else text[:limit] + "…"
|
|
437
|
+
|
|
438
|
+
def _messages_preview(msgs: Any) -> str:
|
|
439
|
+
try:
|
|
440
|
+
out: list[dict[str, Any]] = []
|
|
441
|
+
if isinstance(msgs, list):
|
|
442
|
+
for m in msgs:
|
|
443
|
+
if not isinstance(m, dict):
|
|
444
|
+
continue
|
|
445
|
+
role = m.get("role")
|
|
446
|
+
content = m.get("content")
|
|
447
|
+
if isinstance(content, str):
|
|
448
|
+
text = content
|
|
449
|
+
elif isinstance(content, list):
|
|
450
|
+
parts: list[str] = []
|
|
451
|
+
for seg in content:
|
|
452
|
+
if isinstance(seg, dict) and isinstance(seg.get("text"), str):
|
|
453
|
+
parts.append(seg["text"])
|
|
454
|
+
text = "\n".join(parts)
|
|
258
455
|
else:
|
|
259
|
-
|
|
260
|
-
|
|
456
|
+
text = ""
|
|
457
|
+
out.append({"role": role, "content": _truncate(str(text), 4000)})
|
|
458
|
+
return _json.dumps(out)
|
|
459
|
+
except Exception:
|
|
460
|
+
return "[]"
|
|
261
461
|
|
|
262
|
-
|
|
462
|
+
def _tools_preview(tools: Any) -> str:
|
|
463
|
+
try:
|
|
464
|
+
return _truncate(_json.dumps(tools), 4000)
|
|
465
|
+
except Exception:
|
|
466
|
+
return "[]"
|
|
467
|
+
|
|
468
|
+
msgs = processed_request.get("messages") if isinstance(processed_request, dict) else None
|
|
469
|
+
tools = processed_request.get("tools") if isinstance(processed_request, dict) else None
|
|
470
|
+
io_log: dict[str, Any] = {
|
|
471
|
+
"llm.call": True,
|
|
472
|
+
"model": processed_request.get("model") if isinstance(processed_request, dict) else None,
|
|
473
|
+
"tool_choice": processed_request.get("tool_choice") if isinstance(processed_request, dict) else None,
|
|
474
|
+
"parallel_tool_calls": processed_request.get("parallel_tool_calls") if isinstance(processed_request, dict) else None,
|
|
475
|
+
"stop_after_tool_calls": processed_request.get("stop_after_tool_calls") if isinstance(processed_request, dict) else None,
|
|
476
|
+
"temperature": processed_request.get("temperature") if isinstance(processed_request, dict) else None,
|
|
477
|
+
"top_p": processed_request.get("top_p") if isinstance(processed_request, dict) else None,
|
|
478
|
+
"max_tokens": processed_request.get("max_tokens") if isinstance(processed_request, dict) else None,
|
|
479
|
+
"max_completion_tokens": processed_request.get("max_completion_tokens") if isinstance(processed_request, dict) else None,
|
|
480
|
+
"messages_preview": _messages_preview(msgs),
|
|
481
|
+
"tools_preview": _tools_preview(tools),
|
|
482
|
+
}
|
|
483
|
+
logger.info(io_log)
|
|
484
|
+
except Exception:
|
|
485
|
+
pass
|
|
486
|
+
|
|
487
|
+
# Final hard-guard for OpenAI/Groq: drop unsupported field
|
|
263
488
|
try:
|
|
264
|
-
|
|
489
|
+
low_url = url.lower()
|
|
490
|
+
if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
|
|
265
491
|
processed_request.pop("stop_after_tool_calls", None)
|
|
266
|
-
logger.info("Removed stop_after_tool_calls for
|
|
492
|
+
logger.info("Removed stop_after_tool_calls for %s request", "Groq/OpenAI")
|
|
267
493
|
# Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
|
|
268
|
-
low_url = url.lower()
|
|
269
494
|
if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
|
|
270
495
|
processed_request, dict
|
|
271
496
|
):
|
|
@@ -330,10 +555,70 @@ class OpenAIClient:
|
|
|
330
555
|
logger.info(
|
|
331
556
|
f"Inference response status=200, content-type={content_type}, bytes={len(body_text)}"
|
|
332
557
|
)
|
|
333
|
-
|
|
558
|
+
if body_text:
|
|
559
|
+
# Log raw output with generous preview to debug no-tool-call issues
|
|
560
|
+
preview_len = min(4000, len(body_text))
|
|
561
|
+
logger.info({
|
|
562
|
+
"llm.raw_response": True,
|
|
563
|
+
"bytes": len(body_text),
|
|
564
|
+
"preview": body_text[:preview_len],
|
|
565
|
+
})
|
|
334
566
|
|
|
335
567
|
result = response.json()
|
|
336
568
|
logger.info(f"Inference response parsed_type={type(result).__name__}")
|
|
569
|
+
|
|
570
|
+
tool_call_count = -1
|
|
571
|
+
# Normalize tool calls so downstream always sees a function tool call
|
|
572
|
+
try:
|
|
573
|
+
if isinstance(result, dict):
|
|
574
|
+
choices = result.get("choices")
|
|
575
|
+
if isinstance(choices, list) and choices:
|
|
576
|
+
msg = choices[0].get("message")
|
|
577
|
+
if isinstance(msg, dict):
|
|
578
|
+
# Prefer tool_calls; if missing but function_call is present, synthesize tool_calls
|
|
579
|
+
tc = msg.get("tool_calls")
|
|
580
|
+
fc = msg.get("function_call")
|
|
581
|
+
if (not isinstance(tc, list) or not tc) and isinstance(fc, dict):
|
|
582
|
+
name = fc.get("name") or "interact_many"
|
|
583
|
+
args = fc.get("arguments") or "{}"
|
|
584
|
+
msg["tool_calls"] = [
|
|
585
|
+
{
|
|
586
|
+
"id": "call_norm",
|
|
587
|
+
"type": "function",
|
|
588
|
+
"function": {"name": name, "arguments": args},
|
|
589
|
+
}
|
|
590
|
+
]
|
|
591
|
+
if isinstance(choices[0], dict):
|
|
592
|
+
choices[0]["finish_reason"] = "tool_calls"
|
|
593
|
+
# Log tool call count for debugging
|
|
594
|
+
try:
|
|
595
|
+
tc2 = msg.get("tool_calls")
|
|
596
|
+
count = len(tc2) if isinstance(tc2, list) else 0
|
|
597
|
+
logger.info({
|
|
598
|
+
"llm.tool_calls": True,
|
|
599
|
+
"count": count,
|
|
600
|
+
"finish_reason": choices[0].get("finish_reason") if isinstance(choices[0], dict) else None,
|
|
601
|
+
})
|
|
602
|
+
if count == 0:
|
|
603
|
+
click.echo(
|
|
604
|
+
"[openai-client] ✗ upstream response missing tool_calls; dumping preview to logs",
|
|
605
|
+
err=True,
|
|
606
|
+
)
|
|
607
|
+
logger.error(
|
|
608
|
+
"Inference response missing tool_calls; failing fast. Raw body preview: %s",
|
|
609
|
+
body_text[:500] if body_text else "<empty>",
|
|
610
|
+
)
|
|
611
|
+
raise ValueError("Inference response missing tool_calls")
|
|
612
|
+
tool_call_count = count
|
|
613
|
+
except Exception:
|
|
614
|
+
pass
|
|
615
|
+
except Exception:
|
|
616
|
+
pass
|
|
617
|
+
|
|
618
|
+
click.echo(
|
|
619
|
+
f"[openai-client] ✓ response ok with tool_calls={tool_call_count}",
|
|
620
|
+
err=True,
|
|
621
|
+
)
|
|
337
622
|
return result
|
|
338
623
|
|
|
339
624
|
except httpx.TimeoutException:
|
|
@@ -342,11 +627,31 @@ class OpenAIClient:
|
|
|
342
627
|
except httpx.HTTPStatusError as e:
|
|
343
628
|
status = e.response.status_code if e.response is not None else None
|
|
344
629
|
text = e.response.text if e.response is not None else str(e)
|
|
345
|
-
# Log
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
630
|
+
# Log full body and request diagnostics for debugging remote failures
|
|
631
|
+
try:
|
|
632
|
+
redacted_headers = dict(headers)
|
|
633
|
+
if "Authorization" in redacted_headers:
|
|
634
|
+
redacted_headers["Authorization"] = "***REDACTED***"
|
|
635
|
+
logger.error(
|
|
636
|
+
{
|
|
637
|
+
"openai_http_error": True,
|
|
638
|
+
"status": status,
|
|
639
|
+
"url": url,
|
|
640
|
+
"body": text,
|
|
641
|
+
}
|
|
642
|
+
)
|
|
643
|
+
logger.error(
|
|
644
|
+
{
|
|
645
|
+
"request_debug": True,
|
|
646
|
+
"status": status,
|
|
647
|
+
"target": url,
|
|
648
|
+
"headers": redacted_headers,
|
|
649
|
+
"payload": processed_request,
|
|
650
|
+
}
|
|
651
|
+
)
|
|
652
|
+
except Exception:
|
|
653
|
+
logger.error(f"HTTP error from {url}: {status} - {text}")
|
|
654
|
+
# Special case: token budget exceeded handled below, else 422 degrade, else re-raise
|
|
350
655
|
try:
|
|
351
656
|
if status == 400 and e.response is not None:
|
|
352
657
|
data = e.response.json()
|
|
@@ -399,6 +704,8 @@ class OpenAIClient:
|
|
|
399
704
|
logger.warning(
|
|
400
705
|
{
|
|
401
706
|
"token_budget_recovery": True,
|
|
707
|
+
"messages_tokens": messages_tokens,
|
|
708
|
+
"model_limit": model_limit,
|
|
402
709
|
"retry_max_tokens": new_max,
|
|
403
710
|
}
|
|
404
711
|
)
|
|
@@ -413,35 +720,6 @@ class OpenAIClient:
|
|
|
413
720
|
pass
|
|
414
721
|
except Exception:
|
|
415
722
|
pass
|
|
416
|
-
# Gracefully degrade on 422 so rollouts can still produce a trajectory
|
|
417
|
-
if status == 422:
|
|
418
|
-
try:
|
|
419
|
-
# Best-effort parse of error for diagnostics
|
|
420
|
-
err = None
|
|
421
|
-
try:
|
|
422
|
-
err = e.response.json()
|
|
423
|
-
except Exception:
|
|
424
|
-
err = {"error": "unprocessable"}
|
|
425
|
-
logger.warning({"inference_422_recovered": True})
|
|
426
|
-
except Exception:
|
|
427
|
-
pass
|
|
428
|
-
# Return a minimal OpenAI-compatible response with no tool_calls/content
|
|
429
|
-
import time as _t
|
|
430
|
-
|
|
431
|
-
return {
|
|
432
|
-
"id": f"cmpl-{int(_t.time())}",
|
|
433
|
-
"object": "chat.completion",
|
|
434
|
-
"created": int(_t.time()),
|
|
435
|
-
"model": processed_request.get("model") or "unknown",
|
|
436
|
-
"choices": [
|
|
437
|
-
{
|
|
438
|
-
"index": 0,
|
|
439
|
-
"message": {"role": "assistant", "content": "", "tool_calls": []},
|
|
440
|
-
"finish_reason": "stop",
|
|
441
|
-
}
|
|
442
|
-
],
|
|
443
|
-
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
|
|
444
|
-
}
|
|
445
723
|
raise
|
|
446
724
|
except Exception as e:
|
|
447
725
|
logger.error(f"Unexpected error calling {url}: {e}")
|
|
@@ -507,14 +785,29 @@ class OpenAIClient:
|
|
|
507
785
|
OpenAI-compatible chat completion response
|
|
508
786
|
"""
|
|
509
787
|
last_error = None
|
|
788
|
+
processed_request: dict[str, Any] = dict(request or {})
|
|
510
789
|
wait_time = 1.0
|
|
511
790
|
|
|
512
791
|
for attempt in range(max_retries + 1):
|
|
513
792
|
try:
|
|
514
793
|
# Apply parameter fixes to the request
|
|
794
|
+
# CRITICAL: Use proper URL parsing, not string concatenation!
|
|
795
|
+
target_base = base_url or self.base_url
|
|
796
|
+
if target_base:
|
|
797
|
+
parsed_target = urlparse(target_base)
|
|
798
|
+
target_path = parsed_target.path.rstrip("/")
|
|
799
|
+
if not target_path.endswith("/v1/chat/completions"):
|
|
800
|
+
new_target_path = f"{target_path}/v1/chat/completions" if target_path else "/v1/chat/completions"
|
|
801
|
+
parsed_target = parsed_target._replace(path=new_target_path)
|
|
802
|
+
target_url = urlunparse(parsed_target)
|
|
803
|
+
else:
|
|
804
|
+
target_url = target_base
|
|
805
|
+
else:
|
|
806
|
+
target_url = None
|
|
807
|
+
|
|
515
808
|
processed_request = self._fix_model_parameters(
|
|
516
809
|
request,
|
|
517
|
-
target_url=
|
|
810
|
+
target_url=target_url,
|
|
518
811
|
)
|
|
519
812
|
return await self.generate(
|
|
520
813
|
request=processed_request,
|
|
@@ -546,47 +839,16 @@ class OpenAIClient:
|
|
|
546
839
|
error_block.get("code") or error_block.get("type") or ""
|
|
547
840
|
).lower()
|
|
548
841
|
if error_code in {"tool_use_failed", "tool_call_failed"}:
|
|
549
|
-
logger.
|
|
842
|
+
logger.error(
|
|
550
843
|
{
|
|
551
844
|
"tool_use_failed": True,
|
|
552
845
|
"target": (base_url or self.base_url),
|
|
553
846
|
"message": error_block.get("message") if isinstance(error_block, dict) else None,
|
|
554
847
|
}
|
|
555
848
|
)
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
"object": "chat.completion",
|
|
560
|
-
"created": int(time.time()),
|
|
561
|
-
"model": processed_request.get("model"),
|
|
562
|
-
"choices": [
|
|
563
|
-
{
|
|
564
|
-
"index": 0,
|
|
565
|
-
"message": {
|
|
566
|
-
"role": "assistant",
|
|
567
|
-
"content": "",
|
|
568
|
-
"tool_calls": [
|
|
569
|
-
{
|
|
570
|
-
"id": f"call_fallback_{int(time.time() * 1000)}",
|
|
571
|
-
"type": "function",
|
|
572
|
-
"function": {
|
|
573
|
-
"name": "interact_many",
|
|
574
|
-
"arguments": json.dumps(
|
|
575
|
-
{"actions": fallback_actions}
|
|
576
|
-
),
|
|
577
|
-
},
|
|
578
|
-
}
|
|
579
|
-
],
|
|
580
|
-
},
|
|
581
|
-
"finish_reason": "tool_calls",
|
|
582
|
-
}
|
|
583
|
-
],
|
|
584
|
-
}
|
|
585
|
-
if isinstance(response_data.get("usage"), dict):
|
|
586
|
-
fallback_response["usage"] = response_data["usage"]
|
|
587
|
-
if isinstance(error_block, dict):
|
|
588
|
-
fallback_response["error"] = error_block
|
|
589
|
-
return fallback_response
|
|
849
|
+
raise RuntimeError(
|
|
850
|
+
f"Inference 400 response (tool call failed): {error_block.get('message') if isinstance(error_block, dict) else 'Tool call failed'}"
|
|
851
|
+
) from e
|
|
590
852
|
# This is a different type of 400 error, don't retry
|
|
591
853
|
try:
|
|
592
854
|
redacted_headers = {}
|
|
@@ -651,7 +913,9 @@ class OpenAIClient:
|
|
|
651
913
|
await asyncio.sleep(wait_time)
|
|
652
914
|
wait_time *= backoff_factor
|
|
653
915
|
|
|
654
|
-
|
|
916
|
+
if last_error is not None:
|
|
917
|
+
raise last_error
|
|
918
|
+
raise RuntimeError("RL inference retries exhausted with no captured exception")
|
|
655
919
|
|
|
656
920
|
|
|
657
921
|
def create_inference_client(
|
|
@@ -726,7 +990,8 @@ def create_inference_client(
|
|
|
726
990
|
) -> dict[str, Any]:
|
|
727
991
|
return {"status": "ok", "dummy": True}
|
|
728
992
|
|
|
729
|
-
|
|
993
|
+
import typing as _t
|
|
994
|
+
return _t.cast(OpenAIClient, _DummyClient())
|
|
730
995
|
|
|
731
996
|
return OpenAIClient(
|
|
732
997
|
base_url=task_app.vllm_base_url,
|