synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/baseline/banking77_baseline.py +204 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +6 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -38
- examples/swe/task_app/hosted/policy_routes.py +17 -0
- examples/swe/task_app/hosted/rollout.py +4 -2
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +841 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +288 -39
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
- synth_ai/api/train/builders.py +99 -4
- synth_ai/api/train/cli.py +516 -26
- synth_ai/api/train/config_finder.py +13 -2
- synth_ai/api/train/configs/__init__.py +23 -2
- synth_ai/api/train/configs/prompt_learning.py +442 -0
- synth_ai/api/train/configs/rl.py +61 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/task_app.py +1 -1
- synth_ai/api/train/validators.py +277 -0
- synth_ai/auth/credentials.py +119 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cli/__init__.py +94 -18
- synth_ai/cli/__main__.py +0 -0
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +84 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1436 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +30 -158
- synth_ai/cli/deploy/__init__.py +43 -0
- synth_ai/cli/deploy.py +162 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +14 -8
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +107 -0
- synth_ai/cli/root.py +9 -5
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +51 -1480
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -10
- synth_ai/learning/__init__.py +10 -0
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +184 -0
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/pricing/__init__.py +2 -0
- synth_ai/pricing/model_pricing.py +57 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +518 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +2 -0
- synth_ai/task/tracing_utils.py +25 -25
- synth_ai/task/validators.py +45 -9
- synth_ai/task_app_cfgs.py +21 -0
- synth_ai/tracing_v3/config.py +162 -19
- synth_ai/tracing_v3/constants.py +1 -1
- synth_ai/tracing_v3/db_config.py +24 -38
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/storage/config.py +47 -13
- synth_ai/tracing_v3/storage/factory.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +113 -11
- synth_ai/tracing_v3/turso/native_manager.py +92 -16
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +30 -1
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/cli.py +149 -5
- synth_ai/utils/env.py +40 -33
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/modal.py +285 -3
- synth_ai/utils/paths.py +48 -0
- synth_ai/utils/uvicorn.py +113 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
|
@@ -5,14 +5,21 @@ from __future__ import annotations
|
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
7
|
import sys
|
|
8
|
+
from urllib.parse import parse_qs, urlparse
|
|
8
9
|
from collections.abc import Iterable, Sequence
|
|
9
10
|
from contextlib import suppress
|
|
10
11
|
from dataclasses import dataclass
|
|
12
|
+
from datetime import UTC, datetime
|
|
11
13
|
from pathlib import Path
|
|
12
14
|
from typing import Any
|
|
13
15
|
|
|
16
|
+
from fastapi import HTTPException
|
|
17
|
+
from pydantic import BaseModel
|
|
18
|
+
|
|
19
|
+
from pydantic import BaseModel
|
|
20
|
+
|
|
14
21
|
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
15
|
-
from synth_ai.task.contracts import RolloutMetrics, RolloutRequest, RolloutResponse, TaskInfo
|
|
22
|
+
from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
|
|
16
23
|
from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
|
|
17
24
|
from synth_ai.task.json import to_jsonable # noqa: F401 (imported for side-effect compatibility)
|
|
18
25
|
from synth_ai.task.rubrics import load_rubric
|
|
@@ -115,6 +122,27 @@ try:
|
|
|
115
122
|
except Exception:
|
|
116
123
|
pass
|
|
117
124
|
|
|
125
|
+
try:
|
|
126
|
+
from .synth_envs_hosted.utils import (
|
|
127
|
+
ensure_chat_completions_url,
|
|
128
|
+
extract_trace_correlation_id,
|
|
129
|
+
)
|
|
130
|
+
except Exception: # pragma: no cover - fallback when optional deps missing
|
|
131
|
+
def ensure_chat_completions_url(raw_url, mode=None):
|
|
132
|
+
return raw_url
|
|
133
|
+
|
|
134
|
+
def extract_trace_correlation_id(_raw_url, mode=None):
|
|
135
|
+
if not isinstance(_raw_url, str):
|
|
136
|
+
return None
|
|
137
|
+
parsed = urlparse(_raw_url)
|
|
138
|
+
query_params = parse_qs(parsed.query or "")
|
|
139
|
+
for key in ("cid", "trace", "trace_correlation_id"):
|
|
140
|
+
values = query_params.get(key) or []
|
|
141
|
+
for value in values:
|
|
142
|
+
if isinstance(value, str) and value.strip():
|
|
143
|
+
return value.strip()
|
|
144
|
+
return None
|
|
145
|
+
|
|
118
146
|
HAS_HOSTED = True
|
|
119
147
|
try:
|
|
120
148
|
import crafter # type: ignore
|
|
@@ -306,7 +334,7 @@ def build_dataset() -> tuple[TaskDatasetRegistry, CrafterDataset]:
|
|
|
306
334
|
def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
|
|
307
335
|
return TaskInfo(
|
|
308
336
|
task={"id": "crafter_classic", "name": "Crafter Classic", "version": "1.0.0"},
|
|
309
|
-
|
|
337
|
+
environment="crafter",
|
|
310
338
|
action_space={
|
|
311
339
|
"type": "discrete",
|
|
312
340
|
"size": len(crafter_constants.actions),
|
|
@@ -397,22 +425,29 @@ def provide_task_instances(
|
|
|
397
425
|
dataset: CrafterDataset, base_info: TaskInfo, seeds: Sequence[int]
|
|
398
426
|
) -> Iterable[TaskInfo]:
|
|
399
427
|
infos: list[TaskInfo] = []
|
|
428
|
+
base_observation = getattr(base_info, "observation", None)
|
|
429
|
+
if hasattr(base_observation, "model_dump"):
|
|
430
|
+
observation_template = base_observation.model_dump()
|
|
431
|
+
elif isinstance(base_observation, dict):
|
|
432
|
+
observation_template = dict(base_observation)
|
|
433
|
+
else:
|
|
434
|
+
observation_template = {}
|
|
400
435
|
for seed_value in seeds:
|
|
401
436
|
summary = dataset.describe_seed(seed_value)
|
|
402
437
|
infos.append(
|
|
403
438
|
TaskInfo(
|
|
404
439
|
task=base_info.task,
|
|
405
|
-
|
|
440
|
+
environment=base_info.environment,
|
|
406
441
|
action_space=base_info.action_space,
|
|
407
442
|
observation={
|
|
408
|
-
**
|
|
443
|
+
**observation_template,
|
|
409
444
|
"seed": seed_value,
|
|
410
445
|
"traits": summary["traits"],
|
|
411
446
|
"inventory": summary["inventory"],
|
|
412
447
|
"player_position": summary["player_position"],
|
|
413
448
|
},
|
|
414
449
|
dataset={
|
|
415
|
-
**base_info.dataset,
|
|
450
|
+
**base_info.dataset.model_dump(),
|
|
416
451
|
"seed": seed_value,
|
|
417
452
|
"difficulty": summary["difficulty"],
|
|
418
453
|
"config": summary["config"],
|
|
@@ -536,7 +571,47 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
536
571
|
|
|
537
572
|
request = _coerce_math_to_crafter(request)
|
|
538
573
|
|
|
574
|
+
record_cfg = request.record.model_copy(
|
|
575
|
+
update={
|
|
576
|
+
"return_trace": True,
|
|
577
|
+
"trace_format": "structured",
|
|
578
|
+
}
|
|
579
|
+
)
|
|
580
|
+
request = request.model_copy(update={"record": record_cfg})
|
|
581
|
+
|
|
539
582
|
policy_cfg = dict(request.policy.config or {})
|
|
583
|
+
logger.info(
|
|
584
|
+
"ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
|
|
585
|
+
sorted(policy_cfg.keys()),
|
|
586
|
+
policy_cfg.get("inference_url"),
|
|
587
|
+
request.run_id,
|
|
588
|
+
request.mode,
|
|
589
|
+
)
|
|
590
|
+
inferred_url = ensure_chat_completions_url(policy_cfg.get("inference_url"), mode=request.mode)
|
|
591
|
+
if isinstance(inferred_url, str) and inferred_url:
|
|
592
|
+
policy_cfg["inference_url"] = inferred_url
|
|
593
|
+
else:
|
|
594
|
+
logger.warning(
|
|
595
|
+
"ROLLOUT_EXEC: inference_url missing or not normalized run_id=%s raw=%s",
|
|
596
|
+
request.run_id,
|
|
597
|
+
policy_cfg.get("inference_url"),
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
trace_correlation_id = extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=request.mode)
|
|
601
|
+
if request.mode == RolloutMode.RL:
|
|
602
|
+
assert trace_correlation_id, (
|
|
603
|
+
f"FATAL: trace_correlation_id extraction failed for run_id={request.run_id}. "
|
|
604
|
+
f"policy_cfg_keys={sorted(policy_cfg.keys())} inference_url={policy_cfg.get('inference_url')}"
|
|
605
|
+
)
|
|
606
|
+
if trace_correlation_id:
|
|
607
|
+
policy_cfg["trace_correlation_id"] = trace_correlation_id
|
|
608
|
+
|
|
609
|
+
pipeline_metadata: dict[str, Any] = {}
|
|
610
|
+
if trace_correlation_id:
|
|
611
|
+
pipeline_metadata["trace_correlation_id"] = trace_correlation_id
|
|
612
|
+
if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
|
|
613
|
+
pipeline_metadata.setdefault("inference_url", policy_cfg["inference_url"])
|
|
614
|
+
|
|
540
615
|
try:
|
|
541
616
|
max_llm_calls = int(policy_cfg.get("max_llm_calls") or 10)
|
|
542
617
|
except Exception:
|
|
@@ -585,17 +660,122 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
585
660
|
safety=LegacyRolloutSafetyConfig(**request.safety.model_dump()),
|
|
586
661
|
training_session_id=request.training_session_id,
|
|
587
662
|
synth_base_url=request.synth_base_url,
|
|
663
|
+
mode=request.mode,
|
|
588
664
|
)
|
|
589
665
|
|
|
590
666
|
legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
|
|
591
667
|
legacy_request, fastapi_request
|
|
592
668
|
)
|
|
593
669
|
data = legacy_response.model_dump()
|
|
670
|
+
logger.debug(
|
|
671
|
+
"ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
|
|
672
|
+
sorted(data.keys()),
|
|
673
|
+
bool(data.get("trace")),
|
|
674
|
+
)
|
|
594
675
|
metrics = data.get("metrics", {}) or {}
|
|
595
676
|
metrics.setdefault("outcome_score", None)
|
|
596
677
|
metrics.setdefault("events_score", None)
|
|
597
678
|
metrics.setdefault("details", {})
|
|
598
679
|
data["metrics"] = metrics
|
|
680
|
+
|
|
681
|
+
if data.get("trace") is None:
|
|
682
|
+
legacy_trace = getattr(legacy_response, "trace", None)
|
|
683
|
+
if legacy_trace is not None:
|
|
684
|
+
data["trace"] = legacy_trace
|
|
685
|
+
else:
|
|
686
|
+
tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
|
|
687
|
+
if callable(tracer_factory):
|
|
688
|
+
tracer = tracer_factory()
|
|
689
|
+
logger.debug(
|
|
690
|
+
"ROLLOUT_EXEC: trace backfill factory=%s", type(tracer)
|
|
691
|
+
)
|
|
692
|
+
if isinstance(tracer, SessionTracer):
|
|
693
|
+
try:
|
|
694
|
+
await tracer.initialize()
|
|
695
|
+
if tracer.db is not None:
|
|
696
|
+
trace_row = await tracer.db.get_session_trace(request.run_id)
|
|
697
|
+
if trace_row is not None:
|
|
698
|
+
data["trace"] = trace_row
|
|
699
|
+
except Exception as exc:
|
|
700
|
+
logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
|
|
701
|
+
finally:
|
|
702
|
+
with suppress(Exception):
|
|
703
|
+
await tracer.close()
|
|
704
|
+
|
|
705
|
+
final_cid = trace_correlation_id or f"trace_{request.run_id}"
|
|
706
|
+
data["trace_correlation_id"] = final_cid
|
|
707
|
+
|
|
708
|
+
existing_meta = data.get("pipeline_metadata")
|
|
709
|
+
if not isinstance(existing_meta, dict):
|
|
710
|
+
existing_meta = {}
|
|
711
|
+
existing_meta.setdefault("trace_correlation_id", final_cid)
|
|
712
|
+
if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
|
|
713
|
+
existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
|
|
714
|
+
data["pipeline_metadata"] = existing_meta
|
|
715
|
+
|
|
716
|
+
# Propagate inference_url into each legacy trajectory entry for downstream tooling.
|
|
717
|
+
inferred_url = policy_cfg.get("inference_url")
|
|
718
|
+
# Normalize the url before propagating into trajectories
|
|
719
|
+
try:
|
|
720
|
+
from .synth_envs_hosted.utils import (
|
|
721
|
+
ensure_chat_completions_url as _ensure_cc,
|
|
722
|
+
force_normalize_chat_completions_url as _force_cc,
|
|
723
|
+
)
|
|
724
|
+
if isinstance(inferred_url, str) and inferred_url:
|
|
725
|
+
inferred_url = _force_cc(inferred_url)
|
|
726
|
+
inferred_url = _ensure_cc(inferred_url, mode=request.mode)
|
|
727
|
+
except Exception:
|
|
728
|
+
pass
|
|
729
|
+
|
|
730
|
+
if "trajectories" in data:
|
|
731
|
+
normalized_trajs: list[dict[str, Any]] = []
|
|
732
|
+
for traj in data.get("trajectories", []):
|
|
733
|
+
if isinstance(traj, BaseModel):
|
|
734
|
+
traj_dict = traj.model_dump()
|
|
735
|
+
elif isinstance(traj, dict):
|
|
736
|
+
traj_dict = dict(traj)
|
|
737
|
+
else:
|
|
738
|
+
continue
|
|
739
|
+
traj_dict.setdefault("trace_correlation_id", final_cid)
|
|
740
|
+
if isinstance(inferred_url, str) and inferred_url and not traj_dict.get("inference_url"):
|
|
741
|
+
traj_dict["inference_url"] = inferred_url
|
|
742
|
+
|
|
743
|
+
# Inject nested info.meta.inference_url for each step (required by RL trainer)
|
|
744
|
+
try:
|
|
745
|
+
steps = traj_dict.get("steps", [])
|
|
746
|
+
if isinstance(steps, list):
|
|
747
|
+
for step in steps:
|
|
748
|
+
if not isinstance(step, dict):
|
|
749
|
+
continue
|
|
750
|
+
info = step.get("info")
|
|
751
|
+
if not isinstance(info, dict):
|
|
752
|
+
info = {}
|
|
753
|
+
meta = info.get("meta")
|
|
754
|
+
if not isinstance(meta, dict):
|
|
755
|
+
meta = {}
|
|
756
|
+
if isinstance(inferred_url, str) and inferred_url and not meta.get("inference_url"):
|
|
757
|
+
meta["inference_url"] = inferred_url
|
|
758
|
+
info["meta"] = meta
|
|
759
|
+
step["info"] = info
|
|
760
|
+
except Exception:
|
|
761
|
+
pass
|
|
762
|
+
|
|
763
|
+
normalized_trajs.append(traj_dict)
|
|
764
|
+
if normalized_trajs:
|
|
765
|
+
data["trajectories"] = normalized_trajs
|
|
766
|
+
|
|
767
|
+
if data.get("trace") is None:
|
|
768
|
+
data["trace"] = {
|
|
769
|
+
"session_id": request.run_id,
|
|
770
|
+
"created_at": datetime.now(UTC).isoformat(),
|
|
771
|
+
"metadata": dict(existing_meta),
|
|
772
|
+
"event_history": [],
|
|
773
|
+
"markov_blanket_message_history": [],
|
|
774
|
+
}
|
|
775
|
+
raise HTTPException(
|
|
776
|
+
status_code=500, detail="trace_payload_missing: task app did not emit a SessionTrace"
|
|
777
|
+
)
|
|
778
|
+
|
|
599
779
|
return RolloutResponse.model_validate(data)
|
|
600
780
|
|
|
601
781
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
This module now delegates to the TaskAppConfig defined in the colocated example at
|
|
4
4
|
`examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
|
|
5
5
|
(running the file directly or targeting `fastapi_app` from external tooling). Prefer using
|
|
6
|
-
`uvx synth-ai
|
|
6
|
+
`uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
@@ -148,8 +148,8 @@ class CrafterPolicy(Policy):
|
|
|
148
148
|
if self.use_tools:
|
|
149
149
|
payload["tools"] = TOOLS_SCHEMA
|
|
150
150
|
payload["tool_choice"] = "required"
|
|
151
|
-
|
|
152
|
-
|
|
151
|
+
payload["function_call"] = {"name": "interact_many"}
|
|
152
|
+
payload["parallel_tool_calls"] = False
|
|
153
153
|
payload["stop_after_tool_calls"] = 1
|
|
154
154
|
return payload
|
|
155
155
|
|
|
@@ -158,13 +158,7 @@ class CrafterPolicy(Policy):
|
|
|
158
158
|
response: dict[str, Any],
|
|
159
159
|
use_tools: bool = True,
|
|
160
160
|
) -> list[dict[str, Any]]:
|
|
161
|
-
"""Turn an inference response into environment tool calls.
|
|
162
|
-
|
|
163
|
-
- If tools were used, expect tool_calls-compatible output and forward as-is
|
|
164
|
-
in our simple JSON format: {"tool_name": str, "arguments": {...}}.
|
|
165
|
-
- If no tools, parse plain-text actions using CrafterReActAgent parser and
|
|
166
|
-
wrap them into a single interact_many tool call.
|
|
167
|
-
"""
|
|
161
|
+
"""Turn an inference response into environment tool calls."""
|
|
168
162
|
# First check if we got actual tool calls
|
|
169
163
|
choices = response.get("choices", [])
|
|
170
164
|
tool_calls: list[dict[str, Any]] = []
|
|
@@ -223,24 +217,6 @@ class CrafterPolicy(Policy):
|
|
|
223
217
|
normalized.append(tc)
|
|
224
218
|
return normalized
|
|
225
219
|
|
|
226
|
-
# Otherwise, parse plain text content for actions
|
|
227
|
-
text = ""
|
|
228
|
-
for choice in choices:
|
|
229
|
-
msg = choice.get("message", {})
|
|
230
|
-
content = msg.get("content", "")
|
|
231
|
-
if content:
|
|
232
|
-
text = content
|
|
233
|
-
break
|
|
234
|
-
|
|
235
|
-
if text:
|
|
236
|
-
# Try to parse actions from the text
|
|
237
|
-
from .shared import parse_actions
|
|
238
|
-
|
|
239
|
-
actions = parse_actions(text)
|
|
240
|
-
if actions:
|
|
241
|
-
# Wrap actions in interact_many tool call
|
|
242
|
-
return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
|
|
243
|
-
|
|
244
220
|
# No actions found
|
|
245
221
|
return []
|
|
246
222
|
|
|
@@ -46,7 +46,7 @@ class CrafterReActAgent:
|
|
|
46
46
|
"- Always return a single tool call: interact_many({actions: [...]})\n"
|
|
47
47
|
"- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
|
|
48
48
|
"- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
|
|
49
|
-
"
|
|
49
|
+
"\n"
|
|
50
50
|
"Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
|
|
51
51
|
"place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
|
|
52
52
|
"make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import contextlib
|
|
4
|
+
import logging
|
|
4
5
|
import os
|
|
5
6
|
|
|
6
7
|
from fastapi import FastAPI
|
|
@@ -9,6 +10,52 @@ from fastapi.responses import JSONResponse
|
|
|
9
10
|
from pydantic import BaseModel
|
|
10
11
|
from starlette.requests import Request
|
|
11
12
|
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
_VERSION_LOGGED = False
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _resolve_task_app_version() -> str:
|
|
19
|
+
env_version = os.getenv("TASK_APP_VERSION")
|
|
20
|
+
if isinstance(env_version, str) and env_version.strip():
|
|
21
|
+
return env_version.strip()
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
import importlib.metadata as importlib_metadata
|
|
25
|
+
|
|
26
|
+
pkg_version = importlib_metadata.version("synth-ai")
|
|
27
|
+
if isinstance(pkg_version, str) and pkg_version.strip():
|
|
28
|
+
return pkg_version.strip()
|
|
29
|
+
except Exception:
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
import synth_ai
|
|
34
|
+
|
|
35
|
+
attr_version = getattr(synth_ai, "__version__", None)
|
|
36
|
+
if isinstance(attr_version, str) and attr_version.strip():
|
|
37
|
+
return attr_version.strip()
|
|
38
|
+
except Exception:
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
return "unknown"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _log_task_app_version_once() -> None:
|
|
45
|
+
global _VERSION_LOGGED
|
|
46
|
+
if _VERSION_LOGGED:
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
version = _resolve_task_app_version()
|
|
50
|
+
build_id = os.getenv("TASK_APP_BUILD_ID")
|
|
51
|
+
|
|
52
|
+
if build_id:
|
|
53
|
+
logger.info("TASK_APP_VERSION: %s (build=%s)", version, build_id)
|
|
54
|
+
else:
|
|
55
|
+
logger.info("TASK_APP_VERSION: %s", version)
|
|
56
|
+
|
|
57
|
+
_VERSION_LOGGED = True
|
|
58
|
+
|
|
12
59
|
|
|
13
60
|
class TaskApp:
|
|
14
61
|
"""Holds service configuration and shared state."""
|
|
@@ -56,6 +103,8 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
|
|
|
56
103
|
allow_headers=["*"],
|
|
57
104
|
)
|
|
58
105
|
|
|
106
|
+
_log_task_app_version_once()
|
|
107
|
+
|
|
59
108
|
# Initialize task app configuration
|
|
60
109
|
task_app = TaskApp()
|
|
61
110
|
app.state.task_app = task_app
|
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import contextlib
|
|
5
5
|
import logging
|
|
6
|
+
import os
|
|
6
7
|
from typing import Any
|
|
7
8
|
|
|
8
9
|
import httpx
|
|
@@ -23,6 +24,15 @@ class OpenAIClient:
|
|
|
23
24
|
self.api_key = api_key
|
|
24
25
|
self.timeout_s = timeout_s
|
|
25
26
|
self.headers = {}
|
|
27
|
+
self._env_api_key: str | None = None
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
env_key = os.getenv("ENVIRONMENT_API_KEY") or ""
|
|
31
|
+
env_key = env_key.strip()
|
|
32
|
+
if env_key:
|
|
33
|
+
self._env_api_key = env_key
|
|
34
|
+
except Exception:
|
|
35
|
+
self._env_api_key = None
|
|
26
36
|
|
|
27
37
|
if api_key:
|
|
28
38
|
self.headers["Authorization"] = f"Bearer {api_key}"
|
|
@@ -137,18 +147,49 @@ class OpenAIClient:
|
|
|
137
147
|
Returns:
|
|
138
148
|
OpenAI-compatible chat completion response
|
|
139
149
|
"""
|
|
140
|
-
|
|
150
|
+
# Build target URL robustly: if a full endpoint is given (with query or already ending
|
|
151
|
+
# in /chat/completions), preserve it; otherwise, append the path BEFORE query params.
|
|
152
|
+
from urllib.parse import urlparse, urlunparse
|
|
153
|
+
|
|
154
|
+
candidate = (base_url or self.base_url).strip()
|
|
155
|
+
try:
|
|
156
|
+
parsed = urlparse(candidate)
|
|
157
|
+
# If no scheme, treat as relative base (pass-through)
|
|
158
|
+
if not parsed.scheme or not parsed.netloc:
|
|
159
|
+
base_no_slash = candidate.rstrip("/")
|
|
160
|
+
url = f"{base_no_slash}/v1/chat/completions"
|
|
161
|
+
else:
|
|
162
|
+
path = (parsed.path or "").rstrip("/")
|
|
163
|
+
if path.endswith("/v1/chat/completions") or path.endswith("/chat/completions"):
|
|
164
|
+
new_path = path
|
|
165
|
+
elif path.endswith("/v1"):
|
|
166
|
+
new_path = f"{path}/chat/completions"
|
|
167
|
+
elif path.endswith("/chat"):
|
|
168
|
+
new_path = f"{path}/completions"
|
|
169
|
+
else:
|
|
170
|
+
new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
|
|
171
|
+
url = urlunparse(parsed._replace(path=new_path))
|
|
172
|
+
except Exception:
|
|
173
|
+
# Fallback to legacy behavior
|
|
174
|
+
url = (base_url or self.base_url).rstrip("/") + "/v1/chat/completions"
|
|
141
175
|
timeout = timeout_s or self.timeout_s
|
|
142
176
|
|
|
143
177
|
# Merge headers
|
|
144
178
|
headers = self.headers.copy()
|
|
179
|
+
try:
|
|
180
|
+
parsed_target = urlparse(url)
|
|
181
|
+
path_for_auth = (parsed_target.path or "") if parsed_target else ""
|
|
182
|
+
if self._env_api_key and "/proxy/" in path_for_auth:
|
|
183
|
+
headers.setdefault("X-API-Key", self._env_api_key)
|
|
184
|
+
except Exception:
|
|
185
|
+
pass
|
|
145
186
|
if extra_headers:
|
|
146
187
|
headers.update(extra_headers)
|
|
147
188
|
|
|
148
189
|
# Fix parameter compatibility for newer models
|
|
149
190
|
processed_request = self._fix_model_parameters(request, target_url=url)
|
|
150
191
|
|
|
151
|
-
# Log request (
|
|
192
|
+
# Log request with detailed prompts/tools preview and sampling settings (Authorization is not logged)
|
|
152
193
|
logger.info(f"Inference POST target: {url}")
|
|
153
194
|
if extra_headers:
|
|
154
195
|
logger.info(f"Extra headers: {extra_headers}")
|
|
@@ -156,13 +197,69 @@ class OpenAIClient:
|
|
|
156
197
|
keys_preview = sorted(processed_request.keys())
|
|
157
198
|
logger.info(f"Request keys: {keys_preview}")
|
|
158
199
|
|
|
159
|
-
#
|
|
200
|
+
# Detailed IO log: messages/tools/sampling and final payload fields
|
|
201
|
+
try:
|
|
202
|
+
import json as _json
|
|
203
|
+
|
|
204
|
+
def _truncate(text: str, limit: int = 2000) -> str:
|
|
205
|
+
return text if len(text) <= limit else text[:limit] + "…"
|
|
206
|
+
|
|
207
|
+
def _messages_preview(msgs: Any) -> str:
|
|
208
|
+
try:
|
|
209
|
+
out: list[dict[str, Any]] = []
|
|
210
|
+
if isinstance(msgs, list):
|
|
211
|
+
for m in msgs:
|
|
212
|
+
if not isinstance(m, dict):
|
|
213
|
+
continue
|
|
214
|
+
role = m.get("role")
|
|
215
|
+
content = m.get("content")
|
|
216
|
+
if isinstance(content, str):
|
|
217
|
+
text = content
|
|
218
|
+
elif isinstance(content, list):
|
|
219
|
+
parts: list[str] = []
|
|
220
|
+
for seg in content:
|
|
221
|
+
if isinstance(seg, dict) and isinstance(seg.get("text"), str):
|
|
222
|
+
parts.append(seg["text"])
|
|
223
|
+
text = "\n".join(parts)
|
|
224
|
+
else:
|
|
225
|
+
text = ""
|
|
226
|
+
out.append({"role": role, "content": _truncate(str(text), 4000)})
|
|
227
|
+
return _json.dumps(out)
|
|
228
|
+
except Exception:
|
|
229
|
+
return "[]"
|
|
230
|
+
|
|
231
|
+
def _tools_preview(tools: Any) -> str:
|
|
232
|
+
try:
|
|
233
|
+
return _truncate(_json.dumps(tools), 4000)
|
|
234
|
+
except Exception:
|
|
235
|
+
return "[]"
|
|
236
|
+
|
|
237
|
+
msgs = processed_request.get("messages") if isinstance(processed_request, dict) else None
|
|
238
|
+
tools = processed_request.get("tools") if isinstance(processed_request, dict) else None
|
|
239
|
+
io_log: dict[str, Any] = {
|
|
240
|
+
"llm.call": True,
|
|
241
|
+
"model": processed_request.get("model") if isinstance(processed_request, dict) else None,
|
|
242
|
+
"tool_choice": processed_request.get("tool_choice") if isinstance(processed_request, dict) else None,
|
|
243
|
+
"parallel_tool_calls": processed_request.get("parallel_tool_calls") if isinstance(processed_request, dict) else None,
|
|
244
|
+
"stop_after_tool_calls": processed_request.get("stop_after_tool_calls") if isinstance(processed_request, dict) else None,
|
|
245
|
+
"temperature": processed_request.get("temperature") if isinstance(processed_request, dict) else None,
|
|
246
|
+
"top_p": processed_request.get("top_p") if isinstance(processed_request, dict) else None,
|
|
247
|
+
"max_tokens": processed_request.get("max_tokens") if isinstance(processed_request, dict) else None,
|
|
248
|
+
"max_completion_tokens": processed_request.get("max_completion_tokens") if isinstance(processed_request, dict) else None,
|
|
249
|
+
"messages_preview": _messages_preview(msgs),
|
|
250
|
+
"tools_preview": _tools_preview(tools),
|
|
251
|
+
}
|
|
252
|
+
logger.info(io_log)
|
|
253
|
+
except Exception:
|
|
254
|
+
pass
|
|
255
|
+
|
|
256
|
+
# Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
|
|
160
257
|
try:
|
|
161
|
-
|
|
258
|
+
low_url = url.lower()
|
|
259
|
+
if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
|
|
162
260
|
processed_request.pop("stop_after_tool_calls", None)
|
|
163
|
-
logger.info("Removed stop_after_tool_calls for OpenAI request")
|
|
261
|
+
logger.info("Removed stop_after_tool_calls for Groq/OpenAI request")
|
|
164
262
|
# Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
|
|
165
|
-
low_url = url.lower()
|
|
166
263
|
if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
|
|
167
264
|
processed_request, dict
|
|
168
265
|
):
|
|
@@ -228,13 +325,54 @@ class OpenAIClient:
|
|
|
228
325
|
f"Inference response status=200, content-type={content_type}, bytes={len(body_text)}"
|
|
229
326
|
)
|
|
230
327
|
if body_text:
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
328
|
+
# Log raw output with generous preview to debug no-tool-call issues
|
|
329
|
+
preview_len = min(4000, len(body_text))
|
|
330
|
+
logger.info({
|
|
331
|
+
"llm.raw_response": True,
|
|
332
|
+
"bytes": len(body_text),
|
|
333
|
+
"preview": body_text[:preview_len],
|
|
334
|
+
})
|
|
235
335
|
|
|
236
336
|
result = response.json()
|
|
237
337
|
logger.info(f"Inference response parsed_type={type(result).__name__}")
|
|
338
|
+
|
|
339
|
+
# Normalize tool calls so downstream always sees a function tool call
|
|
340
|
+
try:
|
|
341
|
+
if isinstance(result, dict):
|
|
342
|
+
choices = result.get("choices")
|
|
343
|
+
if isinstance(choices, list) and choices:
|
|
344
|
+
msg = choices[0].get("message")
|
|
345
|
+
if isinstance(msg, dict):
|
|
346
|
+
# Prefer tool_calls; if missing but function_call is present, synthesize tool_calls
|
|
347
|
+
tc = msg.get("tool_calls")
|
|
348
|
+
fc = msg.get("function_call")
|
|
349
|
+
if (not isinstance(tc, list) or not tc) and isinstance(fc, dict):
|
|
350
|
+
name = fc.get("name") or "interact_many"
|
|
351
|
+
args = fc.get("arguments") or "{}"
|
|
352
|
+
msg["tool_calls"] = [
|
|
353
|
+
{
|
|
354
|
+
"id": "call_norm",
|
|
355
|
+
"type": "function",
|
|
356
|
+
"function": {"name": name, "arguments": args},
|
|
357
|
+
}
|
|
358
|
+
]
|
|
359
|
+
# Encourage downstream to treat this as a tool call
|
|
360
|
+
if isinstance(choices[0], dict):
|
|
361
|
+
choices[0]["finish_reason"] = "tool_calls"
|
|
362
|
+
# Log tool call count for debugging
|
|
363
|
+
try:
|
|
364
|
+
tc2 = msg.get("tool_calls")
|
|
365
|
+
count = len(tc2) if isinstance(tc2, list) else 0
|
|
366
|
+
logger.info({
|
|
367
|
+
"llm.tool_calls": True,
|
|
368
|
+
"count": count,
|
|
369
|
+
"finish_reason": choices[0].get("finish_reason") if isinstance(choices[0], dict) else None,
|
|
370
|
+
})
|
|
371
|
+
except Exception:
|
|
372
|
+
pass
|
|
373
|
+
except Exception:
|
|
374
|
+
pass
|
|
375
|
+
|
|
238
376
|
return result
|
|
239
377
|
|
|
240
378
|
except httpx.TimeoutException:
|
|
@@ -340,40 +478,6 @@ class OpenAIClient:
|
|
|
340
478
|
pass
|
|
341
479
|
except Exception:
|
|
342
480
|
pass
|
|
343
|
-
# Gracefully degrade on 422 so rollouts can still produce a trajectory
|
|
344
|
-
if status == 422:
|
|
345
|
-
try:
|
|
346
|
-
# Best-effort parse of error for diagnostics
|
|
347
|
-
err = None
|
|
348
|
-
try:
|
|
349
|
-
err = e.response.json()
|
|
350
|
-
except Exception:
|
|
351
|
-
err = {"error": "unprocessable", "detail": (text or "")[:200]}
|
|
352
|
-
logger.warning(
|
|
353
|
-
{
|
|
354
|
-
"inference_422_recovered": True,
|
|
355
|
-
"detail": err,
|
|
356
|
-
}
|
|
357
|
-
)
|
|
358
|
-
except Exception:
|
|
359
|
-
pass
|
|
360
|
-
# Return a minimal OpenAI-compatible response with no tool_calls/content
|
|
361
|
-
import time as _t
|
|
362
|
-
|
|
363
|
-
return {
|
|
364
|
-
"id": f"cmpl-{int(_t.time())}",
|
|
365
|
-
"object": "chat.completion",
|
|
366
|
-
"created": int(_t.time()),
|
|
367
|
-
"model": processed_request.get("model") or "unknown",
|
|
368
|
-
"choices": [
|
|
369
|
-
{
|
|
370
|
-
"index": 0,
|
|
371
|
-
"message": {"role": "assistant", "content": "", "tool_calls": []},
|
|
372
|
-
"finish_reason": "stop",
|
|
373
|
-
}
|
|
374
|
-
],
|
|
375
|
-
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
|
|
376
|
-
}
|
|
377
481
|
raise
|
|
378
482
|
except Exception as e:
|
|
379
483
|
logger.error(f"Unexpected error calling {url}: {e}")
|
|
@@ -399,7 +503,14 @@ class OpenAIClient:
|
|
|
399
503
|
|
|
400
504
|
try:
|
|
401
505
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
402
|
-
|
|
506
|
+
headers = self.headers.copy()
|
|
507
|
+
try:
|
|
508
|
+
parsed = httpx.URL(url)
|
|
509
|
+
if self._env_api_key and "/proxy/" in (parsed.path or ""):
|
|
510
|
+
headers.setdefault("X-API-Key", self._env_api_key)
|
|
511
|
+
except Exception:
|
|
512
|
+
pass
|
|
513
|
+
response = await client.get(url, headers=headers)
|
|
403
514
|
response.raise_for_status()
|
|
404
515
|
return response.json()
|
|
405
516
|
except httpx.HTTPStatusError as e:
|