PyPI - synth-ai - Versions diffs - 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (169) hide show

examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +12 -10
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +1 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +1 -1
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +60 -10
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +1 -1
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +1 -0
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -0
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -0
examples/rl/configs/rl_from_base_qwen17.toml +1 -0
examples/swe/task_app/hosted/inference/openai_client.py +0 -34
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/grpo_crafter.py +24 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +355 -58
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +68 -7
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +78 -21
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/task_app.py +254 -36
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +1 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +53 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +152 -41
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +31 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +1 -0
synth_ai/api/train/builders.py +90 -1
synth_ai/api/train/cli.py +396 -21
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +15 -1
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +29 -0
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +85 -17
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +1 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/eval/core.py +13 -10
synth_ai/cli/commands/filter/core.py +53 -17
synth_ai/cli/commands/help/core.py +0 -1
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/train/judge_schemas.py +1 -0
synth_ai/cli/commands/train/judge_validation.py +1 -0
synth_ai/cli/commands/train/validation.py +0 -57
synth_ai/cli/demo.py +35 -3
synth_ai/cli/deploy/__init__.py +40 -25
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/task_app_deploy.py +1 -1
synth_ai/cli/task_apps.py +53 -53
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/judge_schemas.py +1 -0
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/handlers.py +53 -4
synth_ai/streaming/streamer.py +19 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +44 -8
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +17 -17
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +283 -1
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/METADATA +102 -4
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/RECORD +162 -88
synth_ai/cli/commands/deploy/__init__.py +0 -23
synth_ai/cli/commands/deploy/core.py +0 -614
synth_ai/cli/commands/deploy/errors.py +0 -72
synth_ai/cli/commands/deploy/validation.py +0 -11
synth_ai/cli/deploy/core.py +0 -5
synth_ai/cli/deploy/errors.py +0 -23
synth_ai/cli/deploy/validation.py +0 -5
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py CHANGED Viewed

@@ -7,7 +7,9 @@ import logging
 import os
 import time
 from typing import Any
+from urllib.parse import urlparse, urlunparse
+import click
 import httpx
 logger = logging.getLogger(__name__)
@@ -148,11 +150,169 @@ class OpenAIClient:
             OpenAI-compatible chat completion response
         """
         base = (base_url or self.base_url).rstrip("/")
-        # Don't append /v1/chat/completions if the URL already contains it
-        if "/v1/chat/completions" in base:
+        # Ensure processed_request is defined for error logging paths
+        processed_request: dict[str, Any] = dict(request or {})
+        # Bulletproof normalization BEFORE any parsing
+        def _local_force_normalize(u: str) -> str:
+            if not isinstance(u, str) or not u:
+                return u
+            p = urlparse(u)
+            path = (p.path or "").rstrip("/")
+            q = p.query or ""
+            # If query contains a path segment, extract and repair
+            if q and "/" in q:
+                before, after = q.split("/", 1)
+                # Split off any extra query parameters that were appended after the path
+                cut_positions = [i for i in [after.find("&"), after.find("?")] if i >= 0]
+                cut = min(cut_positions) if cut_positions else len(after)
+                path_from_query = "/" + after[:cut]
+                extra_query = after[cut + 1 :] if cut < len(after) else ""
+                merged_query = before
+                if extra_query:
+                    merged_query = f"{merged_query}&{extra_query}" if merged_query else extra_query
+                # Ensure final path
+                final_path = path_from_query if path_from_query.startswith("/v1/chat/completions") else f"{path_from_query.rstrip('/')}/v1/chat/completions"
+                p = p._replace(path=final_path, query=merged_query)
+                u = urlunparse(p)
+                p = urlparse(u)
+                path = p.path or ""
+                q = p.query or ""
+            if not path.endswith("/v1/chat/completions"):
+                new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
+                p = p._replace(path=new_path)
+                u = urlunparse(p)
+                p = urlparse(u)
+                q = p.query or ""
+            if q and "/" in q:
+                # Last-resort: drop anything after first '/'
+                safe_q = q.split("/")[0]
+                p = p._replace(query=safe_q)
+                u = urlunparse(p)
+            return u
+        norm_base = None
+        try:
+            # Try importing shared normalizer first
+            from examples.task_apps.crafter.task_app.synth_envs_hosted.utils import (
+                force_normalize_chat_completions_url,
+            )
+            norm_base = force_normalize_chat_completions_url(base)
+        except Exception:
+            norm_base = _local_force_normalize(base)
+        base = norm_base or base
+        # Parse URL to handle query parameters correctly
+        parsed = urlparse(base)
+        path = parsed.path.rstrip("/")
+        query = parsed.query
+        # Debug: Log URL parsing
+        logger.error(f"[URL_PARSE] base={base} parsed.path={parsed.path} parsed.query={parsed.query}")
+        # CRITICAL FIX: Handle malformed URLs where path is incorrectly in the query string
+        # Example: https://host?cid=trace_123/v1/chat/completions
+        # Should be: https://host/v1/chat/completions?cid=trace_123
+        # ALWAYS check for malformed URLs - this is CRITICAL
+        # CRASH IMMEDIATELY if URL is malformed - don't let it through!
+        if query and "/" in query:
+            logger.error(f"[URL_FATAL] MALFORMED URL DETECTED AT START: base={base} query={query}")
+            # Try to fix it
+            logger.error(f"[URL_FIX_TRIGGERED] Query contains '/': query={query}")
+            # This is a malformed URL - extract path from query and fix it
+            logger.error(
+                f"[URL_FIX] Malformed URL detected: {base}\n"
+                f"Query contains path segments. Fixing..."
+            )
+            # Find where the path starts in the query string
+            # The query format is: "cid=value/path" or similar
+            # We need to find the first "/" that starts a path segment
+            query_parts = query.split("/", 1)
+            if len(query_parts) == 2:
+                # query_parts[0] is the actual query (e.g., "cid=trace_123")
+                # query_parts[1] is the path that was incorrectly put in query
+                actual_query = query_parts[0]
+                path_and_more = query_parts[1]  # Could be "v1/chat/completions" or "v1/chat/completions&foo=bar"
+                # Extract the path part (everything before "&" or "?" if present)
+                # Handle both "&" (query param separator) and "?" (another malformed query separator)
+                if "&" in path_and_more:
+                    # Path is followed by more query params (separated by &)
+                    path_segment, extra_query = path_and_more.split("&", 1)
+                    path_in_query = "/" + path_segment  # Restore leading slash
+                    # Merge extra query params with actual_query
+                    actual_query = f"{actual_query}&{extra_query}"
+                elif "?" in path_and_more:
+                    # Path is followed by more query params (separated by ?, which is malformed)
+                    path_segment, extra_query = path_and_more.split("?", 1)
+                    path_in_query = "/" + path_segment  # Restore leading slash
+                    # Merge extra query params with actual_query (use & as separator)
+                    actual_query = f"{actual_query}&{extra_query}"
+                else:
+                    # No extra query params, just the path
+                    path_in_query = "/" + path_and_more  # Restore leading slash
+                # If the path_in_query already contains /v1/chat/completions, use it
+                # Otherwise, append /v1/chat/completions
+                if path_in_query.startswith("/v1/chat/completions"):
+                    final_path = path_in_query
+                else:
+                    # Append /v1/chat/completions to whatever path we found
+                    final_path = path_in_query.rstrip("/") + "/v1/chat/completions"
+                # Reconstruct URL correctly: path comes before query
+                parsed = parsed._replace(path=final_path, query=actual_query)
+                url = urlunparse(parsed)
+                logger.warning(f"[URL_FIX] Fixed malformed URL:\n  FROM: {base}\n  TO:   {url}")
+            else:
+                # Can't parse, fall through to normal processing
+                logger.error(f"[URL_FIX] Could not parse malformed query: {query}")
+                path = parsed.path.rstrip("/")
+                if not path.endswith("/v1/chat/completions"):
+                    new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
+                    parsed = parsed._replace(path=new_path)
+                    url = urlunparse(parsed)
+                else:
+                    url = base
+        # Normal case: query params are separate from path
+        elif path.endswith("/v1/chat/completions"):
             url = base
         else:
-            url = base + "/v1/chat/completions"
+            # Append /v1/chat/completions to the path, preserving query params
+            new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
+            parsed = parsed._replace(path=new_path)
+            url = urlunparse(parsed)
+            logger.debug(f"[URL_CONSTRUCT] Added path to URL: {base} -> {url}")
+        # FINAL VALIDATION: Ensure the constructed URL is correct
+        final_parsed = urlparse(url)
+        final_path = final_parsed.path or ""
+        final_query = final_parsed.query or ""
+        # Verify path is correct
+        if not final_path.endswith("/v1/chat/completions"):
+            error_msg = (
+                f"FATAL [OpenAIClient]: URL missing /v1/chat/completions path!\n"
+                f"Original: {base}\n"
+                f"Constructed: {url}\n"
+                f"Path: {final_path}\n"
+            )
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        # Verify query doesn't contain path segments
+        if final_query and "/" in final_query:
+            error_msg = (
+                f"FATAL [OpenAIClient]: Query still contains path segments after fix!\n"
+                f"Original: {base}\n"
+                f"Constructed: {url}\n"
+                f"Query: {final_query}\n"
+                f"This indicates a bug in URL construction logic."
+            )
+            logger.error(error_msg)
+            raise ValueError(error_msg)
         timeout = timeout_s or self.timeout_s
         # Merge headers
@@ -233,31 +393,97 @@ class OpenAIClient:
             logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Message[1] content value: {msg1_content_post if not isinstance(msg1_content_post, list) else f'list[{len(msg1_content_post)}]'}")
         # Log request (redact messages in production)
+        # CRITICAL: Verify URL is correct BEFORE making HTTP request
+        final_parsed_check = urlparse(url)
+        logger.error(f"[URL_FINAL_CHECK] Before HTTP request: url={url} path={final_parsed_check.path} query={final_parsed_check.query}")
+        # CRASH IF URL IS STILL MALFORMED - DO NOT PROCEED
+        if final_parsed_check.query and "/" in final_parsed_check.query:
+            error_msg = (
+                f"FATAL [OpenAIClient]: URL IS STILL MALFORMED AFTER FIX ATTEMPT!\n"
+                f"Original base_url: {base_url or self.base_url}\n"
+                f"Constructed URL: {url}\n"
+                f"Path: {final_parsed_check.path}\n"
+                f"Query (contains path): {final_parsed_check.query}\n"
+                f"This will cause a 404 error. CRASHING NOW to prevent bad request."
+            )
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        # Verify path is correct
+        if not final_parsed_check.path.endswith("/v1/chat/completions"):
+            error_msg = (
+                f"FATAL [OpenAIClient]: URL missing /v1/chat/completions path!\n"
+                f"URL: {url}\n"
+                f"Path: {final_parsed_check.path}\n"
+            )
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        # Log request with detailed prompts/tools preview and sampling settings (Authorization is not logged)
         logger.info(f"Inference POST target: {url}")
         if extra_headers:
             logger.info(f"Extra headers: {extra_headers}")
         with contextlib.suppress(Exception):
             keys_preview = sorted(processed_request.keys())
             logger.info(f"Request keys: {keys_preview}")
-            # DEBUG: Log message structure for vision debugging
-            if "messages" in processed_request:
-                msgs = processed_request["messages"]
-                if isinstance(msgs, list):
-                    logger.debug(f"🔊 [OPENAI_CLIENT] Request has {len(msgs)} messages")
-                    for idx, msg in enumerate(msgs):
-                        if isinstance(msg, dict):
-                            role = msg.get("role")
-                            content = msg.get("content")
-                            if isinstance(content, list):
-                                logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content=list[{len(content)}]")
-                                for part_idx, part in enumerate(content):
-                                    if isinstance(part, dict):
-                                        part_type = part.get("type")
-                                        logger.debug(f"🔊 [OPENAI_CLIENT]   Part[{part_idx}]: type={part_type}")
+        # Detailed IO log: messages/tools/sampling and final payload fields
+        try:
+            import json as _json
+            def _truncate(text: str, limit: int = 2000) -> str:
+                return text if len(text) <= limit else text[:limit] + "…"
+            def _messages_preview(msgs: Any) -> str:
+                try:
+                    out: list[dict[str, Any]] = []
+                    if isinstance(msgs, list):
+                        for m in msgs:
+                            if not isinstance(m, dict):
+                                continue
+                            role = m.get("role")
+                            content = m.get("content")
+                            if isinstance(content, str):
+                                text = content
+                            elif isinstance(content, list):
+                                parts: list[str] = []
+                                for seg in content:
+                                    if isinstance(seg, dict) and isinstance(seg.get("text"), str):
+                                        parts.append(seg["text"])
+                                text = "\n".join(parts)
                             else:
-                                content_len = len(str(content)) if content else 0
-                                logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content_type={type(content).__name__}, len={content_len}")
+                                text = ""
+                            out.append({"role": role, "content": _truncate(str(text), 4000)})
+                    return _json.dumps(out)
+                except Exception:
+                    return "[]"
+            def _tools_preview(tools: Any) -> str:
+                try:
+                    return _truncate(_json.dumps(tools), 4000)
+                except Exception:
+                    return "[]"
+            msgs = processed_request.get("messages") if isinstance(processed_request, dict) else None
+            tools = processed_request.get("tools") if isinstance(processed_request, dict) else None
+            io_log: dict[str, Any] = {
+                "llm.call": True,
+                "model": processed_request.get("model") if isinstance(processed_request, dict) else None,
+                "tool_choice": processed_request.get("tool_choice") if isinstance(processed_request, dict) else None,
+                "parallel_tool_calls": processed_request.get("parallel_tool_calls") if isinstance(processed_request, dict) else None,
+                "stop_after_tool_calls": processed_request.get("stop_after_tool_calls") if isinstance(processed_request, dict) else None,
+                "temperature": processed_request.get("temperature") if isinstance(processed_request, dict) else None,
+                "top_p": processed_request.get("top_p") if isinstance(processed_request, dict) else None,
+                "max_tokens": processed_request.get("max_tokens") if isinstance(processed_request, dict) else None,
+                "max_completion_tokens": processed_request.get("max_completion_tokens") if isinstance(processed_request, dict) else None,
+                "messages_preview": _messages_preview(msgs),
+                "tools_preview": _tools_preview(tools),
+            }
+            logger.info(io_log)
+        except Exception:
+            pass
         # Final hard-guard for OpenAI/Groq: drop unsupported field
         try:
             low_url = url.lower()
@@ -329,10 +555,70 @@ class OpenAIClient:
                 logger.info(
                     f"Inference response status=200, content-type={content_type}, bytes={len(body_text)}"
                 )
-                # Do not log prompt or full response body
+                if body_text:
+                    # Log raw output with generous preview to debug no-tool-call issues
+                    preview_len = min(4000, len(body_text))
+                    logger.info({
+                        "llm.raw_response": True,
+                        "bytes": len(body_text),
+                        "preview": body_text[:preview_len],
+                    })
                 result = response.json()
                 logger.info(f"Inference response parsed_type={type(result).__name__}")
+                tool_call_count = -1
+                # Normalize tool calls so downstream always sees a function tool call
+                try:
+                    if isinstance(result, dict):
+                        choices = result.get("choices")
+                        if isinstance(choices, list) and choices:
+                            msg = choices[0].get("message")
+                            if isinstance(msg, dict):
+                                # Prefer tool_calls; if missing but function_call is present, synthesize tool_calls
+                                tc = msg.get("tool_calls")
+                                fc = msg.get("function_call")
+                                if (not isinstance(tc, list) or not tc) and isinstance(fc, dict):
+                                    name = fc.get("name") or "interact_many"
+                                    args = fc.get("arguments") or "{}"
+                                    msg["tool_calls"] = [
+                                        {
+                                            "id": "call_norm",
+                                            "type": "function",
+                                            "function": {"name": name, "arguments": args},
+                                        }
+                                    ]
+                                    if isinstance(choices[0], dict):
+                                        choices[0]["finish_reason"] = "tool_calls"
+                                # Log tool call count for debugging
+                                try:
+                                    tc2 = msg.get("tool_calls")
+                                    count = len(tc2) if isinstance(tc2, list) else 0
+                                    logger.info({
+                                        "llm.tool_calls": True,
+                                        "count": count,
+                                        "finish_reason": choices[0].get("finish_reason") if isinstance(choices[0], dict) else None,
+                                    })
+                                    if count == 0:
+                                        click.echo(
+                                            "[openai-client] ✗ upstream response missing tool_calls; dumping preview to logs",
+                                            err=True,
+                                        )
+                                        logger.error(
+                                            "Inference response missing tool_calls; failing fast. Raw body preview: %s",
+                                            body_text[:500] if body_text else "<empty>",
+                                        )
+                                        raise ValueError("Inference response missing tool_calls")
+                                    tool_call_count = count
+                                except Exception:
+                                    pass
+                except Exception:
+                    pass
+                click.echo(
+                    f"[openai-client] ✓ response ok with tool_calls={tool_call_count}",
+                    err=True,
+                )
                 return result
             except httpx.TimeoutException:
@@ -341,11 +627,31 @@ class OpenAIClient:
             except httpx.HTTPStatusError as e:
                 status = e.response.status_code if e.response is not None else None
                 text = e.response.text if e.response is not None else str(e)
-                # Log minimal error info only
-                logger.error({"openai_http_error": True, "status": status})
-                # For 4xx/5xx, print full sanitized request to aid debugging (especially Groq 400s)
-                # Suppress prompt/payload logging entirely
-                # Special case: token budget exceeded (OpenAI-compatible error schema)
+                # Log full body and request diagnostics for debugging remote failures
+                try:
+                    redacted_headers = dict(headers)
+                    if "Authorization" in redacted_headers:
+                        redacted_headers["Authorization"] = "***REDACTED***"
+                    logger.error(
+                        {
+                            "openai_http_error": True,
+                            "status": status,
+                            "url": url,
+                            "body": text,
+                        }
+                    )
+                    logger.error(
+                        {
+                            "request_debug": True,
+                            "status": status,
+                            "target": url,
+                            "headers": redacted_headers,
+                            "payload": processed_request,
+                        }
+                    )
+                except Exception:
+                    logger.error(f"HTTP error from {url}: {status} - {text}")
+                # Special case: token budget exceeded handled below, else 422 degrade, else re-raise
                 try:
                     if status == 400 and e.response is not None:
                         data = e.response.json()
@@ -398,6 +704,8 @@ class OpenAIClient:
                                     logger.warning(
                                         {
                                             "token_budget_recovery": True,
+                                            "messages_tokens": messages_tokens,
+                                            "model_limit": model_limit,
                                             "retry_max_tokens": new_max,
                                         }
                                     )
@@ -412,35 +720,6 @@ class OpenAIClient:
                                 pass
                 except Exception:
                     pass
-                # Gracefully degrade on 422 so rollouts can still produce a trajectory
-                if status == 422:
-                    try:
-                        # Best-effort parse of error for diagnostics
-                        err = None
-                        try:
-                            err = e.response.json()
-                        except Exception:
-                            err = {"error": "unprocessable"}
-                        logger.warning({"inference_422_recovered": True})
-                    except Exception:
-                        pass
-                    # Return a minimal OpenAI-compatible response with no tool_calls/content
-                    import time as _t
-                    return {
-                        "id": f"cmpl-{int(_t.time())}",
-                        "object": "chat.completion",
-                        "created": int(_t.time()),
-                        "model": processed_request.get("model") or "unknown",
-                        "choices": [
-                            {
-                                "index": 0,
-                                "message": {"role": "assistant", "content": "", "tool_calls": []},
-                                "finish_reason": "stop",
-                            }
-                        ],
-                        "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
-                    }
                 raise
             except Exception as e:
                 logger.error(f"Unexpected error calling {url}: {e}")
@@ -506,14 +785,29 @@ class OpenAIClient:
             OpenAI-compatible chat completion response
         """
         last_error = None
+        processed_request: dict[str, Any] = dict(request or {})
         wait_time = 1.0
         for attempt in range(max_retries + 1):
             try:
                 # Apply parameter fixes to the request
+                # CRITICAL: Use proper URL parsing, not string concatenation!
+                target_base = base_url or self.base_url
+                if target_base:
+                    parsed_target = urlparse(target_base)
+                    target_path = parsed_target.path.rstrip("/")
+                    if not target_path.endswith("/v1/chat/completions"):
+                        new_target_path = f"{target_path}/v1/chat/completions" if target_path else "/v1/chat/completions"
+                        parsed_target = parsed_target._replace(path=new_target_path)
+                        target_url = urlunparse(parsed_target)
+                    else:
+                        target_url = target_base
+                else:
+                    target_url = None
                 processed_request = self._fix_model_parameters(
                     request,
-                    target_url=(base_url or self.base_url).rstrip("/") + "/v1/chat/completions",
+                    target_url=target_url,
                 )
                 return await self.generate(
                     request=processed_request,
@@ -619,7 +913,9 @@ class OpenAIClient:
                 await asyncio.sleep(wait_time)
                 wait_time *= backoff_factor
-        raise last_error
+        if last_error is not None:
+            raise last_error
+        raise RuntimeError("RL inference retries exhausted with no captured exception")
 def create_inference_client(
@@ -694,7 +990,8 @@ def create_inference_client(
             ) -> dict[str, Any]:
                 return {"status": "ok", "dummy": True}
-        return _DummyClient()
+        import typing as _t
+        return _t.cast(OpenAIClient, _DummyClient())
     return OpenAIClient(
         base_url=task_app.vllm_base_url,

examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py CHANGED Viewed

@@ -4,6 +4,7 @@ import contextlib
 import logging
 import os
 from datetime import datetime
+import asyncio
 from typing import Any
 from fastapi import APIRouter, HTTPException, Request
@@ -35,6 +36,13 @@ logger = logging.getLogger(__name__)
 router = APIRouter()
+# Global concurrency limit for outbound inference to avoid backend overload/timeouts
+try:
+    _INFERENCE_CONCURRENCY = int(os.getenv("INFERENCE_CONCURRENCY", "2") or "2")
+except Exception:  # pragma: no cover
+    _INFERENCE_CONCURRENCY = 2
+_inference_sem = asyncio.Semaphore(max(1, _INFERENCE_CONCURRENCY))
 class PolicyCreateRequest(BaseModel):
     policy_name: str
@@ -250,6 +258,11 @@ async def step_policy(
         task_app = req.app.state.task_app
         policy = handle.policy
         tracing_context = getattr(req.state, "rollout_tracing", None)
+        if tracing_context is None:
+            print(
+                f"[TRACE_DEBUG] Missing tracing context on policy step; policy_id={request.policy_id}",
+                flush=True,
+            )
         obs_text = request.observation
         if isinstance(request.observation, dict):
@@ -546,6 +559,14 @@ async def step_policy(
             # Ensure meta carries the final target URL for downstream logging/clients
             with contextlib.suppress(Exception):
+                # Bulletproof normalizer at the call site (in addition to client-side)
+                try:
+                    from examples.task_apps.crafter.task_app.synth_envs_hosted.utils import (
+                        force_normalize_chat_completions_url,
+                    )
+                    target_url = force_normalize_chat_completions_url(target_url)
+                except Exception:
+                    pass
                 sanitized_target = ensure_chat_completions_url(target_url)
                 if sanitized_target and sanitized_target != target_url:
                     logger.warning(
@@ -594,6 +615,28 @@ async def step_policy(
             except Exception:
                 api_key_override = None
+            # Fallback: If target is OpenAI but OPENAI_API_KEY is missing, route to Synth API
+            try:
+                import os as _os2
+                _low = str(target_url or "").lower()
+                if ("api.openai.com" in _low) and not (_os2.getenv("OPENAI_API_KEY")):
+                    # Prefer task_app.synth_base_url if available; else default
+                    synth_base = getattr(task_app, "synth_base_url", None)
+                    if isinstance(synth_base, str) and synth_base.strip():
+                        base = synth_base.rstrip("/")
+                        fallback = base + "/inference/v1/chat/completions"
+                    else:
+                        fallback = "https://api.synth.run/api/inference/v1/chat/completions"
+                    fixed = ensure_chat_completions_url(fallback)
+                    logger.warning(
+                        "POLICY_STEP: OPENAI key missing; falling back to Synth route %s",
+                        fixed,
+                    )
+                    meta["inference_url"] = fixed
+                    target_url = fixed
+            except Exception:
+                pass
             if api_key_override:
                 try:
                     masked = f"{api_key_override[:6]}…{api_key_override[-4:]}"
@@ -975,13 +1018,14 @@ async def step_policy(
             _t_start = _t.time()
             call_started_at = datetime.utcnow()
-            inference_response = await client.generate_with_retries(
-                request=meta["inference_request"],
-                base_url=meta["inference_url"],
-                max_retries=12,
-                backoff_factor=2.0,
-                extra_headers=extra_headers,
-            )
+            async with _inference_sem:
+                inference_response = await client.generate_with_retries(
+                    request=meta["inference_request"],
+                    base_url=meta["inference_url"],
+                    max_retries=12,
+                    backoff_factor=2.0,
+                    extra_headers=extra_headers,
+                )
             meta["inference_ms"] = int((_t.time() - _t_start) * 1000)
             call_completed_at = datetime.utcnow()
@@ -1061,6 +1105,23 @@ async def step_policy(
                 except Exception as exc:
                     logger.debug(f"TRACING_LLM_FAIL: {exc}")
+        if not tool_calls:
+            preview = ""
+            try:
+                preview = str(meta.get("raw_response") or "")[:400]
+            except Exception:
+                preview = "<unavailable>"
+            logger.error(
+                {
+                    "rollout.policy_step": True,
+                    "policy_id": request.policy_id,
+                    "error": "no_tool_calls",
+                    "inference_url": meta.get("inference_url"),
+                    "raw_preview": preview,
+                }
+            )
+            raise RuntimeError("Policy step produced no tool calls; inference response unusable.")
         return PolicyStepResponse(
             tool_calls=tool_calls,
             meta=meta,

synth-ai 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl