PyPI - synth-ai - Versions diffs - 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

synth-ai 0.2.2.dev0py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

synth_ai/lm/vendors/base.py CHANGED Viewed

@@ -18,10 +18,17 @@ class BaseLMResponse(BaseModel):
         raw_response: The raw text response from the model
         structured_output: Optional parsed Pydantic model if structured output was requested
         tool_calls: Optional list of tool calls if tools were provided
+        response_id: Optional response ID for thread management (Responses API)
+        reasoning: Optional reasoning trace from the model (o1 models)
+        api_type: Optional API type used ("chat", "responses", or "harmony")
     """
     raw_response: str
     structured_output: Optional[BaseModel] = None
     tool_calls: Optional[List[Dict]] = None
+    response_id: Optional[str] = None
+    reasoning: Optional[str] = None
+    api_type: Optional[str] = None
+    usage: Optional[Dict[str, Any]] = None
 class VendorBase(ABC):

synth_ai/lm/vendors/openai_standard.py CHANGED Viewed

@@ -1,7 +1,10 @@
 from typing import Any, Dict, List, Optional, Union
+import asyncio
+import time
 import groq
 import openai
+import os
 import pydantic_core
 from pydantic import BaseModel
@@ -12,6 +15,7 @@ from synth_ai.lm.tools.base import BaseTool
 from synth_ai.lm.vendors.base import BaseLMResponse, VendorBase
 from synth_ai.lm.constants import SPECIAL_BASE_TEMPS
 from synth_ai.lm.vendors.retries import MAX_BACKOFF
+from synth_ai.lm.vendors.openai_standard_responses import OpenAIResponsesAPIMixin
 import backoff
 DEFAULT_EXCEPTIONS_TO_RETRY = (
@@ -50,7 +54,7 @@ def _silent_backoff_handler(_details):
     pass
-class OpenAIStandard(VendorBase):
+class OpenAIStandard(VendorBase, OpenAIResponsesAPIMixin):
     """
     Standard OpenAI-compatible vendor implementation.
@@ -79,6 +83,16 @@ class OpenAIStandard(VendorBase):
         self.async_client = async_client
         self.used_for_structured_outputs = used_for_structured_outputs
         self.exceptions_to_retry = exceptions_to_retry
+        # Initialize Harmony support for OSS models
+        self.harmony_available = False
+        self.harmony_enc = None
+        try:
+            from openai_harmony import load_harmony_encoding, HarmonyEncodingName
+            self.harmony_available = True
+            self.harmony_enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
+        except ImportError:
+            pass
     @backoff.on_exception(
         backoff.expo,
@@ -99,6 +113,15 @@ class OpenAIStandard(VendorBase):
         assert lm_config.get("response_model", None) is None, (
             "response_model is not supported for standard calls"
         )
+        DEBUG = os.getenv("SYNTH_OPENAI_DEBUG") == "1"
+        if DEBUG:
+            print(f"🔍 OPENAI DEBUG: _hit_api_async called with:")
+            print(f"   Model: {model}")
+            print(f"   Messages: {len(messages)} messages")
+            print(f"   Tools: {len(tools) if tools else 0} tools")
+            print(f"   LM config: {lm_config}")
         messages = special_orion_transform(model, messages)
         used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
         lm_config["reasoning_effort"] = reasoning_effort
@@ -106,7 +129,14 @@ class OpenAIStandard(VendorBase):
             model, messages, lm_config=lm_config, tools=tools
         )
         if cache_result:
-            return cache_result
+            if DEBUG:
+                print(f"🔍 OPENAI DEBUG: Cache hit! Returning cached result")
+                print(f"   Cache result type: {type(cache_result)}")
+                print(f"🔍 OPENAI DEBUG: DISABLING CACHE FOR DEBUGGING - forcing API call")
+            # return cache_result  # Commented out to force API call
+        if DEBUG:
+            print(f"🔍 OPENAI DEBUG: Cache miss, making actual API call")
         # Common API call params
         api_params = {
@@ -126,12 +156,205 @@ class OpenAIStandard(VendorBase):
                 "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
             )
+        # Forward additional sampling / control params if provided
+        if lm_config.get("max_tokens") is not None:
+            api_params["max_tokens"] = lm_config["max_tokens"]
+        if lm_config.get("top_p") is not None:
+            api_params["top_p"] = lm_config["top_p"]
+        if lm_config.get("frequency_penalty") is not None:
+            api_params["frequency_penalty"] = lm_config["frequency_penalty"]
+        if lm_config.get("presence_penalty") is not None:
+            api_params["presence_penalty"] = lm_config["presence_penalty"]
+        if lm_config.get("stop") is not None:
+            api_params["stop"] = lm_config["stop"]
+        if lm_config.get("tool_choice") is not None:
+            api_params["tool_choice"] = lm_config["tool_choice"]
+        # Forward GPU preference to backend (body + header)
+        if lm_config.get("gpu_preference") is not None:
+            api_params["gpu_preference"] = lm_config["gpu_preference"]
+            # Also set header so proxies that read headers can honor it
+            hdrs = api_params.get("extra_headers", {})
+            hdrs["X-GPU-Preference"] = lm_config["gpu_preference"]
+            api_params["extra_headers"] = hdrs
+        # Also mirror stop_after_tool_calls into a header for robustness
+        try:
+            satc_val = None
+            if isinstance(lm_config.get("extra_body"), dict):
+                satc_val = lm_config["extra_body"].get("stop_after_tool_calls")
+            if satc_val is not None:
+                hdrs = api_params.get("extra_headers", {})
+                hdrs["X-Stop-After-Tool-Calls"] = str(satc_val)
+                api_params["extra_headers"] = hdrs
+        except Exception:
+            pass
+        # Forward Qwen3 chat template kwargs via extra_body when requested
+        if lm_config.get("enable_thinking") is not None:
+            api_params["extra_body"] = api_params.get("extra_body", {})
+            ctk = api_params["extra_body"].get("chat_template_kwargs", {})
+            ctk["enable_thinking"] = lm_config["enable_thinking"]
+            api_params["extra_body"]["chat_template_kwargs"] = ctk
+        # Forward arbitrary extra_body from lm_config if provided (merge)
+        if lm_config.get("extra_body") is not None:
+            # Shallow-merge top-level keys; nested keys (like chat_template_kwargs) should be provided whole
+            api_params["extra_body"] = {**api_params.get("extra_body", {}), **(lm_config.get("extra_body") or {})}
+        # Forward Qwen3 chat template kwargs via extra_body when requested
+        if lm_config.get("enable_thinking") is not None:
+            api_params["extra_body"] = api_params.get("extra_body", {})
+            ctk = api_params["extra_body"].get("chat_template_kwargs", {})
+            ctk["enable_thinking"] = lm_config["enable_thinking"]
+            api_params["extra_body"]["chat_template_kwargs"] = ctk
         # Add reasoning_effort only for o3-mini
         if model in ["o3-mini"]:
             print("Reasoning effort:", reasoning_effort)
             api_params["reasoning_effort"] = reasoning_effort
-        output = await self.async_client.chat.completions.create(**api_params)
+        # Filter Synth-only params when calling external OpenAI-compatible providers
+        # External providers (e.g., OpenAI, Groq) reject unknown fields like
+        # extra_body.chat_template_kwargs or stop_after_tool_calls.
+        try:
+            base_url_obj = getattr(self.async_client, "base_url", None)
+            base_url_str = str(base_url_obj) if base_url_obj is not None else ""
+        except Exception:
+            base_url_str = ""
+        is_external_provider = (
+            "openai.com" in base_url_str or "api.groq.com" in base_url_str
+        )
+        if is_external_provider:
+            # Remove extra_body entirely; this is Synth-specific plumbing
+            if "extra_body" in api_params:
+                api_params.pop("extra_body", None)
+            # Also ensure we don't pass stray vendor-specific fields if present
+            # (defensive in case upstream added them at top-level later)
+            for k in ["chat_template_kwargs", "stop_after_tool_calls"]:
+                api_params.pop(k, None)
+            # GPT-5 models: parameter normalization
+            if model.startswith("gpt-5"):
+                # Require max_completion_tokens instead of max_tokens
+                if "max_tokens" in api_params:
+                    api_params["max_completion_tokens"] = api_params.pop("max_tokens")
+                # Only default temperature=1 supported; omit custom temperature
+                if "temperature" in api_params:
+                    api_params.pop("temperature", None)
+        # Call API with better auth error reporting
+        #try:
+        if DEBUG:
+            print(f"🔍 OPENAI DEBUG: Making request with params:")
+            print(f"   Model: {api_params.get('model')}")
+            print(f"   Messages: {len(api_params.get('messages', []))} messages")
+            print(f"   Tools: {len(api_params.get('tools', []))} tools")
+            print(f"   Max tokens: {api_params.get('max_tokens', 'NOT SET')}")
+            print(f"   Temperature: {api_params.get('temperature', 'NOT SET')}")
+            if 'tools' in api_params:
+                print(f"   First tool: {api_params['tools'][0]}")
+            print(f"   FULL API PARAMS: {api_params}")
+        # Quiet targeted retry for OpenAI 400 tool_use_failed during tool-calling
+        try:
+            max_attempts_for_tool_use = int(os.getenv("SYNTH_TOOL_USE_RETRIES", "5"))
+        except Exception:
+            max_attempts_for_tool_use = 5
+        try:
+            backoff_seconds = float(os.getenv("SYNTH_TOOL_USE_BACKOFF_INITIAL", "0.5"))
+        except Exception:
+            backoff_seconds = 0.5
+        attempt_index = 0
+        while True:
+            try:
+                output = await self.async_client.chat.completions.create(**api_params)
+                break
+            except openai.BadRequestError as err:
+                # Detect tool-use failure from various SDK surfaces
+                should_retry = False
+                # 1) Body dict
+                body = getattr(err, "body", None)
+                if isinstance(body, dict):
+                    try:
+                        err_obj = body.get("error") if isinstance(body.get("error"), dict) else {}
+                        code_val = err_obj.get("code")
+                        msg_val = err_obj.get("message")
+                        if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
+                            should_retry = True
+                    except Exception:
+                        pass
+                # 2) Response JSON
+                if not should_retry:
+                    try:
+                        resp = getattr(err, "response", None)
+                        if resp is not None:
+                            j = resp.json()
+                            if isinstance(j, dict):
+                                err_obj = j.get("error") if isinstance(j.get("error"), dict) else {}
+                                code_val = err_obj.get("code")
+                                msg_val = err_obj.get("message")
+                                if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
+                                    should_retry = True
+                    except Exception:
+                        pass
+                # 3) Fallback to string match
+                if not should_retry:
+                    err_text = str(err)
+                    if "tool_use_failed" in err_text or "Failed to call a function" in err_text:
+                        should_retry = True
+                if should_retry and attempt_index + 1 < max_attempts_for_tool_use:
+                    await asyncio.sleep(backoff_seconds)
+                    backoff_seconds = min(backoff_seconds * 2.0, 2.0)
+                    attempt_index += 1
+                    continue
+                raise
+        if DEBUG:
+            print(f"🔍 OPENAI DEBUG: Response received:")
+            print(f"   Type: {type(output)}")
+            print(f"   Choices: {len(output.choices) if hasattr(output, 'choices') else 'N/A'}")
+            if hasattr(output, 'choices') and output.choices:
+                choice = output.choices[0]
+                print(f"   Choice type: {type(choice)}")
+                if hasattr(choice, 'message'):
+                    message = choice.message
+                    print(f"   Message type: {type(message)}")
+                    print(f"   Has tool_calls: {hasattr(message, 'tool_calls')}")
+                    if hasattr(message, 'tool_calls'):
+                        print(f"   Tool calls: {message.tool_calls}")
+                    print(f"   Content: {message.content[:200] if hasattr(message, 'content') and message.content else 'None'}...")
+                # Show finish_reason and usage if available
+                try:
+                    print(f"   finish_reason: {getattr(choice, 'finish_reason', None)}")
+                    usage = getattr(output, 'usage', None)
+                    if usage:
+                        print(f"   usage: prompt_tokens={getattr(usage, 'prompt_tokens', None)}, completion_tokens={getattr(usage, 'completion_tokens', None)}, total_tokens={getattr(usage, 'total_tokens', None)}")
+                except Exception:
+                    pass
+        if DEBUG:
+            print(f"🔍 OPENAI DEBUG: FULL RAW RESPONSE:")
+            if hasattr(output.choices[0].message, 'content') and output.choices[0].message.content:
+                print(f"   FULL CONTENT:\n{output.choices[0].message.content}")
+            print(f"   Raw choice: {choice}")
+            print(f"   Raw message: {message}")
+        # except Exception as e:
+        #     try:
+        #         from openai import AuthenticationError as _OpenAIAuthErr  # type: ignore
+        #     except ModuleNotFoundError:
+        #         _OpenAIAuthErr = type(e)
+        #     if isinstance(e, _OpenAIAuthErr):
+        #         key_preview = (os.getenv("OPENAI_API_KEY") or "")[:8]
+        #         # Create a more informative error message but preserve the original exception
+        #         enhanced_msg = f"Invalid API key format. Expected prefix 'sk-' or 'sk_live_'. Provided key begins with '{key_preview}'. Original error: {str(e)}"
+        #         # Re-raise the original exception with enhanced message if possible
+        #         if hasattr(e, 'response') and hasattr(e, 'body'):
+        #             raise _OpenAIAuthErr(enhanced_msg, response=e.response, body=e.body) from None
+        #         else:
+        #             # Fallback: just re-raise the original with a print for debugging
+        #             print(f"🔑 API Key Debug: {enhanced_msg}")
+        #             raise e from None
+        #     raise
         message = output.choices[0].message
         # Convert tool calls to dict format
@@ -149,10 +372,24 @@ class OpenAIStandard(VendorBase):
                 for tc in message.tool_calls
             ]
+        # Attach basic usage if available
+        usage_dict = None
+        try:
+            usage_obj = getattr(output, 'usage', None)
+            if usage_obj is not None:
+                usage_dict = {
+                    "prompt_tokens": getattr(usage_obj, 'prompt_tokens', None),
+                    "completion_tokens": getattr(usage_obj, 'completion_tokens', None),
+                    "total_tokens": getattr(usage_obj, 'total_tokens', None),
+                }
+        except Exception:
+            usage_dict = None
         lm_response = BaseLMResponse(
             raw_response=message.content or "",  # Use empty string if no content
             structured_output=None,
             tool_calls=tool_calls,
+            usage=usage_dict,
         )
         lm_config["reasoning_effort"] = reasoning_effort
         used_cache_handler.add_to_managed_cache(
@@ -206,12 +443,84 @@ class OpenAIStandard(VendorBase):
                 "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
             )
+        # Forward additional sampling / control params if provided
+        if lm_config.get("max_tokens") is not None:
+            api_params["max_tokens"] = lm_config["max_tokens"]
+        if lm_config.get("top_p") is not None:
+            api_params["top_p"] = lm_config["top_p"]
+        if lm_config.get("frequency_penalty") is not None:
+            api_params["frequency_penalty"] = lm_config["frequency_penalty"]
+        if lm_config.get("presence_penalty") is not None:
+            api_params["presence_penalty"] = lm_config["presence_penalty"]
+        if lm_config.get("stop") is not None:
+            api_params["stop"] = lm_config["stop"]
+        if lm_config.get("tool_choice") is not None:
+            api_params["tool_choice"] = lm_config["tool_choice"]
         # Add reasoning_effort only for o3-mini
         if model in ["o3-mini"]:
             api_params["reasoning_effort"] = reasoning_effort
-        output = self.sync_client.chat.completions.create(**api_params)
+        # Sync path: apply the same targeted retry
+        try:
+            max_attempts_for_tool_use = int(os.getenv("SYNTH_TOOL_USE_RETRIES", "5"))
+        except Exception:
+            max_attempts_for_tool_use = 5
+        try:
+            backoff_seconds = float(os.getenv("SYNTH_TOOL_USE_BACKOFF_INITIAL", "0.5"))
+        except Exception:
+            backoff_seconds = 0.5
+        attempt_index = 0
+        while True:
+            try:
+                output = self.sync_client.chat.completions.create(**api_params)
+                break
+            except openai.BadRequestError as err:
+                should_retry = False
+                body = getattr(err, "body", None)
+                if isinstance(body, dict):
+                    try:
+                        err_obj = body.get("error") if isinstance(body.get("error"), dict) else {}
+                        code_val = err_obj.get("code")
+                        msg_val = err_obj.get("message")
+                        if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
+                            should_retry = True
+                    except Exception:
+                        pass
+                if not should_retry:
+                    try:
+                        resp = getattr(err, "response", None)
+                        if resp is not None:
+                            j = resp.json()
+                            if isinstance(j, dict):
+                                err_obj = j.get("error") if isinstance(j.get("error"), dict) else {}
+                                code_val = err_obj.get("code")
+                                msg_val = err_obj.get("message")
+                                if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
+                                    should_retry = True
+                    except Exception:
+                        pass
+                if not should_retry:
+                    err_text = str(err)
+                    if "tool_use_failed" in err_text or "Failed to call a function" in err_text:
+                        should_retry = True
+                if should_retry and attempt_index + 1 < max_attempts_for_tool_use:
+                    time.sleep(backoff_seconds)
+                    backoff_seconds = min(backoff_seconds * 2.0, 2.0)
+                    attempt_index += 1
+                    continue
+                raise
         message = output.choices[0].message
+        DEBUG = os.getenv("SYNTH_OPENAI_DEBUG") == "1"
+        if DEBUG:
+            try:
+                print(f"🔍 OPENAI DEBUG (sync): finish_reason={getattr(output.choices[0], 'finish_reason', None)}")
+                usage = getattr(output, 'usage', None)
+                if usage:
+                    print(f"🔍 OPENAI DEBUG (sync): usage prompt_tokens={getattr(usage, 'prompt_tokens', None)}, completion_tokens={getattr(usage, 'completion_tokens', None)}, total_tokens={getattr(usage, 'total_tokens', None)}")
+            except Exception:
+                pass
         # Convert tool calls to dict format
         tool_calls = None
@@ -228,10 +537,24 @@ class OpenAIStandard(VendorBase):
                 for tc in message.tool_calls
             ]
+        # Attach basic usage if available
+        usage_dict = None
+        try:
+            usage_obj = getattr(output, 'usage', None)
+            if usage_obj is not None:
+                usage_dict = {
+                    "prompt_tokens": getattr(usage_obj, 'prompt_tokens', None),
+                    "completion_tokens": getattr(usage_obj, 'completion_tokens', None),
+                    "total_tokens": getattr(usage_obj, 'total_tokens', None),
+                }
+        except Exception:
+            usage_dict = None
         lm_response = BaseLMResponse(
             raw_response=message.content or "",  # Use empty string if no content
             structured_output=None,
             tool_calls=tool_calls,
+            usage=usage_dict,
         )
         lm_config["reasoning_effort"] = reasoning_effort
         used_cache_handler.add_to_managed_cache(
@@ -342,6 +665,18 @@ class OpenAIStandard(VendorBase):
         if model in ["o3-mini"]:
             api_params["reasoning_effort"] = reasoning_effort
+        # Normalize for external OpenAI as well in sync path
+        try:
+            base_url_obj = getattr(self.sync_client, "base_url", None)
+            base_url_str_sync = str(base_url_obj) if base_url_obj is not None else ""
+        except Exception:
+            base_url_str_sync = ""
+        if ("openai.com" in base_url_str_sync or "api.groq.com" in base_url_str_sync) and model.startswith("gpt-5"):
+            if "max_tokens" in api_params:
+                api_params["max_completion_tokens"] = api_params.pop("max_tokens")
+            if "temperature" in api_params:
+                api_params.pop("temperature", None)
         output = self.sync_client.chat.completions.create(**api_params)
         structured_output_api_result = response_model(**output.choices[0].message.content)

synth-ai 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl

synth-ai 0.2.2.dev0py3-none-any.whl → 0.2.3py3-none-any.whl