PyPI - synth-ai - Versions diffs - 0.2.2.dev0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl - Mend

synth-ai 0.2.2.dev0py3-none-any.whl → 0.2.4.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

synth_ai/lm/vendors/openai_standard.py CHANGED Viewed

@@ -1,7 +1,10 @@
 from typing import Any, Dict, List, Optional, Union
+import asyncio
+import time
 import groq
 import openai
+import os
 import pydantic_core
 from pydantic import BaseModel
@@ -12,6 +15,7 @@ from synth_ai.lm.tools.base import BaseTool
 from synth_ai.lm.vendors.base import BaseLMResponse, VendorBase
 from synth_ai.lm.constants import SPECIAL_BASE_TEMPS
 from synth_ai.lm.vendors.retries import MAX_BACKOFF
+from synth_ai.lm.vendors.openai_standard_responses import OpenAIResponsesAPIMixin
 import backoff
 DEFAULT_EXCEPTIONS_TO_RETRY = (
@@ -50,7 +54,7 @@ def _silent_backoff_handler(_details):
     pass
-class OpenAIStandard(VendorBase):
+class OpenAIStandard(VendorBase, OpenAIResponsesAPIMixin):
     """
     Standard OpenAI-compatible vendor implementation.
@@ -79,6 +83,16 @@ class OpenAIStandard(VendorBase):
         self.async_client = async_client
         self.used_for_structured_outputs = used_for_structured_outputs
         self.exceptions_to_retry = exceptions_to_retry
+        # Initialize Harmony support for OSS models
+        self.harmony_available = False
+        self.harmony_enc = None
+        try:
+            from openai_harmony import load_harmony_encoding, HarmonyEncodingName
+            self.harmony_available = True
+            self.harmony_enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
+        except ImportError:
+            pass
     @backoff.on_exception(
         backoff.expo,
@@ -99,6 +113,15 @@ class OpenAIStandard(VendorBase):
         assert lm_config.get("response_model", None) is None, (
             "response_model is not supported for standard calls"
         )
+        DEBUG = os.getenv("SYNTH_OPENAI_DEBUG") == "1"
+        if DEBUG:
+            print(f"🔍 OPENAI DEBUG: _hit_api_async called with:")
+            print(f"   Model: {model}")
+            print(f"   Messages: {len(messages)} messages")
+            print(f"   Tools: {len(tools) if tools else 0} tools")
+            print(f"   LM config: {lm_config}")
         messages = special_orion_transform(model, messages)
         used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
         lm_config["reasoning_effort"] = reasoning_effort
@@ -106,7 +129,14 @@ class OpenAIStandard(VendorBase):
             model, messages, lm_config=lm_config, tools=tools
         )
         if cache_result:
-            return cache_result
+            if DEBUG:
+                print(f"🔍 OPENAI DEBUG: Cache hit! Returning cached result")
+                print(f"   Cache result type: {type(cache_result)}")
+                print(f"🔍 OPENAI DEBUG: DISABLING CACHE FOR DEBUGGING - forcing API call")
+            # return cache_result  # Commented out to force API call
+        if DEBUG:
+            print(f"🔍 OPENAI DEBUG: Cache miss, making actual API call")
         # Common API call params
         api_params = {
@@ -126,12 +156,205 @@ class OpenAIStandard(VendorBase):
                 "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
             )
+        # Forward additional sampling / control params if provided
+        if lm_config.get("max_tokens") is not None:
+            api_params["max_tokens"] = lm_config["max_tokens"]
+        if lm_config.get("top_p") is not None:
+            api_params["top_p"] = lm_config["top_p"]
+        if lm_config.get("frequency_penalty") is not None:
+            api_params["frequency_penalty"] = lm_config["frequency_penalty"]
+        if lm_config.get("presence_penalty") is not None:
+            api_params["presence_penalty"] = lm_config["presence_penalty"]
+        if lm_config.get("stop") is not None:
+            api_params["stop"] = lm_config["stop"]
+        if lm_config.get("tool_choice") is not None:
+            api_params["tool_choice"] = lm_config["tool_choice"]
+        # Forward GPU preference to backend (body + header)
+        if lm_config.get("gpu_preference") is not None:
+            api_params["gpu_preference"] = lm_config["gpu_preference"]
+            # Also set header so proxies that read headers can honor it
+            hdrs = api_params.get("extra_headers", {})
+            hdrs["X-GPU-Preference"] = lm_config["gpu_preference"]
+            api_params["extra_headers"] = hdrs
+        # Also mirror stop_after_tool_calls into a header for robustness
+        try:
+            satc_val = None
+            if isinstance(lm_config.get("extra_body"), dict):
+                satc_val = lm_config["extra_body"].get("stop_after_tool_calls")
+            if satc_val is not None:
+                hdrs = api_params.get("extra_headers", {})
+                hdrs["X-Stop-After-Tool-Calls"] = str(satc_val)
+                api_params["extra_headers"] = hdrs
+        except Exception:
+            pass
+        # Forward Qwen3 chat template kwargs via extra_body when requested
+        if lm_config.get("enable_thinking") is not None:
+            api_params["extra_body"] = api_params.get("extra_body", {})
+            ctk = api_params["extra_body"].get("chat_template_kwargs", {})
+            ctk["enable_thinking"] = lm_config["enable_thinking"]
+            api_params["extra_body"]["chat_template_kwargs"] = ctk
+        # Forward arbitrary extra_body from lm_config if provided (merge)
+        if lm_config.get("extra_body") is not None:
+            # Shallow-merge top-level keys; nested keys (like chat_template_kwargs) should be provided whole
+            api_params["extra_body"] = {**api_params.get("extra_body", {}), **(lm_config.get("extra_body") or {})}
+        # Forward Qwen3 chat template kwargs via extra_body when requested
+        if lm_config.get("enable_thinking") is not None:
+            api_params["extra_body"] = api_params.get("extra_body", {})
+            ctk = api_params["extra_body"].get("chat_template_kwargs", {})
+            ctk["enable_thinking"] = lm_config["enable_thinking"]
+            api_params["extra_body"]["chat_template_kwargs"] = ctk
         # Add reasoning_effort only for o3-mini
         if model in ["o3-mini"]:
             print("Reasoning effort:", reasoning_effort)
             api_params["reasoning_effort"] = reasoning_effort
-        output = await self.async_client.chat.completions.create(**api_params)
+        # Filter Synth-only params when calling external OpenAI-compatible providers
+        # External providers (e.g., OpenAI, Groq) reject unknown fields like
+        # extra_body.chat_template_kwargs or stop_after_tool_calls.
+        try:
+            base_url_obj = getattr(self.async_client, "base_url", None)
+            base_url_str = str(base_url_obj) if base_url_obj is not None else ""
+        except Exception:
+            base_url_str = ""
+        is_external_provider = (
+            "openai.com" in base_url_str or "api.groq.com" in base_url_str
+        )
+        if is_external_provider:
+            # Remove extra_body entirely; this is Synth-specific plumbing
+            if "extra_body" in api_params:
+                api_params.pop("extra_body", None)
+            # Also ensure we don't pass stray vendor-specific fields if present
+            # (defensive in case upstream added them at top-level later)
+            for k in ["chat_template_kwargs", "stop_after_tool_calls"]:
+                api_params.pop(k, None)
+            # GPT-5 models: parameter normalization
+            if model.startswith("gpt-5"):
+                # Require max_completion_tokens instead of max_tokens
+                if "max_tokens" in api_params:
+                    api_params["max_completion_tokens"] = api_params.pop("max_tokens")
+                # Only default temperature=1 supported; omit custom temperature
+                if "temperature" in api_params:
+                    api_params.pop("temperature", None)
+        # Call API with better auth error reporting
+        #try:
+        if DEBUG:
+            print(f"🔍 OPENAI DEBUG: Making request with params:")
+            print(f"   Model: {api_params.get('model')}")
+            print(f"   Messages: {len(api_params.get('messages', []))} messages")
+            print(f"   Tools: {len(api_params.get('tools', []))} tools")
+            print(f"   Max tokens: {api_params.get('max_tokens', 'NOT SET')}")
+            print(f"   Temperature: {api_params.get('temperature', 'NOT SET')}")
+            if 'tools' in api_params:
+                print(f"   First tool: {api_params['tools'][0]}")
+            print(f"   FULL API PARAMS: {api_params}")
+        # Quiet targeted retry for OpenAI 400 tool_use_failed during tool-calling
+        try:
+            max_attempts_for_tool_use = int(os.getenv("SYNTH_TOOL_USE_RETRIES", "5"))
+        except Exception:
+            max_attempts_for_tool_use = 5
+        try:
+            backoff_seconds = float(os.getenv("SYNTH_TOOL_USE_BACKOFF_INITIAL", "0.5"))
+        except Exception:
+            backoff_seconds = 0.5
+        attempt_index = 0
+        while True:
+            try:
+                output = await self.async_client.chat.completions.create(**api_params)
+                break
+            except openai.BadRequestError as err:
+                # Detect tool-use failure from various SDK surfaces
+                should_retry = False
+                # 1) Body dict
+                body = getattr(err, "body", None)
+                if isinstance(body, dict):
+                    try:
+                        err_obj = body.get("error") if isinstance(body.get("error"), dict) else {}
+                        code_val = err_obj.get("code")
+                        msg_val = err_obj.get("message")
+                        if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
+                            should_retry = True
+                    except Exception:
+                        pass
+                # 2) Response JSON
+                if not should_retry:
+                    try:
+                        resp = getattr(err, "response", None)
+                        if resp is not None:
+                            j = resp.json()
+                            if isinstance(j, dict):
+                                err_obj = j.get("error") if isinstance(j.get("error"), dict) else {}
+                                code_val = err_obj.get("code")
+                                msg_val = err_obj.get("message")
+                                if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
+                                    should_retry = True
+                    except Exception:
+                        pass
+                # 3) Fallback to string match
+                if not should_retry:
+                    err_text = str(err)
+                    if "tool_use_failed" in err_text or "Failed to call a function" in err_text:
+                        should_retry = True
+                if should_retry and attempt_index + 1 < max_attempts_for_tool_use:
+                    await asyncio.sleep(backoff_seconds)
+                    backoff_seconds = min(backoff_seconds * 2.0, 2.0)
+                    attempt_index += 1
+                    continue
+                raise
+        if DEBUG:
+            print(f"🔍 OPENAI DEBUG: Response received:")
+            print(f"   Type: {type(output)}")
+            print(f"   Choices: {len(output.choices) if hasattr(output, 'choices') else 'N/A'}")
+            if hasattr(output, 'choices') and output.choices:
+                choice = output.choices[0]
+                print(f"   Choice type: {type(choice)}")
+                if hasattr(choice, 'message'):
+                    message = choice.message
+                    print(f"   Message type: {type(message)}")
+                    print(f"   Has tool_calls: {hasattr(message, 'tool_calls')}")
+                    if hasattr(message, 'tool_calls'):
+                        print(f"   Tool calls: {message.tool_calls}")
+                    print(f"   Content: {message.content[:200] if hasattr(message, 'content') and message.content else 'None'}...")
+                # Show finish_reason and usage if available
+                try:
+                    print(f"   finish_reason: {getattr(choice, 'finish_reason', None)}")
+                    usage = getattr(output, 'usage', None)
+                    if usage:
+                        print(f"   usage: prompt_tokens={getattr(usage, 'prompt_tokens', None)}, completion_tokens={getattr(usage, 'completion_tokens', None)}, total_tokens={getattr(usage, 'total_tokens', None)}")
+                except Exception:
+                    pass
+        if DEBUG:
+            print(f"🔍 OPENAI DEBUG: FULL RAW RESPONSE:")
+            if hasattr(output.choices[0].message, 'content') and output.choices[0].message.content:
+                print(f"   FULL CONTENT:\n{output.choices[0].message.content}")
+            print(f"   Raw choice: {choice}")
+            print(f"   Raw message: {message}")
+        # except Exception as e:
+        #     try:
+        #         from openai import AuthenticationError as _OpenAIAuthErr  # type: ignore
+        #     except ModuleNotFoundError:
+        #         _OpenAIAuthErr = type(e)
+        #     if isinstance(e, _OpenAIAuthErr):
+        #         key_preview = (os.getenv("OPENAI_API_KEY") or "")[:8]
+        #         # Create a more informative error message but preserve the original exception
+        #         enhanced_msg = f"Invalid API key format. Expected prefix 'sk-' or 'sk_live_'. Provided key begins with '{key_preview}'. Original error: {str(e)}"
+        #         # Re-raise the original exception with enhanced message if possible
+        #         if hasattr(e, 'response') and hasattr(e, 'body'):
+        #             raise _OpenAIAuthErr(enhanced_msg, response=e.response, body=e.body) from None
+        #         else:
+        #             # Fallback: just re-raise the original with a print for debugging
+        #             print(f"🔑 API Key Debug: {enhanced_msg}")
+        #             raise e from None
+        #     raise
         message = output.choices[0].message
         # Convert tool calls to dict format
@@ -149,10 +372,24 @@ class OpenAIStandard(VendorBase):
                 for tc in message.tool_calls
             ]
+        # Attach basic usage if available
+        usage_dict = None
+        try:
+            usage_obj = getattr(output, 'usage', None)
+            if usage_obj is not None:
+                usage_dict = {
+                    "prompt_tokens": getattr(usage_obj, 'prompt_tokens', None),
+                    "completion_tokens": getattr(usage_obj, 'completion_tokens', None),
+                    "total_tokens": getattr(usage_obj, 'total_tokens', None),
+                }
+        except Exception:
+            usage_dict = None
         lm_response = BaseLMResponse(
             raw_response=message.content or "",  # Use empty string if no content
             structured_output=None,
             tool_calls=tool_calls,
+            usage=usage_dict,
         )
         lm_config["reasoning_effort"] = reasoning_effort
         used_cache_handler.add_to_managed_cache(
@@ -206,12 +443,84 @@ class OpenAIStandard(VendorBase):
                 "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
             )
+        # Forward additional sampling / control params if provided
+        if lm_config.get("max_tokens") is not None:
+            api_params["max_tokens"] = lm_config["max_tokens"]
+        if lm_config.get("top_p") is not None:
+            api_params["top_p"] = lm_config["top_p"]
+        if lm_config.get("frequency_penalty") is not None:
+            api_params["frequency_penalty"] = lm_config["frequency_penalty"]
+        if lm_config.get("presence_penalty") is not None:
+            api_params["presence_penalty"] = lm_config["presence_penalty"]
+        if lm_config.get("stop") is not None:
+            api_params["stop"] = lm_config["stop"]
+        if lm_config.get("tool_choice") is not None:
+            api_params["tool_choice"] = lm_config["tool_choice"]
         # Add reasoning_effort only for o3-mini
         if model in ["o3-mini"]:
             api_params["reasoning_effort"] = reasoning_effort
-        output = self.sync_client.chat.completions.create(**api_params)
+        # Sync path: apply the same targeted retry
+        try:
+            max_attempts_for_tool_use = int(os.getenv("SYNTH_TOOL_USE_RETRIES", "5"))
+        except Exception:
+            max_attempts_for_tool_use = 5
+        try:
+            backoff_seconds = float(os.getenv("SYNTH_TOOL_USE_BACKOFF_INITIAL", "0.5"))
+        except Exception:
+            backoff_seconds = 0.5
+        attempt_index = 0
+        while True:
+            try:
+                output = self.sync_client.chat.completions.create(**api_params)
+                break
+            except openai.BadRequestError as err:
+                should_retry = False
+                body = getattr(err, "body", None)
+                if isinstance(body, dict):
+                    try:
+                        err_obj = body.get("error") if isinstance(body.get("error"), dict) else {}
+                        code_val = err_obj.get("code")
+                        msg_val = err_obj.get("message")
+                        if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
+                            should_retry = True
+                    except Exception:
+                        pass
+                if not should_retry:
+                    try:
+                        resp = getattr(err, "response", None)
+                        if resp is not None:
+                            j = resp.json()
+                            if isinstance(j, dict):
+                                err_obj = j.get("error") if isinstance(j.get("error"), dict) else {}
+                                code_val = err_obj.get("code")
+                                msg_val = err_obj.get("message")
+                                if code_val == "tool_use_failed" or (isinstance(msg_val, str) and "Failed to call a function" in msg_val):
+                                    should_retry = True
+                    except Exception:
+                        pass
+                if not should_retry:
+                    err_text = str(err)
+                    if "tool_use_failed" in err_text or "Failed to call a function" in err_text:
+                        should_retry = True
+                if should_retry and attempt_index + 1 < max_attempts_for_tool_use:
+                    time.sleep(backoff_seconds)
+                    backoff_seconds = min(backoff_seconds * 2.0, 2.0)
+                    attempt_index += 1
+                    continue
+                raise
         message = output.choices[0].message
+        DEBUG = os.getenv("SYNTH_OPENAI_DEBUG") == "1"
+        if DEBUG:
+            try:
+                print(f"🔍 OPENAI DEBUG (sync): finish_reason={getattr(output.choices[0], 'finish_reason', None)}")
+                usage = getattr(output, 'usage', None)
+                if usage:
+                    print(f"🔍 OPENAI DEBUG (sync): usage prompt_tokens={getattr(usage, 'prompt_tokens', None)}, completion_tokens={getattr(usage, 'completion_tokens', None)}, total_tokens={getattr(usage, 'total_tokens', None)}")
+            except Exception:
+                pass
         # Convert tool calls to dict format
         tool_calls = None
@@ -228,10 +537,24 @@ class OpenAIStandard(VendorBase):
                 for tc in message.tool_calls
             ]
+        # Attach basic usage if available
+        usage_dict = None
+        try:
+            usage_obj = getattr(output, 'usage', None)
+            if usage_obj is not None:
+                usage_dict = {
+                    "prompt_tokens": getattr(usage_obj, 'prompt_tokens', None),
+                    "completion_tokens": getattr(usage_obj, 'completion_tokens', None),
+                    "total_tokens": getattr(usage_obj, 'total_tokens', None),
+                }
+        except Exception:
+            usage_dict = None
         lm_response = BaseLMResponse(
             raw_response=message.content or "",  # Use empty string if no content
             structured_output=None,
             tool_calls=tool_calls,
+            usage=usage_dict,
         )
         lm_config["reasoning_effort"] = reasoning_effort
         used_cache_handler.add_to_managed_cache(
@@ -342,6 +665,18 @@ class OpenAIStandard(VendorBase):
         if model in ["o3-mini"]:
             api_params["reasoning_effort"] = reasoning_effort
+        # Normalize for external OpenAI as well in sync path
+        try:
+            base_url_obj = getattr(self.sync_client, "base_url", None)
+            base_url_str_sync = str(base_url_obj) if base_url_obj is not None else ""
+        except Exception:
+            base_url_str_sync = ""
+        if ("openai.com" in base_url_str_sync or "api.groq.com" in base_url_str_sync) and model.startswith("gpt-5"):
+            if "max_tokens" in api_params:
+                api_params["max_completion_tokens"] = api_params.pop("max_tokens")
+            if "temperature" in api_params:
+                api_params.pop("temperature", None)
         output = self.sync_client.chat.completions.create(**api_params)
         structured_output_api_result = response_model(**output.choices[0].message.content)

synth_ai/lm/vendors/openai_standard_responses.py ADDED Viewed

@@ -0,0 +1,243 @@
+"""
+OpenAI Responses API extensions for OpenAIStandard vendor.
+This module contains the Responses API and Harmony encoding methods
+that extend the OpenAIStandard class functionality.
+"""
+from typing import Any, Dict, List, Optional
+import uuid
+from pydantic import BaseModel
+from synth_ai.lm.tools.base import BaseTool
+from synth_ai.lm.vendors.base import BaseLMResponse
+from synth_ai.lm.vendors.retries import MAX_BACKOFF
+import backoff
+def _silent_backoff_handler(_details):
+    """No-op handler to keep stdout clean while still allowing visibility via logging if desired."""
+    pass
+DEFAULT_EXCEPTIONS_TO_RETRY = (
+    Exception,  # Will be more specific when imported
+)
+class OpenAIResponsesAPIMixin:
+    """Mixin class providing Responses API functionality for OpenAI vendors."""
+    async def _hit_api_async_responses(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        lm_config: Dict[str, Any],
+        previous_response_id: Optional[str] = None,
+        use_ephemeral_cache_only: bool = False,
+        tools: Optional[List[BaseTool]] = None,
+    ) -> BaseLMResponse:
+        """Use OpenAI Responses API for supported models."""
+        print(f"🔍 RESPONSES API: Called for model {model}")
+        print(f"🔍 RESPONSES API: previous_response_id = {previous_response_id}")
+        # Check if the client has responses attribute
+        if not hasattr(self.async_client, 'responses'):
+            print("🔍 RESPONSES API: Client doesn't have responses attribute, using fallback")
+            # Fallback - use chat completions with simulated response_id
+            response = await self._hit_api_async(
+                model=model,
+                messages=messages,
+                lm_config=lm_config,
+                use_ephemeral_cache_only=use_ephemeral_cache_only,
+                tools=tools,
+            )
+            # Add Responses API fields
+            if not response.response_id:
+                import uuid
+                response.response_id = str(uuid.uuid4())
+            response.api_type = "responses"
+            return response
+        # Use the official Responses API
+        try:
+            # Common API call params for Responses API
+            api_params = {
+                "model": model,
+            }
+            # For Responses API, we use 'input' parameter
+            if previous_response_id:
+                # Continue existing thread
+                api_params["previous_response_id"] = previous_response_id
+                # Only pass the new user input
+                if messages and len(messages) > 0:
+                    # Get the last user message content
+                    last_message = messages[-1]
+                    api_params["input"] = last_message.get("content", "")
+            else:
+                # Start new thread - combine system and user messages into input
+                if messages and len(messages) > 0:
+                    # Combine messages into a single input string
+                    input_parts = []
+                    for msg in messages:
+                        role = msg.get("role", "")
+                        content = msg.get("content", "")
+                        if role == "system":
+                            input_parts.append(f"System: {content}")
+                        elif role == "user":
+                            input_parts.append(f"User: {content}")
+                        elif role == "assistant":
+                            input_parts.append(f"Assistant: {content}")
+                    api_params["input"] = "\n".join(input_parts)
+            # Add tools if provided
+            if tools and all(isinstance(tool, BaseTool) for tool in tools):
+                api_params["tools"] = [tool.to_openai_tool() for tool in tools]
+            elif tools:
+                api_params["tools"] = tools
+            # Add other parameters from lm_config if needed
+            if "max_tokens" in lm_config:
+                api_params["max_tokens"] = lm_config["max_tokens"]
+            print(f"🔍 RESPONSES API: Calling with params: {list(api_params.keys())}")
+            # Call the Responses API
+            response = await self.async_client.responses.create(**api_params)
+            print(f"🔍 RESPONSES API: Response received, type: {type(response)}")
+            # Extract fields from response
+            output_text = getattr(response, 'output_text', getattr(response, 'content', ''))
+            reasoning_obj = getattr(response, 'reasoning', None)
+            response_id = getattr(response, 'id', None)
+            # Debug reasoning type (only first time)
+            if reasoning_obj and not hasattr(self, '_reasoning_logged'):
+                print(f"🔍 RESPONSES API: Reasoning type: {type(reasoning_obj)}")
+                print(f"🔍 RESPONSES API: Reasoning attributes: {[x for x in dir(reasoning_obj) if not x.startswith('_')]}")
+                self._reasoning_logged = True
+            # Handle reasoning - it might be an object or a string
+            reasoning = None
+            if reasoning_obj:
+                if isinstance(reasoning_obj, str):
+                    # Synth backend returns full reasoning as string
+                    reasoning = reasoning_obj
+                else:
+                    # OpenAI returns a Reasoning object
+                    # Try to get summary first, but preserve entire object if no summary
+                    if hasattr(reasoning_obj, 'summary') and reasoning_obj.summary:
+                        reasoning = reasoning_obj.summary
+                    else:
+                        # Preserve the full object structure as JSON
+                        # This includes effort level and any other fields
+                        if hasattr(reasoning_obj, 'model_dump_json'):
+                            reasoning = reasoning_obj.model_dump_json()
+                        elif hasattr(reasoning_obj, 'to_dict'):
+                            import json
+                            reasoning = json.dumps(reasoning_obj.to_dict())
+                        else:
+                            reasoning = str(reasoning_obj)
+            # Handle tool calls if present
+            tool_calls = None
+            if hasattr(response, 'tool_calls') and response.tool_calls:
+                tool_calls = [
+                    {
+                        "id": tc.id,
+                        "type": tc.type,
+                        "function": {
+                            "name": tc.function.name,
+                            "arguments": tc.function.arguments,
+                        },
+                    }
+                    for tc in response.tool_calls
+                ]
+            print(f"🔍 RESPONSES API: Extracted response_id = {response_id}")
+            return BaseLMResponse(
+                raw_response=output_text,
+                response_id=response_id,
+                reasoning=reasoning,
+                api_type="responses",
+                tool_calls=tool_calls,
+            )
+        except (AttributeError, Exception) as e:
+            print(f"🔍 RESPONSES API: Error calling Responses API: {e}")
+            # No fallback - raise the error
+            raise
+    async def _hit_api_async_harmony(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        lm_config: Dict[str, Any],
+        previous_response_id: Optional[str] = None,
+        use_ephemeral_cache_only: bool = False,
+        tools: Optional[List[BaseTool]] = None,
+    ) -> BaseLMResponse:
+        """Use Harmony encoding for OSS-GPT models."""
+        if not self.harmony_available:
+            raise ImportError("openai-harmony package required for OSS-GPT models. Install with: pip install openai-harmony")
+        from openai_harmony import Message, Role, Conversation
+        # Convert messages to Harmony format
+        harmony_messages = []
+        for msg in messages:
+            role = Role.SYSTEM if msg["role"] == "system" else (
+                Role.USER if msg["role"] == "user" else Role.ASSISTANT
+            )
+            content = msg["content"]
+            # Handle multimodal content
+            if isinstance(content, list):
+                # Extract text content for now
+                text_parts = [part.get("text", "") for part in content if part.get("type") == "text"]
+                content = " ".join(text_parts)
+            harmony_messages.append(Message.from_role_and_content(role, content))
+        conv = Conversation.from_messages(harmony_messages)
+        tokens = self.harmony_enc.render_conversation_for_completion(conv, Role.ASSISTANT)
+        # For now, we'll need to integrate with Synth GPU endpoint
+        # This would require the actual endpoint to be configured
+        # Placeholder for actual Synth GPU call
+        import aiohttp
+        import os
+        synth_gpu_endpoint = os.getenv("SYNTH_GPU_HARMONY_ENDPOINT")
+        if not synth_gpu_endpoint:
+            raise ValueError("SYNTH_GPU_HARMONY_ENDPOINT environment variable not set")
+        async with aiohttp.ClientSession() as session:
+            async with session.post(
+                f"{synth_gpu_endpoint}/v1/completions",
+                json={
+                    "model": model,
+                    "prompt": tokens,
+                    "max_tokens": lm_config.get("max_tokens", 4096),
+                    "temperature": lm_config.get("temperature", 0.8),
+                }
+            ) as resp:
+                result = await resp.json()
+        # Parse response using Harmony
+        response_tokens = result.get("choices", [{}])[0].get("text", "")
+        parsed = self.harmony_enc.parse_messages_from_completion_tokens(response_tokens, Role.ASSISTANT)
+        if parsed:
+            assistant_msg = parsed[-1].content_text() if hasattr(parsed[-1], 'content_text') else str(parsed[-1])
+        else:
+            assistant_msg = response_tokens
+        return BaseLMResponse(
+            raw_response=assistant_msg,
+            response_id=previous_response_id or str(uuid.uuid4()),
+            api_type="harmony",
+        )

synth-ai 0.2.2.dev0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl

synth-ai 0.2.2.dev0py3-none-any.whl → 0.2.4.dev2py3-none-any.whl