PyPI - synth-ai - Versions diffs - 0.2.2.dev0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl - Mend

synth-ai 0.2.2.dev0py3-none-any.whl → 0.2.4.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

synth_ai/lm/core/main_v3.py CHANGED Viewed

@@ -5,37 +5,39 @@ This module provides the LM class with async v3 tracing support,
 replacing the v2 DuckDB-based implementation.
 """
-from typing import Any, Dict, List, Literal, Optional, Union
-import os
-import functools
+from typing import Any, Literal
 import asyncio
 import time
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
-from synth_ai.lm.core.exceptions import StructuredOutputCoercionFailureException
+from synth_ai.lm.config import reasoning_models
 from synth_ai.lm.core.vendor_clients import (
     anthropic_naming_regexes,
     get_client,
     openai_naming_regexes,
 )
 from synth_ai.lm.structured_outputs.handler import StructuredOutputHandler
-from synth_ai.lm.vendors.base import VendorBase, BaseLMResponse
 from synth_ai.lm.tools.base import BaseTool
-from synth_ai.lm.config import reasoning_models
+from synth_ai.lm.vendors.base import BaseLMResponse, VendorBase
 # V3 tracing imports
-from synth_ai.tracing_v3.session_tracer import SessionTracer
-from synth_ai.tracing_v3.decorators import set_session_id, set_turn_number, set_session_tracer
 from synth_ai.tracing_v3.abstractions import LMCAISEvent, TimeRecord
+from synth_ai.tracing_v3.decorators import set_turn_number
+from synth_ai.tracing_v3.llm_call_record_helpers import (
+    compute_aggregates_from_call_records,
+    create_llm_call_record_from_response,
+)
+from synth_ai.tracing_v3.session_tracer import SessionTracer
 def build_messages(
     sys_msg: str,
     user_msg: str,
-    images_bytes: List = [],
-    model_name: Optional[str] = None,
-) -> List[Dict]:
+    images_bytes: list | None = None,
+    model_name: str | None = None,
+) -> list[dict]:
+    images_bytes = images_bytes or []
     if len(images_bytes) > 0 and any(regex.match(model_name) for regex in openai_naming_regexes):
         return [
             {"role": "system", "content": sys_msg},
@@ -51,9 +53,7 @@ def build_messages(
                 ],
             },
         ]
-    elif len(images_bytes) > 0 and any(
-        regex.match(model_name) for regex in anthropic_naming_regexes
-    ):
+    elif len(images_bytes) > 0 and any(regex.match(model_name) for regex in anthropic_naming_regexes):
         return [
             {"role": "system", "content": sys_msg},
             {
@@ -84,24 +84,27 @@ class LM:
     def __init__(
         self,
-        vendor: Optional[str] = None,
-        model: Optional[str] = None,
+        vendor: str | None = None,
+        model: str | None = None,
         # v2 compatibility parameters
-        model_name: Optional[str] = None,  # Alias for model
-        formatting_model_name: Optional[str] = None,  # For structured outputs
-        provider: Optional[str] = None,  # Alias for vendor
+        model_name: str | None = None,  # Alias for model
+        formatting_model_name: str | None = None,  # For structured outputs
+        provider: str | None = None,  # Alias for vendor
         synth_logging: bool = True,  # v2 compatibility
         max_retries: Literal["None", "Few", "Many"] = "Few",  # v2 compatibility
         # v3 parameters
-        is_structured: Optional[bool] = None,
-        structured_outputs_vendor: Optional[str] = None,
-        response_format: Union[BaseModel, Dict[str, Any], None] = None,
+        is_structured: bool | None = None,
+        structured_outputs_vendor: str | None = None,
+        response_format: type[BaseModel] | dict[str, Any] | None = None,
         json_mode: bool = False,
         temperature: float = 0.8,
-        session_tracer: Optional[SessionTracer] = None,
-        system_id: Optional[str] = None,
+        session_tracer: SessionTracer | None = None,
+        system_id: str | None = None,
         enable_v3_tracing: bool = True,
-        enable_v2_tracing: Optional[bool] = None,  # v2 compatibility
+        enable_v2_tracing: bool | None = None,  # v2 compatibility
+        # Responses API parameters
+        auto_store_responses: bool = True,
+        use_responses_api: bool | None = None,
         **additional_params,
     ):
         # Handle v2 compatibility parameters
@@ -116,14 +119,14 @@ class LM:
         if vendor is None and model is not None:
             # Import vendor detection logic
             from synth_ai.lm.core.vendor_clients import (
-                openai_naming_regexes,
                 anthropic_naming_regexes,
-                gemini_naming_regexes,
+                custom_endpoint_naming_regexes,
                 deepseek_naming_regexes,
-                groq_naming_regexes,
+                gemini_naming_regexes,
                 grok_naming_regexes,
+                groq_naming_regexes,
+                openai_naming_regexes,
                 openrouter_naming_regexes,
-                custom_endpoint_naming_regexes,
                 together_naming_regexes,
             )
@@ -160,18 +163,52 @@ class LM:
         self.system_id = system_id or f"lm_{self.vendor or 'unknown'}_{self.model or 'unknown'}"
         self.enable_v3_tracing = enable_v3_tracing
         self.additional_params = additional_params
+        # Initialize vendor wrapper early, before any potential usage
+        # (e.g., within StructuredOutputHandler initialization below)
+        self._vendor_wrapper = None
+        # Responses API thread management
+        self.auto_store_responses = auto_store_responses
+        self.use_responses_api = use_responses_api
+        self._last_response_id: str | None = None
         # Set structured output handler if needed
         if self.response_format:
             self.is_structured = True
+            # Choose mode automatically: prefer forced_json for OpenAI/reasoning models
+            forced_json_preferred = (self.vendor == "openai") or (
+                self.model in reasoning_models if self.model else False
+            )
+            structured_output_mode = "forced_json" if forced_json_preferred else "stringified_json"
+            # Build core and formatting clients
+            core_client = get_client(
+                self.model,
+                with_formatting=(structured_output_mode == "forced_json"),
+                provider=self.vendor,
+            )
+            formatting_model = formatting_model_name or self.model
+            formatting_client = get_client(
+                formatting_model,
+                with_formatting=True,
+                provider=self.vendor if self.vendor != "custom_endpoint" else None,
+            )
+            # Map retries
+            max_retries_dict = {"None": 0, "Few": 2, "Many": 5}
+            handler_params = {"max_retries": max_retries_dict.get(max_retries, 2)}
             self.structured_output_handler = StructuredOutputHandler(
-                response_format=self.response_format, vendor_wrapper=self.get_vendor_wrapper()
+                core_client,
+                formatting_client,
+                structured_output_mode,
+                handler_params,
             )
         else:
             self.structured_output_handler = None
-        # Initialize vendor wrapper
-        self._vendor_wrapper = None
+        # Vendor wrapper lazy-instantiated via get_vendor_wrapper()
     def get_vendor_wrapper(self) -> VendorBase:
         """Get or create the vendor wrapper."""
@@ -180,31 +217,68 @@ class LM:
             self._vendor_wrapper = get_client(self.model, provider=self.vendor)
         return self._vendor_wrapper
+    def _should_use_responses_api(self) -> bool:
+        """Determine if Responses API should be used."""
+        if self.use_responses_api is not None:
+            return self.use_responses_api
+        # Auto-detect based on model
+        responses_models = {
+            "o4-mini", "o3", "o3-mini",  # Supported Synth-hosted models
+            "gpt-oss-120b", "gpt-oss-20b"  # OSS models via Synth
+        }
+        return self.model in responses_models or (self.model and self.model in reasoning_models)
+    def _should_use_harmony(self) -> bool:
+        """Determine if Harmony encoding should be used for OSS models."""
+        # Only use Harmony for OSS models when NOT using OpenAI vendor
+        # OpenAI hosts these models directly via Responses API
+        harmony_models = {"gpt-oss-120b", "gpt-oss-20b"}
+        return self.model in harmony_models and self.vendor != "openai"
     async def respond_async(
         self,
-        system_message: Optional[str] = None,
-        user_message: Optional[str] = None,
-        messages: Optional[List[Dict]] = None,  # v2 compatibility
-        images_bytes: List[bytes] = [],
-        images_as_bytes: Optional[List[bytes]] = None,  # v2 compatibility
-        response_model: Optional[BaseModel] = None,  # v2 compatibility
-        tools: Optional[List[BaseTool]] = None,
-        turn_number: Optional[int] = None,
+        system_message: str | None = None,
+        user_message: str | None = None,
+        messages: list[dict] | None = None,  # v2 compatibility
+        images_bytes: list[bytes] | None = None,
+        images_as_bytes: list[bytes] | None = None,  # v2 compatibility
+        response_model: type[BaseModel] | None = None,  # v2 compatibility
+        tools: list[BaseTool] | None = None,
+        turn_number: int | None = None,
+        previous_response_id: str | None = None,  # Responses API thread management
         **kwargs,
     ) -> BaseLMResponse:
         """Async method to get LM response with v3 tracing."""
         start_time = time.time()
         # Handle v2 compatibility
-        if images_as_bytes is not None:
-            images_bytes = images_as_bytes
+        images_bytes = images_as_bytes if images_as_bytes is not None else (images_bytes or [])
-        # Handle response_model for structured outputs
+        # Handle response_model for structured outputs (runtime-provided)
         if response_model and not self.response_format:
             self.response_format = response_model
             self.is_structured = True
+            # Mirror initialization logic from __init__
+            forced_json_preferred = (self.vendor == "openai") or (
+                self.model in reasoning_models if self.model else False
+            )
+            structured_output_mode = "forced_json" if forced_json_preferred else "stringified_json"
+            core_client = get_client(
+                self.model,
+                with_formatting=(structured_output_mode == "forced_json"),
+                provider=self.vendor,
+            )
+            formatting_client = get_client(
+                self.model,
+                with_formatting=True,
+                provider=self.vendor if self.vendor != "custom_endpoint" else None,
+            )
             self.structured_output_handler = StructuredOutputHandler(
-                response_format=self.response_format, vendor_wrapper=self.get_vendor_wrapper()
+                core_client,
+                formatting_client,
+                structured_output_mode,
+                {"max_retries": 2},
             )
         # Set turn number if provided
@@ -227,57 +301,94 @@ class LM:
                 )
             messages_to_use = build_messages(system_message, user_message, images_bytes, self.model)
-        # Get vendor wrapper
-        vendor_wrapper = self.get_vendor_wrapper()
-        # Prepare parameters based on vendor type
-        if hasattr(vendor_wrapper, "_hit_api_async"):
-            # OpenAIStandard expects lm_config
-            lm_config = {"temperature": self.temperature, **self.additional_params, **kwargs}
-            if self.json_mode:
-                lm_config["response_format"] = {"type": "json_object"}
-            params = {"model": self.model, "messages": messages_to_use, "lm_config": lm_config}
+        # If using structured outputs, route through the handler
+        if self.structured_output_handler and self.response_format:
             if tools:
-                params["tools"] = tools
+                raise ValueError("Tools are not supported with structured output mode")
+            response = await self.structured_output_handler.call_async(
+                messages=messages_to_use,
+                model=self.model,
+                response_model=self.response_format,
+                use_ephemeral_cache_only=False,
+                lm_config={"temperature": self.temperature, **self.additional_params, **kwargs},
+                reasoning_effort="high",
+            )
         else:
-            # Other vendors use flat params
-            params = {
-                "model": self.model,
-                "messages": messages_to_use,
-                "temperature": self.temperature,
-                **self.additional_params,
-                **kwargs,
-            }
+            # Get vendor wrapper
+            vendor_wrapper = self.get_vendor_wrapper()
-            if tools:
-                params["tools"] = [tool.to_dict() for tool in tools]
+            # Determine API type to use
+            use_responses = self._should_use_responses_api()
+            use_harmony = self._should_use_harmony()
-            if self.json_mode:
-                params["response_format"] = {"type": "json_object"}
+            # Decide response ID to use for thread management
+            response_id_to_use = None
+            if previous_response_id:
+                response_id_to_use = previous_response_id  # Manual override
+            elif self.auto_store_responses and self._last_response_id:
+                response_id_to_use = self._last_response_id  # Auto-chain
-        # Call vendor
-        try:
-            # Try the standard method names
+            # Prepare parameters based on vendor type
             if hasattr(vendor_wrapper, "_hit_api_async"):
-                response = await vendor_wrapper._hit_api_async(**params)
-            elif hasattr(vendor_wrapper, "respond_async"):
-                response = await vendor_wrapper.respond_async(**params)
-            elif hasattr(vendor_wrapper, "respond"):
-                # Fallback to sync in executor
-                loop = asyncio.get_event_loop()
-                response = await loop.run_in_executor(None, vendor_wrapper.respond, params)
+                # OpenAIStandard expects lm_config
+                lm_config = {"temperature": self.temperature, **self.additional_params, **kwargs}
+                if self.json_mode:
+                    lm_config["response_format"] = {"type": "json_object"}
+                params = {"model": self.model, "messages": messages_to_use, "lm_config": lm_config}
+                if tools:
+                    params["tools"] = tools
             else:
-                raise AttributeError(
-                    f"Vendor wrapper {type(vendor_wrapper).__name__} has no suitable response method"
-                )
-        except Exception as e:
-            print(f"Error calling vendor: {e}")
-            raise
+                # Other vendors use flat params
+                params = {
+                    "model": self.model,
+                    "messages": messages_to_use,
+                    "temperature": self.temperature,
+                    **self.additional_params,
+                    **kwargs,
+                }
+                if tools:
+                    params["tools"] = [tool.to_dict() for tool in tools]
+                if self.json_mode:
+                    params["response_format"] = {"type": "json_object"}
+            # Call vendor with appropriate API type
+            try:
+                # Route to appropriate API
+                if use_harmony and hasattr(vendor_wrapper, "_hit_api_async_harmony"):
+                    params["previous_response_id"] = response_id_to_use
+                    response = await vendor_wrapper._hit_api_async_harmony(**params)
+                elif use_responses and hasattr(vendor_wrapper, "_hit_api_async_responses"):
+                    params["previous_response_id"] = response_id_to_use
+                    response = await vendor_wrapper._hit_api_async_responses(**params)
+                else:
+                    # Standard chat completions API
+                    if hasattr(vendor_wrapper, "_hit_api_async"):
+                        response = await vendor_wrapper._hit_api_async(**params)
+                    elif hasattr(vendor_wrapper, "respond_async"):
+                        response = await vendor_wrapper.respond_async(**params)
+                    elif hasattr(vendor_wrapper, "respond"):
+                        # Fallback to sync in executor
+                        loop = asyncio.get_event_loop()
+                        response = await loop.run_in_executor(None, vendor_wrapper.respond, params)
+                    else:
+                        raise AttributeError(
+                            f"Vendor wrapper {type(vendor_wrapper).__name__} has no suitable response method"
+                        )
+                    if not hasattr(response, 'api_type'):
+                        response.api_type = "chat"
-        # Handle structured output
-        if self.structured_output_handler:
-            response = self.structured_output_handler.process_response(response)
+                # Update stored response ID if auto-storing
+                if self.auto_store_responses and hasattr(response, 'response_id') and response.response_id:
+                    self._last_response_id = response.response_id
+            except Exception as e:
+                print(f"Error calling vendor: {e}")
+                raise
+        # No additional post-processing needed for structured outputs here
         # Record tracing event if enabled
         if (
@@ -286,36 +397,40 @@ class LM:
             and hasattr(self.session_tracer, "current_session")
         ):
             latency_ms = int((time.time() - start_time) * 1000)
+            # Create LLMCallRecord from the response
+            from datetime import datetime
+            started_at = datetime.utcnow()
+            completed_at = datetime.utcnow()
+            call_record = create_llm_call_record_from_response(
+                response=response,
+                model_name=self.model or self.vendor,
+                provider=self.vendor,
+                messages=messages_to_use,
+                temperature=self.temperature,
+                request_params={**self.additional_params, **kwargs},
+                tools=tools,
+                started_at=started_at,
+                completed_at=completed_at,
+                latency_ms=latency_ms,
+            )
+            # Compute aggregates from the call record
+            aggregates = compute_aggregates_from_call_records([call_record])
-            # Extract usage info if available
-            usage_info = {}
-            if hasattr(response, "usage") and response.usage:
-                usage_info = {
-                    "input_tokens": response.usage.get("input_tokens", 0),
-                    "output_tokens": response.usage.get("output_tokens", 0),
-                    "total_tokens": response.usage.get("total_tokens", 0),
-                    "cost_usd": response.usage.get("cost_usd", 0.0),
-                }
-            else:
-                # Default values when usage is not available
-                usage_info = {
-                    "input_tokens": 0,
-                    "output_tokens": 0,
-                    "total_tokens": 0,
-                    "cost_usd": 0.0,
-                }
-            # Create LM event
+            # Create LM event with call_records
             lm_event = LMCAISEvent(
                 system_instance_id=self.system_id,
                 time_record=TimeRecord(event_time=time.time(), message_time=turn_number),
-                model_name=self.model or self.vendor,
-                provider=self.vendor,
-                input_tokens=usage_info["input_tokens"],
-                output_tokens=usage_info["output_tokens"],
-                total_tokens=usage_info["total_tokens"],
-                cost_usd=usage_info["cost_usd"],
-                latency_ms=latency_ms,
+                # Aggregates at event level
+                input_tokens=aggregates["input_tokens"],
+                output_tokens=aggregates["output_tokens"],
+                total_tokens=aggregates["total_tokens"],
+                cost_usd=aggregates["cost_usd"],
+                latency_ms=aggregates["latency_ms"],
+                # Store the call record
+                call_records=[call_record],
                 metadata={
                     "temperature": self.temperature,
                     "json_mode": self.json_mode,
@@ -363,14 +478,15 @@ class LM:
     def respond(
         self,
-        system_message: Optional[str] = None,
-        user_message: Optional[str] = None,
-        messages: Optional[List[Dict]] = None,  # v2 compatibility
-        images_bytes: List[bytes] = [],
-        images_as_bytes: Optional[List[bytes]] = None,  # v2 compatibility
-        response_model: Optional[BaseModel] = None,  # v2 compatibility
-        tools: Optional[List[BaseTool]] = None,
-        turn_number: Optional[int] = None,
+        system_message: str | None = None,
+        user_message: str | None = None,
+        messages: list[dict] | None = None,  # v2 compatibility
+        images_bytes: list[bytes] | None = None,
+        images_as_bytes: list[bytes] | None = None,  # v2 compatibility
+        response_model: type[BaseModel] | None = None,  # v2 compatibility
+        tools: list[BaseTool] | None = None,
+        previous_response_id: str | None = None,  # Responses API thread management
+        turn_number: int | None = None,
         **kwargs,
     ) -> BaseLMResponse:
         """Synchronous wrapper for respond_async."""

synth_ai/lm/core/vendor_clients.py CHANGED Viewed

@@ -68,6 +68,10 @@ grok_naming_regexes: List[Pattern] = [
 ]
+openrouter_naming_regexes: List[Pattern] = [
+    re.compile(r"^openrouter/.*$"),  # openrouter/model-name pattern
+]
 openrouter_naming_regexes: List[Pattern] = [
     re.compile(r"^openrouter/.*$"),  # openrouter/model-name pattern
 ]

synth_ai/lm/provider_support/openai.py CHANGED Viewed

@@ -103,7 +103,7 @@ OPENAI_METHODS_V1 = [
         sync=False,
     ),
     OpenAiDefinition(
-        module="openai.resources.beta.chat.completions",
+        module="openai.resources.chat.completions",
         object="Completions",
         method="parse",
         type="chat",
@@ -111,7 +111,7 @@ OPENAI_METHODS_V1 = [
         min_version="1.50.0",
     ),
     OpenAiDefinition(
-        module="openai.resources.beta.chat.completions",
+        module="openai.resources.chat.completions",
         object="AsyncCompletions",
         method="parse",
         type="chat",
@@ -776,6 +776,15 @@ class OpenAILangfuse:
             ):
                 continue
+            # Check if the method actually exists before trying to wrap it
+            try:
+                module = __import__(resource.module, fromlist=[resource.object])
+                obj = getattr(module, resource.object, None)
+                if obj and not hasattr(obj, resource.method):
+                    continue  # Skip if method doesn't exist
+            except (ImportError, AttributeError):
+                continue  # Skip if module or object doesn't exist
             wrap_function_wrapper(
                 resource.module,
                 f"{resource.object}.{resource.method}",

synth_ai/lm/vendors/base.py CHANGED Viewed

@@ -18,10 +18,17 @@ class BaseLMResponse(BaseModel):
         raw_response: The raw text response from the model
         structured_output: Optional parsed Pydantic model if structured output was requested
         tool_calls: Optional list of tool calls if tools were provided
+        response_id: Optional response ID for thread management (Responses API)
+        reasoning: Optional reasoning trace from the model (o1 models)
+        api_type: Optional API type used ("chat", "responses", or "harmony")
     """
     raw_response: str
     structured_output: Optional[BaseModel] = None
     tool_calls: Optional[List[Dict]] = None
+    response_id: Optional[str] = None
+    reasoning: Optional[str] = None
+    api_type: Optional[str] = None
+    usage: Optional[Dict[str, Any]] = None
 class VendorBase(ABC):

synth-ai 0.2.2.dev0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl

synth-ai 0.2.2.dev0py3-none-any.whl → 0.2.4.dev2py3-none-any.whl