PyPI - synth-ai - Versions diffs - 0.1.0.dev50__py3-none-any.whl → 0.1.0.dev52__py3-none-any.whl - Mend

synth-ai 0.1.0.dev50py3-none-any.whl → 0.1.0.dev52py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

synth_ai/__init__.py +1 -1
synth_ai/zyk/__init__.py +3 -0
synth_ai/zyk/lms/__init__.py +0 -0
synth_ai/zyk/lms/caching/__init__.py +0 -0
synth_ai/zyk/lms/caching/constants.py +1 -0
synth_ai/zyk/lms/caching/dbs.py +0 -0
synth_ai/zyk/lms/caching/ephemeral.py +72 -0
synth_ai/zyk/lms/caching/handler.py +137 -0
synth_ai/zyk/lms/caching/initialize.py +13 -0
synth_ai/zyk/lms/caching/persistent.py +83 -0
synth_ai/zyk/lms/config.py +10 -0
synth_ai/zyk/lms/constants.py +22 -0
synth_ai/zyk/lms/core/__init__.py +0 -0
synth_ai/zyk/lms/core/all.py +47 -0
synth_ai/zyk/lms/core/exceptions.py +9 -0
synth_ai/zyk/lms/core/main.py +268 -0
synth_ai/zyk/lms/core/vendor_clients.py +85 -0
synth_ai/zyk/lms/cost/__init__.py +0 -0
synth_ai/zyk/lms/cost/monitor.py +1 -0
synth_ai/zyk/lms/cost/statefulness.py +1 -0
synth_ai/zyk/lms/structured_outputs/__init__.py +0 -0
synth_ai/zyk/lms/structured_outputs/handler.py +441 -0
synth_ai/zyk/lms/structured_outputs/inject.py +314 -0
synth_ai/zyk/lms/structured_outputs/rehabilitate.py +187 -0
synth_ai/zyk/lms/tools/base.py +118 -0
synth_ai/zyk/lms/vendors/__init__.py +0 -0
synth_ai/zyk/lms/vendors/base.py +31 -0
synth_ai/zyk/lms/vendors/core/__init__.py +0 -0
synth_ai/zyk/lms/vendors/core/anthropic_api.py +365 -0
synth_ai/zyk/lms/vendors/core/gemini_api.py +282 -0
synth_ai/zyk/lms/vendors/core/mistral_api.py +331 -0
synth_ai/zyk/lms/vendors/core/openai_api.py +187 -0
synth_ai/zyk/lms/vendors/local/__init__.py +0 -0
synth_ai/zyk/lms/vendors/local/ollama.py +0 -0
synth_ai/zyk/lms/vendors/openai_standard.py +345 -0
synth_ai/zyk/lms/vendors/retries.py +3 -0
synth_ai/zyk/lms/vendors/supported/__init__.py +0 -0
synth_ai/zyk/lms/vendors/supported/deepseek.py +73 -0
synth_ai/zyk/lms/vendors/supported/groq.py +16 -0
synth_ai/zyk/lms/vendors/supported/ollama.py +14 -0
synth_ai/zyk/lms/vendors/supported/together.py +11 -0
{synth_ai-0.1.0.dev50.dist-info → synth_ai-0.1.0.dev52.dist-info}/METADATA +2 -1
synth_ai-0.1.0.dev52.dist-info/RECORD +46 -0
synth_ai-0.1.0.dev50.dist-info/RECORD +0 -6
{synth_ai-0.1.0.dev50.dist-info → synth_ai-0.1.0.dev52.dist-info}/WHEEL +0 -0
{synth_ai-0.1.0.dev50.dist-info → synth_ai-0.1.0.dev52.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.1.0.dev50.dist-info → synth_ai-0.1.0.dev52.dist-info}/top_level.txt +0 -0

synth_ai/zyk/lms/vendors/core/anthropic_api.py ADDED Viewed

@@ -0,0 +1,365 @@
+import json
+from typing import Any, Dict, List, Optional, Tuple, Type
+import anthropic
+import pydantic
+from pydantic import BaseModel
+from synth_ai.zyk.lms.caching.initialize import (
+    get_cache_handler,
+)
+from synth_ai.zyk.lms.tools.base import BaseTool
+from synth_ai.zyk.lms.vendors.base import BaseLMResponse, VendorBase
+from synth_ai.zyk.lms.constants import SPECIAL_BASE_TEMPS, CLAUDE_REASONING_MODELS, SONNET_37_BUDGETS
+from synth_ai.zyk.lms.vendors.core.openai_api import OpenAIStructuredOutputClient
+ANTHROPIC_EXCEPTIONS_TO_RETRY: Tuple[Type[Exception], ...] = (anthropic.APIError,)
+class AnthropicAPI(VendorBase):
+    used_for_structured_outputs: bool = True
+    exceptions_to_retry: Tuple = ANTHROPIC_EXCEPTIONS_TO_RETRY
+    sync_client: Any
+    async_client: Any
+    def __init__(
+        self,
+        exceptions_to_retry: Tuple[
+            Type[Exception], ...
+        ] = ANTHROPIC_EXCEPTIONS_TO_RETRY,
+        used_for_structured_outputs: bool = False,
+        reasoning_effort: str = "high",
+    ):
+        self.sync_client = anthropic.Anthropic()
+        self.async_client = anthropic.AsyncAnthropic()
+        self.used_for_structured_outputs = used_for_structured_outputs
+        self.exceptions_to_retry = exceptions_to_retry
+        self._openai_fallback = None
+        self.reasoning_effort = reasoning_effort
+    # @backoff.on_exception(
+    #     backoff.expo,
+    #     exceptions_to_retry,
+    #     max_tries=BACKOFF_TOLERANCE,
+    #     on_giveup=lambda e: print(e),
+    # )
+    async def _hit_api_async(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        lm_config: Dict[str, Any],
+        use_ephemeral_cache_only: bool = False,
+        reasoning_effort: str = "high",
+        tools: Optional[List[BaseTool]] = None,
+        **vendor_params: Dict[str, Any],
+    ) -> BaseLMResponse:
+        assert (
+            lm_config.get("response_model", None) is None
+        ), "response_model is not supported for standard calls"
+        used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
+        lm_config["reasoning_effort"] = reasoning_effort
+        cache_result = used_cache_handler.hit_managed_cache(
+            model, messages, lm_config=lm_config, tools=tools
+        )
+        if cache_result:
+            return cache_result
+        # Common API parameters
+        api_params = {
+            "system": messages[0]["content"],
+            "messages": messages[1:],
+            "model": model,
+            "max_tokens": lm_config.get("max_tokens", 4096),
+            "temperature": lm_config.get(
+                "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
+            ),
+        }
+        # Add tools if provided
+        if tools:
+            api_params["tools"] = [tool.to_anthropic_tool() for tool in tools]
+        # Only try to add thinking if supported by the SDK
+        try:
+            import inspect
+            create_sig = inspect.signature(self.async_client.messages.create)
+            if "thinking" in create_sig.parameters and model in CLAUDE_REASONING_MODELS:
+                if reasoning_effort in ["high", "medium"]:
+                    budget = SONNET_37_BUDGETS[reasoning_effort]
+                    api_params["thinking"] = {
+                        "type": "enabled",
+                        "budget_tokens": budget,
+                    }
+                    api_params["max_tokens"] = budget+4096
+                    api_params["temperature"] = 1
+        except (ImportError, AttributeError, TypeError):
+            pass
+        # Make the API call
+        response = await self.async_client.messages.create(**api_params)
+        # Extract text content and tool calls
+        raw_response = ""
+        tool_calls = []
+        for content in response.content:
+            if content.type == "text":
+                raw_response += content.text
+            elif content.type == "tool_use":
+                tool_calls.append(
+                    {
+                        "id": content.id,
+                        "type": "function",
+                        "function": {
+                            "name": content.name,
+                            "arguments": json.dumps(content.input),
+                        },
+                    }
+                )
+        lm_response = BaseLMResponse(
+            raw_response=raw_response,
+            structured_output=None,
+            tool_calls=tool_calls if tool_calls else None,
+        )
+        lm_config["reasoning_effort"] = reasoning_effort
+        used_cache_handler.add_to_managed_cache(
+            model, messages, lm_config=lm_config, output=lm_response, tools=tools
+        )
+        return lm_response
+    # @backoff.on_exception(
+    #     backoff.expo,
+    #     exceptions_to_retry,
+    #     max_tries=BACKOFF_TOLERANCE,
+    #     on_giveup=lambda e: print(e),
+    # )
+    def _hit_api_sync(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        lm_config: Dict[str, Any],
+        use_ephemeral_cache_only: bool = False,
+        reasoning_effort: str = "high",
+        tools: Optional[List[BaseTool]] = None,
+        **vendor_params: Dict[str, Any],
+    ) -> BaseLMResponse:
+        assert (
+            lm_config.get("response_model", None) is None
+        ), "response_model is not supported for standard calls"
+        used_cache_handler = get_cache_handler(
+            use_ephemeral_cache_only=use_ephemeral_cache_only
+        )
+        lm_config["reasoning_effort"] = reasoning_effort
+        cache_result = used_cache_handler.hit_managed_cache(
+            model, messages, lm_config=lm_config, tools=tools
+        )
+        if cache_result:
+            return cache_result
+        # Common API parameters
+        api_params = {
+            "system": messages[0]["content"],
+            "messages": messages[1:],
+            "model": model,
+            "max_tokens": lm_config.get("max_tokens", 4096),
+            "temperature": lm_config.get(
+                "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
+            ),
+        }
+        # Add tools if provided
+        if tools:
+            api_params["tools"] = [tool.to_anthropic_tool() for tool in tools]
+        # Only try to add thinking if supported by the SDK
+        try:
+            import inspect
+            create_sig = inspect.signature(self.sync_client.messages.create)
+            if "thinking" in create_sig.parameters and model in CLAUDE_REASONING_MODELS:
+                api_params["temperature"] = 1
+                if reasoning_effort in ["high", "medium"]:
+                    budgets = SONNET_37_BUDGETS
+                    budget = budgets[reasoning_effort]
+                    api_params["thinking"] = {
+                        "type": "enabled",
+                        "budget_tokens": budget,
+                    }
+                    api_params["max_tokens"] = budget+4096
+                    api_params["temperature"] = 1
+        except (ImportError, AttributeError, TypeError):
+            pass
+        # Make the API call
+        response = self.sync_client.messages.create(**api_params)
+        # Extract text content and tool calls
+        raw_response = ""
+        tool_calls = []
+        for content in response.content:
+            if content.type == "text":
+                raw_response += content.text
+            elif content.type == "tool_use":
+                tool_calls.append(
+                    {
+                        "id": content.id,
+                        "type": "function",
+                        "function": {
+                            "name": content.name,
+                            "arguments": json.dumps(content.input),
+                        },
+                    }
+                )
+        lm_response = BaseLMResponse(
+            raw_response=raw_response,
+            structured_output=None,
+            tool_calls=tool_calls if tool_calls else None,
+        )
+        lm_config["reasoning_effort"] = reasoning_effort
+        used_cache_handler.add_to_managed_cache(
+            model, messages, lm_config=lm_config, output=lm_response, tools=tools
+        )
+        return lm_response
+    async def _hit_api_async_structured_output(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        response_model: BaseModel,
+        temperature: float,
+        use_ephemeral_cache_only: bool = False,
+        reasoning_effort: str = "high",
+        **vendor_params: Dict[str, Any],
+    ) -> BaseLMResponse:
+        try:
+            # First try with Anthropic
+            reasoning_effort = vendor_params.get("reasoning_effort", reasoning_effort)
+            if model in CLAUDE_REASONING_MODELS:
+                #if reasoning_effort in ["high", "medium"]:
+                budgets = SONNET_37_BUDGETS
+                budget = budgets[reasoning_effort]
+                max_tokens = budget+4096
+                temperature = 1
+                response = await self.async_client.messages.create(
+                    system=messages[0]["content"],
+                    messages=messages[1:],
+                    model=model,
+                    max_tokens=max_tokens,
+                    thinking={"type": "enabled", "budget_tokens": budget},
+                    temperature=temperature,
+                )
+            else:
+                response = await self.async_client.messages.create(
+                    system=messages[0]["content"],
+                    messages=messages[1:],
+                    model=model,
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                )
+            result = response.content[0].text
+            parsed = json.loads(result)
+            lm_response = BaseLMResponse(
+                raw_response="",
+                structured_output=response_model(**parsed),
+                tool_calls=None,
+            )
+            return lm_response
+        except (json.JSONDecodeError, pydantic.ValidationError):
+            # If Anthropic fails, fallback to OpenAI
+            if self._openai_fallback is None:
+                self._openai_fallback = OpenAIStructuredOutputClient()
+            return await self._openai_fallback._hit_api_async_structured_output(
+                model="gpt-4o",  # Fallback to GPT-4
+                messages=messages,
+                response_model=response_model,
+                temperature=temperature,
+                use_ephemeral_cache_only=use_ephemeral_cache_only,
+            )
+    def _hit_api_sync_structured_output(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        response_model: BaseModel,
+        temperature: float,
+        use_ephemeral_cache_only: bool = False,
+        reasoning_effort: str = "high",
+        **vendor_params: Dict[str, Any],
+    ) -> BaseLMResponse:
+        try:
+            # First try with Anthropic
+            reasoning_effort = vendor_params.get("reasoning_effort", reasoning_effort)
+            import time
+            if model in CLAUDE_REASONING_MODELS:
+                if reasoning_effort in ["high", "medium"]:
+                    budgets = SONNET_37_BUDGETS
+                    budget = budgets[reasoning_effort]
+                    max_tokens = budget+4096
+                    temperature = 1
+                response = self.sync_client.messages.create(
+                    system=messages[0]["content"],
+                    messages=messages[1:],
+                    model=model,
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                    thinking={"type": "enabled", "budget_tokens": budget},
+                )
+            else:
+                response = self.sync_client.messages.create(
+                    system=messages[0]["content"],
+                    messages=messages[1:],
+                    model=model,
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                )
+            # print("Time taken for API call", time.time() - t)
+            result = response.content[0].text
+            # Try to parse the result as JSON
+            parsed = json.loads(result)
+            lm_response = BaseLMResponse(
+                raw_response="",
+                structured_output=response_model(**parsed),
+                tool_calls=None,
+            )
+            return lm_response
+        except (json.JSONDecodeError, pydantic.ValidationError):
+            # If Anthropic fails, fallback to OpenAI
+            print("WARNING - Falling back to OpenAI - THIS IS SLOW")
+            if self._openai_fallback is None:
+                self._openai_fallback = OpenAIStructuredOutputClient()
+            return self._openai_fallback._hit_api_sync_structured_output(
+                model="gpt-4o",  # Fallback to GPT-4
+                messages=messages,
+                response_model=response_model,
+                temperature=temperature,
+                use_ephemeral_cache_only=use_ephemeral_cache_only,
+            )
+    async def _process_call_async(
+        self,
+        messages: List[Dict[str, Any]],
+        model: str,
+        response_model: BaseModel,
+        api_call_method,
+        temperature: float = 0.0,
+        use_ephemeral_cache_only: bool = False,
+        vendor_params: Dict[str, Any] = None,
+    ) -> BaseModel:
+        vendor_params = vendor_params or {}
+        # Each vendor can filter parameters they support
+        return await api_call_method(
+            messages=messages,
+            model=model,
+            temperature=temperature,
+            use_ephemeral_cache_only=use_ephemeral_cache_only,
+            **vendor_params,  # Pass all vendor-specific params
+        )

synth_ai/zyk/lms/vendors/core/gemini_api.py ADDED Viewed

@@ -0,0 +1,282 @@
+import json
+import logging
+import os
+import warnings
+from typing import Any, Dict, List, Optional, Tuple, Type
+from google import genai
+from google.api_core.exceptions import ResourceExhausted
+from google.genai import types
+from synth_ai.zyk.lms.caching.initialize import get_cache_handler
+from synth_ai.zyk.lms.tools.base import BaseTool
+from synth_ai.zyk.lms.vendors.base import BaseLMResponse, VendorBase
+from synth_ai.zyk.lms.constants import (
+    SPECIAL_BASE_TEMPS,
+    GEMINI_REASONING_MODELS,
+    GEMINI_THINKING_BUDGETS,
+)
+from synth_ai.zyk.lms.vendors.retries import BACKOFF_TOLERANCE, backoff
+import logging
+ALIASES = {
+    "gemini-2.5-flash": "gemini-2.5-flash-preview-04-17",
+}
+logger = logging.getLogger(__name__)
+_CLIENT = genai.Client()  # one client for everything
+GEMINI_EXCEPTIONS_TO_RETRY: Tuple[Type[Exception], ...] = (ResourceExhausted,)
+logging.getLogger("google.genai").setLevel(logging.ERROR)
+os.environ["GRPC_VERBOSITY"] = "ERROR"
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+warnings.filterwarnings("ignore")
+SAFETY_SETTINGS = {
+    types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: types.HarmBlockThreshold.BLOCK_NONE,
+    types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: types.HarmBlockThreshold.BLOCK_NONE,
+    types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: types.HarmBlockThreshold.BLOCK_NONE,
+    types.HarmCategory.HARM_CATEGORY_HARASSMENT: types.HarmBlockThreshold.BLOCK_NONE,
+}
+class GeminiAPI(VendorBase):
+    used_for_structured_outputs: bool = True
+    exceptions_to_retry: Tuple[Type[Exception], ...] = GEMINI_EXCEPTIONS_TO_RETRY
+    def __init__(
+        self,
+        exceptions_to_retry: Tuple[Type[Exception], ...] = GEMINI_EXCEPTIONS_TO_RETRY,
+        used_for_structured_outputs: bool = False,
+    ):
+        self.used_for_structured_outputs = used_for_structured_outputs
+        self.exceptions_to_retry = exceptions_to_retry
+    def get_aliased_model_name(self, model_name: str) -> str:
+        if model_name in ALIASES:
+            return ALIASES[model_name]
+        return model_name
+    @staticmethod
+    def _msg_to_contents(messages: List[Dict[str, Any]]) -> List[types.Content]:
+        # contents, sys_instr = [], None
+        contents = []
+        for m in messages:
+            # if m["role"] == "system":
+            #     sys_instr = f"<instructions>\n{m['content']}\n</instructions>"
+            #     continue
+            # text = (sys_instr + "\n" + m["content"]) if sys_instr else m["content"]
+            if m["role"].lower() not in ["user", "assistant"]:
+                continue
+            role = "user" if m["role"] == "user" else "assistant"
+            contents.append(types.Content(role=role, parts=[types.Part.from_text(text=m["content"])]))
+        return contents
+    @staticmethod
+    def _tools_to_genai(tools: List[BaseTool]) -> List[types.Tool]:
+        """Convert internal BaseTool → genai Tool."""
+        out: List[types.Tool] = []
+        for t in tools:
+            # Assume t.to_gemini_tool() now correctly returns a FunctionDeclaration
+            #func_decl = t.to_gemini_tool()
+            if isinstance(t, dict):
+                func_decl = t
+            else:
+                func_decl = t.to_gemini_tool()
+            if not isinstance(func_decl, types.FunctionDeclaration):
+                 # Or fetch schema parts if to_gemini_tool still returns dict
+                 # This depends on BaseTool.to_gemini_tool implementation
+                tool_dict = func_decl # Assuming it's a dict for now
+                func_decl = types.FunctionDeclaration(
+                    name=tool_dict['name'],
+                    description=tool_dict['description'],
+                    parameters=tool_dict['parameters'], # Expects OpenAPI-style dict
+                )
+            out.append(types.Tool(function_declarations=[func_decl]))
+        return out
+    async def _gen_content_async(
+        self,
+        messages: List[Dict],
+        temperature: float,
+        model_name: str,
+        reasoning_effort: str,
+        tools: Optional[List[BaseTool]],
+        lm_config: Optional[Dict[str, Any]],
+    ) -> Tuple[str, Optional[List[Dict]]]:
+        model_name = self.get_aliased_model_name(model_name)
+        cfg_kwargs: Dict[str, Any] = {"temperature": temperature}
+        if model_name in GEMINI_REASONING_MODELS and reasoning_effort in GEMINI_THINKING_BUDGETS:
+            cfg_kwargs["thinking_config"] = types.ThinkingConfig(
+                thinking_budget=GEMINI_THINKING_BUDGETS[reasoning_effort]
+            )
+        if any(m["role"] == "system" for m in messages):
+            cfg_kwargs["system_instruction"] = next(m["content"] for m in messages if m["role"] == "system")
+        generation_config = types.GenerateContentConfig(
+            **cfg_kwargs,
+            tool_config=lm_config.get("tool_config") if lm_config else None,
+            tools=self._tools_to_genai(tools) if tools else None
+        )
+        resp = await _CLIENT.aio.models.generate_content(
+            model=model_name,
+            contents=self._msg_to_contents(messages),
+            config=generation_config,
+            #safety_settings=SAFETY_SETTINGS,
+        )
+        return self._extract(resp)
+    def _gen_content_sync(
+        self,
+        messages: List[Dict],
+        temperature: float,
+        model_name: str,
+        reasoning_effort: str,
+        tools: Optional[List[BaseTool]],
+        lm_config: Optional[Dict[str, Any]],
+    ) -> Tuple[str, Optional[List[Dict]]]:
+        model_name = self.get_aliased_model_name(model_name)
+        cfg_kwargs: Dict[str, Any] = {"temperature": temperature}
+        if model_name in GEMINI_REASONING_MODELS and reasoning_effort in GEMINI_THINKING_BUDGETS:
+            cfg_kwargs["thinking_config"] = types.ThinkingConfig(
+                thinking_budget=GEMINI_THINKING_BUDGETS[reasoning_effort]
+            )
+        if any(m["role"] == "system" for m in messages):
+            cfg_kwargs["system_instruction"] = next(m["content"] for m in messages if m["role"] == "system")
+        generation_config = types.GenerateContentConfig(
+            **cfg_kwargs,
+            tool_config=lm_config.get("tool_config") if lm_config else None,
+            tools=self._tools_to_genai(tools) if tools else None
+        )
+        resp = _CLIENT.models.generate_content(
+            model=model_name,
+            contents=self._msg_to_contents(messages),
+            safety_settings=SAFETY_SETTINGS,
+            config=generation_config,
+        )
+        return self._extract(resp)
+    @staticmethod
+    def _extract(response) -> Tuple[str, Optional[List[Dict]]]:
+        # Extract text, handling cases where it might be missing
+        try:
+            text = response.text
+        except ValueError: # Handle cases where only non-text parts exist
+            text = ""
+        calls = []
+        # Access parts through candidates[0].content
+        if response.candidates and response.candidates[0].content:
+            for part in response.candidates[0].content.parts:
+                if part.function_call:
+                    calls.append(
+                        {
+                            "id": f"call_{len(calls) + 1}",
+                            "type": "function",
+                            "function": {
+                                "name": part.function_call.name,
+                                "arguments": json.dumps(dict(part.function_call.args)),
+                            },
+                        }
+                    )
+        return text, calls or None
+    @backoff.on_exception(
+        backoff.expo,
+        exceptions_to_retry,
+        max_tries=BACKOFF_TOLERANCE,
+        on_giveup=lambda e: print(e),
+    )
+    async def _hit_api_async(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        lm_config: Dict[str, Any],
+        use_ephemeral_cache_only: bool = False,
+        reasoning_effort: str = "high",
+        tools: Optional[List[BaseTool]] = None,
+    ) -> BaseLMResponse:
+        assert (
+            lm_config.get("response_model", None) is None
+        ), "response_model is not supported for standard calls"
+        used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
+        lm_config["reasoning_effort"] = reasoning_effort
+        cache_result = used_cache_handler.hit_managed_cache(
+            model, messages, lm_config=lm_config, tools=tools
+        )
+        if cache_result:
+            return cache_result
+        raw_response, tool_calls = await self._gen_content_async(
+            messages,
+            temperature=lm_config.get("temperature", SPECIAL_BASE_TEMPS.get(model, 0)),
+            reasoning_effort=reasoning_effort,
+            tools=tools,
+            lm_config=lm_config,
+            model_name=model,
+        )
+        if not raw_response:
+            raw_response = ""
+        lm_response = BaseLMResponse(
+            raw_response=raw_response,
+            structured_output=None,
+            tool_calls=tool_calls,
+        )
+        lm_config["reasoning_effort"] = reasoning_effort
+        used_cache_handler.add_to_managed_cache(
+            model, messages, lm_config=lm_config, output=lm_response, tools=tools
+        )
+        return lm_response
+    @backoff.on_exception(
+        backoff.expo,
+        exceptions_to_retry,
+        max_tries=BACKOFF_TOLERANCE,
+        on_giveup=lambda e: print(e),
+    )
+    def _hit_api_sync(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        lm_config: Dict[str, Any],
+        use_ephemeral_cache_only: bool = False,
+        reasoning_effort: str = "high",
+        tools: Optional[List[BaseTool]] = None,
+    ) -> BaseLMResponse:
+        assert (
+            lm_config.get("response_model", None) is None
+        ), "response_model is not supported for standard calls"
+        used_cache_handler = get_cache_handler(
+            use_ephemeral_cache_only=use_ephemeral_cache_only
+        )
+        lm_config["reasoning_effort"] = reasoning_effort
+        cache_result = used_cache_handler.hit_managed_cache(
+            model, messages, lm_config=lm_config, tools=tools
+        )
+        if cache_result:
+            return cache_result
+        raw_response, tool_calls = self._gen_content_sync(
+            messages,
+            temperature=lm_config.get("temperature", SPECIAL_BASE_TEMPS.get(model, 0)),
+            reasoning_effort=reasoning_effort,
+            tools=tools,
+            lm_config=lm_config,
+            model_name=model,
+        )
+        if not raw_response:
+            raw_response = ""
+        lm_response = BaseLMResponse(
+            raw_response=raw_response,
+            structured_output=None,
+            tool_calls=tool_calls,
+        )
+        lm_config["reasoning_effort"] = reasoning_effort
+        used_cache_handler.add_to_managed_cache(
+            model, messages, lm_config=lm_config, output=lm_response, tools=tools
+        )
+        return lm_response

synth-ai 0.1.0.dev50__py3-none-any.whl → 0.1.0.dev52__py3-none-any.whl

synth-ai 0.1.0.dev50py3-none-any.whl → 0.1.0.dev52py3-none-any.whl