PyPI - synth-ai - Versions diffs - 0.1.0.dev28__py3-none-any.whl → 0.1.0.dev30__py3-none-any.whl - Mend

synth-ai 0.1.0.dev28py3-none-any.whl → 0.1.0.dev30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

public_tests/test_agent.py +11 -11
public_tests/test_all_structured_outputs.py +32 -37
public_tests/test_anthropic_structured_outputs.py +0 -0
public_tests/test_deepseek_structured_outputs.py +0 -0
public_tests/test_deepseek_tools.py +64 -0
public_tests/test_gemini_structured_outputs.py +106 -0
public_tests/test_models.py +27 -27
public_tests/test_openai_structured_outputs.py +106 -0
public_tests/test_reasoning_models.py +9 -7
public_tests/test_recursive_structured_outputs.py +30 -30
public_tests/test_structured.py +137 -0
public_tests/test_structured_outputs.py +22 -13
public_tests/test_text.py +160 -0
public_tests/test_tools.py +300 -0
synth_ai/__init__.py +1 -4
synth_ai/zyk/__init__.py +2 -2
synth_ai/zyk/lms/caching/ephemeral.py +54 -32
synth_ai/zyk/lms/caching/handler.py +43 -15
synth_ai/zyk/lms/caching/persistent.py +55 -27
synth_ai/zyk/lms/core/main.py +29 -16
synth_ai/zyk/lms/core/vendor_clients.py +1 -1
synth_ai/zyk/lms/structured_outputs/handler.py +79 -45
synth_ai/zyk/lms/structured_outputs/rehabilitate.py +3 -2
synth_ai/zyk/lms/tools/base.py +104 -0
synth_ai/zyk/lms/vendors/base.py +22 -6
synth_ai/zyk/lms/vendors/core/anthropic_api.py +130 -95
synth_ai/zyk/lms/vendors/core/gemini_api.py +153 -34
synth_ai/zyk/lms/vendors/core/mistral_api.py +160 -54
synth_ai/zyk/lms/vendors/core/openai_api.py +64 -53
synth_ai/zyk/lms/vendors/openai_standard.py +197 -41
synth_ai/zyk/lms/vendors/supported/deepseek.py +55 -0
{synth_ai-0.1.0.dev28.dist-info → synth_ai-0.1.0.dev30.dist-info}/METADATA +2 -5
synth_ai-0.1.0.dev30.dist-info/RECORD +65 -0
public_tests/test_sonnet_thinking.py +0 -217
synth_ai-0.1.0.dev28.dist-info/RECORD +0 -57
{synth_ai-0.1.0.dev28.dist-info → synth_ai-0.1.0.dev30.dist-info}/WHEEL +0 -0
{synth_ai-0.1.0.dev28.dist-info → synth_ai-0.1.0.dev30.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.1.0.dev28.dist-info → synth_ai-0.1.0.dev30.dist-info}/top_level.txt +0 -0

synth_ai/zyk/lms/vendors/core/mistral_api.py CHANGED Viewed

@@ -1,16 +1,16 @@
 import json
 import os
-from typing import Any, Dict, List, Tuple, Type
+from typing import Any, Dict, List, Optional, Tuple, Type
 import pydantic
 from mistralai import Mistral  # use Mistral as both sync and async client
 from pydantic import BaseModel
 from synth_ai.zyk.lms.caching.initialize import get_cache_handler
-from synth_ai.zyk.lms.vendors.base import VendorBase
+from synth_ai.zyk.lms.tools.base import BaseTool
+from synth_ai.zyk.lms.vendors.base import BaseLMResponse, VendorBase
 from synth_ai.zyk.lms.vendors.constants import SPECIAL_BASE_TEMPS
 from synth_ai.zyk.lms.vendors.core.openai_api import OpenAIStructuredOutputClient
-from synth_ai.zyk.lms.vendors.retries import BACKOFF_TOLERANCE, backoff
 # Since the mistralai package doesn't expose an exceptions module,
 # we fallback to catching all Exceptions for retry.
@@ -31,97 +31,193 @@ class MistralAPI(VendorBase):
         self.exceptions_to_retry = exceptions_to_retry
         self._openai_fallback = None
-    @backoff.on_exception(
-        backoff.expo,
-        MISTRAL_EXCEPTIONS_TO_RETRY,
-        max_tries=BACKOFF_TOLERANCE,
-        on_giveup=lambda e: print(e),
-    )
+    # @backoff.on_exception(
+    #     backoff.expo,
+    #     MISTRAL_EXCEPTIONS_TO_RETRY,
+    #     max_tries=BACKOFF_TOLERANCE,
+    #     on_giveup=lambda e: print(e),
+    # )
     async def _hit_api_async(
         self,
         model: str,
         messages: List[Dict[str, Any]],
         lm_config: Dict[str, Any],
+        response_model: Optional[BaseModel] = None,
         use_ephemeral_cache_only: bool = False,
-    ) -> str:
+        reasoning_effort: str = "high",
+        tools: Optional[List[BaseTool]] = None,
+    ) -> BaseLMResponse:
         assert (
             lm_config.get("response_model", None) is None
         ), "response_model is not supported for standard calls"
+        assert not (response_model and tools), "Cannot provide both response_model and tools"
         used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
         cache_result = used_cache_handler.hit_managed_cache(
-            model, messages, lm_config=lm_config
+            model, messages, lm_config=lm_config, tools=tools
         )
         if cache_result:
+            assert type(cache_result) in [
+                BaseLMResponse,
+                str,
+            ], f"Expected BaseLMResponse or str, got {type(cache_result)}"
             return (
-                cache_result["response"]
-                if isinstance(cache_result, dict)
-                else cache_result
+                cache_result
+                if type(cache_result) == BaseLMResponse
+                else BaseLMResponse(
+                    raw_response=cache_result, structured_output=None, tool_calls=None
+                )
             )
         mistral_messages = [
             {"role": msg["role"], "content": msg["content"]} for msg in messages
         ]
+        functions = [tool.to_mistral_tool() for tool in tools] if tools else None
+        params = {
+            "model": model,
+            "messages": mistral_messages,
+            "max_tokens": lm_config.get("max_tokens", 4096),
+            "temperature": lm_config.get(
+                "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
+            ),
+            "stream": False,
+            "tool_choice": "auto" if functions else None,
+        }
+        if response_model:
+            params["response_format"] = response_model
+        elif tools:
+            params["tools"] = functions
         async with Mistral(api_key=os.getenv("MISTRAL_API_KEY", "")) as client:
-            response = await client.chat.complete_async(
-                model=model,
-                messages=mistral_messages,
-                max_tokens=lm_config.get("max_tokens", 4096),
-                temperature=lm_config.get(
-                    "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
-                ),
-                stream=False,
-            )
-        api_result = response.choices[0].message.content
+            response = await client.chat.complete_async(**params)
+        message = response.choices[0].message
+        try:
+            raw_response = message.content
+        except AttributeError:
+            raw_response = ""
+        tool_calls = []
+        try:
+            if message.tool_calls:
+                tool_calls = [
+                    {
+                        "id": call.id,
+                        "type": "function",
+                        "function": {
+                            "name": call.function.name,
+                            "arguments": call.function.arguments,
+                        },
+                    }
+                    for call in message.tool_calls
+                ]
+        except AttributeError:
+            pass
+        lm_response = BaseLMResponse(
+            raw_response=raw_response,
+            structured_output=None,
+            tool_calls=tool_calls if tool_calls else None,
+        )
         used_cache_handler.add_to_managed_cache(
-            model, messages, lm_config=lm_config, output=api_result
+            model, messages, lm_config=lm_config, output=lm_response, tools=tools
         )
-        return api_result
-    @backoff.on_exception(
-        backoff.expo,
-        MISTRAL_EXCEPTIONS_TO_RETRY,
-        max_tries=BACKOFF_TOLERANCE,
-        on_giveup=lambda e: print(e),
-    )
+        return lm_response
+    # @backoff.on_exception(
+    #     backoff.expo,
+    #     MISTRAL_EXCEPTIONS_TO_RETRY,
+    #     max_tries=BACKOFF_TOLERANCE,
+    #     on_giveup=lambda e: print(e),
+    # )
     def _hit_api_sync(
         self,
         model: str,
         messages: List[Dict[str, Any]],
         lm_config: Dict[str, Any],
+        response_model: Optional[BaseModel] = None,
         use_ephemeral_cache_only: bool = False,
-    ) -> str:
+        reasoning_effort: str = "high",
+        tools: Optional[List[BaseTool]] = None,
+    ) -> BaseLMResponse:
         assert (
             lm_config.get("response_model", None) is None
         ), "response_model is not supported for standard calls"
+        assert not (response_model and tools), "Cannot provide both response_model and tools"
         used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
         cache_result = used_cache_handler.hit_managed_cache(
-            model, messages, lm_config=lm_config
+            model, messages, lm_config=lm_config, tools=tools
         )
         if cache_result:
+            assert type(cache_result) in [
+                BaseLMResponse,
+                str,
+            ], f"Expected BaseLMResponse or str, got {type(cache_result)}"
             return (
-                cache_result["response"]
-                if isinstance(cache_result, dict)
-                else cache_result
+                cache_result
+                if type(cache_result) == BaseLMResponse
+                else BaseLMResponse(
+                    raw_response=cache_result, structured_output=None, tool_calls=None
+                )
             )
         mistral_messages = [
             {"role": msg["role"], "content": msg["content"]} for msg in messages
         ]
+        functions = [tool.to_mistral_tool() for tool in tools] if tools else None
+        params = {
+            "model": model,
+            "messages": mistral_messages,
+            "max_tokens": lm_config.get("max_tokens", 4096),
+            "temperature": lm_config.get(
+                "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
+            ),
+            "stream": False,
+            "tool_choice": "auto" if functions else None,
+            #"tools": functions,
+        }
+        if response_model:
+            params["response_format"] = response_model
+        elif tools:
+            params["tools"] = functions
         with Mistral(api_key=os.getenv("MISTRAL_API_KEY", "")) as client:
-            response = client.chat.complete(
-                model=model,
-                messages=mistral_messages,
-                max_tokens=lm_config.get("max_tokens", 4096),
-                temperature=lm_config.get(
-                    "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
-                ),
-                stream=False,
-            )
-        api_result = response.choices[0].message.content
+            response = client.chat.complete(**params)
+        message = response.choices[0].message
+        try:
+            raw_response = message.content
+        except AttributeError:
+            raw_response = ""
+        tool_calls = []
+        try:
+            if message.tool_calls:
+                tool_calls = [
+                    {
+                        "id": call.id,
+                        "type": "function",
+                        "function": {
+                            "name": call.function.name,
+                            "arguments": call.function.arguments,
+                        },
+                    }
+                    for call in message.tool_calls
+                ]
+        except AttributeError:
+            pass
+        lm_response = BaseLMResponse(
+            raw_response=raw_response,
+            structured_output=None,
+            tool_calls=tool_calls if tool_calls else None,
+        )
         used_cache_handler.add_to_managed_cache(
-            model, messages, lm_config=lm_config, output=api_result
+            model, messages, lm_config=lm_config, output=lm_response, tools=tools
         )
-        return api_result
+        return lm_response
     async def _hit_api_async_structured_output(
         self,
@@ -130,7 +226,7 @@ class MistralAPI(VendorBase):
         response_model: BaseModel,
         temperature: float,
         use_ephemeral_cache_only: bool = False,
-    ) -> Any:
+    ) -> BaseLMResponse:
         try:
             mistral_messages = [
                 {"role": msg["role"], "content": msg["content"]} for msg in messages
@@ -145,7 +241,12 @@ class MistralAPI(VendorBase):
                 )
             result = response.choices[0].message.content
             parsed = json.loads(result)
-            return response_model(**parsed)
+            lm_response = BaseLMResponse(
+                raw_response="",
+                structured_output=response_model(**parsed),
+                tool_calls=None,
+            )
+            return lm_response
         except (json.JSONDecodeError, pydantic.ValidationError):
             if self._openai_fallback is None:
                 self._openai_fallback = OpenAIStructuredOutputClient()
@@ -164,7 +265,7 @@ class MistralAPI(VendorBase):
         response_model: BaseModel,
         temperature: float,
         use_ephemeral_cache_only: bool = False,
-    ) -> Any:
+    ) -> BaseLMResponse:
         try:
             mistral_messages = [
                 {"role": msg["role"], "content": msg["content"]} for msg in messages
@@ -179,7 +280,12 @@ class MistralAPI(VendorBase):
                 )
             result = response.choices[0].message.content
             parsed = json.loads(result)
-            return response_model(**parsed)
+            lm_response = BaseLMResponse(
+                raw_response="",
+                structured_output=response_model(**parsed),
+                tool_calls=None,
+            )
+            return lm_response
         except (json.JSONDecodeError, pydantic.ValidationError):
             print("WARNING - Falling back to OpenAI - THIS IS SLOW")
             if self._openai_fallback is None:

synth_ai/zyk/lms/vendors/core/openai_api.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import json
-from typing import Any, Dict, List, Tuple, Type
+from typing import Any, Dict, List, Optional, Tuple, Type
 import openai
 import pydantic_core
@@ -8,6 +8,8 @@ import pydantic_core
 from pydantic import BaseModel
 from synth_ai.zyk.lms.caching.initialize import get_cache_handler
+from synth_ai.zyk.lms.tools.base import BaseTool
+from synth_ai.zyk.lms.vendors.base import BaseLMResponse
 from synth_ai.zyk.lms.vendors.constants import SPECIAL_BASE_TEMPS
 from synth_ai.zyk.lms.vendors.openai_standard import OpenAIStandard
@@ -46,8 +48,11 @@ class OpenAIStructuredOutputClient(OpenAIStandard):
         response_model: BaseModel,
         temperature: float,
         use_ephemeral_cache_only: bool = False,
+        tools: Optional[List[BaseTool]] = None,
         reasoning_effort: str = "high",
     ) -> str:
+        if tools:
+            raise ValueError("Tools are not supported for async structured output")
         # "Hit client")
         lm_config = {"temperature": temperature, "response_model": response_model}
         used_cache_handler = get_cache_handler(
@@ -58,38 +63,40 @@ class OpenAIStructuredOutputClient(OpenAIStandard):
         )
         if cache_result:
             # print("Hit cache")
+            assert type(cache_result) in [
+                dict,
+                BaseLMResponse,
+            ], f"Expected dict or BaseLMResponse, got {type(cache_result)}"
             return (
-                cache_result["response"]
-                if isinstance(cache_result, dict)
-                else cache_result
+                cache_result["response"] if type(cache_result) == dict else cache_result
             )
-        # Common API call params
-        api_params = {
-            "model": model,
-            "messages": messages,
-            "response_format": response_model,
-        }
-        # Only add temperature for non o1/o3 models
-        if not any(prefix in model for prefix in ["o1-", "o3-"]):
-            api_params["temperature"] = lm_config.get(
-                "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
+        if model in ["o3-mini", "o3", "o1-mini", "o1"]:
+            output = await self.async_client.beta.chat.completions.parse(
+                model=model,
+                messages=messages,
+                temperature=lm_config.get(
+                    "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
+                ),
+                response_format=response_model,
+                reasoning_effort=reasoning_effort,
+            )
+        else:
+            output = await self.async_client.beta.chat.completions.parse(
+                model=model,
+                messages=messages,
+                response_format=response_model,
             )
-        # Add reasoning_effort only for o3-mini
-        if "o3-mini" in model:
-            #print("Reasoning effort:", reasoning_effort)
-            api_params["reasoning_effort"] = reasoning_effort
-        output = await self.async_client.beta.chat.completions.parse(**api_params)
         # "Output", output)
         api_result = response_model(**json.loads(output.choices[0].message.content))
+        lm_response = BaseLMResponse(
+            raw_response="",
+            structured_output=api_result,
+            tool_calls=None,
+        )
         used_cache_handler.add_to_managed_cache(
-            model, messages, lm_config, output=output.choices[0].message.content
+            model, messages, lm_config, output=lm_response
         )
-        return api_result
+        return lm_response
     def _hit_api_sync_structured_output(
         self,
@@ -98,8 +105,11 @@ class OpenAIStructuredOutputClient(OpenAIStandard):
         response_model: BaseModel,
         temperature: float,
         use_ephemeral_cache_only: bool = False,
+        tools: Optional[List[BaseTool]] = None,
         reasoning_effort: str = "high",
     ) -> str:
+        if tools:
+            raise ValueError("Tools are not supported for sync structured output")
         lm_config = {"temperature": temperature, "response_model": response_model}
         used_cache_handler = get_cache_handler(
             use_ephemeral_cache_only=use_ephemeral_cache_only
@@ -108,39 +118,40 @@ class OpenAIStructuredOutputClient(OpenAIStandard):
             model, messages, lm_config=lm_config
         )
         if cache_result:
+            assert type(cache_result) in [
+                dict,
+                BaseLMResponse,
+            ], f"Expected dict or BaseLMResponse, got {type(cache_result)}"
             return (
-                cache_result["response"]
-                if isinstance(cache_result, dict)
-                else cache_result
+                cache_result["response"] if type(cache_result) == dict else cache_result
             )
-        # Common API call params
-        api_params = {
-            "model": model,
-            "messages": messages,
-            "response_format": response_model,
-        }
-        # Only add temperature for non o1/o3 models
-        if not any(prefix in model for prefix in ["o1-", "o3-"]):
-            api_params["temperature"] = lm_config.get(
-                "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
+        if model in ["o3-mini", "o3", "o1-mini", "o1"]:
+            output = self.sync_client.beta.chat.completions.parse(
+                model=model,
+                messages=messages,
+                temperature=lm_config.get(
+                    "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
+                ),
+                response_format=response_model,
+                reasoning_effort=reasoning_effort,
+            )
+        else:
+            output = self.sync_client.beta.chat.completions.parse(
+                model=model,
+                messages=messages,
+                response_format=response_model,
             )
-        # Add reasoning_effort only for o3-mini
-        if model in ["o3-mini"]:
-            api_params["reasoning_effort"] = reasoning_effort
-        output = self.sync_client.beta.chat.completions.parse(**api_params)
         api_result = response_model(**json.loads(output.choices[0].message.content))
+        lm_response = BaseLMResponse(
+            raw_response="",
+            structured_output=api_result,
+            tool_calls=None,
+        )
         used_cache_handler.add_to_managed_cache(
-            model,
-            messages,
-            lm_config=lm_config,
-            output=output.choices[0].message.content,
+            model, messages, lm_config=lm_config, output=lm_response
         )
-        return api_result
+        return lm_response
 class OpenAIPrivate(OpenAIStandard):

synth-ai 0.1.0.dev28__py3-none-any.whl → 0.1.0.dev30__py3-none-any.whl

synth-ai 0.1.0.dev28py3-none-any.whl → 0.1.0.dev30py3-none-any.whl