PyPI - synth-ai - Versions diffs - 0.1.0.dev28__py3-none-any.whl → 0.1.0.dev30__py3-none-any.whl - Mend

synth-ai 0.1.0.dev28py3-none-any.whl → 0.1.0.dev30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

public_tests/test_agent.py +11 -11
public_tests/test_all_structured_outputs.py +32 -37
public_tests/test_anthropic_structured_outputs.py +0 -0
public_tests/test_deepseek_structured_outputs.py +0 -0
public_tests/test_deepseek_tools.py +64 -0
public_tests/test_gemini_structured_outputs.py +106 -0
public_tests/test_models.py +27 -27
public_tests/test_openai_structured_outputs.py +106 -0
public_tests/test_reasoning_models.py +9 -7
public_tests/test_recursive_structured_outputs.py +30 -30
public_tests/test_structured.py +137 -0
public_tests/test_structured_outputs.py +22 -13
public_tests/test_text.py +160 -0
public_tests/test_tools.py +300 -0
synth_ai/__init__.py +1 -4
synth_ai/zyk/__init__.py +2 -2
synth_ai/zyk/lms/caching/ephemeral.py +54 -32
synth_ai/zyk/lms/caching/handler.py +43 -15
synth_ai/zyk/lms/caching/persistent.py +55 -27
synth_ai/zyk/lms/core/main.py +29 -16
synth_ai/zyk/lms/core/vendor_clients.py +1 -1
synth_ai/zyk/lms/structured_outputs/handler.py +79 -45
synth_ai/zyk/lms/structured_outputs/rehabilitate.py +3 -2
synth_ai/zyk/lms/tools/base.py +104 -0
synth_ai/zyk/lms/vendors/base.py +22 -6
synth_ai/zyk/lms/vendors/core/anthropic_api.py +130 -95
synth_ai/zyk/lms/vendors/core/gemini_api.py +153 -34
synth_ai/zyk/lms/vendors/core/mistral_api.py +160 -54
synth_ai/zyk/lms/vendors/core/openai_api.py +64 -53
synth_ai/zyk/lms/vendors/openai_standard.py +197 -41
synth_ai/zyk/lms/vendors/supported/deepseek.py +55 -0
{synth_ai-0.1.0.dev28.dist-info → synth_ai-0.1.0.dev30.dist-info}/METADATA +2 -5
synth_ai-0.1.0.dev30.dist-info/RECORD +65 -0
public_tests/test_sonnet_thinking.py +0 -217
synth_ai-0.1.0.dev28.dist-info/RECORD +0 -57
{synth_ai-0.1.0.dev28.dist-info → synth_ai-0.1.0.dev30.dist-info}/WHEEL +0 -0
{synth_ai-0.1.0.dev28.dist-info → synth_ai-0.1.0.dev30.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.1.0.dev28.dist-info → synth_ai-0.1.0.dev30.dist-info}/top_level.txt +0 -0

public_tests/test_agent.py CHANGED Viewed

@@ -311,7 +311,7 @@ You will be given a code_prompt_for_answer, which contains imports and the funct
 Your next actions / thought:
-Structured output: reasoning="I need to implement the function 'task_func' that replaces values in a DataFrame based on a dictionary and calculates the Pearson correlation coefficient between the columns. I will also ensure to handle the case where the input is not a DataFrame by raising a ValueError." action_name='edit_submission' action_args=[ActionArgument(key='first_line', value=4), ActionArgument(key='last_line', value=4), ActionArgument(key='new_code', value="    if not isinstance(df, pd.DataFrame):\n        raise ValueError('Input must be a DataFrame')\n    df.replace(dct, inplace=True)\n    return df.corr(method='pearson')")]
+Structured output: reasoning="I need to implement the function 'task_func' that replaces values in a DataFrame based on a dictionary and calculates the Pearson correlation coefficient between each pair of columns. I will also ensure to handle the case where the input is not a DataFrame by raising a ValueError." action_name='edit_submission' action_args=[ActionArgument(key='first_line', value=4), ActionArgument(key='last_line', value=4), ActionArgument(key='new_code', value="    if not isinstance(df, pd.DataFrame):\n        raise ValueError('Input must be a DataFrame')\n    df.replace(dct, inplace=True)\n    return df.corr(method='pearson')")]
 <System Message>
 # Premise
 You are a software engineer
@@ -489,11 +489,11 @@ class TestLMStructuredOutputs(unittest.TestCase):
                     user_message=user_message,
                     response_model=ReAct,
                 )
-                self.assertIsInstance(result, ReAct)
-                self.assertIsInstance(result.reasoning, str)
-                self.assertIsInstance(result.action_name, str)
-                self.assertIsInstance(result.action_args, list)
-                for arg in result.action_args:
+                self.assertIsInstance(result.structured_output, ReAct)
+                self.assertIsInstance(result.structured_output.reasoning, str)
+                self.assertIsInstance(result.structured_output.action_name, str)
+                self.assertIsInstance(result.structured_output.action_args, list)
+                for arg in result.structured_output.action_args:
                     self.assertIsInstance(arg, ActionArgument)
                     self.assertIsInstance(arg.key, str)
                     # self.assertIsInstance(arg.value, str)
@@ -512,11 +512,11 @@ class TestLMStructuredOutputs(unittest.TestCase):
                     user_message=user_message,
                     response_model=ReAct,
                 )
-                self.assertIsInstance(result, ReAct)
-                self.assertIsInstance(result.reasoning, str)
-                self.assertIsInstance(result.action_name, str)
-                self.assertIsInstance(result.action_args, list)
-                for arg in result.action_args:
+                self.assertIsInstance(result.structured_output, ReAct)
+                self.assertIsInstance(result.structured_output.reasoning, str)
+                self.assertIsInstance(result.structured_output.action_name, str)
+                self.assertIsInstance(result.structured_output.action_args, list)
+                for arg in result.structured_output.action_args:
                     self.assertIsInstance(arg, ActionArgument)
                     self.assertIsInstance(arg.key, str)
                     # self.assertIsInstance(arg.value, str)

public_tests/test_all_structured_outputs.py CHANGED Viewed

@@ -3,8 +3,7 @@ from typing import Any, Dict, Optional
 import pytest
 from pydantic import BaseModel
-from synth_ai.zyk.lms.core.main import LM
+from synth_ai.zyk import LM, BaseLMResponse
 class StateUpdate(BaseModel):
     """Response model for state updates from LLM"""
@@ -117,8 +116,8 @@ def current_state():
 @pytest.mark.parametrize(
     "model_name",
     [
-        "gpt-4o-mini",
-        "gemini-1.5-flash",
+       "gpt-4o-mini",
+       "gemini-1.5-flash",
         "claude-3-haiku-20240307",
         "deepseek-chat",
         "llama-3.1-8b-instant",
@@ -132,26 +131,27 @@ def test_state_delta_handling(
     state_delta_instructions = """Update the final_results to include findings about code quality issues. Add a recommendation to improve error handling."""
     user_message = f"Current state: {current_state}\nState delta instructions: {state_delta_instructions}\n\nHow should the state be updated?"
-    try:
-        result = models[model_name].respond_sync(
-            system_message=system_message,
-            user_message=user_message,
-            response_model=StateUpdate,
-        )
-        # Verify response structure
-        assert isinstance(result, StateUpdate)
-        # Verify only allowed fields are present and have correct types
-        if result.short_term_plan is not None:
-            assert isinstance(result.short_term_plan, str)
-        if result.objective is not None:
-            assert isinstance(result.objective, str)
-        if result.final_results is not None:
-            assert isinstance(result.final_results, dict)
-    except Exception as e:
-        pytest.fail(f"Model {model_name} failed: {str(e)}")
+    #try:
+    result: BaseLMResponse = models[model_name].respond_sync(
+        system_message=system_message,
+        user_message=user_message,
+        response_model=StateUpdate,
+    )
+    print("Result", result)
+    # Verify response structure
+    assert isinstance(result, BaseLMResponse)
+    assert isinstance(result.structured_output, StateUpdate)
+    # Verify only allowed fields are present and have correct types
+    if result.structured_output.short_term_plan is not None:
+        assert isinstance(result.structured_output.short_term_plan, str)
+    if result.structured_output.objective is not None:
+        assert isinstance(result.structured_output.objective, str)
+    if result.structured_output.final_results is not None:
+        assert isinstance(result.structured_output.final_results, dict)
+    # except Exception as e:
+    #     pytest.fail(f"Model {model_name} failed: {str(e)}")
 @pytest.mark.timeout(15)
@@ -186,16 +186,11 @@ def test_state_delta_protected_fields(
     state_delta_instructions = """Update the message history to include new findings and update step summaries with recent progress."""
     user_message = f"Current state: {current_state}\nState delta instructions: {state_delta_instructions}\n\nHow should the state be updated?"
-    try:
-        result = models[model_name].respond_sync(
-            system_message=system_message,
-            user_message=user_message,
-            response_model=StateUpdate,
-        )
-        # Verify no protected fields are present
-        assert not hasattr(result, "message_history")
-        assert not hasattr(result, "step_summaries")
-    except Exception as e:
-        pytest.fail(f"Model {model_name} failed: {str(e)}")
+    #try:
+    result = models[model_name].respond_sync(
+        system_message=system_message,
+        user_message=user_message,
+        response_model=StateUpdate,
+    )
+    # except Exception as e:
+    #     pytest.fail(f"Model {model_name} failed: {str(e)}")

public_tests/test_anthropic_structured_outputs.py ADDED Viewed

File without changes

public_tests/test_deepseek_structured_outputs.py ADDED Viewed

File without changes

public_tests/test_deepseek_tools.py ADDED Viewed

@@ -0,0 +1,64 @@
+from pydantic import BaseModel
+from synth_ai.zyk.lms.core.main import LM
+from synth_ai.zyk.lms.tools.base import BaseTool
+from synth_ai.zyk.lms.vendors.supported.deepseek import DeepSeekAPI
+class WeatherParams(BaseModel):
+    location: str
+weather_tool = BaseTool(
+    name="get_weather",
+    description="Get current temperature for a given location.",
+    arguments=WeatherParams,
+)
+def test_weather_tool_direct():
+    client = DeepSeekAPI()
+    response = client._hit_api_sync(
+        model="deepseek-chat",
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a helpful assistant that uses tools when appropriate.",
+            },
+            {
+                "role": "user",
+                "content": "What's the weather in Paris? Use the tools and explain your reasoning.",
+            },
+        ],
+        tools=[weather_tool],
+        lm_config={
+            "temperature": 0,
+        },
+    )
+    # Check that we got a tool call
+    assert response.tool_calls is not None
+    assert len(response.tool_calls) == 1
+    assert response.tool_calls[0]["function"]["name"] == "get_weather"
+    assert "Paris" in response.tool_calls[0]["function"]["arguments"]
+def test_weather_tool_lm():
+    lm = LM(
+        model_name="deepseek-chat",
+        formatting_model_name="deepseek-chat",
+        temperature=0,
+    )
+    response = lm.respond_sync(
+        system_message="You are a helpful assistant that uses tools when appropriate.",
+        user_message="What's the weather in Paris? Use the tools and explain your reasoning.",
+        tools=[weather_tool],
+    )
+    # Check that we got a tool call
+    assert response.tool_calls is not None
+    assert len(response.tool_calls) == 1
+    assert response.tool_calls[0]["function"]["name"] == "get_weather"
+    assert "Paris" in response.tool_calls[0]["function"]["arguments"]

public_tests/test_gemini_structured_outputs.py ADDED Viewed

@@ -0,0 +1,106 @@
+import asyncio
+import unittest
+from typing import List
+from pydantic import BaseModel, Field
+from synth_ai.zyk.lms.core.main import LM
+# Define example structured output models
+class SimpleResponse(BaseModel):
+    message: str
+    confidence_between_zero_one: float = Field(
+        ..., description="Confidence level between 0 and 1"
+    )
+class ComplexResponse(BaseModel):
+    title: str
+    tags: List[str]
+    content: str
+class NestedResponse(BaseModel):
+    main_category: str
+    subcategories: List[str]
+    details: SimpleResponse
+class TestLMStructuredOutputs(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        # Initialize LMs for both forced_json and stringified_json modes
+        cls.lm_forced_json = LM(
+            model_name="gpt-4o-mini",
+            formatting_model_name="gpt-4o-mini",
+            temperature=0.7,
+            max_retries="Few",
+            structured_output_mode="forced_json",
+        )
+        cls.lm_stringified_json = LM(
+            model_name="gemma3-27b-it",
+            formatting_model_name="gpt-4o-mini",
+            temperature=0.7,
+            max_retries="Few",
+            structured_output_mode="stringified_json",
+        )
+    def test_sync_simple_response(self):
+        for lm in [self.lm_forced_json, self.lm_stringified_json]:
+            with self.subTest(
+                mode=lm.structured_output_handler.handler.structured_output_mode
+            ):
+                result = lm.respond_sync(
+                    system_message="You are a helpful assistant.",
+                    user_message="Give me a short greeting and your confidence level.",
+                    response_model=SimpleResponse,
+                )
+                self.assertIsInstance(result.structured_output, SimpleResponse)
+                self.assertIsInstance(result.structured_output.message, str)
+                self.assertIsInstance(
+                    result.structured_output.confidence_between_zero_one, float
+                )
+                self.assertGreaterEqual(
+                    result.structured_output.confidence_between_zero_one, 0
+                )
+                self.assertLessEqual(
+                    result.structured_output.confidence_between_zero_one, 1
+                )
+    def test_sync_complex_response(self):
+        for lm in [self.lm_forced_json, self.lm_stringified_json]:
+            with self.subTest(
+                mode=lm.structured_output_handler.handler.structured_output_mode
+            ):
+                result = lm.respond_sync(
+                    system_message="You are a content creator.",
+                    user_message="Create a short blog post about AI.",
+                    response_model=ComplexResponse,
+                )
+                self.assertIsInstance(result.structured_output, ComplexResponse)
+                self.assertIsInstance(result.structured_output.title, str)
+                self.assertIsInstance(result.structured_output.tags, list)
+                self.assertIsInstance(result.structured_output.content, str)
+    async def async_nested_response(self, lm):
+        result = await lm.respond_async(
+            system_message="You are a categorization expert.",
+            user_message="Categorize 'Python' and provide a brief description.",
+            response_model=NestedResponse,
+        )
+        self.assertIsInstance(result.structured_output, NestedResponse)
+        self.assertIsInstance(result.structured_output.main_category, str)
+        self.assertIsInstance(result.structured_output.subcategories, list)
+        self.assertIsInstance(result.structured_output.details, SimpleResponse)
+    def test_async_nested_response(self):
+        for lm in [self.lm_forced_json, self.lm_stringified_json]:  #
+            with self.subTest(
+                mode=lm.structured_output_handler.handler.structured_output_mode
+            ):
+                asyncio.run(self.async_nested_response(lm))
+if __name__ == "__main__":
+    unittest.main()

public_tests/test_models.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import asyncio
 import time
 import pytest
@@ -57,7 +56,9 @@ def model_instances():
     # Set reasoning_effort to "high" for specific models
     models["o3-mini-high-reasoning"].lm_config["reasoning_effort"] = "high"
-    models["claude-3-7-sonnet-latest-high-reasoning"].lm_config["reasoning_effort"] = "high"
+    models["claude-3-7-sonnet-latest-high-reasoning"].lm_config["reasoning_effort"] = (
+        "high"
+    )
     return models
@@ -91,28 +92,27 @@ def test_model_simple_response(model_instances, model_name):
     elapsed = time.time() - start_time
     print(f"Response time: {elapsed:.2f} seconds")
-    print(f"Response length: {len(response)} characters")
-    print(f"Response sample: {response[:100]}...")
+    print(f"Response length: {len(response.raw_response)} characters")
+    print(f"Response sample: {response.raw_response[:100]}...")
     # Basic validation
-    assert isinstance(response, str)
-    assert len(response) > 0
+    assert isinstance(response.raw_response, str)
+    assert len(response.raw_response) > 0
     assert (
-        "Paris" in response
-    ), f"Expected 'Paris' in response, but got: {response[:200]}..."
+        "Paris" in response.raw_response
+    ), f"Expected 'Paris' in response, but got: {response.raw_response[:200]}..."
 @pytest.mark.asyncio
 @pytest.mark.parametrize(
     "model_name",
     [
-        "o3-mini",
-        "o3-mini-high-reasoning",
-        "claude-3-7-sonnet-latest",
+   #     "o3-mini",
+ #       "claude-3-7-sonnet-latest",
         "claude-3-7-sonnet-latest-high-reasoning",
-        "gemini-2-flash",
-        "gemma3-27b-it",
-        "gpt-4o-mini",
+        # "gemini-2-flash",
+        # "gemma3-27b-it",
+        # "gpt-4o-mini",
     ],
 )
 async def test_reasoning_question(model_instances, model_name):
@@ -131,24 +131,24 @@ async def test_reasoning_question(model_instances, model_name):
     elapsed = time.time() - start_time
     print(f"Response time: {elapsed:.2f} seconds")
-    print(f"Response length: {len(response)} characters")
-    print(f"Response sample: {response[:100]}...")
+    print(f"Response length: {len(response.raw_response)} characters")
+    print(f"Response sample: {response.raw_response[:100]}...")
     # Basic validation
-    assert isinstance(response, str)
-    assert len(response) > 0
+    assert isinstance(response.raw_response, str)
+    assert len(response.raw_response) > 0
 @pytest.mark.parametrize(
     "model_name",
     [
         "o3-mini",
-        "o3-mini-high-reasoning",
-        "claude-3-7-sonnet-latest",
+       # "o3-mini",
+        #"claude-3-7-sonnet-latest",
         "claude-3-7-sonnet-latest-high-reasoning",
-        "gemini-2-flash",
-        "gemma3-27b-it",
-        "gpt-4o-mini",
+        # "gemini-2-flash",
+        # "gemma3-27b-it",
+        # "gpt-4o-mini",
     ],
 )
 def test_model_context_and_factuality(model_instances, model_name):
@@ -171,11 +171,11 @@ def test_model_context_and_factuality(model_instances, model_name):
     # Check if the response contains the correct information
     assert (
-        "1968" in response
-    ), f"Expected '1968' in response for founding year, but got: {response[:200]}..."
+        "1968" in response.raw_response
+    ), f"Expected '1968' in response for founding year, but got: {response.raw_response[:200]}..."
     assert (
-        "Robert Neptune" in response
-    ), f"Expected 'Robert Neptune' in response for mayor, but got: {response[:200]}..."
+        "Robert Neptune" in response.raw_response
+    ), f"Expected 'Robert Neptune' in response for mayor, but got: {response.raw_response[:200]}..."
 if __name__ == "__main__":

public_tests/test_openai_structured_outputs.py ADDED Viewed

@@ -0,0 +1,106 @@
+import asyncio
+import unittest
+from typing import List
+from pydantic import BaseModel, Field
+from synth_ai.zyk.lms.core.main import LM
+# Define example structured output models
+class SimpleResponse(BaseModel):
+    message: str
+    confidence_between_zero_one: float = Field(
+        ..., description="Confidence level between 0 and 1"
+    )
+class ComplexResponse(BaseModel):
+    title: str
+    tags: List[str]
+    content: str
+class NestedResponse(BaseModel):
+    main_category: str
+    subcategories: List[str]
+    details: SimpleResponse
+class TestLMStructuredOutputs(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        # Initialize LMs for both forced_json and stringified_json modes
+        cls.lm_forced_json = LM(
+            model_name="gpt-4o-mini",
+            formatting_model_name="gpt-4o-mini",
+            temperature=0.7,
+            max_retries="Few",
+            structured_output_mode="forced_json",
+        )
+        cls.lm_stringified_json = LM(
+            model_name="gpt-4o-mini",
+            formatting_model_name="gpt-4o-mini",
+            temperature=0.7,
+            max_retries="Few",
+            structured_output_mode="stringified_json",
+        )
+    def test_sync_simple_response(self):
+        for lm in [self.lm_forced_json, self.lm_stringified_json]:
+            with self.subTest(
+                mode=lm.structured_output_handler.handler.structured_output_mode
+            ):
+                result = lm.respond_sync(
+                    system_message="You are a helpful assistant.",
+                    user_message="Give me a short greeting and your confidence level.",
+                    response_model=SimpleResponse,
+                )
+                self.assertIsInstance(result.structured_output, SimpleResponse)
+                self.assertIsInstance(result.structured_output.message, str)
+                self.assertIsInstance(
+                    result.structured_output.confidence_between_zero_one, float
+                )
+                self.assertGreaterEqual(
+                    result.structured_output.confidence_between_zero_one, 0
+                )
+                self.assertLessEqual(
+                    result.structured_output.confidence_between_zero_one, 1
+                )
+    def test_sync_complex_response(self):
+        for lm in [self.lm_forced_json, self.lm_stringified_json]:
+            with self.subTest(
+                mode=lm.structured_output_handler.handler.structured_output_mode
+            ):
+                result = lm.respond_sync(
+                    system_message="You are a content creator.",
+                    user_message="Create a short blog post about AI.",
+                    response_model=ComplexResponse,
+                )
+                self.assertIsInstance(result.structured_output, ComplexResponse)
+                self.assertIsInstance(result.structured_output.title, str)
+                self.assertIsInstance(result.structured_output.tags, list)
+                self.assertIsInstance(result.structured_output.content, str)
+    async def async_nested_response(self, lm):
+        result = await lm.respond_async(
+            system_message="You are a categorization expert.",
+            user_message="Categorize 'Python' and provide a brief description.",
+            response_model=NestedResponse,
+        )
+        self.assertIsInstance(result.structured_output, NestedResponse)
+        self.assertIsInstance(result.structured_output.main_category, str)
+        self.assertIsInstance(result.structured_output.subcategories, list)
+        self.assertIsInstance(result.structured_output.details, SimpleResponse)
+    def test_async_nested_response(self):
+        for lm in [self.lm_forced_json, self.lm_stringified_json]:  #
+            with self.subTest(
+                mode=lm.structured_output_handler.handler.structured_output_mode
+            ):
+                asyncio.run(self.async_nested_response(lm))
+if __name__ == "__main__":
+    unittest.main()

public_tests/test_reasoning_models.py CHANGED Viewed

@@ -43,7 +43,7 @@ async def test_reasoning_effort():
     high_time = time.time() - start_time
     print(f"Time taken: {high_time:.2f} seconds")
-    print(f"Response length: {len(high_result)} characters")
+    print(f"Response length: {len(high_result.raw_response)} characters")
     print("-" * 60)
     # Create a separate instance for LOW reasoning
@@ -65,7 +65,7 @@ async def test_reasoning_effort():
     low_time = time.time() - start_time
     print(f"Time taken: {low_time:.2f} seconds")
-    print(f"Response length: {len(low_result)} characters")
+    print(f"Response length: {len(low_result.raw_response)} characters")
     print("-" * 60)
     # Print comparison
@@ -75,15 +75,17 @@ async def test_reasoning_effort():
     print(
         f"Difference: {high_time - low_time:.2f} seconds ({(high_time/low_time - 1)*100:.1f}% difference)"
     )
-    print(f"High response length: {len(high_result)} characters")
-    print(f"Low response length: {len(low_result)} characters")
-    print(f"Response length ratio: {len(high_result)/len(low_result):.2f}x")
+    print(f"High response length: {len(high_result.raw_response)} characters")
+    print(f"Low response length: {len(low_result.raw_response)} characters")
+    print(
+        f"Response length ratio: {len(high_result.raw_response)/len(low_result.raw_response):.2f}x"
+    )
     # Print response samples
     print("\nHIGH Response Sample (first 200 chars):")
-    print(high_result[:200] + "...")
+    print(high_result.raw_response[:200] + "...")
     print("\nLOW Response Sample (first 200 chars):")
-    print(low_result[:200] + "...")
+    print(low_result.raw_response[:200] + "...")
 if __name__ == "__main__":

synth-ai 0.1.0.dev28__py3-none-any.whl → 0.1.0.dev30__py3-none-any.whl

synth-ai 0.1.0.dev28py3-none-any.whl → 0.1.0.dev30py3-none-any.whl