synth-ai 0.1.0.dev25__py3-none-any.whl → 0.1.0.dev27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- public_tests/test_sonnet_thinking.py +178 -0
- synth_ai/zyk/lms/vendors/core/anthropic_api.py +20 -21
- {synth_ai-0.1.0.dev25.dist-info → synth_ai-0.1.0.dev27.dist-info}/METADATA +1 -1
- {synth_ai-0.1.0.dev25.dist-info → synth_ai-0.1.0.dev27.dist-info}/RECORD +7 -6
- {synth_ai-0.1.0.dev25.dist-info → synth_ai-0.1.0.dev27.dist-info}/WHEEL +0 -0
- {synth_ai-0.1.0.dev25.dist-info → synth_ai-0.1.0.dev27.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.1.0.dev25.dist-info → synth_ai-0.1.0.dev27.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,178 @@
|
|
1
|
+
import asyncio
|
2
|
+
import unittest
|
3
|
+
|
4
|
+
from synth_ai.zyk import LM
|
5
|
+
|
6
|
+
|
7
|
+
class TestSonnetThinking(unittest.TestCase):
|
8
|
+
@classmethod
|
9
|
+
def setUpClass(cls):
|
10
|
+
cls.lm = LM(
|
11
|
+
model_name="claude-3-7-sonnet-latest",
|
12
|
+
formatting_model_name="gpt-4o-mini",
|
13
|
+
temperature=0,
|
14
|
+
)
|
15
|
+
# Set reasoning_effort in lm_config
|
16
|
+
cls.lm.lm_config["reasoning_effort"] = "high"
|
17
|
+
|
18
|
+
async def test_thinking_response(self):
|
19
|
+
messages = [
|
20
|
+
{"role": "system", "content": "You are a helpful AI assistant."},
|
21
|
+
{
|
22
|
+
"role": "user",
|
23
|
+
"content": "Please solve this math problem step by step: If a train travels at 60 mph for 2.5 hours, how far does it travel?",
|
24
|
+
},
|
25
|
+
]
|
26
|
+
|
27
|
+
response = await self.lm.respond_async(messages=messages)
|
28
|
+
print("\n=== Math Problem Test ===")
|
29
|
+
print(f"Response:\n{response}\n")
|
30
|
+
self.assertIsInstance(response, str)
|
31
|
+
self.assertGreater(len(response), 0)
|
32
|
+
|
33
|
+
# Test that the response includes numerical calculation
|
34
|
+
self.assertTrue(any(char.isdigit() for char in response))
|
35
|
+
|
36
|
+
async def test_thinking_structured_output(self):
|
37
|
+
from pydantic import BaseModel
|
38
|
+
|
39
|
+
class MathSolution(BaseModel):
|
40
|
+
steps: list[str]
|
41
|
+
final_answer: float
|
42
|
+
units: str
|
43
|
+
|
44
|
+
messages = [
|
45
|
+
{"role": "system", "content": "You are a math problem solver."},
|
46
|
+
{
|
47
|
+
"role": "user",
|
48
|
+
"content": "If a car travels at 30 mph for 45 minutes, how far does it travel? Provide steps.",
|
49
|
+
},
|
50
|
+
]
|
51
|
+
|
52
|
+
response = await self.lm.respond_async(
|
53
|
+
messages=messages, response_model=MathSolution
|
54
|
+
)
|
55
|
+
|
56
|
+
print("\n=== Structured Math Problem Test ===")
|
57
|
+
print(f"Steps:")
|
58
|
+
for i, step in enumerate(response.steps, 1):
|
59
|
+
print(f"{i}. {step}")
|
60
|
+
print(f"Final Answer: {response.final_answer} {response.units}\n")
|
61
|
+
|
62
|
+
self.assertIsInstance(response, MathSolution)
|
63
|
+
self.assertGreater(len(response.steps), 0)
|
64
|
+
self.assertIsInstance(response.final_answer, float)
|
65
|
+
self.assertIsInstance(response.units, str)
|
66
|
+
|
67
|
+
async def test_thinking_with_high_effort(self):
|
68
|
+
messages = [
|
69
|
+
{
|
70
|
+
"role": "system",
|
71
|
+
"content": "You are a problem-solving AI. Break down complex problems into detailed steps.",
|
72
|
+
},
|
73
|
+
{
|
74
|
+
"role": "user",
|
75
|
+
"content": "Design a system to automate a coffee shop's inventory management. Consider all aspects.",
|
76
|
+
},
|
77
|
+
]
|
78
|
+
|
79
|
+
print("\n=== High Effort Thinking Test ===")
|
80
|
+
response = await self.lm.respond_async(messages=messages)
|
81
|
+
print(f"High Effort Response:\n{response}\n")
|
82
|
+
self.assertIsInstance(response, str)
|
83
|
+
self.assertGreater(len(response), 100) # Expecting detailed response
|
84
|
+
|
85
|
+
# Test with medium effort
|
86
|
+
lm_medium = LM(
|
87
|
+
model_name="claude-3-7-sonnet-latest",
|
88
|
+
formatting_model_name="gpt-4o-mini",
|
89
|
+
temperature=0,
|
90
|
+
)
|
91
|
+
lm_medium.lm_config["reasoning_effort"] = "medium"
|
92
|
+
print("\n=== Medium Effort Thinking Test ===")
|
93
|
+
response_medium = await lm_medium.respond_async(messages=messages)
|
94
|
+
print(f"Medium Effort Response:\n{response_medium}\n")
|
95
|
+
self.assertIsInstance(response_medium, str)
|
96
|
+
|
97
|
+
async def test_thinking_blocks_attributes(self):
|
98
|
+
"""Test to verify thinking blocks have the correct attributes and structure"""
|
99
|
+
messages = [
|
100
|
+
{"role": "system", "content": "You are a helpful AI assistant."},
|
101
|
+
{
|
102
|
+
"role": "user",
|
103
|
+
"content": "Please solve this math problem step by step: If a train travels at 60 mph for 2.5 hours, how far does it travel?",
|
104
|
+
},
|
105
|
+
]
|
106
|
+
|
107
|
+
print("\n=== Testing Thinking Blocks Structure ===")
|
108
|
+
try:
|
109
|
+
response = await self.lm.respond_async(messages=messages)
|
110
|
+
print(f"Response received successfully: {response[:100]}...")
|
111
|
+
self.assertIsInstance(response, str)
|
112
|
+
self.assertGreater(len(response), 0)
|
113
|
+
except AttributeError as e:
|
114
|
+
if "'TextBlock' object has no attribute 'value'" in str(e):
|
115
|
+
self.fail(
|
116
|
+
"TextBlock missing 'value' attribute - API response structure may have changed"
|
117
|
+
)
|
118
|
+
raise
|
119
|
+
|
120
|
+
async def test_thinking_blocks_with_structured_output(self):
|
121
|
+
"""Test thinking blocks with structured output to verify attribute handling"""
|
122
|
+
from pydantic import BaseModel
|
123
|
+
|
124
|
+
class SimpleResponse(BaseModel):
|
125
|
+
answer: str
|
126
|
+
explanation: str
|
127
|
+
|
128
|
+
messages = [
|
129
|
+
{"role": "system", "content": "You are a helpful AI assistant."},
|
130
|
+
{"role": "user", "content": "What is 2+2? Provide answer and explanation."},
|
131
|
+
]
|
132
|
+
|
133
|
+
print("\n=== Testing Thinking Blocks with Structured Output ===")
|
134
|
+
try:
|
135
|
+
response = await self.lm.respond_async(
|
136
|
+
messages=messages, response_model=SimpleResponse
|
137
|
+
)
|
138
|
+
print(f"Structured response received: {response}")
|
139
|
+
self.assertIsInstance(response, SimpleResponse)
|
140
|
+
self.assertTrue(hasattr(response, "answer"))
|
141
|
+
self.assertTrue(hasattr(response, "explanation"))
|
142
|
+
except AttributeError as e:
|
143
|
+
if "'TextBlock' object has no attribute 'value'" in str(e):
|
144
|
+
self.fail("TextBlock missing 'value' attribute in structured output")
|
145
|
+
raise
|
146
|
+
|
147
|
+
async def test_thinking_blocks_raw_response(self):
|
148
|
+
"""Test to examine the raw response structure from the API"""
|
149
|
+
messages = [
|
150
|
+
{"role": "system", "content": "You are a helpful AI assistant."},
|
151
|
+
{"role": "user", "content": "Count from 1 to 3."},
|
152
|
+
]
|
153
|
+
|
154
|
+
print("\n=== Testing Raw Response Structure ===")
|
155
|
+
try:
|
156
|
+
# Access the raw response if possible
|
157
|
+
response = await self.lm.respond_async(messages=messages)
|
158
|
+
print(f"Raw response type: {type(response)}")
|
159
|
+
print(f"Raw response content: {response}")
|
160
|
+
self.assertIsInstance(response, str)
|
161
|
+
except Exception as e:
|
162
|
+
print(f"Exception type: {type(e)}")
|
163
|
+
print(f"Exception message: {str(e)}")
|
164
|
+
raise
|
165
|
+
|
166
|
+
def test_all(self):
|
167
|
+
print("\nStarting Claude 3.7 Sonnet Thinking Tests...")
|
168
|
+
asyncio.run(self.test_thinking_response())
|
169
|
+
asyncio.run(self.test_thinking_structured_output())
|
170
|
+
asyncio.run(self.test_thinking_with_high_effort())
|
171
|
+
asyncio.run(self.test_thinking_blocks_attributes())
|
172
|
+
asyncio.run(self.test_thinking_blocks_with_structured_output())
|
173
|
+
asyncio.run(self.test_thinking_blocks_raw_response())
|
174
|
+
print("\nAll tests completed successfully!")
|
175
|
+
|
176
|
+
|
177
|
+
if __name__ == "__main__":
|
178
|
+
unittest.main()
|
@@ -106,8 +106,16 @@ class AnthropicAPI(VendorBase):
|
|
106
106
|
# Make the API call
|
107
107
|
response = await self.async_client.messages.create(**api_params)
|
108
108
|
|
109
|
-
|
110
|
-
|
109
|
+
# Handle both regular and thinking responses
|
110
|
+
if hasattr(response.content[0], "text"):
|
111
|
+
api_result = response.content[0].text
|
112
|
+
else:
|
113
|
+
# For thinking responses, get the final output
|
114
|
+
thinking_blocks = [
|
115
|
+
block for block in response.content if block.type == "text"
|
116
|
+
]
|
117
|
+
api_result = thinking_blocks[-1].value if thinking_blocks else ""
|
118
|
+
|
111
119
|
used_cache_handler.add_to_managed_cache(
|
112
120
|
model, messages, lm_config=lm_config, output=api_result
|
113
121
|
)
|
@@ -180,7 +188,16 @@ class AnthropicAPI(VendorBase):
|
|
180
188
|
# Make the API call
|
181
189
|
response = self.sync_client.messages.create(**api_params)
|
182
190
|
|
183
|
-
|
191
|
+
# Handle both regular and thinking responses
|
192
|
+
if hasattr(response.content[0], "text"):
|
193
|
+
api_result = response.content[0].text
|
194
|
+
else:
|
195
|
+
# For thinking responses, get the final output
|
196
|
+
thinking_blocks = [
|
197
|
+
block for block in response.content if block.type == "text"
|
198
|
+
]
|
199
|
+
api_result = thinking_blocks[-1].value if thinking_blocks else ""
|
200
|
+
|
184
201
|
used_cache_handler.add_to_managed_cache(
|
185
202
|
model, messages, lm_config=lm_config, output=api_result
|
186
203
|
)
|
@@ -313,21 +330,3 @@ class AnthropicAPI(VendorBase):
|
|
313
330
|
use_ephemeral_cache_only=use_ephemeral_cache_only,
|
314
331
|
**vendor_params, # Pass all vendor-specific params
|
315
332
|
)
|
316
|
-
|
317
|
-
def _prepare_api_params(self, messages, **kwargs):
|
318
|
-
# Ensure max_tokens is at least thinking budget + 4096
|
319
|
-
if "max_tokens" not in kwargs:
|
320
|
-
kwargs["max_tokens"] = 4096 * 2 # Default to 2x context window
|
321
|
-
|
322
|
-
if "thinking" in kwargs and kwargs.get("thinking", {}).get("budget_tokens"):
|
323
|
-
thinking_budget = kwargs["thinking"]["budget_tokens"]
|
324
|
-
kwargs["max_tokens"] = max(kwargs["max_tokens"], thinking_budget + 4096)
|
325
|
-
|
326
|
-
api_params = {
|
327
|
-
"messages": formatted_messages,
|
328
|
-
"model": self.model_name,
|
329
|
-
"max_tokens": kwargs.get("max_tokens", 4096 * 2),
|
330
|
-
# ... rest of params ...
|
331
|
-
}
|
332
|
-
|
333
|
-
return api_params
|
@@ -4,6 +4,7 @@ public_tests/test_all_structured_outputs.py,sha256=x7Gj5Ykpw8Ut_XlSOEBHRLJSagYSH
|
|
4
4
|
public_tests/test_models.py,sha256=7ZJ2HPDZWhcIeZDDu8Iyt5lOy1xpKpYHM8FzsyEKQmc,5703
|
5
5
|
public_tests/test_reasoning_models.py,sha256=twKNTrWyeTgtqSC2A4V0g79Uq_SjZiBeWp6ntJIAGNM,2779
|
6
6
|
public_tests/test_recursive_structured_outputs.py,sha256=Ne-9XwnOxN7eSpGbNHOpegR-sRj589I84T6y8Z_4QnA,5781
|
7
|
+
public_tests/test_sonnet_thinking.py,sha256=XrV9Uxxl4T26zvgeycD7yPoPk2XR43A5bsRjejpEDKk,7003
|
7
8
|
public_tests/test_structured_outputs.py,sha256=MZitgGedFlvxeaVFzuDQb2xXs8apwvDLTINpGBfsTdM,3653
|
8
9
|
public_tests/test_synth_sdk.py,sha256=jqJHKpvBn9qj21P76z9onXfPg88jyUmBTKmdvCsQMk8,14885
|
9
10
|
synth_ai/__init__.py,sha256=2siivzLbT2r-EA7m91dcJB-6Vsurc5_sX3WiKf4_o8Y,198
|
@@ -35,7 +36,7 @@ synth_ai/zyk/lms/vendors/constants.py,sha256=zqCOyXZqo297wboR9EKVSkvpq6JCMSJyeso
|
|
35
36
|
synth_ai/zyk/lms/vendors/openai_standard.py,sha256=TJz1u6IcJ1KHjbofyHs0rlFa13smVXFTtqBSVqEYJqo,5818
|
36
37
|
synth_ai/zyk/lms/vendors/retries.py,sha256=m-WvAiPix9ovnO2S-m53Td5VZDWBVBFuHuSK9--OVxw,38
|
37
38
|
synth_ai/zyk/lms/vendors/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
|
-
synth_ai/zyk/lms/vendors/core/anthropic_api.py,sha256=
|
39
|
+
synth_ai/zyk/lms/vendors/core/anthropic_api.py,sha256=rVAVrjgMXeih3qbISBHf1euilue9Au1n-xDDyQB81n0,12594
|
39
40
|
synth_ai/zyk/lms/vendors/core/gemini_api.py,sha256=Cp8BpSk1yCC3SYrEK1pFOnCdUc65XIPonFEirZ6W2rA,5395
|
40
41
|
synth_ai/zyk/lms/vendors/core/mistral_api.py,sha256=m11ItQ46VyyCUy6hv6mw5OmiqwHr07wV_NJVNnPHgiA,8080
|
41
42
|
synth_ai/zyk/lms/vendors/core/openai_api.py,sha256=700M0QfAxDZXAURnlY--ReEwIEPJPMCwY0JIpu4vptM,5881
|
@@ -46,11 +47,11 @@ synth_ai/zyk/lms/vendors/supported/deepseek.py,sha256=diFfdhPMO5bLFZxnYj7VT0v6jK
|
|
46
47
|
synth_ai/zyk/lms/vendors/supported/groq.py,sha256=Fbi7QvhdLx0F-VHO5PY-uIQlPR0bo3C9h1MvIOx8nz0,388
|
47
48
|
synth_ai/zyk/lms/vendors/supported/ollama.py,sha256=K30VBFRTd7NYyPmyBVRZS2sm0UB651AHp9i3wd55W64,469
|
48
49
|
synth_ai/zyk/lms/vendors/supported/together.py,sha256=Ni_jBqqGPN0PkkY-Ew64s3gNKk51k3FCpLSwlNhKbf0,342
|
49
|
-
synth_ai-0.1.0.
|
50
|
+
synth_ai-0.1.0.dev27.dist-info/licenses/LICENSE,sha256=ynhjRQUfqA_RdGRATApfFA_fBAy9cno04sLtLUqxVFM,1069
|
50
51
|
tests/test_agent.py,sha256=CjPPWuMWC_TzX1DkDald-bbAxgjXE-HPQvFhq2B--5k,22363
|
51
52
|
tests/test_recursive_structured_outputs.py,sha256=Ne-9XwnOxN7eSpGbNHOpegR-sRj589I84T6y8Z_4QnA,5781
|
52
53
|
tests/test_structured_outputs.py,sha256=J7sfbGZ7OeB5ONIKpcCTymyayNyAdFfGokC1bcUrSx0,3651
|
53
|
-
synth_ai-0.1.0.
|
54
|
-
synth_ai-0.1.0.
|
55
|
-
synth_ai-0.1.0.
|
56
|
-
synth_ai-0.1.0.
|
54
|
+
synth_ai-0.1.0.dev27.dist-info/METADATA,sha256=JMIUzbECFL7CJ1EJv-D7Waelv_KYwhI0WyhiyJJCxfI,2795
|
55
|
+
synth_ai-0.1.0.dev27.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
56
|
+
synth_ai-0.1.0.dev27.dist-info/top_level.txt,sha256=5GzJO9j-KbJ_4ppxhmCUa_qdhHM4-9cHHNU76yAI8do,42
|
57
|
+
synth_ai-0.1.0.dev27.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|