synth-ai 0.1.0.dev28__py3-none-any.whl → 0.1.0.dev30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- public_tests/test_agent.py +11 -11
- public_tests/test_all_structured_outputs.py +32 -37
- public_tests/test_anthropic_structured_outputs.py +0 -0
- public_tests/test_deepseek_structured_outputs.py +0 -0
- public_tests/test_deepseek_tools.py +64 -0
- public_tests/test_gemini_structured_outputs.py +106 -0
- public_tests/test_models.py +27 -27
- public_tests/test_openai_structured_outputs.py +106 -0
- public_tests/test_reasoning_models.py +9 -7
- public_tests/test_recursive_structured_outputs.py +30 -30
- public_tests/test_structured.py +137 -0
- public_tests/test_structured_outputs.py +22 -13
- public_tests/test_text.py +160 -0
- public_tests/test_tools.py +300 -0
- synth_ai/__init__.py +1 -4
- synth_ai/zyk/__init__.py +2 -2
- synth_ai/zyk/lms/caching/ephemeral.py +54 -32
- synth_ai/zyk/lms/caching/handler.py +43 -15
- synth_ai/zyk/lms/caching/persistent.py +55 -27
- synth_ai/zyk/lms/core/main.py +29 -16
- synth_ai/zyk/lms/core/vendor_clients.py +1 -1
- synth_ai/zyk/lms/structured_outputs/handler.py +79 -45
- synth_ai/zyk/lms/structured_outputs/rehabilitate.py +3 -2
- synth_ai/zyk/lms/tools/base.py +104 -0
- synth_ai/zyk/lms/vendors/base.py +22 -6
- synth_ai/zyk/lms/vendors/core/anthropic_api.py +130 -95
- synth_ai/zyk/lms/vendors/core/gemini_api.py +153 -34
- synth_ai/zyk/lms/vendors/core/mistral_api.py +160 -54
- synth_ai/zyk/lms/vendors/core/openai_api.py +64 -53
- synth_ai/zyk/lms/vendors/openai_standard.py +197 -41
- synth_ai/zyk/lms/vendors/supported/deepseek.py +55 -0
- {synth_ai-0.1.0.dev28.dist-info → synth_ai-0.1.0.dev30.dist-info}/METADATA +2 -5
- synth_ai-0.1.0.dev30.dist-info/RECORD +65 -0
- public_tests/test_sonnet_thinking.py +0 -217
- synth_ai-0.1.0.dev28.dist-info/RECORD +0 -57
- {synth_ai-0.1.0.dev28.dist-info → synth_ai-0.1.0.dev30.dist-info}/WHEEL +0 -0
- {synth_ai-0.1.0.dev28.dist-info → synth_ai-0.1.0.dev30.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.1.0.dev28.dist-info → synth_ai-0.1.0.dev30.dist-info}/top_level.txt +0 -0
public_tests/test_agent.py
CHANGED
@@ -311,7 +311,7 @@ You will be given a code_prompt_for_answer, which contains imports and the funct
|
|
311
311
|
|
312
312
|
Your next actions / thought:
|
313
313
|
|
314
|
-
Structured output: reasoning="I need to implement the function 'task_func' that replaces values in a DataFrame based on a dictionary and calculates the Pearson correlation coefficient between
|
314
|
+
Structured output: reasoning="I need to implement the function 'task_func' that replaces values in a DataFrame based on a dictionary and calculates the Pearson correlation coefficient between each pair of columns. I will also ensure to handle the case where the input is not a DataFrame by raising a ValueError." action_name='edit_submission' action_args=[ActionArgument(key='first_line', value=4), ActionArgument(key='last_line', value=4), ActionArgument(key='new_code', value=" if not isinstance(df, pd.DataFrame):\n raise ValueError('Input must be a DataFrame')\n df.replace(dct, inplace=True)\n return df.corr(method='pearson')")]
|
315
315
|
<System Message>
|
316
316
|
# Premise
|
317
317
|
You are a software engineer
|
@@ -489,11 +489,11 @@ class TestLMStructuredOutputs(unittest.TestCase):
|
|
489
489
|
user_message=user_message,
|
490
490
|
response_model=ReAct,
|
491
491
|
)
|
492
|
-
self.assertIsInstance(result, ReAct)
|
493
|
-
self.assertIsInstance(result.reasoning, str)
|
494
|
-
self.assertIsInstance(result.action_name, str)
|
495
|
-
self.assertIsInstance(result.action_args, list)
|
496
|
-
for arg in result.action_args:
|
492
|
+
self.assertIsInstance(result.structured_output, ReAct)
|
493
|
+
self.assertIsInstance(result.structured_output.reasoning, str)
|
494
|
+
self.assertIsInstance(result.structured_output.action_name, str)
|
495
|
+
self.assertIsInstance(result.structured_output.action_args, list)
|
496
|
+
for arg in result.structured_output.action_args:
|
497
497
|
self.assertIsInstance(arg, ActionArgument)
|
498
498
|
self.assertIsInstance(arg.key, str)
|
499
499
|
# self.assertIsInstance(arg.value, str)
|
@@ -512,11 +512,11 @@ class TestLMStructuredOutputs(unittest.TestCase):
|
|
512
512
|
user_message=user_message,
|
513
513
|
response_model=ReAct,
|
514
514
|
)
|
515
|
-
self.assertIsInstance(result, ReAct)
|
516
|
-
self.assertIsInstance(result.reasoning, str)
|
517
|
-
self.assertIsInstance(result.action_name, str)
|
518
|
-
self.assertIsInstance(result.action_args, list)
|
519
|
-
for arg in result.action_args:
|
515
|
+
self.assertIsInstance(result.structured_output, ReAct)
|
516
|
+
self.assertIsInstance(result.structured_output.reasoning, str)
|
517
|
+
self.assertIsInstance(result.structured_output.action_name, str)
|
518
|
+
self.assertIsInstance(result.structured_output.action_args, list)
|
519
|
+
for arg in result.structured_output.action_args:
|
520
520
|
self.assertIsInstance(arg, ActionArgument)
|
521
521
|
self.assertIsInstance(arg.key, str)
|
522
522
|
# self.assertIsInstance(arg.value, str)
|
@@ -3,8 +3,7 @@ from typing import Any, Dict, Optional
|
|
3
3
|
import pytest
|
4
4
|
from pydantic import BaseModel
|
5
5
|
|
6
|
-
from synth_ai.zyk
|
7
|
-
|
6
|
+
from synth_ai.zyk import LM, BaseLMResponse
|
8
7
|
|
9
8
|
class StateUpdate(BaseModel):
|
10
9
|
"""Response model for state updates from LLM"""
|
@@ -117,8 +116,8 @@ def current_state():
|
|
117
116
|
@pytest.mark.parametrize(
|
118
117
|
"model_name",
|
119
118
|
[
|
120
|
-
|
121
|
-
|
119
|
+
"gpt-4o-mini",
|
120
|
+
"gemini-1.5-flash",
|
122
121
|
"claude-3-haiku-20240307",
|
123
122
|
"deepseek-chat",
|
124
123
|
"llama-3.1-8b-instant",
|
@@ -132,26 +131,27 @@ def test_state_delta_handling(
|
|
132
131
|
state_delta_instructions = """Update the final_results to include findings about code quality issues. Add a recommendation to improve error handling."""
|
133
132
|
user_message = f"Current state: {current_state}\nState delta instructions: {state_delta_instructions}\n\nHow should the state be updated?"
|
134
133
|
|
135
|
-
try:
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
134
|
+
#try:
|
135
|
+
result: BaseLMResponse = models[model_name].respond_sync(
|
136
|
+
system_message=system_message,
|
137
|
+
user_message=user_message,
|
138
|
+
response_model=StateUpdate,
|
139
|
+
)
|
140
|
+
print("Result", result)
|
141
|
+
# Verify response structure
|
142
|
+
assert isinstance(result, BaseLMResponse)
|
143
|
+
assert isinstance(result.structured_output, StateUpdate)
|
144
|
+
|
145
|
+
# Verify only allowed fields are present and have correct types
|
146
|
+
if result.structured_output.short_term_plan is not None:
|
147
|
+
assert isinstance(result.structured_output.short_term_plan, str)
|
148
|
+
if result.structured_output.objective is not None:
|
149
|
+
assert isinstance(result.structured_output.objective, str)
|
150
|
+
if result.structured_output.final_results is not None:
|
151
|
+
assert isinstance(result.structured_output.final_results, dict)
|
152
|
+
|
153
|
+
# except Exception as e:
|
154
|
+
# pytest.fail(f"Model {model_name} failed: {str(e)}")
|
155
155
|
|
156
156
|
|
157
157
|
@pytest.mark.timeout(15)
|
@@ -186,16 +186,11 @@ def test_state_delta_protected_fields(
|
|
186
186
|
state_delta_instructions = """Update the message history to include new findings and update step summaries with recent progress."""
|
187
187
|
user_message = f"Current state: {current_state}\nState delta instructions: {state_delta_instructions}\n\nHow should the state be updated?"
|
188
188
|
|
189
|
-
try:
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
assert not hasattr(result, "message_history")
|
198
|
-
assert not hasattr(result, "step_summaries")
|
199
|
-
|
200
|
-
except Exception as e:
|
201
|
-
pytest.fail(f"Model {model_name} failed: {str(e)}")
|
189
|
+
#try:
|
190
|
+
result = models[model_name].respond_sync(
|
191
|
+
system_message=system_message,
|
192
|
+
user_message=user_message,
|
193
|
+
response_model=StateUpdate,
|
194
|
+
)
|
195
|
+
# except Exception as e:
|
196
|
+
# pytest.fail(f"Model {model_name} failed: {str(e)}")
|
File without changes
|
File without changes
|
@@ -0,0 +1,64 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
|
3
|
+
from synth_ai.zyk.lms.core.main import LM
|
4
|
+
from synth_ai.zyk.lms.tools.base import BaseTool
|
5
|
+
from synth_ai.zyk.lms.vendors.supported.deepseek import DeepSeekAPI
|
6
|
+
|
7
|
+
|
8
|
+
class WeatherParams(BaseModel):
|
9
|
+
location: str
|
10
|
+
|
11
|
+
|
12
|
+
weather_tool = BaseTool(
|
13
|
+
name="get_weather",
|
14
|
+
description="Get current temperature for a given location.",
|
15
|
+
arguments=WeatherParams,
|
16
|
+
)
|
17
|
+
|
18
|
+
|
19
|
+
def test_weather_tool_direct():
|
20
|
+
client = DeepSeekAPI()
|
21
|
+
|
22
|
+
response = client._hit_api_sync(
|
23
|
+
model="deepseek-chat",
|
24
|
+
messages=[
|
25
|
+
{
|
26
|
+
"role": "system",
|
27
|
+
"content": "You are a helpful assistant that uses tools when appropriate.",
|
28
|
+
},
|
29
|
+
{
|
30
|
+
"role": "user",
|
31
|
+
"content": "What's the weather in Paris? Use the tools and explain your reasoning.",
|
32
|
+
},
|
33
|
+
],
|
34
|
+
tools=[weather_tool],
|
35
|
+
lm_config={
|
36
|
+
"temperature": 0,
|
37
|
+
},
|
38
|
+
)
|
39
|
+
|
40
|
+
# Check that we got a tool call
|
41
|
+
assert response.tool_calls is not None
|
42
|
+
assert len(response.tool_calls) == 1
|
43
|
+
assert response.tool_calls[0]["function"]["name"] == "get_weather"
|
44
|
+
assert "Paris" in response.tool_calls[0]["function"]["arguments"]
|
45
|
+
|
46
|
+
|
47
|
+
def test_weather_tool_lm():
|
48
|
+
lm = LM(
|
49
|
+
model_name="deepseek-chat",
|
50
|
+
formatting_model_name="deepseek-chat",
|
51
|
+
temperature=0,
|
52
|
+
)
|
53
|
+
|
54
|
+
response = lm.respond_sync(
|
55
|
+
system_message="You are a helpful assistant that uses tools when appropriate.",
|
56
|
+
user_message="What's the weather in Paris? Use the tools and explain your reasoning.",
|
57
|
+
tools=[weather_tool],
|
58
|
+
)
|
59
|
+
|
60
|
+
# Check that we got a tool call
|
61
|
+
assert response.tool_calls is not None
|
62
|
+
assert len(response.tool_calls) == 1
|
63
|
+
assert response.tool_calls[0]["function"]["name"] == "get_weather"
|
64
|
+
assert "Paris" in response.tool_calls[0]["function"]["arguments"]
|
@@ -0,0 +1,106 @@
|
|
1
|
+
import asyncio
|
2
|
+
import unittest
|
3
|
+
from typing import List
|
4
|
+
|
5
|
+
from pydantic import BaseModel, Field
|
6
|
+
|
7
|
+
from synth_ai.zyk.lms.core.main import LM
|
8
|
+
|
9
|
+
|
10
|
+
# Define example structured output models
|
11
|
+
class SimpleResponse(BaseModel):
|
12
|
+
message: str
|
13
|
+
confidence_between_zero_one: float = Field(
|
14
|
+
..., description="Confidence level between 0 and 1"
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
class ComplexResponse(BaseModel):
|
19
|
+
title: str
|
20
|
+
tags: List[str]
|
21
|
+
content: str
|
22
|
+
|
23
|
+
|
24
|
+
class NestedResponse(BaseModel):
|
25
|
+
main_category: str
|
26
|
+
subcategories: List[str]
|
27
|
+
details: SimpleResponse
|
28
|
+
|
29
|
+
|
30
|
+
class TestLMStructuredOutputs(unittest.TestCase):
|
31
|
+
@classmethod
|
32
|
+
def setUpClass(cls):
|
33
|
+
# Initialize LMs for both forced_json and stringified_json modes
|
34
|
+
cls.lm_forced_json = LM(
|
35
|
+
model_name="gpt-4o-mini",
|
36
|
+
formatting_model_name="gpt-4o-mini",
|
37
|
+
temperature=0.7,
|
38
|
+
max_retries="Few",
|
39
|
+
structured_output_mode="forced_json",
|
40
|
+
)
|
41
|
+
cls.lm_stringified_json = LM(
|
42
|
+
model_name="gemma3-27b-it",
|
43
|
+
formatting_model_name="gpt-4o-mini",
|
44
|
+
temperature=0.7,
|
45
|
+
max_retries="Few",
|
46
|
+
structured_output_mode="stringified_json",
|
47
|
+
)
|
48
|
+
|
49
|
+
def test_sync_simple_response(self):
|
50
|
+
for lm in [self.lm_forced_json, self.lm_stringified_json]:
|
51
|
+
with self.subTest(
|
52
|
+
mode=lm.structured_output_handler.handler.structured_output_mode
|
53
|
+
):
|
54
|
+
result = lm.respond_sync(
|
55
|
+
system_message="You are a helpful assistant.",
|
56
|
+
user_message="Give me a short greeting and your confidence level.",
|
57
|
+
response_model=SimpleResponse,
|
58
|
+
)
|
59
|
+
self.assertIsInstance(result.structured_output, SimpleResponse)
|
60
|
+
self.assertIsInstance(result.structured_output.message, str)
|
61
|
+
self.assertIsInstance(
|
62
|
+
result.structured_output.confidence_between_zero_one, float
|
63
|
+
)
|
64
|
+
self.assertGreaterEqual(
|
65
|
+
result.structured_output.confidence_between_zero_one, 0
|
66
|
+
)
|
67
|
+
self.assertLessEqual(
|
68
|
+
result.structured_output.confidence_between_zero_one, 1
|
69
|
+
)
|
70
|
+
|
71
|
+
def test_sync_complex_response(self):
|
72
|
+
for lm in [self.lm_forced_json, self.lm_stringified_json]:
|
73
|
+
with self.subTest(
|
74
|
+
mode=lm.structured_output_handler.handler.structured_output_mode
|
75
|
+
):
|
76
|
+
result = lm.respond_sync(
|
77
|
+
system_message="You are a content creator.",
|
78
|
+
user_message="Create a short blog post about AI.",
|
79
|
+
response_model=ComplexResponse,
|
80
|
+
)
|
81
|
+
self.assertIsInstance(result.structured_output, ComplexResponse)
|
82
|
+
self.assertIsInstance(result.structured_output.title, str)
|
83
|
+
self.assertIsInstance(result.structured_output.tags, list)
|
84
|
+
self.assertIsInstance(result.structured_output.content, str)
|
85
|
+
|
86
|
+
async def async_nested_response(self, lm):
|
87
|
+
result = await lm.respond_async(
|
88
|
+
system_message="You are a categorization expert.",
|
89
|
+
user_message="Categorize 'Python' and provide a brief description.",
|
90
|
+
response_model=NestedResponse,
|
91
|
+
)
|
92
|
+
self.assertIsInstance(result.structured_output, NestedResponse)
|
93
|
+
self.assertIsInstance(result.structured_output.main_category, str)
|
94
|
+
self.assertIsInstance(result.structured_output.subcategories, list)
|
95
|
+
self.assertIsInstance(result.structured_output.details, SimpleResponse)
|
96
|
+
|
97
|
+
def test_async_nested_response(self):
|
98
|
+
for lm in [self.lm_forced_json, self.lm_stringified_json]: #
|
99
|
+
with self.subTest(
|
100
|
+
mode=lm.structured_output_handler.handler.structured_output_mode
|
101
|
+
):
|
102
|
+
asyncio.run(self.async_nested_response(lm))
|
103
|
+
|
104
|
+
|
105
|
+
if __name__ == "__main__":
|
106
|
+
unittest.main()
|
public_tests/test_models.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
import asyncio
|
2
1
|
import time
|
3
2
|
|
4
3
|
import pytest
|
@@ -57,7 +56,9 @@ def model_instances():
|
|
57
56
|
|
58
57
|
# Set reasoning_effort to "high" for specific models
|
59
58
|
models["o3-mini-high-reasoning"].lm_config["reasoning_effort"] = "high"
|
60
|
-
models["claude-3-7-sonnet-latest-high-reasoning"].lm_config["reasoning_effort"] =
|
59
|
+
models["claude-3-7-sonnet-latest-high-reasoning"].lm_config["reasoning_effort"] = (
|
60
|
+
"high"
|
61
|
+
)
|
61
62
|
|
62
63
|
return models
|
63
64
|
|
@@ -91,28 +92,27 @@ def test_model_simple_response(model_instances, model_name):
|
|
91
92
|
elapsed = time.time() - start_time
|
92
93
|
|
93
94
|
print(f"Response time: {elapsed:.2f} seconds")
|
94
|
-
print(f"Response length: {len(response)} characters")
|
95
|
-
print(f"Response sample: {response[:100]}...")
|
95
|
+
print(f"Response length: {len(response.raw_response)} characters")
|
96
|
+
print(f"Response sample: {response.raw_response[:100]}...")
|
96
97
|
|
97
98
|
# Basic validation
|
98
|
-
assert isinstance(response, str)
|
99
|
-
assert len(response) > 0
|
99
|
+
assert isinstance(response.raw_response, str)
|
100
|
+
assert len(response.raw_response) > 0
|
100
101
|
assert (
|
101
|
-
"Paris" in response
|
102
|
-
), f"Expected 'Paris' in response, but got: {response[:200]}..."
|
102
|
+
"Paris" in response.raw_response
|
103
|
+
), f"Expected 'Paris' in response, but got: {response.raw_response[:200]}..."
|
103
104
|
|
104
105
|
|
105
106
|
@pytest.mark.asyncio
|
106
107
|
@pytest.mark.parametrize(
|
107
108
|
"model_name",
|
108
109
|
[
|
109
|
-
|
110
|
-
|
111
|
-
"claude-3-7-sonnet-latest",
|
110
|
+
# "o3-mini",
|
111
|
+
# "claude-3-7-sonnet-latest",
|
112
112
|
"claude-3-7-sonnet-latest-high-reasoning",
|
113
|
-
"gemini-2-flash",
|
114
|
-
"gemma3-27b-it",
|
115
|
-
"gpt-4o-mini",
|
113
|
+
# "gemini-2-flash",
|
114
|
+
# "gemma3-27b-it",
|
115
|
+
# "gpt-4o-mini",
|
116
116
|
],
|
117
117
|
)
|
118
118
|
async def test_reasoning_question(model_instances, model_name):
|
@@ -131,24 +131,24 @@ async def test_reasoning_question(model_instances, model_name):
|
|
131
131
|
elapsed = time.time() - start_time
|
132
132
|
|
133
133
|
print(f"Response time: {elapsed:.2f} seconds")
|
134
|
-
print(f"Response length: {len(response)} characters")
|
135
|
-
print(f"Response sample: {response[:100]}...")
|
134
|
+
print(f"Response length: {len(response.raw_response)} characters")
|
135
|
+
print(f"Response sample: {response.raw_response[:100]}...")
|
136
136
|
|
137
137
|
# Basic validation
|
138
|
-
assert isinstance(response, str)
|
139
|
-
assert len(response) > 0
|
138
|
+
assert isinstance(response.raw_response, str)
|
139
|
+
assert len(response.raw_response) > 0
|
140
140
|
|
141
141
|
|
142
142
|
@pytest.mark.parametrize(
|
143
143
|
"model_name",
|
144
144
|
[
|
145
145
|
"o3-mini",
|
146
|
-
|
147
|
-
"claude-3-7-sonnet-latest",
|
146
|
+
# "o3-mini",
|
147
|
+
#"claude-3-7-sonnet-latest",
|
148
148
|
"claude-3-7-sonnet-latest-high-reasoning",
|
149
|
-
"gemini-2-flash",
|
150
|
-
"gemma3-27b-it",
|
151
|
-
"gpt-4o-mini",
|
149
|
+
# "gemini-2-flash",
|
150
|
+
# "gemma3-27b-it",
|
151
|
+
# "gpt-4o-mini",
|
152
152
|
],
|
153
153
|
)
|
154
154
|
def test_model_context_and_factuality(model_instances, model_name):
|
@@ -171,11 +171,11 @@ def test_model_context_and_factuality(model_instances, model_name):
|
|
171
171
|
|
172
172
|
# Check if the response contains the correct information
|
173
173
|
assert (
|
174
|
-
"1968" in response
|
175
|
-
), f"Expected '1968' in response for founding year, but got: {response[:200]}..."
|
174
|
+
"1968" in response.raw_response
|
175
|
+
), f"Expected '1968' in response for founding year, but got: {response.raw_response[:200]}..."
|
176
176
|
assert (
|
177
|
-
"Robert Neptune" in response
|
178
|
-
), f"Expected 'Robert Neptune' in response for mayor, but got: {response[:200]}..."
|
177
|
+
"Robert Neptune" in response.raw_response
|
178
|
+
), f"Expected 'Robert Neptune' in response for mayor, but got: {response.raw_response[:200]}..."
|
179
179
|
|
180
180
|
|
181
181
|
if __name__ == "__main__":
|
@@ -0,0 +1,106 @@
|
|
1
|
+
import asyncio
|
2
|
+
import unittest
|
3
|
+
from typing import List
|
4
|
+
|
5
|
+
from pydantic import BaseModel, Field
|
6
|
+
|
7
|
+
from synth_ai.zyk.lms.core.main import LM
|
8
|
+
|
9
|
+
|
10
|
+
# Define example structured output models
|
11
|
+
class SimpleResponse(BaseModel):
|
12
|
+
message: str
|
13
|
+
confidence_between_zero_one: float = Field(
|
14
|
+
..., description="Confidence level between 0 and 1"
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
class ComplexResponse(BaseModel):
|
19
|
+
title: str
|
20
|
+
tags: List[str]
|
21
|
+
content: str
|
22
|
+
|
23
|
+
|
24
|
+
class NestedResponse(BaseModel):
|
25
|
+
main_category: str
|
26
|
+
subcategories: List[str]
|
27
|
+
details: SimpleResponse
|
28
|
+
|
29
|
+
|
30
|
+
class TestLMStructuredOutputs(unittest.TestCase):
|
31
|
+
@classmethod
|
32
|
+
def setUpClass(cls):
|
33
|
+
# Initialize LMs for both forced_json and stringified_json modes
|
34
|
+
cls.lm_forced_json = LM(
|
35
|
+
model_name="gpt-4o-mini",
|
36
|
+
formatting_model_name="gpt-4o-mini",
|
37
|
+
temperature=0.7,
|
38
|
+
max_retries="Few",
|
39
|
+
structured_output_mode="forced_json",
|
40
|
+
)
|
41
|
+
cls.lm_stringified_json = LM(
|
42
|
+
model_name="gpt-4o-mini",
|
43
|
+
formatting_model_name="gpt-4o-mini",
|
44
|
+
temperature=0.7,
|
45
|
+
max_retries="Few",
|
46
|
+
structured_output_mode="stringified_json",
|
47
|
+
)
|
48
|
+
|
49
|
+
def test_sync_simple_response(self):
|
50
|
+
for lm in [self.lm_forced_json, self.lm_stringified_json]:
|
51
|
+
with self.subTest(
|
52
|
+
mode=lm.structured_output_handler.handler.structured_output_mode
|
53
|
+
):
|
54
|
+
result = lm.respond_sync(
|
55
|
+
system_message="You are a helpful assistant.",
|
56
|
+
user_message="Give me a short greeting and your confidence level.",
|
57
|
+
response_model=SimpleResponse,
|
58
|
+
)
|
59
|
+
self.assertIsInstance(result.structured_output, SimpleResponse)
|
60
|
+
self.assertIsInstance(result.structured_output.message, str)
|
61
|
+
self.assertIsInstance(
|
62
|
+
result.structured_output.confidence_between_zero_one, float
|
63
|
+
)
|
64
|
+
self.assertGreaterEqual(
|
65
|
+
result.structured_output.confidence_between_zero_one, 0
|
66
|
+
)
|
67
|
+
self.assertLessEqual(
|
68
|
+
result.structured_output.confidence_between_zero_one, 1
|
69
|
+
)
|
70
|
+
|
71
|
+
def test_sync_complex_response(self):
|
72
|
+
for lm in [self.lm_forced_json, self.lm_stringified_json]:
|
73
|
+
with self.subTest(
|
74
|
+
mode=lm.structured_output_handler.handler.structured_output_mode
|
75
|
+
):
|
76
|
+
result = lm.respond_sync(
|
77
|
+
system_message="You are a content creator.",
|
78
|
+
user_message="Create a short blog post about AI.",
|
79
|
+
response_model=ComplexResponse,
|
80
|
+
)
|
81
|
+
self.assertIsInstance(result.structured_output, ComplexResponse)
|
82
|
+
self.assertIsInstance(result.structured_output.title, str)
|
83
|
+
self.assertIsInstance(result.structured_output.tags, list)
|
84
|
+
self.assertIsInstance(result.structured_output.content, str)
|
85
|
+
|
86
|
+
async def async_nested_response(self, lm):
|
87
|
+
result = await lm.respond_async(
|
88
|
+
system_message="You are a categorization expert.",
|
89
|
+
user_message="Categorize 'Python' and provide a brief description.",
|
90
|
+
response_model=NestedResponse,
|
91
|
+
)
|
92
|
+
self.assertIsInstance(result.structured_output, NestedResponse)
|
93
|
+
self.assertIsInstance(result.structured_output.main_category, str)
|
94
|
+
self.assertIsInstance(result.structured_output.subcategories, list)
|
95
|
+
self.assertIsInstance(result.structured_output.details, SimpleResponse)
|
96
|
+
|
97
|
+
def test_async_nested_response(self):
|
98
|
+
for lm in [self.lm_forced_json, self.lm_stringified_json]: #
|
99
|
+
with self.subTest(
|
100
|
+
mode=lm.structured_output_handler.handler.structured_output_mode
|
101
|
+
):
|
102
|
+
asyncio.run(self.async_nested_response(lm))
|
103
|
+
|
104
|
+
|
105
|
+
if __name__ == "__main__":
|
106
|
+
unittest.main()
|
@@ -43,7 +43,7 @@ async def test_reasoning_effort():
|
|
43
43
|
high_time = time.time() - start_time
|
44
44
|
|
45
45
|
print(f"Time taken: {high_time:.2f} seconds")
|
46
|
-
print(f"Response length: {len(high_result)} characters")
|
46
|
+
print(f"Response length: {len(high_result.raw_response)} characters")
|
47
47
|
print("-" * 60)
|
48
48
|
|
49
49
|
# Create a separate instance for LOW reasoning
|
@@ -65,7 +65,7 @@ async def test_reasoning_effort():
|
|
65
65
|
low_time = time.time() - start_time
|
66
66
|
|
67
67
|
print(f"Time taken: {low_time:.2f} seconds")
|
68
|
-
print(f"Response length: {len(low_result)} characters")
|
68
|
+
print(f"Response length: {len(low_result.raw_response)} characters")
|
69
69
|
print("-" * 60)
|
70
70
|
|
71
71
|
# Print comparison
|
@@ -75,15 +75,17 @@ async def test_reasoning_effort():
|
|
75
75
|
print(
|
76
76
|
f"Difference: {high_time - low_time:.2f} seconds ({(high_time/low_time - 1)*100:.1f}% difference)"
|
77
77
|
)
|
78
|
-
print(f"High response length: {len(high_result)} characters")
|
79
|
-
print(f"Low response length: {len(low_result)} characters")
|
80
|
-
print(
|
78
|
+
print(f"High response length: {len(high_result.raw_response)} characters")
|
79
|
+
print(f"Low response length: {len(low_result.raw_response)} characters")
|
80
|
+
print(
|
81
|
+
f"Response length ratio: {len(high_result.raw_response)/len(low_result.raw_response):.2f}x"
|
82
|
+
)
|
81
83
|
|
82
84
|
# Print response samples
|
83
85
|
print("\nHIGH Response Sample (first 200 chars):")
|
84
|
-
print(high_result[:200] + "...")
|
86
|
+
print(high_result.raw_response[:200] + "...")
|
85
87
|
print("\nLOW Response Sample (first 200 chars):")
|
86
|
-
print(low_result[:200] + "...")
|
88
|
+
print(low_result.raw_response[:200] + "...")
|
87
89
|
|
88
90
|
|
89
91
|
if __name__ == "__main__":
|