synth-ai 0.1.0.dev38__py3-none-any.whl → 0.1.0.dev49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +3 -1
- {synth_ai-0.1.0.dev38.dist-info → synth_ai-0.1.0.dev49.dist-info}/METADATA +12 -11
- synth_ai-0.1.0.dev49.dist-info/RECORD +6 -0
- {synth_ai-0.1.0.dev38.dist-info → synth_ai-0.1.0.dev49.dist-info}/WHEEL +1 -1
- synth_ai-0.1.0.dev49.dist-info/top_level.txt +1 -0
- private_tests/try_synth_sdk.py +0 -1
- public_tests/test_agent.py +0 -538
- public_tests/test_all_structured_outputs.py +0 -196
- public_tests/test_anthropic_structured_outputs.py +0 -0
- public_tests/test_deepseek_structured_outputs.py +0 -0
- public_tests/test_deepseek_tools.py +0 -64
- public_tests/test_gemini_output.py +0 -188
- public_tests/test_gemini_structured_outputs.py +0 -106
- public_tests/test_models.py +0 -183
- public_tests/test_openai_structured_outputs.py +0 -106
- public_tests/test_reasoning_effort.py +0 -75
- public_tests/test_reasoning_models.py +0 -92
- public_tests/test_recursive_structured_outputs.py +0 -180
- public_tests/test_structured.py +0 -137
- public_tests/test_structured_outputs.py +0 -109
- public_tests/test_synth_sdk.py +0 -384
- public_tests/test_text.py +0 -160
- public_tests/test_tools.py +0 -319
- synth_ai/zyk/__init__.py +0 -3
- synth_ai/zyk/lms/__init__.py +0 -0
- synth_ai/zyk/lms/caching/__init__.py +0 -0
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/caching/dbs.py +0 -0
- synth_ai/zyk/lms/caching/ephemeral.py +0 -72
- synth_ai/zyk/lms/caching/handler.py +0 -142
- synth_ai/zyk/lms/caching/initialize.py +0 -13
- synth_ai/zyk/lms/caching/persistent.py +0 -83
- synth_ai/zyk/lms/config.py +0 -8
- synth_ai/zyk/lms/core/__init__.py +0 -0
- synth_ai/zyk/lms/core/all.py +0 -47
- synth_ai/zyk/lms/core/exceptions.py +0 -9
- synth_ai/zyk/lms/core/main.py +0 -314
- synth_ai/zyk/lms/core/vendor_clients.py +0 -85
- synth_ai/zyk/lms/cost/__init__.py +0 -0
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai/zyk/lms/structured_outputs/__init__.py +0 -0
- synth_ai/zyk/lms/structured_outputs/handler.py +0 -442
- synth_ai/zyk/lms/structured_outputs/inject.py +0 -314
- synth_ai/zyk/lms/structured_outputs/rehabilitate.py +0 -187
- synth_ai/zyk/lms/tools/base.py +0 -104
- synth_ai/zyk/lms/vendors/__init__.py +0 -0
- synth_ai/zyk/lms/vendors/base.py +0 -31
- synth_ai/zyk/lms/vendors/constants.py +0 -22
- synth_ai/zyk/lms/vendors/core/__init__.py +0 -0
- synth_ai/zyk/lms/vendors/core/anthropic_api.py +0 -413
- synth_ai/zyk/lms/vendors/core/gemini_api.py +0 -306
- synth_ai/zyk/lms/vendors/core/mistral_api.py +0 -327
- synth_ai/zyk/lms/vendors/core/openai_api.py +0 -185
- synth_ai/zyk/lms/vendors/local/__init__.py +0 -0
- synth_ai/zyk/lms/vendors/local/ollama.py +0 -0
- synth_ai/zyk/lms/vendors/openai_standard.py +0 -375
- synth_ai/zyk/lms/vendors/retries.py +0 -3
- synth_ai/zyk/lms/vendors/supported/__init__.py +0 -0
- synth_ai/zyk/lms/vendors/supported/deepseek.py +0 -73
- synth_ai/zyk/lms/vendors/supported/groq.py +0 -16
- synth_ai/zyk/lms/vendors/supported/ollama.py +0 -14
- synth_ai/zyk/lms/vendors/supported/together.py +0 -11
- synth_ai-0.1.0.dev38.dist-info/RECORD +0 -67
- synth_ai-0.1.0.dev38.dist-info/top_level.txt +0 -4
- tests/test_agent.py +0 -538
- tests/test_recursive_structured_outputs.py +0 -180
- tests/test_structured_outputs.py +0 -100
- {synth_ai-0.1.0.dev38.dist-info → synth_ai-0.1.0.dev49.dist-info}/licenses/LICENSE +0 -0
public_tests/test_models.py
DELETED
@@ -1,183 +0,0 @@
|
|
1
|
-
import time
|
2
|
-
|
3
|
-
import pytest
|
4
|
-
|
5
|
-
from synth_ai.zyk import LM
|
6
|
-
|
7
|
-
|
8
|
-
# Use pytest fixtures instead of unittest setup
|
9
|
-
@pytest.fixture
|
10
|
-
def model_instances():
|
11
|
-
"""Initialize all model configurations for testing."""
|
12
|
-
models = {
|
13
|
-
# O3-mini standard
|
14
|
-
"o3-mini": LM(
|
15
|
-
model_name="o3-mini",
|
16
|
-
formatting_model_name="gpt-4o-mini",
|
17
|
-
temperature=0.7,
|
18
|
-
),
|
19
|
-
# O3-mini with high reasoning
|
20
|
-
"o3-mini-high-reasoning": LM(
|
21
|
-
model_name="o3-mini",
|
22
|
-
formatting_model_name="gpt-4o-mini",
|
23
|
-
temperature=0.7,
|
24
|
-
),
|
25
|
-
# Claude 3 Sonnet
|
26
|
-
"claude-3-7-sonnet-latest": LM(
|
27
|
-
model_name="claude-3-7-sonnet-latest",
|
28
|
-
formatting_model_name="gpt-4o-mini",
|
29
|
-
temperature=0.7,
|
30
|
-
),
|
31
|
-
# Claude 3 Sonnet with high reasoning
|
32
|
-
"claude-3-7-sonnet-latest-high-reasoning": LM(
|
33
|
-
model_name="claude-3-7-sonnet-latest",
|
34
|
-
formatting_model_name="gpt-4o-mini",
|
35
|
-
temperature=0.7,
|
36
|
-
),
|
37
|
-
# Gemini Flash
|
38
|
-
"gemini-2-flash": LM(
|
39
|
-
model_name="gemini-2-flash",
|
40
|
-
formatting_model_name="gpt-4o-mini",
|
41
|
-
temperature=0.7,
|
42
|
-
),
|
43
|
-
# Gemma 3
|
44
|
-
"gemma3-27b-it": LM(
|
45
|
-
model_name="gemma3-27b-it",
|
46
|
-
formatting_model_name="gpt-4o-mini",
|
47
|
-
temperature=0.7,
|
48
|
-
),
|
49
|
-
# GPT-4o mini
|
50
|
-
"gpt-4o-mini": LM(
|
51
|
-
model_name="gpt-4o-mini",
|
52
|
-
formatting_model_name="gpt-4o-mini",
|
53
|
-
temperature=0.7,
|
54
|
-
),
|
55
|
-
}
|
56
|
-
|
57
|
-
# Set reasoning_effort to "high" for specific models
|
58
|
-
models["o3-mini-high-reasoning"].lm_config["reasoning_effort"] = "high"
|
59
|
-
models["claude-3-7-sonnet-latest-high-reasoning"].lm_config["reasoning_effort"] = (
|
60
|
-
"high"
|
61
|
-
)
|
62
|
-
|
63
|
-
return models
|
64
|
-
|
65
|
-
|
66
|
-
# Convert tests to pytest style
|
67
|
-
@pytest.mark.parametrize(
|
68
|
-
"model_name",
|
69
|
-
[
|
70
|
-
"o3-mini",
|
71
|
-
"o3-mini-high-reasoning",
|
72
|
-
"claude-3-7-sonnet-latest",
|
73
|
-
"claude-3-7-sonnet-latest-high-reasoning",
|
74
|
-
"gemini-2-flash",
|
75
|
-
"gemma3-27b-it",
|
76
|
-
"gpt-4o-mini",
|
77
|
-
],
|
78
|
-
)
|
79
|
-
def test_model_simple_response(model_instances, model_name):
|
80
|
-
"""Test that models can generate a simple response."""
|
81
|
-
lm = model_instances[model_name]
|
82
|
-
system_message = "You are a helpful assistant."
|
83
|
-
user_message = "What is the capital of France?"
|
84
|
-
|
85
|
-
print(f"\nTesting {model_name}...")
|
86
|
-
start_time = time.time()
|
87
|
-
|
88
|
-
response = lm.respond_sync(
|
89
|
-
system_message=system_message,
|
90
|
-
user_message=user_message,
|
91
|
-
)
|
92
|
-
elapsed = time.time() - start_time
|
93
|
-
|
94
|
-
print(f"Response time: {elapsed:.2f} seconds")
|
95
|
-
print(f"Response length: {len(response.raw_response)} characters")
|
96
|
-
print(f"Response sample: {response.raw_response[:100]}...")
|
97
|
-
|
98
|
-
# Basic validation
|
99
|
-
assert isinstance(response.raw_response, str)
|
100
|
-
assert len(response.raw_response) > 0
|
101
|
-
assert (
|
102
|
-
"Paris" in response.raw_response
|
103
|
-
), f"Expected 'Paris' in response, but got: {response.raw_response[:200]}..."
|
104
|
-
|
105
|
-
|
106
|
-
@pytest.mark.asyncio
|
107
|
-
@pytest.mark.parametrize(
|
108
|
-
"model_name",
|
109
|
-
[
|
110
|
-
# "o3-mini",
|
111
|
-
# "claude-3-7-sonnet-latest",
|
112
|
-
"claude-3-7-sonnet-latest-high-reasoning",
|
113
|
-
# "gemini-2-flash",
|
114
|
-
# "gemma3-27b-it",
|
115
|
-
# "gpt-4o-mini",
|
116
|
-
],
|
117
|
-
)
|
118
|
-
async def test_reasoning_question(model_instances, model_name):
|
119
|
-
"""Test models with a question that requires reasoning."""
|
120
|
-
lm = model_instances[model_name]
|
121
|
-
system_message = "You are a helpful assistant."
|
122
|
-
user_message = "If a train travels at 120 km/h and another train travels at 80 km/h in the opposite direction, how long will it take for them to be 500 km apart if they start 100 km apart?"
|
123
|
-
|
124
|
-
print(f"\nTesting {model_name} with reasoning question...")
|
125
|
-
start_time = time.time()
|
126
|
-
|
127
|
-
response = await lm.respond_async(
|
128
|
-
system_message=system_message,
|
129
|
-
user_message=user_message,
|
130
|
-
)
|
131
|
-
elapsed = time.time() - start_time
|
132
|
-
|
133
|
-
print(f"Response time: {elapsed:.2f} seconds")
|
134
|
-
print(f"Response length: {len(response.raw_response)} characters")
|
135
|
-
print(f"Response sample: {response.raw_response[:100]}...")
|
136
|
-
|
137
|
-
# Basic validation
|
138
|
-
assert isinstance(response.raw_response, str)
|
139
|
-
assert len(response.raw_response) > 0
|
140
|
-
|
141
|
-
|
142
|
-
@pytest.mark.parametrize(
|
143
|
-
"model_name",
|
144
|
-
[
|
145
|
-
"o3-mini",
|
146
|
-
# "o3-mini",
|
147
|
-
#"claude-3-7-sonnet-latest",
|
148
|
-
"claude-3-7-sonnet-latest-high-reasoning",
|
149
|
-
# "gemini-2-flash",
|
150
|
-
# "gemma3-27b-it",
|
151
|
-
# "gpt-4o-mini",
|
152
|
-
],
|
153
|
-
)
|
154
|
-
def test_model_context_and_factuality(model_instances, model_name):
|
155
|
-
"""Test models for factuality with a context-based question."""
|
156
|
-
lm = model_instances[model_name]
|
157
|
-
system_message = "You are a helpful assistant."
|
158
|
-
context = """
|
159
|
-
The city of Atlantis was founded in 1968 by marine archaeologist Dr. Sophia Maris.
|
160
|
-
It has a population of 37,500 residents and is known for its underwater research facilities.
|
161
|
-
The current mayor is Dr. Robert Neptune who was elected in 2020.
|
162
|
-
"""
|
163
|
-
user_message = f"Based on the following information, when was Atlantis founded and who is its current mayor?\n\n{context}"
|
164
|
-
|
165
|
-
print(f"\nTesting {model_name} for factuality...")
|
166
|
-
|
167
|
-
response = lm.respond_sync(
|
168
|
-
system_message=system_message,
|
169
|
-
user_message=user_message,
|
170
|
-
)
|
171
|
-
|
172
|
-
# Check if the response contains the correct information
|
173
|
-
assert (
|
174
|
-
"1968" in response.raw_response
|
175
|
-
), f"Expected '1968' in response for founding year, but got: {response.raw_response[:200]}..."
|
176
|
-
assert (
|
177
|
-
"Robert Neptune" in response.raw_response
|
178
|
-
), f"Expected 'Robert Neptune' in response for mayor, but got: {response.raw_response[:200]}..."
|
179
|
-
|
180
|
-
|
181
|
-
if __name__ == "__main__":
|
182
|
-
# For direct script execution
|
183
|
-
pytest.main(["-xvs", __file__])
|
@@ -1,106 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
import unittest
|
3
|
-
from typing import List
|
4
|
-
|
5
|
-
from pydantic import BaseModel, Field
|
6
|
-
|
7
|
-
from synth_ai.zyk.lms.core.main import LM
|
8
|
-
|
9
|
-
|
10
|
-
# Define example structured output models
|
11
|
-
class SimpleResponse(BaseModel):
|
12
|
-
message: str
|
13
|
-
confidence_between_zero_one: float = Field(
|
14
|
-
..., description="Confidence level between 0 and 1"
|
15
|
-
)
|
16
|
-
|
17
|
-
|
18
|
-
class ComplexResponse(BaseModel):
|
19
|
-
title: str
|
20
|
-
tags: List[str]
|
21
|
-
content: str
|
22
|
-
|
23
|
-
|
24
|
-
class NestedResponse(BaseModel):
|
25
|
-
main_category: str
|
26
|
-
subcategories: List[str]
|
27
|
-
details: SimpleResponse
|
28
|
-
|
29
|
-
|
30
|
-
class TestLMStructuredOutputs(unittest.TestCase):
|
31
|
-
@classmethod
|
32
|
-
def setUpClass(cls):
|
33
|
-
# Initialize LMs for both forced_json and stringified_json modes
|
34
|
-
cls.lm_forced_json = LM(
|
35
|
-
model_name="gpt-4o-mini",
|
36
|
-
formatting_model_name="gpt-4o-mini",
|
37
|
-
temperature=0.7,
|
38
|
-
max_retries="Few",
|
39
|
-
structured_output_mode="forced_json",
|
40
|
-
)
|
41
|
-
cls.lm_stringified_json = LM(
|
42
|
-
model_name="gpt-4o-mini",
|
43
|
-
formatting_model_name="gpt-4o-mini",
|
44
|
-
temperature=0.7,
|
45
|
-
max_retries="Few",
|
46
|
-
structured_output_mode="stringified_json",
|
47
|
-
)
|
48
|
-
|
49
|
-
def test_sync_simple_response(self):
|
50
|
-
for lm in [self.lm_forced_json, self.lm_stringified_json]:
|
51
|
-
with self.subTest(
|
52
|
-
mode=lm.structured_output_handler.handler.structured_output_mode
|
53
|
-
):
|
54
|
-
result = lm.respond_sync(
|
55
|
-
system_message="You are a helpful assistant.",
|
56
|
-
user_message="Give me a short greeting and your confidence level.",
|
57
|
-
response_model=SimpleResponse,
|
58
|
-
)
|
59
|
-
self.assertIsInstance(result.structured_output, SimpleResponse)
|
60
|
-
self.assertIsInstance(result.structured_output.message, str)
|
61
|
-
self.assertIsInstance(
|
62
|
-
result.structured_output.confidence_between_zero_one, float
|
63
|
-
)
|
64
|
-
self.assertGreaterEqual(
|
65
|
-
result.structured_output.confidence_between_zero_one, 0
|
66
|
-
)
|
67
|
-
self.assertLessEqual(
|
68
|
-
result.structured_output.confidence_between_zero_one, 1
|
69
|
-
)
|
70
|
-
|
71
|
-
def test_sync_complex_response(self):
|
72
|
-
for lm in [self.lm_forced_json, self.lm_stringified_json]:
|
73
|
-
with self.subTest(
|
74
|
-
mode=lm.structured_output_handler.handler.structured_output_mode
|
75
|
-
):
|
76
|
-
result = lm.respond_sync(
|
77
|
-
system_message="You are a content creator.",
|
78
|
-
user_message="Create a short blog post about AI.",
|
79
|
-
response_model=ComplexResponse,
|
80
|
-
)
|
81
|
-
self.assertIsInstance(result.structured_output, ComplexResponse)
|
82
|
-
self.assertIsInstance(result.structured_output.title, str)
|
83
|
-
self.assertIsInstance(result.structured_output.tags, list)
|
84
|
-
self.assertIsInstance(result.structured_output.content, str)
|
85
|
-
|
86
|
-
async def async_nested_response(self, lm):
|
87
|
-
result = await lm.respond_async(
|
88
|
-
system_message="You are a categorization expert.",
|
89
|
-
user_message="Categorize 'Python' and provide a brief description.",
|
90
|
-
response_model=NestedResponse,
|
91
|
-
)
|
92
|
-
self.assertIsInstance(result.structured_output, NestedResponse)
|
93
|
-
self.assertIsInstance(result.structured_output.main_category, str)
|
94
|
-
self.assertIsInstance(result.structured_output.subcategories, list)
|
95
|
-
self.assertIsInstance(result.structured_output.details, SimpleResponse)
|
96
|
-
|
97
|
-
def test_async_nested_response(self):
|
98
|
-
for lm in [self.lm_forced_json, self.lm_stringified_json]: #
|
99
|
-
with self.subTest(
|
100
|
-
mode=lm.structured_output_handler.handler.structured_output_mode
|
101
|
-
):
|
102
|
-
asyncio.run(self.async_nested_response(lm))
|
103
|
-
|
104
|
-
|
105
|
-
if __name__ == "__main__":
|
106
|
-
unittest.main()
|
@@ -1,75 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
import pytest
|
3
|
-
from synth_ai.zyk.lms.core.main import LM
|
4
|
-
FORMATTING_MODEL_NAME = "gpt-4o-mini"
|
5
|
-
|
6
|
-
# List of reasoning models to test
|
7
|
-
# Note: Ensure these models are correctly configured and accessible in your environment
|
8
|
-
# And that they are included in REASONING_MODELS in main.py
|
9
|
-
REASONING_MODELS_TO_TEST = [
|
10
|
-
"o4-mini",
|
11
|
-
"claude-3-7-sonnet-latest",
|
12
|
-
"gemini-2.5-pro"
|
13
|
-
]
|
14
|
-
|
15
|
-
# Define effort levels (adjust if specific models use different terms)
|
16
|
-
EFFORT_LEVELS = ["low", "medium", "high"]
|
17
|
-
|
18
|
-
@pytest.mark.parametrize("model_name", REASONING_MODELS_TO_TEST)
|
19
|
-
@pytest.mark.parametrize("effort", EFFORT_LEVELS)
|
20
|
-
@pytest.mark.asyncio
|
21
|
-
async def test_reasoning_effort_levels(model_name, effort):
|
22
|
-
"""
|
23
|
-
Tests that the reasoning_effort parameter is accepted and calls succeed for various models and levels.
|
24
|
-
Note: This test primarily checks for successful API calls across effort levels.
|
25
|
-
Comparing output length or quality based on 'effort' is complex and model-specific.
|
26
|
-
Anthropic's 'thinking' budget might correlate, but OpenAI/others might handle 'effort' differently or ignore it.
|
27
|
-
"""
|
28
|
-
print(f"\nTesting model: {model_name} with effort: {effort}")
|
29
|
-
lm = LM(
|
30
|
-
model_name=model_name,
|
31
|
-
formatting_model_name=FORMATTING_MODEL_NAME,
|
32
|
-
temperature=0,
|
33
|
-
)
|
34
|
-
|
35
|
-
system_prompt = "You are a helpful assistant designed to explain complex topics simply."
|
36
|
-
user_prompt = f"Explain the concept of quantum entanglement step by step using a simple analogy. Be concise if effort is low, detailed if high. Current effort: {effort}."
|
37
|
-
|
38
|
-
try:
|
39
|
-
result = await lm.respond_async(
|
40
|
-
system_message=system_prompt,
|
41
|
-
user_message=user_prompt,
|
42
|
-
reasoning_effort=effort, # Pass the effort level
|
43
|
-
)
|
44
|
-
|
45
|
-
response = result.raw_response
|
46
|
-
|
47
|
-
# Assert call succeeded and response is non-empty
|
48
|
-
assert isinstance(response, str), f"Model {model_name} (effort={effort}) failed. Response type: {type(response)}"
|
49
|
-
assert len(response) > 0, f"Model {model_name} (effort={effort}): Response is empty."
|
50
|
-
|
51
|
-
print(f" Response length (effort={effort}): {len(response)}")
|
52
|
-
# print(f" Response snippet: {response[:100]}...") # Optional: print snippet
|
53
|
-
|
54
|
-
except Exception as e:
|
55
|
-
pytest.fail(f"Model {model_name} (effort={effort}) raised an exception: {e}")
|
56
|
-
|
57
|
-
# Optional: Add a separate test to compare lengths between low and high effort for specific models if needed.
|
58
|
-
|
59
|
-
if __name__ == "__main__":
|
60
|
-
async def main():
|
61
|
-
print("Running effort tests directly...")
|
62
|
-
test_models = REASONING_MODELS_TO_TEST
|
63
|
-
effort_levels_to_run = EFFORT_LEVELS
|
64
|
-
|
65
|
-
all_tasks = []
|
66
|
-
for model in test_models:
|
67
|
-
for effort_level in effort_levels_to_run:
|
68
|
-
# Create a task for each combination
|
69
|
-
all_tasks.append(test_reasoning_effort_levels(model, effort_level))
|
70
|
-
|
71
|
-
# Run all tests concurrently (be mindful of rate limits)
|
72
|
-
await asyncio.gather(*all_tasks)
|
73
|
-
print("\nTest run finished.")
|
74
|
-
|
75
|
-
asyncio.run(main())
|
@@ -1,92 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
import os
|
3
|
-
import time
|
4
|
-
|
5
|
-
from pydantic import BaseModel
|
6
|
-
|
7
|
-
from synth_ai.zyk import LM
|
8
|
-
|
9
|
-
|
10
|
-
def get_openai_api_key():
|
11
|
-
# Get API key from environment variable
|
12
|
-
return os.environ.get("OPENAI_API_KEY")
|
13
|
-
|
14
|
-
|
15
|
-
class TestOutput(BaseModel):
|
16
|
-
answer: str
|
17
|
-
reasoning: str
|
18
|
-
|
19
|
-
|
20
|
-
async def test_reasoning_effort():
|
21
|
-
# Define a question that requires reasoning
|
22
|
-
question = "If a train travels at 120 km/h and another train travels at 80 km/h in the opposite direction, how long will it take for them to be 500 km apart if they start 100 km apart?"
|
23
|
-
|
24
|
-
print("Testing o3-mini with different reasoning_effort settings:")
|
25
|
-
print("-" * 60)
|
26
|
-
|
27
|
-
# Create an instance for HIGH reasoning
|
28
|
-
print("Testing with reasoning_effort='high'")
|
29
|
-
lm_high = LM(
|
30
|
-
model_name="o3-mini",
|
31
|
-
formatting_model_name="gpt-4o-mini",
|
32
|
-
temperature=1,
|
33
|
-
)
|
34
|
-
# Set reasoning_effort in lm_config
|
35
|
-
lm_high.lm_config["reasoning_effort"] = "high"
|
36
|
-
|
37
|
-
# Time the API call
|
38
|
-
start_time = time.time()
|
39
|
-
high_result = await lm_high.respond_async(
|
40
|
-
system_message="You are a helpful assistant.",
|
41
|
-
user_message=question,
|
42
|
-
)
|
43
|
-
high_time = time.time() - start_time
|
44
|
-
|
45
|
-
print(f"Time taken: {high_time:.2f} seconds")
|
46
|
-
print(f"Response length: {len(high_result.raw_response)} characters")
|
47
|
-
print("-" * 60)
|
48
|
-
|
49
|
-
# Create a separate instance for LOW reasoning
|
50
|
-
print("Testing with reasoning_effort='low'")
|
51
|
-
lm_low = LM(
|
52
|
-
model_name="o3-mini",
|
53
|
-
formatting_model_name="gpt-4o-mini",
|
54
|
-
temperature=1,
|
55
|
-
)
|
56
|
-
# Set reasoning_effort in lm_config
|
57
|
-
lm_low.lm_config["reasoning_effort"] = "low"
|
58
|
-
|
59
|
-
# Time the API call
|
60
|
-
start_time = time.time()
|
61
|
-
low_result = await lm_low.respond_async(
|
62
|
-
system_message="You are a helpful assistant.",
|
63
|
-
user_message=question,
|
64
|
-
)
|
65
|
-
low_time = time.time() - start_time
|
66
|
-
|
67
|
-
print(f"Time taken: {low_time:.2f} seconds")
|
68
|
-
print(f"Response length: {len(low_result.raw_response)} characters")
|
69
|
-
print("-" * 60)
|
70
|
-
|
71
|
-
# Print comparison
|
72
|
-
print("Results comparison:")
|
73
|
-
print(f"High reasoning time: {high_time:.2f} seconds")
|
74
|
-
print(f"Low reasoning time: {low_time:.2f} seconds")
|
75
|
-
print(
|
76
|
-
f"Difference: {high_time - low_time:.2f} seconds ({(high_time/low_time - 1)*100:.1f}% difference)"
|
77
|
-
)
|
78
|
-
print(f"High response length: {len(high_result.raw_response)} characters")
|
79
|
-
print(f"Low response length: {len(low_result.raw_response)} characters")
|
80
|
-
print(
|
81
|
-
f"Response length ratio: {len(high_result.raw_response)/len(low_result.raw_response):.2f}x"
|
82
|
-
)
|
83
|
-
|
84
|
-
# Print response samples
|
85
|
-
print("\nHIGH Response Sample (first 200 chars):")
|
86
|
-
print(high_result.raw_response[:200] + "...")
|
87
|
-
print("\nLOW Response Sample (first 200 chars):")
|
88
|
-
print(low_result.raw_response[:200] + "...")
|
89
|
-
|
90
|
-
|
91
|
-
if __name__ == "__main__":
|
92
|
-
asyncio.run(test_reasoning_effort())
|
@@ -1,180 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
import unittest
|
3
|
-
from typing import List
|
4
|
-
|
5
|
-
from pydantic import BaseModel
|
6
|
-
|
7
|
-
from synth_ai.zyk.lms.core.main import LM
|
8
|
-
|
9
|
-
|
10
|
-
# Define example structured output models
|
11
|
-
class SimpleResponse(BaseModel):
|
12
|
-
message: str
|
13
|
-
confidence: float
|
14
|
-
|
15
|
-
|
16
|
-
class ComplexResponse(BaseModel):
|
17
|
-
title: str
|
18
|
-
tags: List[str]
|
19
|
-
content: str
|
20
|
-
|
21
|
-
|
22
|
-
class NestedResponse(BaseModel):
|
23
|
-
main_category: str
|
24
|
-
subcategories: List[str]
|
25
|
-
details: SimpleResponse
|
26
|
-
|
27
|
-
|
28
|
-
# Define nested structured output models
|
29
|
-
class Address(BaseModel):
|
30
|
-
street: str
|
31
|
-
city: str
|
32
|
-
country: str
|
33
|
-
|
34
|
-
|
35
|
-
class PersonalInfo(BaseModel):
|
36
|
-
name: str
|
37
|
-
age: int
|
38
|
-
address: Address
|
39
|
-
|
40
|
-
|
41
|
-
class WorkInfo(BaseModel):
|
42
|
-
company: str
|
43
|
-
position: str
|
44
|
-
years_experience: int
|
45
|
-
|
46
|
-
|
47
|
-
class NestedPersonResponse(BaseModel):
|
48
|
-
personal: PersonalInfo
|
49
|
-
work: WorkInfo
|
50
|
-
skills: List[str]
|
51
|
-
|
52
|
-
|
53
|
-
class ProjectDetails(BaseModel):
|
54
|
-
name: str
|
55
|
-
description: str
|
56
|
-
technologies: List[str]
|
57
|
-
|
58
|
-
|
59
|
-
class NestedPortfolioResponse(BaseModel):
|
60
|
-
developer: PersonalInfo
|
61
|
-
projects: List[ProjectDetails]
|
62
|
-
total_experience: int
|
63
|
-
|
64
|
-
|
65
|
-
class NestedCompanyResponse(BaseModel):
|
66
|
-
name: str
|
67
|
-
founded: int
|
68
|
-
headquarters: Address
|
69
|
-
employees: List[PersonalInfo]
|
70
|
-
main_products: List[str]
|
71
|
-
|
72
|
-
|
73
|
-
class TestLMStructuredOutputs(unittest.TestCase):
|
74
|
-
@classmethod
|
75
|
-
def setUpClass(cls):
|
76
|
-
# Initialize the LM once for all tests
|
77
|
-
cls.lm = LM(
|
78
|
-
model_name="gpt-4o-mini",
|
79
|
-
formatting_model_name="gpt-4o-mini",
|
80
|
-
temperature=0.7,
|
81
|
-
max_retries="Few",
|
82
|
-
structured_output_mode="forced_json",
|
83
|
-
)
|
84
|
-
|
85
|
-
def test_sync_simple_response(self):
|
86
|
-
result = self.lm.respond_sync(
|
87
|
-
system_message="You are a helpful assistant.",
|
88
|
-
user_message="Give me a short greeting and your confidence level.",
|
89
|
-
response_model=SimpleResponse,
|
90
|
-
)
|
91
|
-
self.assertIsInstance(result.structured_output, SimpleResponse)
|
92
|
-
self.assertIsInstance(result.structured_output.message, str)
|
93
|
-
self.assertIsInstance(result.structured_output.confidence, float)
|
94
|
-
self.assertGreaterEqual(result.structured_output.confidence, 0)
|
95
|
-
self.assertLessEqual(result.structured_output.confidence, 1)
|
96
|
-
|
97
|
-
def test_sync_complex_response(self):
|
98
|
-
result = self.lm.respond_sync(
|
99
|
-
system_message="You are a content creator.",
|
100
|
-
user_message="Create a short blog post about AI.",
|
101
|
-
response_model=ComplexResponse,
|
102
|
-
)
|
103
|
-
self.assertIsInstance(result.structured_output, ComplexResponse)
|
104
|
-
self.assertIsInstance(result.structured_output.title, str)
|
105
|
-
self.assertIsInstance(result.structured_output.tags, list)
|
106
|
-
self.assertIsInstance(result.structured_output.content, str)
|
107
|
-
|
108
|
-
async def async_nested_response(self):
|
109
|
-
result = await self.lm.respond_async(
|
110
|
-
system_message="You are a categorization expert.",
|
111
|
-
user_message="Categorize 'Python' and provide a brief description.",
|
112
|
-
response_model=NestedResponse,
|
113
|
-
)
|
114
|
-
self.assertIsInstance(result.structured_output, NestedResponse)
|
115
|
-
self.assertIsInstance(result.structured_output.main_category, str)
|
116
|
-
self.assertIsInstance(result.structured_output.subcategories, list)
|
117
|
-
self.assertIsInstance(result.structured_output.details, SimpleResponse)
|
118
|
-
|
119
|
-
def test_async_nested_response(self):
|
120
|
-
asyncio.run(self.async_nested_response())
|
121
|
-
|
122
|
-
|
123
|
-
class TestLMNestedStructuredOutputs(unittest.TestCase):
|
124
|
-
@classmethod
|
125
|
-
def setUpClass(cls):
|
126
|
-
# Initialize the LM once for all tests
|
127
|
-
cls.lm = LM(
|
128
|
-
model_name="gpt-4o-mini",
|
129
|
-
formatting_model_name="gpt-4o-mini",
|
130
|
-
temperature=0.7,
|
131
|
-
max_retries="Few",
|
132
|
-
structured_output_mode="forced_json",
|
133
|
-
)
|
134
|
-
|
135
|
-
def test_sync_nested_person_response(self):
|
136
|
-
result = self.lm.respond_sync(
|
137
|
-
system_message="You are an HR assistant.",
|
138
|
-
user_message="Provide detailed information about a fictional employee named John Doe.",
|
139
|
-
response_model=NestedPersonResponse,
|
140
|
-
)
|
141
|
-
self.assertIsInstance(result.structured_output, NestedPersonResponse)
|
142
|
-
self.assertIsInstance(result.structured_output.personal, PersonalInfo)
|
143
|
-
self.assertIsInstance(result.structured_output.personal.address, Address)
|
144
|
-
self.assertIsInstance(result.structured_output.work, WorkInfo)
|
145
|
-
self.assertIsInstance(result.structured_output.skills, list)
|
146
|
-
|
147
|
-
def test_sync_nested_portfolio_response(self):
|
148
|
-
result = self.lm.respond_sync(
|
149
|
-
system_message="You are a portfolio manager.",
|
150
|
-
user_message="Create a portfolio for a fictional software developer with multiple projects.",
|
151
|
-
response_model=NestedPortfolioResponse,
|
152
|
-
)
|
153
|
-
self.assertIsInstance(result.structured_output, NestedPortfolioResponse)
|
154
|
-
self.assertIsInstance(result.structured_output.developer, PersonalInfo)
|
155
|
-
self.assertIsInstance(result.structured_output.developer.address, Address)
|
156
|
-
self.assertIsInstance(result.structured_output.projects, list)
|
157
|
-
for project in result.structured_output.projects:
|
158
|
-
self.assertIsInstance(project, ProjectDetails)
|
159
|
-
self.assertIsInstance(result.structured_output.total_experience, int)
|
160
|
-
|
161
|
-
async def async_nested_company_response(self):
|
162
|
-
result = await self.lm.respond_async(
|
163
|
-
system_message="You are a company information specialist.",
|
164
|
-
user_message="Provide detailed information about a fictional tech company.",
|
165
|
-
response_model=NestedCompanyResponse,
|
166
|
-
)
|
167
|
-
self.assertIsInstance(result.structured_output, NestedCompanyResponse)
|
168
|
-
self.assertIsInstance(result.structured_output.headquarters, Address)
|
169
|
-
self.assertIsInstance(result.structured_output.employees, list)
|
170
|
-
for employee in result.structured_output.employees:
|
171
|
-
self.assertIsInstance(employee, PersonalInfo)
|
172
|
-
self.assertIsInstance(employee.address, Address)
|
173
|
-
self.assertIsInstance(result.structured_output.main_products, list)
|
174
|
-
|
175
|
-
def test_async_nested_company_response(self):
|
176
|
-
asyncio.run(self.async_nested_company_response())
|
177
|
-
|
178
|
-
|
179
|
-
if __name__ == "__main__":
|
180
|
-
unittest.main()
|