synth-ai 0.1.0.dev38__py3-none-any.whl → 0.1.0.dev49__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +3 -1
- {synth_ai-0.1.0.dev38.dist-info → synth_ai-0.1.0.dev49.dist-info}/METADATA +12 -11
- synth_ai-0.1.0.dev49.dist-info/RECORD +6 -0
- {synth_ai-0.1.0.dev38.dist-info → synth_ai-0.1.0.dev49.dist-info}/WHEEL +1 -1
- synth_ai-0.1.0.dev49.dist-info/top_level.txt +1 -0
- private_tests/try_synth_sdk.py +0 -1
- public_tests/test_agent.py +0 -538
- public_tests/test_all_structured_outputs.py +0 -196
- public_tests/test_anthropic_structured_outputs.py +0 -0
- public_tests/test_deepseek_structured_outputs.py +0 -0
- public_tests/test_deepseek_tools.py +0 -64
- public_tests/test_gemini_output.py +0 -188
- public_tests/test_gemini_structured_outputs.py +0 -106
- public_tests/test_models.py +0 -183
- public_tests/test_openai_structured_outputs.py +0 -106
- public_tests/test_reasoning_effort.py +0 -75
- public_tests/test_reasoning_models.py +0 -92
- public_tests/test_recursive_structured_outputs.py +0 -180
- public_tests/test_structured.py +0 -137
- public_tests/test_structured_outputs.py +0 -109
- public_tests/test_synth_sdk.py +0 -384
- public_tests/test_text.py +0 -160
- public_tests/test_tools.py +0 -319
- synth_ai/zyk/__init__.py +0 -3
- synth_ai/zyk/lms/__init__.py +0 -0
- synth_ai/zyk/lms/caching/__init__.py +0 -0
- synth_ai/zyk/lms/caching/constants.py +0 -1
- synth_ai/zyk/lms/caching/dbs.py +0 -0
- synth_ai/zyk/lms/caching/ephemeral.py +0 -72
- synth_ai/zyk/lms/caching/handler.py +0 -142
- synth_ai/zyk/lms/caching/initialize.py +0 -13
- synth_ai/zyk/lms/caching/persistent.py +0 -83
- synth_ai/zyk/lms/config.py +0 -8
- synth_ai/zyk/lms/core/__init__.py +0 -0
- synth_ai/zyk/lms/core/all.py +0 -47
- synth_ai/zyk/lms/core/exceptions.py +0 -9
- synth_ai/zyk/lms/core/main.py +0 -314
- synth_ai/zyk/lms/core/vendor_clients.py +0 -85
- synth_ai/zyk/lms/cost/__init__.py +0 -0
- synth_ai/zyk/lms/cost/monitor.py +0 -1
- synth_ai/zyk/lms/cost/statefulness.py +0 -1
- synth_ai/zyk/lms/structured_outputs/__init__.py +0 -0
- synth_ai/zyk/lms/structured_outputs/handler.py +0 -442
- synth_ai/zyk/lms/structured_outputs/inject.py +0 -314
- synth_ai/zyk/lms/structured_outputs/rehabilitate.py +0 -187
- synth_ai/zyk/lms/tools/base.py +0 -104
- synth_ai/zyk/lms/vendors/__init__.py +0 -0
- synth_ai/zyk/lms/vendors/base.py +0 -31
- synth_ai/zyk/lms/vendors/constants.py +0 -22
- synth_ai/zyk/lms/vendors/core/__init__.py +0 -0
- synth_ai/zyk/lms/vendors/core/anthropic_api.py +0 -413
- synth_ai/zyk/lms/vendors/core/gemini_api.py +0 -306
- synth_ai/zyk/lms/vendors/core/mistral_api.py +0 -327
- synth_ai/zyk/lms/vendors/core/openai_api.py +0 -185
- synth_ai/zyk/lms/vendors/local/__init__.py +0 -0
- synth_ai/zyk/lms/vendors/local/ollama.py +0 -0
- synth_ai/zyk/lms/vendors/openai_standard.py +0 -375
- synth_ai/zyk/lms/vendors/retries.py +0 -3
- synth_ai/zyk/lms/vendors/supported/__init__.py +0 -0
- synth_ai/zyk/lms/vendors/supported/deepseek.py +0 -73
- synth_ai/zyk/lms/vendors/supported/groq.py +0 -16
- synth_ai/zyk/lms/vendors/supported/ollama.py +0 -14
- synth_ai/zyk/lms/vendors/supported/together.py +0 -11
- synth_ai-0.1.0.dev38.dist-info/RECORD +0 -67
- synth_ai-0.1.0.dev38.dist-info/top_level.txt +0 -4
- tests/test_agent.py +0 -538
- tests/test_recursive_structured_outputs.py +0 -180
- tests/test_structured_outputs.py +0 -100
- {synth_ai-0.1.0.dev38.dist-info → synth_ai-0.1.0.dev49.dist-info}/licenses/LICENSE +0 -0
@@ -1,142 +0,0 @@
|
|
1
|
-
import hashlib
|
2
|
-
from typing import Any, Dict, List, Optional, Type
|
3
|
-
|
4
|
-
from pydantic import BaseModel
|
5
|
-
|
6
|
-
from synth_ai.zyk.lms.caching.ephemeral import EphemeralCache
|
7
|
-
from synth_ai.zyk.lms.caching.persistent import PersistentCache
|
8
|
-
from synth_ai.zyk.lms.tools.base import BaseTool
|
9
|
-
from synth_ai.zyk.lms.vendors.base import BaseLMResponse
|
10
|
-
|
11
|
-
persistent_cache = PersistentCache()
|
12
|
-
ephemeral_cache = EphemeralCache()
|
13
|
-
|
14
|
-
|
15
|
-
def map_params_to_key(
|
16
|
-
messages: List[Dict],
|
17
|
-
model: str,
|
18
|
-
temperature: float,
|
19
|
-
response_model: Optional[Type[BaseModel]],
|
20
|
-
tools: Optional[List] = None,
|
21
|
-
reasoning_effort: Optional[str] = None,
|
22
|
-
max_tokens: Optional[int] = None,
|
23
|
-
) -> str:
|
24
|
-
if not all([isinstance(msg["content"], str) for msg in messages]):
|
25
|
-
normalized_messages = "".join([str(msg["content"]) for msg in messages])
|
26
|
-
else:
|
27
|
-
normalized_messages = "".join([msg["content"] for msg in messages])
|
28
|
-
normalized_model = model
|
29
|
-
normalized_temperature = f"{temperature:.2f}"[:4]
|
30
|
-
normalized_response_model = str(response_model.schema()) if response_model else ""
|
31
|
-
normalized_reasoning_effort = str(reasoning_effort) if reasoning_effort is not None else ""
|
32
|
-
normalized_max_tokens = str(max_tokens) if max_tokens is not None else ""
|
33
|
-
|
34
|
-
# Normalize tools if present
|
35
|
-
normalized_tools = ""
|
36
|
-
if tools:
|
37
|
-
tool_schemas = []
|
38
|
-
for tool in tools:
|
39
|
-
tool_schema = {}
|
40
|
-
try:
|
41
|
-
tool_schema = {
|
42
|
-
"name": tool.name,
|
43
|
-
"description": tool.description,
|
44
|
-
"arguments": tool.arguments.model_json_schema(),
|
45
|
-
}
|
46
|
-
except AttributeError:
|
47
|
-
if isinstance(tool, dict) and "name" in tool:
|
48
|
-
tool_schema = {
|
49
|
-
"name": tool.get("name", ""),
|
50
|
-
"description": tool.get("description", ""),
|
51
|
-
"parameters": tool.get("parameters", {}),
|
52
|
-
}
|
53
|
-
|
54
|
-
tool_schemas.append(str(tool_schema))
|
55
|
-
normalized_tools = "".join(tool_schemas)
|
56
|
-
|
57
|
-
return hashlib.sha256(
|
58
|
-
(
|
59
|
-
normalized_messages
|
60
|
-
+ normalized_model
|
61
|
-
+ normalized_temperature
|
62
|
-
+ normalized_response_model
|
63
|
-
+ normalized_tools
|
64
|
-
+ normalized_reasoning_effort
|
65
|
-
+ normalized_max_tokens
|
66
|
-
).encode()
|
67
|
-
).hexdigest()
|
68
|
-
|
69
|
-
|
70
|
-
class CacheHandler:
|
71
|
-
use_persistent_store: bool = False
|
72
|
-
use_ephemeral_store: bool = True
|
73
|
-
|
74
|
-
def __init__(
|
75
|
-
self, use_persistent_store: bool = False, use_ephemeral_store: bool = True
|
76
|
-
):
|
77
|
-
self.use_persistent_store = use_persistent_store
|
78
|
-
self.use_ephemeral_store = use_ephemeral_store
|
79
|
-
|
80
|
-
def _validate_messages(self, messages: List[Dict[str, Any]]) -> None:
|
81
|
-
"""Validate that messages are in the correct format."""
|
82
|
-
assert all(
|
83
|
-
[type(msg["content"]) == str for msg in messages]
|
84
|
-
), "All message contents must be strings"
|
85
|
-
|
86
|
-
def hit_managed_cache(
|
87
|
-
self,
|
88
|
-
model: str,
|
89
|
-
messages: List[Dict[str, Any]],
|
90
|
-
lm_config: Dict[str, Any],
|
91
|
-
tools: Optional[List] = None,
|
92
|
-
reasoning_effort: Optional[str] = None,
|
93
|
-
) -> Optional[BaseLMResponse]:
|
94
|
-
"""Hit the cache with the given key."""
|
95
|
-
self._validate_messages(messages)
|
96
|
-
assert type(lm_config) == dict, "lm_config must be a dictionary"
|
97
|
-
key = map_params_to_key(
|
98
|
-
messages,
|
99
|
-
model,
|
100
|
-
lm_config.get("temperature", 0.0),
|
101
|
-
lm_config.get("response_model", None),
|
102
|
-
tools,
|
103
|
-
reasoning_effort,
|
104
|
-
lm_config.get("max_tokens"),
|
105
|
-
)
|
106
|
-
if self.use_persistent_store:
|
107
|
-
return persistent_cache.hit_cache(
|
108
|
-
key=key, response_model=lm_config.get("response_model", None)
|
109
|
-
)
|
110
|
-
elif self.use_ephemeral_store:
|
111
|
-
return ephemeral_cache.hit_cache(
|
112
|
-
key=key, response_model=lm_config.get("response_model", None)
|
113
|
-
)
|
114
|
-
else:
|
115
|
-
return None
|
116
|
-
|
117
|
-
def add_to_managed_cache(
|
118
|
-
self,
|
119
|
-
model: str,
|
120
|
-
messages: List[Dict[str, Any]],
|
121
|
-
lm_config: Dict[str, Any],
|
122
|
-
output: BaseLMResponse,
|
123
|
-
tools: Optional[List] = None,
|
124
|
-
reasoning_effort: Optional[str] = None,
|
125
|
-
) -> None:
|
126
|
-
"""Add the given output to the cache."""
|
127
|
-
self._validate_messages(messages)
|
128
|
-
assert type(output) == BaseLMResponse, "output must be a BaseLMResponse"
|
129
|
-
assert type(lm_config) == dict, "lm_config must be a dictionary"
|
130
|
-
key = map_params_to_key(
|
131
|
-
messages,
|
132
|
-
model,
|
133
|
-
lm_config.get("temperature", 0.0),
|
134
|
-
lm_config.get("response_model", None),
|
135
|
-
tools,
|
136
|
-
reasoning_effort,
|
137
|
-
lm_config.get("max_tokens"),
|
138
|
-
)
|
139
|
-
if self.use_persistent_store:
|
140
|
-
persistent_cache.add_to_cache(key, output)
|
141
|
-
if self.use_ephemeral_store:
|
142
|
-
ephemeral_cache.add_to_cache(key, output)
|
@@ -1,13 +0,0 @@
|
|
1
|
-
from synth_ai.zyk.lms.caching.handler import CacheHandler
|
2
|
-
|
3
|
-
cache_handler = CacheHandler(use_ephemeral_store=True, use_persistent_store=True)
|
4
|
-
ephemeral_cache_handler = CacheHandler(
|
5
|
-
use_ephemeral_store=True, use_persistent_store=False
|
6
|
-
)
|
7
|
-
|
8
|
-
|
9
|
-
def get_cache_handler(use_ephemeral_cache_only: bool = False):
|
10
|
-
if use_ephemeral_cache_only:
|
11
|
-
return ephemeral_cache_handler
|
12
|
-
else:
|
13
|
-
return cache_handler
|
@@ -1,83 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
import os
|
3
|
-
import sqlite3
|
4
|
-
from dataclasses import dataclass
|
5
|
-
from typing import Optional, Type, Union
|
6
|
-
|
7
|
-
from pydantic import BaseModel
|
8
|
-
|
9
|
-
from synth_ai.zyk.lms.vendors.base import BaseLMResponse
|
10
|
-
|
11
|
-
|
12
|
-
@dataclass
|
13
|
-
class PersistentCache:
|
14
|
-
def __init__(self, db_path: str = ".cache/persistent_cache.db"):
|
15
|
-
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
16
|
-
self.conn = sqlite3.connect(db_path)
|
17
|
-
self.cursor = self.conn.cursor()
|
18
|
-
self.cursor.execute("""CREATE TABLE IF NOT EXISTS cache
|
19
|
-
(key TEXT PRIMARY KEY, response TEXT)""")
|
20
|
-
self.conn.commit()
|
21
|
-
|
22
|
-
def hit_cache(
|
23
|
-
self, key: str, response_model: Optional[Type[BaseModel]] = None
|
24
|
-
) -> Optional[BaseLMResponse]:
|
25
|
-
self.cursor.execute("SELECT response FROM cache WHERE key = ?", (key,))
|
26
|
-
result = self.cursor.fetchone()
|
27
|
-
if not result:
|
28
|
-
return None
|
29
|
-
|
30
|
-
try:
|
31
|
-
cache_data = json.loads(result[0])
|
32
|
-
except json.JSONDecodeError:
|
33
|
-
# Handle legacy string responses
|
34
|
-
return BaseLMResponse(
|
35
|
-
raw_response=result[0], structured_output=None, tool_calls=None
|
36
|
-
)
|
37
|
-
|
38
|
-
if not isinstance(cache_data, dict):
|
39
|
-
return BaseLMResponse(
|
40
|
-
raw_response=cache_data, structured_output=None, tool_calls=None
|
41
|
-
)
|
42
|
-
|
43
|
-
raw_response = cache_data.get("raw_response")
|
44
|
-
tool_calls = cache_data.get("tool_calls")
|
45
|
-
structured_output = cache_data.get("structured_output")
|
46
|
-
|
47
|
-
if response_model and structured_output:
|
48
|
-
structured_output = response_model(**structured_output)
|
49
|
-
|
50
|
-
return BaseLMResponse(
|
51
|
-
raw_response=raw_response,
|
52
|
-
structured_output=structured_output,
|
53
|
-
tool_calls=tool_calls,
|
54
|
-
)
|
55
|
-
|
56
|
-
def add_to_cache(self, key: str, response: Union[BaseLMResponse, str]) -> None:
|
57
|
-
if isinstance(response, str):
|
58
|
-
cache_data = response
|
59
|
-
elif isinstance(response, BaseLMResponse):
|
60
|
-
cache_data = {
|
61
|
-
"raw_response": response.raw_response
|
62
|
-
if response.raw_response is not None
|
63
|
-
else None,
|
64
|
-
"tool_calls": response.tool_calls
|
65
|
-
if response.tool_calls is not None
|
66
|
-
else None,
|
67
|
-
"structured_output": (
|
68
|
-
response.structured_output.model_dump()
|
69
|
-
if response.structured_output is not None
|
70
|
-
else None
|
71
|
-
),
|
72
|
-
}
|
73
|
-
else:
|
74
|
-
raise ValueError(f"Invalid response type: {type(response)}")
|
75
|
-
|
76
|
-
self.cursor.execute(
|
77
|
-
"INSERT OR REPLACE INTO cache (key, response) VALUES (?, ?)",
|
78
|
-
(key, json.dumps(cache_data)),
|
79
|
-
)
|
80
|
-
self.conn.commit()
|
81
|
-
|
82
|
-
def close(self) -> None:
|
83
|
-
self.conn.close()
|
synth_ai/zyk/lms/config.py
DELETED
File without changes
|
synth_ai/zyk/lms/core/all.py
DELETED
@@ -1,47 +0,0 @@
|
|
1
|
-
from synth_ai.zyk.lms.vendors.core.anthropic_api import AnthropicAPI
|
2
|
-
from synth_ai.zyk.lms.vendors.core.gemini_api import GeminiAPI
|
3
|
-
from synth_ai.zyk.lms.vendors.core.openai_api import (
|
4
|
-
OpenAIPrivate,
|
5
|
-
OpenAIStructuredOutputClient,
|
6
|
-
)
|
7
|
-
from synth_ai.zyk.lms.vendors.supported.deepseek import DeepSeekAPI
|
8
|
-
from synth_ai.zyk.lms.vendors.supported.together import TogetherAPI
|
9
|
-
from synth_ai.zyk.lms.vendors.supported.groq import GroqAPI
|
10
|
-
from synth_ai.zyk.lms.vendors.core.mistral_api import MistralAPI
|
11
|
-
|
12
|
-
|
13
|
-
class OpenAIClient(OpenAIPrivate):
|
14
|
-
def __init__(self, synth_logging: bool = True):
|
15
|
-
super().__init__(
|
16
|
-
synth_logging=synth_logging,
|
17
|
-
)
|
18
|
-
|
19
|
-
|
20
|
-
class AnthropicClient(AnthropicAPI):
|
21
|
-
def __init__(self):
|
22
|
-
super().__init__()
|
23
|
-
|
24
|
-
|
25
|
-
class GeminiClient(GeminiAPI):
|
26
|
-
def __init__(self):
|
27
|
-
super().__init__()
|
28
|
-
|
29
|
-
|
30
|
-
class DeepSeekClient(DeepSeekAPI):
|
31
|
-
def __init__(self):
|
32
|
-
super().__init__()
|
33
|
-
|
34
|
-
|
35
|
-
class TogetherClient(TogetherAPI):
|
36
|
-
def __init__(self):
|
37
|
-
super().__init__()
|
38
|
-
|
39
|
-
|
40
|
-
class GroqClient(GroqAPI):
|
41
|
-
def __init__(self):
|
42
|
-
super().__init__()
|
43
|
-
|
44
|
-
|
45
|
-
class MistralClient(MistralAPI):
|
46
|
-
def __init__(self):
|
47
|
-
super().__init__()
|
synth_ai/zyk/lms/core/main.py
DELETED
@@ -1,314 +0,0 @@
|
|
1
|
-
from typing import Any, Dict, List, Literal, Optional
|
2
|
-
|
3
|
-
from pydantic import BaseModel, Field
|
4
|
-
|
5
|
-
from synth_ai.zyk.lms.core.exceptions import StructuredOutputCoercionFailureException
|
6
|
-
from synth_ai.zyk.lms.core.vendor_clients import (
|
7
|
-
anthropic_naming_regexes,
|
8
|
-
get_client,
|
9
|
-
openai_naming_regexes,
|
10
|
-
)
|
11
|
-
from synth_ai.zyk.lms.structured_outputs.handler import StructuredOutputHandler
|
12
|
-
from synth_ai.zyk.lms.vendors.base import BaseLMResponse, VendorBase
|
13
|
-
|
14
|
-
REASONING_MODELS = ["deepseek-reasoner", "o1-mini", "o1-preview", "o1", "o3", "o4-mini", "claude-3-7-latest-thinking", "gemini-1.5-pro-latest"]
|
15
|
-
|
16
|
-
|
17
|
-
def build_messages(
|
18
|
-
sys_msg: str,
|
19
|
-
user_msg: str,
|
20
|
-
images_bytes: List = [],
|
21
|
-
model_name: Optional[str] = None,
|
22
|
-
) -> List[Dict]:
|
23
|
-
if len(images_bytes) > 0 and any(
|
24
|
-
regex.match(model_name) for regex in openai_naming_regexes
|
25
|
-
):
|
26
|
-
return [
|
27
|
-
{"role": "system", "content": sys_msg},
|
28
|
-
{
|
29
|
-
"role": "user",
|
30
|
-
"content": [{"type": "text", "text": user_msg}]
|
31
|
-
+ [
|
32
|
-
{
|
33
|
-
"type": "image_url",
|
34
|
-
"image_url": {"url": f"data:image/jpeg;base64,{image_bytes}"},
|
35
|
-
}
|
36
|
-
for image_bytes in images_bytes
|
37
|
-
],
|
38
|
-
},
|
39
|
-
]
|
40
|
-
elif len(images_bytes) > 0 and any(
|
41
|
-
regex.match(model_name) for regex in anthropic_naming_regexes
|
42
|
-
):
|
43
|
-
system_info = {"role": "system", "content": sys_msg}
|
44
|
-
user_info = {
|
45
|
-
"role": "user",
|
46
|
-
"content": [{"type": "text", "text": user_msg}]
|
47
|
-
+ [
|
48
|
-
{
|
49
|
-
"type": "image",
|
50
|
-
"source": {
|
51
|
-
"type": "base64",
|
52
|
-
"media_type": "image/png",
|
53
|
-
"data": image_bytes,
|
54
|
-
},
|
55
|
-
}
|
56
|
-
for image_bytes in images_bytes
|
57
|
-
],
|
58
|
-
}
|
59
|
-
return [system_info, user_info]
|
60
|
-
elif len(images_bytes) > 0:
|
61
|
-
raise ValueError("Images are not yet supported for this model")
|
62
|
-
else:
|
63
|
-
return [
|
64
|
-
{"role": "system", "content": sys_msg},
|
65
|
-
{"role": "user", "content": user_msg},
|
66
|
-
]
|
67
|
-
|
68
|
-
|
69
|
-
class LM:
|
70
|
-
# if str
|
71
|
-
model_name: str
|
72
|
-
client: VendorBase
|
73
|
-
lm_config: Dict[str, Any]
|
74
|
-
structured_output_handler: StructuredOutputHandler
|
75
|
-
|
76
|
-
def __init__(
|
77
|
-
self,
|
78
|
-
model_name: str,
|
79
|
-
formatting_model_name: str,
|
80
|
-
temperature: float,
|
81
|
-
max_retries: Literal["None", "Few", "Many"] = "Few",
|
82
|
-
structured_output_mode: Literal[
|
83
|
-
"stringified_json", "forced_json"
|
84
|
-
] = "stringified_json",
|
85
|
-
synth_logging: bool = True,
|
86
|
-
):
|
87
|
-
# print("Structured output mode", structured_output_mode)
|
88
|
-
self.client = get_client(
|
89
|
-
model_name,
|
90
|
-
with_formatting=structured_output_mode == "forced_json",
|
91
|
-
synth_logging=synth_logging,
|
92
|
-
)
|
93
|
-
# print(self.client.__class__)
|
94
|
-
|
95
|
-
# Determine if the primary model supports forced JSON or specific formatting modes
|
96
|
-
# primary_model_supports_forced_json = self.client.supports_forced_json()
|
97
|
-
|
98
|
-
# Choose the structured output mode based on primary model capability
|
99
|
-
# effective_structured_output_mode = structured_output_mode
|
100
|
-
# if not primary_model_supports_forced_json and structured_output_mode == "forced_json":
|
101
|
-
# # Fallback or adjust if the primary model doesn't support the desired mode
|
102
|
-
# # For simplicity, let's assume we might want to fallback to stringified_json or handle differently
|
103
|
-
# # print(f"Warning: Model {model_name} does not support forced_json. Adjusting strategy.")
|
104
|
-
# effective_structured_output_mode = "stringified_json" # Example fallback
|
105
|
-
|
106
|
-
|
107
|
-
formatting_client = get_client(formatting_model_name, with_formatting=True)
|
108
|
-
|
109
|
-
|
110
|
-
max_retries_dict = {"None": 0, "Few": 2, "Many": 5}
|
111
|
-
# Use the effective mode for the primary handler
|
112
|
-
self.structured_output_handler = StructuredOutputHandler(
|
113
|
-
self.client,
|
114
|
-
formatting_client,
|
115
|
-
structured_output_mode, # Use original mode
|
116
|
-
{"max_retries": max_retries_dict.get(max_retries, 2)},
|
117
|
-
)
|
118
|
-
# Always have a forced_json backup handler ready
|
119
|
-
self.backup_structured_output_handler = StructuredOutputHandler(
|
120
|
-
self.client, # This should ideally use a client capable of forced_json if primary isn't
|
121
|
-
formatting_client, # Formatting client must support forced_json
|
122
|
-
"forced_json",
|
123
|
-
{"max_retries": max_retries_dict.get(max_retries, 2)},
|
124
|
-
)
|
125
|
-
# Override temperature to 1 for reasoning models
|
126
|
-
effective_temperature = 1.0 if model_name in REASONING_MODELS else temperature
|
127
|
-
self.lm_config = {"temperature": effective_temperature}
|
128
|
-
self.model_name = model_name
|
129
|
-
|
130
|
-
def respond_sync(
|
131
|
-
self,
|
132
|
-
system_message: Optional[str] = None,
|
133
|
-
user_message: Optional[str] = None,
|
134
|
-
messages: Optional[List[Dict]] = None,
|
135
|
-
images_as_bytes: List[Any] = [],
|
136
|
-
response_model: Optional[BaseModel] = None,
|
137
|
-
use_ephemeral_cache_only: bool = False,
|
138
|
-
tools: Optional[List] = None,
|
139
|
-
reasoning_effort: Optional[str] = None,
|
140
|
-
) -> BaseLMResponse:
|
141
|
-
assert (system_message is None) == (
|
142
|
-
user_message is None
|
143
|
-
), "Must provide both system_message and user_message or neither"
|
144
|
-
assert (
|
145
|
-
(messages is None) != (system_message is None)
|
146
|
-
), "Must provide either messages or system_message/user_message pair, but not both"
|
147
|
-
assert not (
|
148
|
-
response_model and tools
|
149
|
-
), "Cannot provide both response_model and tools"
|
150
|
-
|
151
|
-
current_lm_config = self.lm_config.copy()
|
152
|
-
if self.model_name in REASONING_MODELS:
|
153
|
-
# Removed logic that set max_tokens based on reasoning_tokens
|
154
|
-
# Vendor clients will now receive reasoning_effort directly
|
155
|
-
pass
|
156
|
-
|
157
|
-
|
158
|
-
if messages is None:
|
159
|
-
messages = build_messages(
|
160
|
-
system_message, user_message, images_as_bytes, self.model_name
|
161
|
-
)
|
162
|
-
result = None
|
163
|
-
if response_model:
|
164
|
-
try:
|
165
|
-
result = self.structured_output_handler.call_sync(
|
166
|
-
messages=messages,
|
167
|
-
model=self.model_name,
|
168
|
-
response_model=response_model,
|
169
|
-
use_ephemeral_cache_only=use_ephemeral_cache_only,
|
170
|
-
lm_config=current_lm_config,
|
171
|
-
reasoning_effort=reasoning_effort,
|
172
|
-
)
|
173
|
-
except StructuredOutputCoercionFailureException:
|
174
|
-
# print("Falling back to backup handler")
|
175
|
-
result = self.backup_structured_output_handler.call_sync(
|
176
|
-
messages=messages,
|
177
|
-
model=self.model_name,
|
178
|
-
response_model=response_model,
|
179
|
-
use_ephemeral_cache_only=use_ephemeral_cache_only,
|
180
|
-
lm_config=current_lm_config,
|
181
|
-
reasoning_effort=reasoning_effort,
|
182
|
-
)
|
183
|
-
else:
|
184
|
-
result = self.client._hit_api_sync(
|
185
|
-
messages=messages,
|
186
|
-
model=self.model_name,
|
187
|
-
lm_config=current_lm_config,
|
188
|
-
use_ephemeral_cache_only=use_ephemeral_cache_only,
|
189
|
-
tools=tools,
|
190
|
-
reasoning_effort=reasoning_effort,
|
191
|
-
)
|
192
|
-
assert isinstance(result.raw_response, str), "Raw response must be a string"
|
193
|
-
assert (
|
194
|
-
isinstance(result.structured_output, BaseModel)
|
195
|
-
or result.structured_output is None
|
196
|
-
), "Structured output must be a Pydantic model or None"
|
197
|
-
assert (
|
198
|
-
isinstance(result.tool_calls, list) or result.tool_calls is None
|
199
|
-
), "Tool calls must be a list or None"
|
200
|
-
return result
|
201
|
-
|
202
|
-
async def respond_async(
|
203
|
-
self,
|
204
|
-
system_message: Optional[str] = None,
|
205
|
-
user_message: Optional[str] = None,
|
206
|
-
messages: Optional[List[Dict]] = None,
|
207
|
-
images_as_bytes: List[Any] = [],
|
208
|
-
response_model: Optional[BaseModel] = None,
|
209
|
-
use_ephemeral_cache_only: bool = False,
|
210
|
-
tools: Optional[List] = None,
|
211
|
-
reasoning_effort: Optional[str] = None,
|
212
|
-
) -> BaseLMResponse:
|
213
|
-
# "In respond_async")
|
214
|
-
assert (system_message is None) == (
|
215
|
-
user_message is None
|
216
|
-
), "Must provide both system_message and user_message or neither"
|
217
|
-
assert (
|
218
|
-
(messages is None) != (system_message is None)
|
219
|
-
), "Must provide either messages or system_message/user_message pair, but not both"
|
220
|
-
|
221
|
-
assert not (
|
222
|
-
response_model and tools
|
223
|
-
), "Cannot provide both response_model and tools"
|
224
|
-
|
225
|
-
current_lm_config = self.lm_config.copy()
|
226
|
-
if self.model_name in REASONING_MODELS:
|
227
|
-
# Removed logic that set max_tokens based on reasoning_tokens
|
228
|
-
# Vendor clients will now receive reasoning_effort directly
|
229
|
-
pass
|
230
|
-
|
231
|
-
if messages is None:
|
232
|
-
messages = build_messages(
|
233
|
-
system_message, user_message, images_as_bytes, self.model_name
|
234
|
-
)
|
235
|
-
result = None
|
236
|
-
if response_model:
|
237
|
-
try:
|
238
|
-
# print("Trying structured output handler")
|
239
|
-
result = await self.structured_output_handler.call_async(
|
240
|
-
messages=messages,
|
241
|
-
model=self.model_name,
|
242
|
-
response_model=response_model,
|
243
|
-
use_ephemeral_cache_only=use_ephemeral_cache_only,
|
244
|
-
lm_config=current_lm_config,
|
245
|
-
reasoning_effort=reasoning_effort,
|
246
|
-
)
|
247
|
-
except StructuredOutputCoercionFailureException:
|
248
|
-
# print("Falling back to backup handler")
|
249
|
-
result = await self.backup_structured_output_handler.call_async(
|
250
|
-
messages=messages,
|
251
|
-
model=self.model_name,
|
252
|
-
response_model=response_model,
|
253
|
-
use_ephemeral_cache_only=use_ephemeral_cache_only,
|
254
|
-
lm_config=current_lm_config,
|
255
|
-
reasoning_effort=reasoning_effort,
|
256
|
-
)
|
257
|
-
else:
|
258
|
-
# print("Calling API no response model")
|
259
|
-
result = await self.client._hit_api_async(
|
260
|
-
messages=messages,
|
261
|
-
model=self.model_name,
|
262
|
-
lm_config=current_lm_config,
|
263
|
-
use_ephemeral_cache_only=use_ephemeral_cache_only,
|
264
|
-
tools=tools,
|
265
|
-
reasoning_effort=reasoning_effort,
|
266
|
-
)
|
267
|
-
assert isinstance(result.raw_response, str), "Raw response must be a string"
|
268
|
-
assert (
|
269
|
-
isinstance(result.structured_output, BaseModel)
|
270
|
-
or result.structured_output is None
|
271
|
-
), "Structured output must be a Pydantic model or None"
|
272
|
-
assert (
|
273
|
-
isinstance(result.tool_calls, list) or result.tool_calls is None
|
274
|
-
), "Tool calls must be a list or None"
|
275
|
-
return result
|
276
|
-
|
277
|
-
|
278
|
-
if __name__ == "__main__":
|
279
|
-
import asyncio
|
280
|
-
|
281
|
-
# Update json instructions to handle nested pydantic?
|
282
|
-
class Thought(BaseModel):
|
283
|
-
argument_keys: List[str] = Field(description="The keys of the arguments")
|
284
|
-
argument_values: List[str] = Field(
|
285
|
-
description="Stringified JSON for the values of the arguments"
|
286
|
-
)
|
287
|
-
|
288
|
-
class TestModel(BaseModel):
|
289
|
-
emotion: str = Field(description="The emotion expressed")
|
290
|
-
concern: str = Field(description="The concern expressed")
|
291
|
-
action: str = Field(description="The action to be taken")
|
292
|
-
thought: Thought = Field(description="The thought process")
|
293
|
-
|
294
|
-
class Config:
|
295
|
-
schema_extra = {"required": ["thought", "emotion", "concern", "action"]}
|
296
|
-
|
297
|
-
lm = LM(
|
298
|
-
model_name="gpt-4o-mini",
|
299
|
-
formatting_model_name="gpt-4o-mini",
|
300
|
-
temperature=1,
|
301
|
-
max_retries="Few",
|
302
|
-
structured_output_mode="forced_json",
|
303
|
-
)
|
304
|
-
print(
|
305
|
-
asyncio.run(
|
306
|
-
lm.respond_async(
|
307
|
-
system_message="You are a helpful assistant ",
|
308
|
-
user_message="Hello, how are you?",
|
309
|
-
images_as_bytes=[],
|
310
|
-
response_model=TestModel,
|
311
|
-
use_ephemeral_cache_only=False,
|
312
|
-
)
|
313
|
-
)
|
314
|
-
)
|