synth-ai 0.1.0.dev36__py3-none-any.whl → 0.1.0.dev38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ import asyncio
2
+ import pytest
3
+ from synth_ai.zyk.lms.core.main import LM
4
+ FORMATTING_MODEL_NAME = "gpt-4o-mini"
5
+
6
+ # List of reasoning models to test
7
+ # Note: Ensure these models are correctly configured and accessible in your environment
8
+ # And that they are included in REASONING_MODELS in main.py
9
+ REASONING_MODELS_TO_TEST = [
10
+ "o4-mini",
11
+ "claude-3-7-sonnet-latest",
12
+ "gemini-2.5-pro"
13
+ ]
14
+
15
+ # Define effort levels (adjust if specific models use different terms)
16
+ EFFORT_LEVELS = ["low", "medium", "high"]
17
+
18
+ @pytest.mark.parametrize("model_name", REASONING_MODELS_TO_TEST)
19
+ @pytest.mark.parametrize("effort", EFFORT_LEVELS)
20
+ @pytest.mark.asyncio
21
+ async def test_reasoning_effort_levels(model_name, effort):
22
+ """
23
+ Tests that the reasoning_effort parameter is accepted and calls succeed for various models and levels.
24
+ Note: This test primarily checks for successful API calls across effort levels.
25
+ Comparing output length or quality based on 'effort' is complex and model-specific.
26
+ Anthropic's 'thinking' budget might correlate, but OpenAI/others might handle 'effort' differently or ignore it.
27
+ """
28
+ print(f"\nTesting model: {model_name} with effort: {effort}")
29
+ lm = LM(
30
+ model_name=model_name,
31
+ formatting_model_name=FORMATTING_MODEL_NAME,
32
+ temperature=0,
33
+ )
34
+
35
+ system_prompt = "You are a helpful assistant designed to explain complex topics simply."
36
+ user_prompt = f"Explain the concept of quantum entanglement step by step using a simple analogy. Be concise if effort is low, detailed if high. Current effort: {effort}."
37
+
38
+ try:
39
+ result = await lm.respond_async(
40
+ system_message=system_prompt,
41
+ user_message=user_prompt,
42
+ reasoning_effort=effort, # Pass the effort level
43
+ )
44
+
45
+ response = result.raw_response
46
+
47
+ # Assert call succeeded and response is non-empty
48
+ assert isinstance(response, str), f"Model {model_name} (effort={effort}) failed. Response type: {type(response)}"
49
+ assert len(response) > 0, f"Model {model_name} (effort={effort}): Response is empty."
50
+
51
+ print(f" Response length (effort={effort}): {len(response)}")
52
+ # print(f" Response snippet: {response[:100]}...") # Optional: print snippet
53
+
54
+ except Exception as e:
55
+ pytest.fail(f"Model {model_name} (effort={effort}) raised an exception: {e}")
56
+
57
+ # Optional: Add a separate test to compare lengths between low and high effort for specific models if needed.
58
+
59
+ if __name__ == "__main__":
60
+ async def main():
61
+ print("Running effort tests directly...")
62
+ test_models = REASONING_MODELS_TO_TEST
63
+ effort_levels_to_run = EFFORT_LEVELS
64
+
65
+ all_tasks = []
66
+ for model in test_models:
67
+ for effort_level in effort_levels_to_run:
68
+ # Create a task for each combination
69
+ all_tasks.append(test_reasoning_effort_levels(model, effort_level))
70
+
71
+ # Run all tests concurrently (be mindful of rate limits)
72
+ await asyncio.gather(*all_tasks)
73
+ print("\nTest run finished.")
74
+
75
+ asyncio.run(main())
@@ -18,6 +18,8 @@ def map_params_to_key(
18
18
  temperature: float,
19
19
  response_model: Optional[Type[BaseModel]],
20
20
  tools: Optional[List] = None,
21
+ reasoning_effort: Optional[str] = None,
22
+ max_tokens: Optional[int] = None,
21
23
  ) -> str:
22
24
  if not all([isinstance(msg["content"], str) for msg in messages]):
23
25
  normalized_messages = "".join([str(msg["content"]) for msg in messages])
@@ -26,6 +28,8 @@ def map_params_to_key(
26
28
  normalized_model = model
27
29
  normalized_temperature = f"{temperature:.2f}"[:4]
28
30
  normalized_response_model = str(response_model.schema()) if response_model else ""
31
+ normalized_reasoning_effort = str(reasoning_effort) if reasoning_effort is not None else ""
32
+ normalized_max_tokens = str(max_tokens) if max_tokens is not None else ""
29
33
 
30
34
  # Normalize tools if present
31
35
  normalized_tools = ""
@@ -57,6 +61,8 @@ def map_params_to_key(
57
61
  + normalized_temperature
58
62
  + normalized_response_model
59
63
  + normalized_tools
64
+ + normalized_reasoning_effort
65
+ + normalized_max_tokens
60
66
  ).encode()
61
67
  ).hexdigest()
62
68
 
@@ -83,6 +89,7 @@ class CacheHandler:
83
89
  messages: List[Dict[str, Any]],
84
90
  lm_config: Dict[str, Any],
85
91
  tools: Optional[List] = None,
92
+ reasoning_effort: Optional[str] = None,
86
93
  ) -> Optional[BaseLMResponse]:
87
94
  """Hit the cache with the given key."""
88
95
  self._validate_messages(messages)
@@ -93,6 +100,8 @@ class CacheHandler:
93
100
  lm_config.get("temperature", 0.0),
94
101
  lm_config.get("response_model", None),
95
102
  tools,
103
+ reasoning_effort,
104
+ lm_config.get("max_tokens"),
96
105
  )
97
106
  if self.use_persistent_store:
98
107
  return persistent_cache.hit_cache(
@@ -112,6 +121,7 @@ class CacheHandler:
112
121
  lm_config: Dict[str, Any],
113
122
  output: BaseLMResponse,
114
123
  tools: Optional[List] = None,
124
+ reasoning_effort: Optional[str] = None,
115
125
  ) -> None:
116
126
  """Add the given output to the cache."""
117
127
  self._validate_messages(messages)
@@ -123,6 +133,8 @@ class CacheHandler:
123
133
  lm_config.get("temperature", 0.0),
124
134
  lm_config.get("response_model", None),
125
135
  tools,
136
+ reasoning_effort,
137
+ lm_config.get("max_tokens"),
126
138
  )
127
139
  if self.use_persistent_store:
128
140
  persistent_cache.add_to_cache(key, output)
@@ -11,7 +11,7 @@ from synth_ai.zyk.lms.core.vendor_clients import (
11
11
  from synth_ai.zyk.lms.structured_outputs.handler import StructuredOutputHandler
12
12
  from synth_ai.zyk.lms.vendors.base import BaseLMResponse, VendorBase
13
13
 
14
- REASONING_MODELS = ["deepseek-reasoner", "o1-mini", "o1-preview", "o1", "o3"]
14
+ REASONING_MODELS = ["deepseek-reasoner", "o1-mini", "o1-preview", "o1", "o3", "o4-mini", "claude-3-7-latest-thinking", "gemini-1.5-pro-latest"]
15
15
 
16
16
 
17
17
  def build_messages(
@@ -92,18 +92,33 @@ class LM:
92
92
  )
93
93
  # print(self.client.__class__)
94
94
 
95
+ # Determine if the primary model supports forced JSON or specific formatting modes
96
+ # primary_model_supports_forced_json = self.client.supports_forced_json()
97
+
98
+ # Choose the structured output mode based on primary model capability
99
+ # effective_structured_output_mode = structured_output_mode
100
+ # if not primary_model_supports_forced_json and structured_output_mode == "forced_json":
101
+ # # Fallback or adjust if the primary model doesn't support the desired mode
102
+ # # For simplicity, let's assume we might want to fallback to stringified_json or handle differently
103
+ # # print(f"Warning: Model {model_name} does not support forced_json. Adjusting strategy.")
104
+ # effective_structured_output_mode = "stringified_json" # Example fallback
105
+
106
+
95
107
  formatting_client = get_client(formatting_model_name, with_formatting=True)
96
108
 
109
+
97
110
  max_retries_dict = {"None": 0, "Few": 2, "Many": 5}
111
+ # Use the effective mode for the primary handler
98
112
  self.structured_output_handler = StructuredOutputHandler(
99
113
  self.client,
100
114
  formatting_client,
101
- structured_output_mode,
115
+ structured_output_mode, # Use original mode
102
116
  {"max_retries": max_retries_dict.get(max_retries, 2)},
103
117
  )
118
+ # Always have a forced_json backup handler ready
104
119
  self.backup_structured_output_handler = StructuredOutputHandler(
105
- self.client,
106
- formatting_client,
120
+ self.client, # This should ideally use a client capable of forced_json if primary isn't
121
+ formatting_client, # Formatting client must support forced_json
107
122
  "forced_json",
108
123
  {"max_retries": max_retries_dict.get(max_retries, 2)},
109
124
  )
@@ -121,6 +136,7 @@ class LM:
121
136
  response_model: Optional[BaseModel] = None,
122
137
  use_ephemeral_cache_only: bool = False,
123
138
  tools: Optional[List] = None,
139
+ reasoning_effort: Optional[str] = None,
124
140
  ) -> BaseLMResponse:
125
141
  assert (system_message is None) == (
126
142
  user_message is None
@@ -131,6 +147,14 @@ class LM:
131
147
  assert not (
132
148
  response_model and tools
133
149
  ), "Cannot provide both response_model and tools"
150
+
151
+ current_lm_config = self.lm_config.copy()
152
+ if self.model_name in REASONING_MODELS:
153
+ # Removed logic that set max_tokens based on reasoning_tokens
154
+ # Vendor clients will now receive reasoning_effort directly
155
+ pass
156
+
157
+
134
158
  if messages is None:
135
159
  messages = build_messages(
136
160
  system_message, user_message, images_as_bytes, self.model_name
@@ -139,28 +163,31 @@ class LM:
139
163
  if response_model:
140
164
  try:
141
165
  result = self.structured_output_handler.call_sync(
142
- messages,
166
+ messages=messages,
143
167
  model=self.model_name,
144
- lm_config=self.lm_config,
145
168
  response_model=response_model,
146
169
  use_ephemeral_cache_only=use_ephemeral_cache_only,
170
+ lm_config=current_lm_config,
171
+ reasoning_effort=reasoning_effort,
147
172
  )
148
173
  except StructuredOutputCoercionFailureException:
149
174
  # print("Falling back to backup handler")
150
175
  result = self.backup_structured_output_handler.call_sync(
151
- messages,
176
+ messages=messages,
152
177
  model=self.model_name,
153
- lm_config=self.lm_config,
154
178
  response_model=response_model,
155
179
  use_ephemeral_cache_only=use_ephemeral_cache_only,
180
+ lm_config=current_lm_config,
181
+ reasoning_effort=reasoning_effort,
156
182
  )
157
183
  else:
158
184
  result = self.client._hit_api_sync(
159
185
  messages=messages,
160
186
  model=self.model_name,
161
- lm_config=self.lm_config,
187
+ lm_config=current_lm_config,
162
188
  use_ephemeral_cache_only=use_ephemeral_cache_only,
163
189
  tools=tools,
190
+ reasoning_effort=reasoning_effort,
164
191
  )
165
192
  assert isinstance(result.raw_response, str), "Raw response must be a string"
166
193
  assert (
@@ -181,6 +208,7 @@ class LM:
181
208
  response_model: Optional[BaseModel] = None,
182
209
  use_ephemeral_cache_only: bool = False,
183
210
  tools: Optional[List] = None,
211
+ reasoning_effort: Optional[str] = None,
184
212
  ) -> BaseLMResponse:
185
213
  # "In respond_async")
186
214
  assert (system_message is None) == (
@@ -193,6 +221,13 @@ class LM:
193
221
  assert not (
194
222
  response_model and tools
195
223
  ), "Cannot provide both response_model and tools"
224
+
225
+ current_lm_config = self.lm_config.copy()
226
+ if self.model_name in REASONING_MODELS:
227
+ # Removed logic that set max_tokens based on reasoning_tokens
228
+ # Vendor clients will now receive reasoning_effort directly
229
+ pass
230
+
196
231
  if messages is None:
197
232
  messages = build_messages(
198
233
  system_message, user_message, images_as_bytes, self.model_name
@@ -202,29 +237,32 @@ class LM:
202
237
  try:
203
238
  # print("Trying structured output handler")
204
239
  result = await self.structured_output_handler.call_async(
205
- messages,
240
+ messages=messages,
206
241
  model=self.model_name,
207
- lm_config=self.lm_config,
208
242
  response_model=response_model,
209
243
  use_ephemeral_cache_only=use_ephemeral_cache_only,
244
+ lm_config=current_lm_config,
245
+ reasoning_effort=reasoning_effort,
210
246
  )
211
247
  except StructuredOutputCoercionFailureException:
212
248
  # print("Falling back to backup handler")
213
249
  result = await self.backup_structured_output_handler.call_async(
214
- messages,
250
+ messages=messages,
215
251
  model=self.model_name,
216
- lm_config=self.lm_config,
217
252
  response_model=response_model,
218
253
  use_ephemeral_cache_only=use_ephemeral_cache_only,
254
+ lm_config=current_lm_config,
255
+ reasoning_effort=reasoning_effort,
219
256
  )
220
257
  else:
221
258
  # print("Calling API no response model")
222
259
  result = await self.client._hit_api_async(
223
260
  messages=messages,
224
261
  model=self.model_name,
225
- lm_config=self.lm_config,
262
+ lm_config=current_lm_config,
226
263
  use_ephemeral_cache_only=use_ephemeral_cache_only,
227
264
  tools=tools,
265
+ reasoning_effort=reasoning_effort,
228
266
  )
229
267
  assert isinstance(result.raw_response, str), "Raw response must be a string"
230
268
  assert (
@@ -429,6 +429,7 @@ class StructuredOutputHandler:
429
429
  lm_config: Dict[str, Any] = {},
430
430
  reasoning_effort: str = "high",
431
431
  ) -> BaseLMResponse:
432
+ # print("Output handler call sync")
432
433
  return self.handler.call_sync(
433
434
  messages=messages,
434
435
  model=model,
@@ -17,8 +17,8 @@ ANTHROPIC_EXCEPTIONS_TO_RETRY: Tuple[Type[Exception], ...] = (anthropic.APIError
17
17
 
18
18
 
19
19
  sonnet_37_budgets = {
20
- "high": 4000,
21
- "medium": 2000,
20
+ "high": 8000,
21
+ "medium": 4000,
22
22
  "low": 1000,
23
23
  }
24
24
 
@@ -64,7 +64,7 @@ class AnthropicAPI(VendorBase):
64
64
  ), "response_model is not supported for standard calls"
65
65
  used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
66
66
  cache_result = used_cache_handler.hit_managed_cache(
67
- model, messages, lm_config=lm_config, tools=tools
67
+ model, messages, lm_config=lm_config, tools=tools, reasoning_effort=reasoning_effort
68
68
  )
69
69
  if cache_result:
70
70
  return cache_result
@@ -135,7 +135,7 @@ class AnthropicAPI(VendorBase):
135
135
  )
136
136
 
137
137
  used_cache_handler.add_to_managed_cache(
138
- model, messages, lm_config=lm_config, output=lm_response, tools=tools
138
+ model, messages, lm_config=lm_config, output=lm_response, tools=tools, reasoning_effort=reasoning_effort
139
139
  )
140
140
  return lm_response
141
141
 
@@ -162,7 +162,7 @@ class AnthropicAPI(VendorBase):
162
162
  use_ephemeral_cache_only=use_ephemeral_cache_only
163
163
  )
164
164
  cache_result = used_cache_handler.hit_managed_cache(
165
- model, messages, lm_config=lm_config, tools=tools
165
+ model, messages, lm_config=lm_config, tools=tools, reasoning_effort=reasoning_effort
166
166
  )
167
167
  if cache_result:
168
168
  return cache_result
@@ -235,7 +235,7 @@ class AnthropicAPI(VendorBase):
235
235
  )
236
236
 
237
237
  used_cache_handler.add_to_managed_cache(
238
- model, messages, lm_config=lm_config, output=lm_response, tools=tools
238
+ model, messages, lm_config=lm_config, output=lm_response, tools=tools, reasoning_effort=reasoning_effort
239
239
  )
240
240
  return lm_response
241
241
 
@@ -249,6 +249,17 @@ class AnthropicAPI(VendorBase):
249
249
  reasoning_effort: str = "high",
250
250
  **vendor_params: Dict[str, Any],
251
251
  ) -> BaseLMResponse:
252
+ used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
253
+ lm_config = {"temperature": temperature, "response_model": response_model}
254
+ cache_result = used_cache_handler.hit_managed_cache(
255
+ model=model,
256
+ messages=messages,
257
+ lm_config=lm_config,
258
+ reasoning_effort=reasoning_effort,
259
+ )
260
+ if cache_result:
261
+ return cache_result
262
+
252
263
  try:
253
264
  # First try with Anthropic
254
265
  reasoning_effort = vendor_params.get("reasoning_effort", reasoning_effort)
@@ -283,6 +294,13 @@ class AnthropicAPI(VendorBase):
283
294
  structured_output=response_model(**parsed),
284
295
  tool_calls=None,
285
296
  )
297
+ used_cache_handler.add_to_managed_cache(
298
+ model=model,
299
+ messages=messages,
300
+ lm_config=lm_config,
301
+ output=lm_response,
302
+ reasoning_effort=reasoning_effort,
303
+ )
286
304
  return lm_response
287
305
  except (json.JSONDecodeError, pydantic.ValidationError):
288
306
  # If Anthropic fails, fallback to OpenAI
@@ -306,6 +324,17 @@ class AnthropicAPI(VendorBase):
306
324
  reasoning_effort: str = "high",
307
325
  **vendor_params: Dict[str, Any],
308
326
  ) -> BaseLMResponse:
327
+ used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
328
+ lm_config = {"temperature": temperature, "response_model": response_model}
329
+ cache_result = used_cache_handler.hit_managed_cache(
330
+ model=model,
331
+ messages=messages,
332
+ lm_config=lm_config,
333
+ reasoning_effort=reasoning_effort,
334
+ )
335
+ if cache_result:
336
+ return cache_result
337
+
309
338
  try:
310
339
  # First try with Anthropic
311
340
  reasoning_effort = vendor_params.get("reasoning_effort", reasoning_effort)
@@ -342,6 +371,13 @@ class AnthropicAPI(VendorBase):
342
371
  structured_output=response_model(**parsed),
343
372
  tool_calls=None,
344
373
  )
374
+ used_cache_handler.add_to_managed_cache(
375
+ model=model,
376
+ messages=messages,
377
+ lm_config=lm_config,
378
+ output=lm_response,
379
+ reasoning_effort=reasoning_effort,
380
+ )
345
381
  return lm_response
346
382
  except (json.JSONDecodeError, pydantic.ValidationError):
347
383
  # If Anthropic fails, fallback to OpenAI
@@ -111,6 +111,9 @@ class GeminiAPI(VendorBase):
111
111
  generation_config = {
112
112
  "temperature": temperature,
113
113
  }
114
+ # Add max_output_tokens if max_tokens is in lm_config
115
+ if lm_config and "max_tokens" in lm_config:
116
+ generation_config["max_output_tokens"] = lm_config["max_tokens"]
114
117
 
115
118
  tools_config = None
116
119
  if tools:
@@ -167,6 +170,9 @@ class GeminiAPI(VendorBase):
167
170
  generation_config = {
168
171
  "temperature": temperature,
169
172
  }
173
+ # Add max_output_tokens if max_tokens is in lm_config
174
+ if lm_config and "max_tokens" in lm_config:
175
+ generation_config["max_output_tokens"] = lm_config["max_tokens"]
170
176
 
171
177
  tools_config = None
172
178
  if tools:
@@ -231,7 +237,7 @@ class GeminiAPI(VendorBase):
231
237
  ), "response_model is not supported for standard calls"
232
238
  used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
233
239
  cache_result = used_cache_handler.hit_managed_cache(
234
- model, messages, lm_config=lm_config, tools=tools
240
+ model, messages, lm_config=lm_config, tools=tools, reasoning_effort=reasoning_effort
235
241
  )
236
242
  if cache_result:
237
243
  return cache_result
@@ -250,7 +256,7 @@ class GeminiAPI(VendorBase):
250
256
  )
251
257
 
252
258
  used_cache_handler.add_to_managed_cache(
253
- model, messages, lm_config=lm_config, output=lm_response, tools=tools
259
+ model, messages, lm_config=lm_config, output=lm_response, tools=tools, reasoning_effort=reasoning_effort
254
260
  )
255
261
  return lm_response
256
262
 
@@ -276,7 +282,7 @@ class GeminiAPI(VendorBase):
276
282
  use_ephemeral_cache_only=use_ephemeral_cache_only
277
283
  )
278
284
  cache_result = used_cache_handler.hit_managed_cache(
279
- model, messages, lm_config=lm_config, tools=tools
285
+ model, messages, lm_config=lm_config, tools=tools, reasoning_effort=reasoning_effort
280
286
  )
281
287
  if cache_result:
282
288
  return cache_result
@@ -295,6 +301,6 @@ class GeminiAPI(VendorBase):
295
301
  )
296
302
 
297
303
  used_cache_handler.add_to_managed_cache(
298
- model, messages, lm_config=lm_config, output=lm_response, tools=tools
304
+ model, messages, lm_config=lm_config, output=lm_response, tools=tools, reasoning_effort=reasoning_effort
299
305
  )
300
306
  return lm_response
@@ -10,7 +10,7 @@ from pydantic import BaseModel
10
10
  from synth_ai.zyk.lms.caching.initialize import get_cache_handler
11
11
  from synth_ai.zyk.lms.tools.base import BaseTool
12
12
  from synth_ai.zyk.lms.vendors.base import BaseLMResponse
13
- from synth_ai.zyk.lms.vendors.constants import SPECIAL_BASE_TEMPS
13
+ from synth_ai.zyk.lms.vendors.constants import SPECIAL_BASE_TEMPS, openai_reasoners
14
14
  from synth_ai.zyk.lms.vendors.openai_standard import OpenAIStandard
15
15
 
16
16
  OPENAI_EXCEPTIONS_TO_RETRY: Tuple[Type[Exception], ...] = (
@@ -70,21 +70,20 @@ class OpenAIStructuredOutputClient(OpenAIStandard):
70
70
  return (
71
71
  cache_result["response"] if type(cache_result) == dict else cache_result
72
72
  )
73
- if model in ["o3-mini", "o3", "o1-mini", "o1"]:
74
- output = await self.async_client.beta.chat.completions.parse(
73
+ if model in openai_reasoners:
74
+ output = await self.async_client.chat.completions.create(
75
75
  model=model,
76
76
  messages=messages,
77
- temperature=lm_config.get(
78
- "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
79
- ),
80
- response_format=response_model,
81
- reasoning_effort=reasoning_effort,
77
+ temperature=temperature,
78
+ tools=tools,
79
+ max_completion_tokens=lm_config.get("max_tokens"),
82
80
  )
83
81
  else:
84
- output = await self.async_client.beta.chat.completions.parse(
82
+ output = await self.async_client.chat.completions.create(
85
83
  model=model,
86
84
  messages=messages,
87
- response_format=response_model,
85
+ temperature=temperature,
86
+ max_tokens=lm_config.get("max_tokens"),
88
87
  )
89
88
  # "Output", output)
90
89
  api_result = response_model(**json.loads(output.choices[0].message.content))
@@ -126,20 +125,19 @@ class OpenAIStructuredOutputClient(OpenAIStandard):
126
125
  cache_result["response"] if type(cache_result) == dict else cache_result
127
126
  )
128
127
  if model in ["o3-mini", "o3", "o1-mini", "o1"]:
129
- output = self.sync_client.beta.chat.completions.parse(
128
+ output = self.sync_client.chat.completions.create(
130
129
  model=model,
131
130
  messages=messages,
132
- temperature=lm_config.get(
133
- "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
134
- ),
135
- response_format=response_model,
136
- reasoning_effort=reasoning_effort,
131
+ temperature=temperature,
132
+ tools=tools,
133
+ max_tokens=lm_config.get("max_tokens"),
137
134
  )
138
135
  else:
139
- output = self.sync_client.beta.chat.completions.parse(
136
+ output = self.sync_client.chat.completions.create(
140
137
  model=model,
141
138
  messages=messages,
142
- response_format=response_model,
139
+ temperature=temperature,
140
+ max_tokens=lm_config.get("max_tokens"),
143
141
  )
144
142
  api_result = response_model(**json.loads(output.choices[0].message.content))
145
143
 
@@ -87,7 +87,7 @@ class OpenAIStandard(VendorBase):
87
87
  messages = special_orion_transform(model, messages)
88
88
  used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
89
89
  cache_result = used_cache_handler.hit_managed_cache(
90
- model, messages, lm_config=lm_config, tools=tools
90
+ model, messages, lm_config=lm_config, tools=tools, reasoning_effort=reasoning_effort
91
91
  )
92
92
  if cache_result:
93
93
  return cache_result
@@ -97,6 +97,9 @@ class OpenAIStandard(VendorBase):
97
97
  "model": model,
98
98
  "messages": messages,
99
99
  }
100
+ # Add max_tokens if present in lm_config
101
+ if "max_tokens" in lm_config:
102
+ api_params["max_completion_tokens"] = lm_config["max_tokens"]
100
103
 
101
104
  # Add tools if provided
102
105
  if tools:
@@ -142,7 +145,7 @@ class OpenAIStandard(VendorBase):
142
145
  tool_calls=tool_calls,
143
146
  )
144
147
  used_cache_handler.add_to_managed_cache(
145
- model, messages, lm_config=lm_config, output=lm_response, tools=tools
148
+ model, messages, lm_config=lm_config, output=lm_response, tools=tools, reasoning_effort=reasoning_effort
146
149
  )
147
150
  return lm_response
148
151
 
@@ -170,7 +173,7 @@ class OpenAIStandard(VendorBase):
170
173
  use_ephemeral_cache_only=use_ephemeral_cache_only
171
174
  )
172
175
  cache_result = used_cache_handler.hit_managed_cache(
173
- model, messages, lm_config=lm_config, tools=tools
176
+ model, messages, lm_config=lm_config, tools=tools, reasoning_effort=reasoning_effort
174
177
  )
175
178
  if cache_result:
176
179
  return cache_result
@@ -180,6 +183,9 @@ class OpenAIStandard(VendorBase):
180
183
  "model": model,
181
184
  "messages": messages,
182
185
  }
186
+ # Add max_tokens if present in lm_config
187
+ if "max_tokens" in lm_config:
188
+ api_params["max_tokens"] = lm_config["max_tokens"]
183
189
 
184
190
  # Add tools if provided
185
191
  if tools:
@@ -224,7 +230,7 @@ class OpenAIStandard(VendorBase):
224
230
  tool_calls=tool_calls,
225
231
  )
226
232
  used_cache_handler.add_to_managed_cache(
227
- model, messages, lm_config=lm_config, output=lm_response, tools=tools
233
+ model, messages, lm_config=lm_config, output=lm_response, tools=tools, reasoning_effort=reasoning_effort
228
234
  )
229
235
  return lm_response
230
236
 
@@ -242,7 +248,11 @@ class OpenAIStandard(VendorBase):
242
248
  used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
243
249
  cache_result: Union[BaseLMResponse, None] = (
244
250
  used_cache_handler.hit_managed_cache(
245
- model, messages, lm_config=lm_config, tools=tools
251
+ model,
252
+ messages,
253
+ lm_config=lm_config,
254
+ tools=tools,
255
+ reasoning_effort=reasoning_effort,
246
256
  )
247
257
  )
248
258
  if cache_result is not None:
@@ -285,7 +295,12 @@ class OpenAIStandard(VendorBase):
285
295
  tool_calls=tool_calls,
286
296
  )
287
297
  used_cache_handler.add_to_managed_cache(
288
- model, messages, lm_config=lm_config, output=lm_response, tools=tools
298
+ model,
299
+ messages,
300
+ lm_config=lm_config,
301
+ output=lm_response,
302
+ tools=tools,
303
+ reasoning_effort=reasoning_effort,
289
304
  )
290
305
  return lm_response
291
306
 
@@ -303,7 +318,11 @@ class OpenAIStandard(VendorBase):
303
318
  used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
304
319
  cache_result: Union[BaseLMResponse, None] = (
305
320
  used_cache_handler.hit_managed_cache(
306
- model, messages, lm_config=lm_config, tools=tools
321
+ model,
322
+ messages,
323
+ lm_config=lm_config,
324
+ tools=tools,
325
+ reasoning_effort=reasoning_effort,
307
326
  )
308
327
  )
309
328
  if cache_result is not None:
@@ -346,6 +365,11 @@ class OpenAIStandard(VendorBase):
346
365
  tool_calls=tool_calls,
347
366
  )
348
367
  used_cache_handler.add_to_managed_cache(
349
- model, messages, lm_config=lm_config, output=lm_response, tools=tools
368
+ model,
369
+ messages,
370
+ lm_config=lm_config,
371
+ output=lm_response,
372
+ tools=tools,
373
+ reasoning_effort=reasoning_effort,
350
374
  )
351
375
  return lm_response
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: synth-ai
3
- Version: 0.1.0.dev36
3
+ Version: 0.1.0.dev38
4
4
  Summary: Software for aiding the best and multiplying the will.
5
5
  Author: Josh Purtell
6
6
  Author-email: Josh Purtell <josh@usesynth.ai>
@@ -8,6 +8,7 @@ public_tests/test_gemini_output.py,sha256=704NCnxNepYjUxJj3eEms6zHRCps2PSaR8A-lc
8
8
  public_tests/test_gemini_structured_outputs.py,sha256=yKa3CDVJxE_Vb2BbVROje83Pb35MBusF0Nb-ttWbqS8,4001
9
9
  public_tests/test_models.py,sha256=QGevBfBuQzwyKw1ez34igDyJpMTBVOc3meW6yqFE-bM,5853
10
10
  public_tests/test_openai_structured_outputs.py,sha256=oIhdZ2QVLmn0LaqBpCP3Qhbn2KHJv633DGn6u9Ousak,3999
11
+ public_tests/test_reasoning_effort.py,sha256=w4dIiEaEU8gnfAmjrpCC5y-c9w-eH9NzFjwUHe2deyg,3089
11
12
  public_tests/test_reasoning_models.py,sha256=Vr4sFRYcrYOBAZMFz2a0fZQqa-WjRwbtwc6lXy6bF4I,2897
12
13
  public_tests/test_recursive_structured_outputs.py,sha256=rrqzsU5ExNt-m_wu9j_fkbHiEsAtbKEK66uK5Ub2ojs,6296
13
14
  public_tests/test_structured.py,sha256=rftVwvYgMSHkRZM1WUJzga5Uvl9hmc5OpXzBshEXNF0,3740
@@ -23,32 +24,32 @@ synth_ai/zyk/lms/caching/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
23
24
  synth_ai/zyk/lms/caching/constants.py,sha256=fPi3x9p-yRdvixMSIyclvmwmwCRliXLXQjEm6dRnG8s,52
24
25
  synth_ai/zyk/lms/caching/dbs.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
26
  synth_ai/zyk/lms/caching/ephemeral.py,sha256=pNMG5Rzzp2m0Ln1UYmWxz1qbXwq3iNIrhjYAS0yO3ZE,2370
26
- synth_ai/zyk/lms/caching/handler.py,sha256=a-4FBxXLWeHCXiGDWP8QU-LPxMAvGbJ_5lUrXnZytn0,4478
27
+ synth_ai/zyk/lms/caching/handler.py,sha256=4h4Kywf0_-WohE1RxBt4cqPo-kHRjZv-2K50WWO91V4,5050
27
28
  synth_ai/zyk/lms/caching/initialize.py,sha256=zZls6RKAax6Z-8oJInGaSg_RPN_fEZ6e_RCX64lMLJw,416
28
29
  synth_ai/zyk/lms/caching/persistent.py,sha256=ZaY1A9qhvfNKzcAI9FnwbIrgMKvVeIfb_yCyl3M8dxE,2860
29
30
  synth_ai/zyk/lms/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
31
  synth_ai/zyk/lms/core/all.py,sha256=wakK0HhvYRuaQZmxClURyNf3vUkTbm3OABw3TgpMjOQ,1185
31
32
  synth_ai/zyk/lms/core/exceptions.py,sha256=K0BVdAzxVIchsvYZAaHEH1GAWBZvpxhFi-SPcJOjyPQ,205
32
- synth_ai/zyk/lms/core/main.py,sha256=kKxk-1TZQMNXDrLv7qA42fNOsXes-G9kLtNg-LtrpYY,10370
33
+ synth_ai/zyk/lms/core/main.py,sha256=NNPd4wwpgscFtCCrVPgz6gcrg7kOTSKsBFhldV0kwv0,12502
33
34
  synth_ai/zyk/lms/core/vendor_clients.py,sha256=C4ICuczCG2yRpDbrraT0LUoaPFYytuetfJLqhgvGn8A,2785
34
35
  synth_ai/zyk/lms/cost/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
36
  synth_ai/zyk/lms/cost/monitor.py,sha256=cSKIvw6WdPZIRubADWxQoh1MdB40T8-jjgfNUeUHIn0,5
36
37
  synth_ai/zyk/lms/cost/statefulness.py,sha256=TOsuXL8IjtKOYJ2aJQF8TwJVqn_wQ7AIwJJmdhMye7U,36
37
38
  synth_ai/zyk/lms/structured_outputs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- synth_ai/zyk/lms/structured_outputs/handler.py,sha256=BQ0T4HBFXC9qesF8v0lG8MuiOecWm2YEF75nUt1mB_s,16925
39
+ synth_ai/zyk/lms/structured_outputs/handler.py,sha256=Y7qQ8VReofLKDX6M7L5OXBUmTyHw6bWEfYz0jqvQIZ0,16969
39
40
  synth_ai/zyk/lms/structured_outputs/inject.py,sha256=Fy-zDeleRxOZ8ZRM6IuZ6CP2XZnMe4K2PEn4Q9c_KPY,11777
40
41
  synth_ai/zyk/lms/structured_outputs/rehabilitate.py,sha256=GuIhzsb7rTvwgn7f9I9omNnXBz5Me_qrtNYcTWzw5_U,7909
41
42
  synth_ai/zyk/lms/tools/base.py,sha256=j7wYb1xAvaAm3qVrINphgUhGS-UjZmRpbouseQYgh7A,3228
42
43
  synth_ai/zyk/lms/vendors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
44
  synth_ai/zyk/lms/vendors/base.py,sha256=aK4PEtkMLt_o3qD22kW-x3HJUEKdIk06zlH4kX0VkAE,760
44
45
  synth_ai/zyk/lms/vendors/constants.py,sha256=3CCq45otD80yaLts5sFHvPgLCQNkcjHkc9cqOQ0zH4Y,320
45
- synth_ai/zyk/lms/vendors/openai_standard.py,sha256=oii23QtG_sh_V2yFV1ZMF7F0t9Q_mGL8yM_QxZnZ9QA,12091
46
+ synth_ai/zyk/lms/vendors/openai_standard.py,sha256=A8fRaazdaccFW24De2dDyiH2kdjhf9SAlQhLmAQuZjA,12934
46
47
  synth_ai/zyk/lms/vendors/retries.py,sha256=m-WvAiPix9ovnO2S-m53Td5VZDWBVBFuHuSK9--OVxw,38
47
48
  synth_ai/zyk/lms/vendors/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- synth_ai/zyk/lms/vendors/core/anthropic_api.py,sha256=vxANYEcU46n6flRJ4y5j4VrSA1ky4EXo8nWgYPLi3HU,13829
49
- synth_ai/zyk/lms/vendors/core/gemini_api.py,sha256=I1goLy5R8eBLrun2jpnD4o87NlmzWgPrfYaeu9RZN8M,11008
49
+ synth_ai/zyk/lms/vendors/core/anthropic_api.py,sha256=cgSPFgR9YM8eO1if92fch6pVv0Dxk3xigSXExGSdu-4,15329
50
+ synth_ai/zyk/lms/vendors/core/gemini_api.py,sha256=gsWudX1RP2a4mjuyWZXT3LUek-UMUYhCfE5TMYxp0nA,11530
50
51
  synth_ai/zyk/lms/vendors/core/mistral_api.py,sha256=-EMPBEIoYxxDMxukmcmKL8AGAHPNYe4w-76gsPtmrhk,11860
51
- synth_ai/zyk/lms/vendors/core/openai_api.py,sha256=QkQqba851EEGf9n5H31-pJ6WexhTZkdPWQap0oGy2Ho,6713
52
+ synth_ai/zyk/lms/vendors/core/openai_api.py,sha256=GDCHIc0kpCnNPj2oW8RE3Cj2U_HcbXzzA5JV1ArAQlE,6600
52
53
  synth_ai/zyk/lms/vendors/local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
54
  synth_ai/zyk/lms/vendors/local/ollama.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
55
  synth_ai/zyk/lms/vendors/supported/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -56,11 +57,11 @@ synth_ai/zyk/lms/vendors/supported/deepseek.py,sha256=BElW0NGpkSA62wOqzzMtDw8XR3
56
57
  synth_ai/zyk/lms/vendors/supported/groq.py,sha256=Fbi7QvhdLx0F-VHO5PY-uIQlPR0bo3C9h1MvIOx8nz0,388
57
58
  synth_ai/zyk/lms/vendors/supported/ollama.py,sha256=K30VBFRTd7NYyPmyBVRZS2sm0UB651AHp9i3wd55W64,469
58
59
  synth_ai/zyk/lms/vendors/supported/together.py,sha256=Ni_jBqqGPN0PkkY-Ew64s3gNKk51k3FCpLSwlNhKbf0,342
59
- synth_ai-0.1.0.dev36.dist-info/licenses/LICENSE,sha256=ynhjRQUfqA_RdGRATApfFA_fBAy9cno04sLtLUqxVFM,1069
60
+ synth_ai-0.1.0.dev38.dist-info/licenses/LICENSE,sha256=ynhjRQUfqA_RdGRATApfFA_fBAy9cno04sLtLUqxVFM,1069
60
61
  tests/test_agent.py,sha256=CjPPWuMWC_TzX1DkDald-bbAxgjXE-HPQvFhq2B--5k,22363
61
62
  tests/test_recursive_structured_outputs.py,sha256=Ne-9XwnOxN7eSpGbNHOpegR-sRj589I84T6y8Z_4QnA,5781
62
63
  tests/test_structured_outputs.py,sha256=J7sfbGZ7OeB5ONIKpcCTymyayNyAdFfGokC1bcUrSx0,3651
63
- synth_ai-0.1.0.dev36.dist-info/METADATA,sha256=xID7XqBdy-n664hgNBaNRhVTaot5fLUlTBVJiVVqvjw,2702
64
- synth_ai-0.1.0.dev36.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
65
- synth_ai-0.1.0.dev36.dist-info/top_level.txt,sha256=5GzJO9j-KbJ_4ppxhmCUa_qdhHM4-9cHHNU76yAI8do,42
66
- synth_ai-0.1.0.dev36.dist-info/RECORD,,
64
+ synth_ai-0.1.0.dev38.dist-info/METADATA,sha256=JHm6Ol6dT8wqnCf-Iq3dQJFxII1oFAMFHDkJY9tDm4o,2702
65
+ synth_ai-0.1.0.dev38.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
66
+ synth_ai-0.1.0.dev38.dist-info/top_level.txt,sha256=5GzJO9j-KbJ_4ppxhmCUa_qdhHM4-9cHHNU76yAI8do,42
67
+ synth_ai-0.1.0.dev38.dist-info/RECORD,,