synth-ai 0.1.0.dev50__py3-none-any.whl → 0.1.0.dev52__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. synth_ai/__init__.py +1 -1
  2. synth_ai/zyk/__init__.py +3 -0
  3. synth_ai/zyk/lms/__init__.py +0 -0
  4. synth_ai/zyk/lms/caching/__init__.py +0 -0
  5. synth_ai/zyk/lms/caching/constants.py +1 -0
  6. synth_ai/zyk/lms/caching/dbs.py +0 -0
  7. synth_ai/zyk/lms/caching/ephemeral.py +72 -0
  8. synth_ai/zyk/lms/caching/handler.py +137 -0
  9. synth_ai/zyk/lms/caching/initialize.py +13 -0
  10. synth_ai/zyk/lms/caching/persistent.py +83 -0
  11. synth_ai/zyk/lms/config.py +10 -0
  12. synth_ai/zyk/lms/constants.py +22 -0
  13. synth_ai/zyk/lms/core/__init__.py +0 -0
  14. synth_ai/zyk/lms/core/all.py +47 -0
  15. synth_ai/zyk/lms/core/exceptions.py +9 -0
  16. synth_ai/zyk/lms/core/main.py +268 -0
  17. synth_ai/zyk/lms/core/vendor_clients.py +85 -0
  18. synth_ai/zyk/lms/cost/__init__.py +0 -0
  19. synth_ai/zyk/lms/cost/monitor.py +1 -0
  20. synth_ai/zyk/lms/cost/statefulness.py +1 -0
  21. synth_ai/zyk/lms/structured_outputs/__init__.py +0 -0
  22. synth_ai/zyk/lms/structured_outputs/handler.py +441 -0
  23. synth_ai/zyk/lms/structured_outputs/inject.py +314 -0
  24. synth_ai/zyk/lms/structured_outputs/rehabilitate.py +187 -0
  25. synth_ai/zyk/lms/tools/base.py +118 -0
  26. synth_ai/zyk/lms/vendors/__init__.py +0 -0
  27. synth_ai/zyk/lms/vendors/base.py +31 -0
  28. synth_ai/zyk/lms/vendors/core/__init__.py +0 -0
  29. synth_ai/zyk/lms/vendors/core/anthropic_api.py +365 -0
  30. synth_ai/zyk/lms/vendors/core/gemini_api.py +282 -0
  31. synth_ai/zyk/lms/vendors/core/mistral_api.py +331 -0
  32. synth_ai/zyk/lms/vendors/core/openai_api.py +187 -0
  33. synth_ai/zyk/lms/vendors/local/__init__.py +0 -0
  34. synth_ai/zyk/lms/vendors/local/ollama.py +0 -0
  35. synth_ai/zyk/lms/vendors/openai_standard.py +345 -0
  36. synth_ai/zyk/lms/vendors/retries.py +3 -0
  37. synth_ai/zyk/lms/vendors/supported/__init__.py +0 -0
  38. synth_ai/zyk/lms/vendors/supported/deepseek.py +73 -0
  39. synth_ai/zyk/lms/vendors/supported/groq.py +16 -0
  40. synth_ai/zyk/lms/vendors/supported/ollama.py +14 -0
  41. synth_ai/zyk/lms/vendors/supported/together.py +11 -0
  42. {synth_ai-0.1.0.dev50.dist-info → synth_ai-0.1.0.dev52.dist-info}/METADATA +2 -1
  43. synth_ai-0.1.0.dev52.dist-info/RECORD +46 -0
  44. synth_ai-0.1.0.dev50.dist-info/RECORD +0 -6
  45. {synth_ai-0.1.0.dev50.dist-info → synth_ai-0.1.0.dev52.dist-info}/WHEEL +0 -0
  46. {synth_ai-0.1.0.dev50.dist-info → synth_ai-0.1.0.dev52.dist-info}/licenses/LICENSE +0 -0
  47. {synth_ai-0.1.0.dev50.dist-info → synth_ai-0.1.0.dev52.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,365 @@
1
+ import json
2
+ from typing import Any, Dict, List, Optional, Tuple, Type
3
+
4
+ import anthropic
5
+ import pydantic
6
+ from pydantic import BaseModel
7
+
8
+ from synth_ai.zyk.lms.caching.initialize import (
9
+ get_cache_handler,
10
+ )
11
+ from synth_ai.zyk.lms.tools.base import BaseTool
12
+ from synth_ai.zyk.lms.vendors.base import BaseLMResponse, VendorBase
13
+ from synth_ai.zyk.lms.constants import SPECIAL_BASE_TEMPS, CLAUDE_REASONING_MODELS, SONNET_37_BUDGETS
14
+ from synth_ai.zyk.lms.vendors.core.openai_api import OpenAIStructuredOutputClient
15
+
16
+ ANTHROPIC_EXCEPTIONS_TO_RETRY: Tuple[Type[Exception], ...] = (anthropic.APIError,)
17
+
18
+
19
+ class AnthropicAPI(VendorBase):
20
+ used_for_structured_outputs: bool = True
21
+ exceptions_to_retry: Tuple = ANTHROPIC_EXCEPTIONS_TO_RETRY
22
+ sync_client: Any
23
+ async_client: Any
24
+
25
+ def __init__(
26
+ self,
27
+ exceptions_to_retry: Tuple[
28
+ Type[Exception], ...
29
+ ] = ANTHROPIC_EXCEPTIONS_TO_RETRY,
30
+ used_for_structured_outputs: bool = False,
31
+ reasoning_effort: str = "high",
32
+ ):
33
+ self.sync_client = anthropic.Anthropic()
34
+ self.async_client = anthropic.AsyncAnthropic()
35
+ self.used_for_structured_outputs = used_for_structured_outputs
36
+ self.exceptions_to_retry = exceptions_to_retry
37
+ self._openai_fallback = None
38
+ self.reasoning_effort = reasoning_effort
39
+
40
+ # @backoff.on_exception(
41
+ # backoff.expo,
42
+ # exceptions_to_retry,
43
+ # max_tries=BACKOFF_TOLERANCE,
44
+ # on_giveup=lambda e: print(e),
45
+ # )
46
+ async def _hit_api_async(
47
+ self,
48
+ model: str,
49
+ messages: List[Dict[str, Any]],
50
+ lm_config: Dict[str, Any],
51
+ use_ephemeral_cache_only: bool = False,
52
+ reasoning_effort: str = "high",
53
+ tools: Optional[List[BaseTool]] = None,
54
+ **vendor_params: Dict[str, Any],
55
+ ) -> BaseLMResponse:
56
+ assert (
57
+ lm_config.get("response_model", None) is None
58
+ ), "response_model is not supported for standard calls"
59
+ used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
60
+ lm_config["reasoning_effort"] = reasoning_effort
61
+ cache_result = used_cache_handler.hit_managed_cache(
62
+ model, messages, lm_config=lm_config, tools=tools
63
+ )
64
+ if cache_result:
65
+ return cache_result
66
+
67
+ # Common API parameters
68
+ api_params = {
69
+ "system": messages[0]["content"],
70
+ "messages": messages[1:],
71
+ "model": model,
72
+ "max_tokens": lm_config.get("max_tokens", 4096),
73
+ "temperature": lm_config.get(
74
+ "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
75
+ ),
76
+ }
77
+
78
+ # Add tools if provided
79
+ if tools:
80
+ api_params["tools"] = [tool.to_anthropic_tool() for tool in tools]
81
+
82
+ # Only try to add thinking if supported by the SDK
83
+ try:
84
+ import inspect
85
+
86
+ create_sig = inspect.signature(self.async_client.messages.create)
87
+ if "thinking" in create_sig.parameters and model in CLAUDE_REASONING_MODELS:
88
+ if reasoning_effort in ["high", "medium"]:
89
+ budget = SONNET_37_BUDGETS[reasoning_effort]
90
+ api_params["thinking"] = {
91
+ "type": "enabled",
92
+ "budget_tokens": budget,
93
+ }
94
+ api_params["max_tokens"] = budget+4096
95
+ api_params["temperature"] = 1
96
+ except (ImportError, AttributeError, TypeError):
97
+ pass
98
+
99
+ # Make the API call
100
+ response = await self.async_client.messages.create(**api_params)
101
+
102
+ # Extract text content and tool calls
103
+ raw_response = ""
104
+ tool_calls = []
105
+
106
+ for content in response.content:
107
+ if content.type == "text":
108
+ raw_response += content.text
109
+ elif content.type == "tool_use":
110
+ tool_calls.append(
111
+ {
112
+ "id": content.id,
113
+ "type": "function",
114
+ "function": {
115
+ "name": content.name,
116
+ "arguments": json.dumps(content.input),
117
+ },
118
+ }
119
+ )
120
+
121
+ lm_response = BaseLMResponse(
122
+ raw_response=raw_response,
123
+ structured_output=None,
124
+ tool_calls=tool_calls if tool_calls else None,
125
+ )
126
+
127
+ lm_config["reasoning_effort"] = reasoning_effort
128
+ used_cache_handler.add_to_managed_cache(
129
+ model, messages, lm_config=lm_config, output=lm_response, tools=tools
130
+ )
131
+ return lm_response
132
+
133
+ # @backoff.on_exception(
134
+ # backoff.expo,
135
+ # exceptions_to_retry,
136
+ # max_tries=BACKOFF_TOLERANCE,
137
+ # on_giveup=lambda e: print(e),
138
+ # )
139
+ def _hit_api_sync(
140
+ self,
141
+ model: str,
142
+ messages: List[Dict[str, Any]],
143
+ lm_config: Dict[str, Any],
144
+ use_ephemeral_cache_only: bool = False,
145
+ reasoning_effort: str = "high",
146
+ tools: Optional[List[BaseTool]] = None,
147
+ **vendor_params: Dict[str, Any],
148
+ ) -> BaseLMResponse:
149
+ assert (
150
+ lm_config.get("response_model", None) is None
151
+ ), "response_model is not supported for standard calls"
152
+ used_cache_handler = get_cache_handler(
153
+ use_ephemeral_cache_only=use_ephemeral_cache_only
154
+ )
155
+ lm_config["reasoning_effort"] = reasoning_effort
156
+ cache_result = used_cache_handler.hit_managed_cache(
157
+ model, messages, lm_config=lm_config, tools=tools
158
+ )
159
+ if cache_result:
160
+ return cache_result
161
+
162
+ # Common API parameters
163
+ api_params = {
164
+ "system": messages[0]["content"],
165
+ "messages": messages[1:],
166
+ "model": model,
167
+ "max_tokens": lm_config.get("max_tokens", 4096),
168
+ "temperature": lm_config.get(
169
+ "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
170
+ ),
171
+ }
172
+
173
+ # Add tools if provided
174
+ if tools:
175
+ api_params["tools"] = [tool.to_anthropic_tool() for tool in tools]
176
+
177
+ # Only try to add thinking if supported by the SDK
178
+ try:
179
+ import inspect
180
+
181
+ create_sig = inspect.signature(self.sync_client.messages.create)
182
+ if "thinking" in create_sig.parameters and model in CLAUDE_REASONING_MODELS:
183
+ api_params["temperature"] = 1
184
+ if reasoning_effort in ["high", "medium"]:
185
+ budgets = SONNET_37_BUDGETS
186
+ budget = budgets[reasoning_effort]
187
+ api_params["thinking"] = {
188
+ "type": "enabled",
189
+ "budget_tokens": budget,
190
+ }
191
+ api_params["max_tokens"] = budget+4096
192
+ api_params["temperature"] = 1
193
+ except (ImportError, AttributeError, TypeError):
194
+ pass
195
+
196
+ # Make the API call
197
+ response = self.sync_client.messages.create(**api_params)
198
+
199
+ # Extract text content and tool calls
200
+ raw_response = ""
201
+ tool_calls = []
202
+
203
+ for content in response.content:
204
+ if content.type == "text":
205
+ raw_response += content.text
206
+ elif content.type == "tool_use":
207
+ tool_calls.append(
208
+ {
209
+ "id": content.id,
210
+ "type": "function",
211
+ "function": {
212
+ "name": content.name,
213
+ "arguments": json.dumps(content.input),
214
+ },
215
+ }
216
+ )
217
+
218
+ lm_response = BaseLMResponse(
219
+ raw_response=raw_response,
220
+ structured_output=None,
221
+ tool_calls=tool_calls if tool_calls else None,
222
+ )
223
+
224
+ lm_config["reasoning_effort"] = reasoning_effort
225
+ used_cache_handler.add_to_managed_cache(
226
+ model, messages, lm_config=lm_config, output=lm_response, tools=tools
227
+ )
228
+ return lm_response
229
+
230
+ async def _hit_api_async_structured_output(
231
+ self,
232
+ model: str,
233
+ messages: List[Dict[str, Any]],
234
+ response_model: BaseModel,
235
+ temperature: float,
236
+ use_ephemeral_cache_only: bool = False,
237
+ reasoning_effort: str = "high",
238
+ **vendor_params: Dict[str, Any],
239
+ ) -> BaseLMResponse:
240
+ try:
241
+ # First try with Anthropic
242
+ reasoning_effort = vendor_params.get("reasoning_effort", reasoning_effort)
243
+ if model in CLAUDE_REASONING_MODELS:
244
+
245
+ #if reasoning_effort in ["high", "medium"]:
246
+ budgets = SONNET_37_BUDGETS
247
+ budget = budgets[reasoning_effort]
248
+ max_tokens = budget+4096
249
+ temperature = 1
250
+
251
+ response = await self.async_client.messages.create(
252
+ system=messages[0]["content"],
253
+ messages=messages[1:],
254
+ model=model,
255
+ max_tokens=max_tokens,
256
+ thinking={"type": "enabled", "budget_tokens": budget},
257
+ temperature=temperature,
258
+ )
259
+ else:
260
+ response = await self.async_client.messages.create(
261
+ system=messages[0]["content"],
262
+ messages=messages[1:],
263
+ model=model,
264
+ max_tokens=max_tokens,
265
+ temperature=temperature,
266
+ )
267
+ result = response.content[0].text
268
+ parsed = json.loads(result)
269
+ lm_response = BaseLMResponse(
270
+ raw_response="",
271
+ structured_output=response_model(**parsed),
272
+ tool_calls=None,
273
+ )
274
+ return lm_response
275
+ except (json.JSONDecodeError, pydantic.ValidationError):
276
+ # If Anthropic fails, fallback to OpenAI
277
+ if self._openai_fallback is None:
278
+ self._openai_fallback = OpenAIStructuredOutputClient()
279
+ return await self._openai_fallback._hit_api_async_structured_output(
280
+ model="gpt-4o", # Fallback to GPT-4
281
+ messages=messages,
282
+ response_model=response_model,
283
+ temperature=temperature,
284
+ use_ephemeral_cache_only=use_ephemeral_cache_only,
285
+ )
286
+
287
+ def _hit_api_sync_structured_output(
288
+ self,
289
+ model: str,
290
+ messages: List[Dict[str, Any]],
291
+ response_model: BaseModel,
292
+ temperature: float,
293
+ use_ephemeral_cache_only: bool = False,
294
+ reasoning_effort: str = "high",
295
+ **vendor_params: Dict[str, Any],
296
+ ) -> BaseLMResponse:
297
+ try:
298
+ # First try with Anthropic
299
+ reasoning_effort = vendor_params.get("reasoning_effort", reasoning_effort)
300
+ import time
301
+
302
+ if model in CLAUDE_REASONING_MODELS:
303
+ if reasoning_effort in ["high", "medium"]:
304
+ budgets = SONNET_37_BUDGETS
305
+ budget = budgets[reasoning_effort]
306
+ max_tokens = budget+4096
307
+ temperature = 1
308
+ response = self.sync_client.messages.create(
309
+ system=messages[0]["content"],
310
+ messages=messages[1:],
311
+ model=model,
312
+ max_tokens=max_tokens,
313
+ temperature=temperature,
314
+ thinking={"type": "enabled", "budget_tokens": budget},
315
+ )
316
+ else:
317
+ response = self.sync_client.messages.create(
318
+ system=messages[0]["content"],
319
+ messages=messages[1:],
320
+ model=model,
321
+ max_tokens=max_tokens,
322
+ temperature=temperature,
323
+ )
324
+ # print("Time taken for API call", time.time() - t)
325
+ result = response.content[0].text
326
+ # Try to parse the result as JSON
327
+ parsed = json.loads(result)
328
+ lm_response = BaseLMResponse(
329
+ raw_response="",
330
+ structured_output=response_model(**parsed),
331
+ tool_calls=None,
332
+ )
333
+ return lm_response
334
+ except (json.JSONDecodeError, pydantic.ValidationError):
335
+ # If Anthropic fails, fallback to OpenAI
336
+ print("WARNING - Falling back to OpenAI - THIS IS SLOW")
337
+ if self._openai_fallback is None:
338
+ self._openai_fallback = OpenAIStructuredOutputClient()
339
+ return self._openai_fallback._hit_api_sync_structured_output(
340
+ model="gpt-4o", # Fallback to GPT-4
341
+ messages=messages,
342
+ response_model=response_model,
343
+ temperature=temperature,
344
+ use_ephemeral_cache_only=use_ephemeral_cache_only,
345
+ )
346
+
347
+ async def _process_call_async(
348
+ self,
349
+ messages: List[Dict[str, Any]],
350
+ model: str,
351
+ response_model: BaseModel,
352
+ api_call_method,
353
+ temperature: float = 0.0,
354
+ use_ephemeral_cache_only: bool = False,
355
+ vendor_params: Dict[str, Any] = None,
356
+ ) -> BaseModel:
357
+ vendor_params = vendor_params or {}
358
+ # Each vendor can filter parameters they support
359
+ return await api_call_method(
360
+ messages=messages,
361
+ model=model,
362
+ temperature=temperature,
363
+ use_ephemeral_cache_only=use_ephemeral_cache_only,
364
+ **vendor_params, # Pass all vendor-specific params
365
+ )
@@ -0,0 +1,282 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ import warnings
5
+ from typing import Any, Dict, List, Optional, Tuple, Type
6
+
7
+ from google import genai
8
+ from google.api_core.exceptions import ResourceExhausted
9
+ from google.genai import types
10
+ from synth_ai.zyk.lms.caching.initialize import get_cache_handler
11
+ from synth_ai.zyk.lms.tools.base import BaseTool
12
+ from synth_ai.zyk.lms.vendors.base import BaseLMResponse, VendorBase
13
+ from synth_ai.zyk.lms.constants import (
14
+ SPECIAL_BASE_TEMPS,
15
+ GEMINI_REASONING_MODELS,
16
+ GEMINI_THINKING_BUDGETS,
17
+ )
18
+ from synth_ai.zyk.lms.vendors.retries import BACKOFF_TOLERANCE, backoff
19
+ import logging
20
+
21
+
22
+ ALIASES = {
23
+ "gemini-2.5-flash": "gemini-2.5-flash-preview-04-17",
24
+ }
25
+
26
+ logger = logging.getLogger(__name__)
27
+ _CLIENT = genai.Client() # one client for everything
28
+ GEMINI_EXCEPTIONS_TO_RETRY: Tuple[Type[Exception], ...] = (ResourceExhausted,)
29
+ logging.getLogger("google.genai").setLevel(logging.ERROR)
30
+ os.environ["GRPC_VERBOSITY"] = "ERROR"
31
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
32
+ warnings.filterwarnings("ignore")
33
+
34
+ SAFETY_SETTINGS = {
35
+ types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: types.HarmBlockThreshold.BLOCK_NONE,
36
+ types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: types.HarmBlockThreshold.BLOCK_NONE,
37
+ types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: types.HarmBlockThreshold.BLOCK_NONE,
38
+ types.HarmCategory.HARM_CATEGORY_HARASSMENT: types.HarmBlockThreshold.BLOCK_NONE,
39
+ }
40
+
41
+
42
+ class GeminiAPI(VendorBase):
43
+ used_for_structured_outputs: bool = True
44
+ exceptions_to_retry: Tuple[Type[Exception], ...] = GEMINI_EXCEPTIONS_TO_RETRY
45
+
46
+ def __init__(
47
+ self,
48
+ exceptions_to_retry: Tuple[Type[Exception], ...] = GEMINI_EXCEPTIONS_TO_RETRY,
49
+ used_for_structured_outputs: bool = False,
50
+ ):
51
+ self.used_for_structured_outputs = used_for_structured_outputs
52
+ self.exceptions_to_retry = exceptions_to_retry
53
+
54
+
55
+ def get_aliased_model_name(self, model_name: str) -> str:
56
+ if model_name in ALIASES:
57
+ return ALIASES[model_name]
58
+ return model_name
59
+
60
+ @staticmethod
61
+ def _msg_to_contents(messages: List[Dict[str, Any]]) -> List[types.Content]:
62
+ # contents, sys_instr = [], None
63
+ contents = []
64
+ for m in messages:
65
+ # if m["role"] == "system":
66
+ # sys_instr = f"<instructions>\n{m['content']}\n</instructions>"
67
+ # continue
68
+ # text = (sys_instr + "\n" + m["content"]) if sys_instr else m["content"]
69
+ if m["role"].lower() not in ["user", "assistant"]:
70
+ continue
71
+ role = "user" if m["role"] == "user" else "assistant"
72
+ contents.append(types.Content(role=role, parts=[types.Part.from_text(text=m["content"])]))
73
+ return contents
74
+
75
+ @staticmethod
76
+ def _tools_to_genai(tools: List[BaseTool]) -> List[types.Tool]:
77
+ """Convert internal BaseTool → genai Tool."""
78
+ out: List[types.Tool] = []
79
+ for t in tools:
80
+ # Assume t.to_gemini_tool() now correctly returns a FunctionDeclaration
81
+ #func_decl = t.to_gemini_tool()
82
+ if isinstance(t, dict):
83
+ func_decl = t
84
+ else:
85
+ func_decl = t.to_gemini_tool()
86
+ if not isinstance(func_decl, types.FunctionDeclaration):
87
+ # Or fetch schema parts if to_gemini_tool still returns dict
88
+ # This depends on BaseTool.to_gemini_tool implementation
89
+ tool_dict = func_decl # Assuming it's a dict for now
90
+ func_decl = types.FunctionDeclaration(
91
+ name=tool_dict['name'],
92
+ description=tool_dict['description'],
93
+ parameters=tool_dict['parameters'], # Expects OpenAPI-style dict
94
+ )
95
+ out.append(types.Tool(function_declarations=[func_decl]))
96
+ return out
97
+
98
+ async def _gen_content_async(
99
+ self,
100
+ messages: List[Dict],
101
+ temperature: float,
102
+ model_name: str,
103
+ reasoning_effort: str,
104
+ tools: Optional[List[BaseTool]],
105
+ lm_config: Optional[Dict[str, Any]],
106
+ ) -> Tuple[str, Optional[List[Dict]]]:
107
+ model_name = self.get_aliased_model_name(model_name)
108
+ cfg_kwargs: Dict[str, Any] = {"temperature": temperature}
109
+ if model_name in GEMINI_REASONING_MODELS and reasoning_effort in GEMINI_THINKING_BUDGETS:
110
+ cfg_kwargs["thinking_config"] = types.ThinkingConfig(
111
+ thinking_budget=GEMINI_THINKING_BUDGETS[reasoning_effort]
112
+ )
113
+
114
+ if any(m["role"] == "system" for m in messages):
115
+ cfg_kwargs["system_instruction"] = next(m["content"] for m in messages if m["role"] == "system")
116
+
117
+ generation_config = types.GenerateContentConfig(
118
+ **cfg_kwargs,
119
+ tool_config=lm_config.get("tool_config") if lm_config else None,
120
+ tools=self._tools_to_genai(tools) if tools else None
121
+ )
122
+ resp = await _CLIENT.aio.models.generate_content(
123
+ model=model_name,
124
+ contents=self._msg_to_contents(messages),
125
+ config=generation_config,
126
+ #safety_settings=SAFETY_SETTINGS,
127
+ )
128
+ return self._extract(resp)
129
+
130
+ def _gen_content_sync(
131
+ self,
132
+ messages: List[Dict],
133
+ temperature: float,
134
+ model_name: str,
135
+ reasoning_effort: str,
136
+ tools: Optional[List[BaseTool]],
137
+ lm_config: Optional[Dict[str, Any]],
138
+ ) -> Tuple[str, Optional[List[Dict]]]:
139
+ model_name = self.get_aliased_model_name(model_name)
140
+ cfg_kwargs: Dict[str, Any] = {"temperature": temperature}
141
+ if model_name in GEMINI_REASONING_MODELS and reasoning_effort in GEMINI_THINKING_BUDGETS:
142
+ cfg_kwargs["thinking_config"] = types.ThinkingConfig(
143
+ thinking_budget=GEMINI_THINKING_BUDGETS[reasoning_effort]
144
+ )
145
+ if any(m["role"] == "system" for m in messages):
146
+ cfg_kwargs["system_instruction"] = next(m["content"] for m in messages if m["role"] == "system")
147
+ generation_config = types.GenerateContentConfig(
148
+ **cfg_kwargs,
149
+ tool_config=lm_config.get("tool_config") if lm_config else None,
150
+ tools=self._tools_to_genai(tools) if tools else None
151
+ )
152
+
153
+ resp = _CLIENT.models.generate_content(
154
+ model=model_name,
155
+ contents=self._msg_to_contents(messages),
156
+ safety_settings=SAFETY_SETTINGS,
157
+ config=generation_config,
158
+ )
159
+ return self._extract(resp)
160
+
161
+ @staticmethod
162
+ def _extract(response) -> Tuple[str, Optional[List[Dict]]]:
163
+ # Extract text, handling cases where it might be missing
164
+ try:
165
+ text = response.text
166
+ except ValueError: # Handle cases where only non-text parts exist
167
+ text = ""
168
+
169
+ calls = []
170
+ # Access parts through candidates[0].content
171
+ if response.candidates and response.candidates[0].content:
172
+ for part in response.candidates[0].content.parts:
173
+ if part.function_call:
174
+ calls.append(
175
+ {
176
+ "id": f"call_{len(calls) + 1}",
177
+ "type": "function",
178
+ "function": {
179
+ "name": part.function_call.name,
180
+ "arguments": json.dumps(dict(part.function_call.args)),
181
+ },
182
+ }
183
+ )
184
+ return text, calls or None
185
+
186
+ @backoff.on_exception(
187
+ backoff.expo,
188
+ exceptions_to_retry,
189
+ max_tries=BACKOFF_TOLERANCE,
190
+ on_giveup=lambda e: print(e),
191
+ )
192
+ async def _hit_api_async(
193
+ self,
194
+ model: str,
195
+ messages: List[Dict[str, Any]],
196
+ lm_config: Dict[str, Any],
197
+ use_ephemeral_cache_only: bool = False,
198
+ reasoning_effort: str = "high",
199
+ tools: Optional[List[BaseTool]] = None,
200
+ ) -> BaseLMResponse:
201
+ assert (
202
+ lm_config.get("response_model", None) is None
203
+ ), "response_model is not supported for standard calls"
204
+ used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
205
+ lm_config["reasoning_effort"] = reasoning_effort
206
+ cache_result = used_cache_handler.hit_managed_cache(
207
+ model, messages, lm_config=lm_config, tools=tools
208
+ )
209
+ if cache_result:
210
+ return cache_result
211
+
212
+ raw_response, tool_calls = await self._gen_content_async(
213
+ messages,
214
+ temperature=lm_config.get("temperature", SPECIAL_BASE_TEMPS.get(model, 0)),
215
+ reasoning_effort=reasoning_effort,
216
+ tools=tools,
217
+ lm_config=lm_config,
218
+ model_name=model,
219
+ )
220
+ if not raw_response:
221
+ raw_response = ""
222
+ lm_response = BaseLMResponse(
223
+ raw_response=raw_response,
224
+ structured_output=None,
225
+ tool_calls=tool_calls,
226
+ )
227
+
228
+ lm_config["reasoning_effort"] = reasoning_effort
229
+ used_cache_handler.add_to_managed_cache(
230
+ model, messages, lm_config=lm_config, output=lm_response, tools=tools
231
+ )
232
+ return lm_response
233
+
234
+ @backoff.on_exception(
235
+ backoff.expo,
236
+ exceptions_to_retry,
237
+ max_tries=BACKOFF_TOLERANCE,
238
+ on_giveup=lambda e: print(e),
239
+ )
240
+ def _hit_api_sync(
241
+ self,
242
+ model: str,
243
+ messages: List[Dict[str, Any]],
244
+ lm_config: Dict[str, Any],
245
+ use_ephemeral_cache_only: bool = False,
246
+ reasoning_effort: str = "high",
247
+ tools: Optional[List[BaseTool]] = None,
248
+ ) -> BaseLMResponse:
249
+ assert (
250
+ lm_config.get("response_model", None) is None
251
+ ), "response_model is not supported for standard calls"
252
+ used_cache_handler = get_cache_handler(
253
+ use_ephemeral_cache_only=use_ephemeral_cache_only
254
+ )
255
+ lm_config["reasoning_effort"] = reasoning_effort
256
+ cache_result = used_cache_handler.hit_managed_cache(
257
+ model, messages, lm_config=lm_config, tools=tools
258
+ )
259
+ if cache_result:
260
+ return cache_result
261
+
262
+ raw_response, tool_calls = self._gen_content_sync(
263
+ messages,
264
+ temperature=lm_config.get("temperature", SPECIAL_BASE_TEMPS.get(model, 0)),
265
+ reasoning_effort=reasoning_effort,
266
+ tools=tools,
267
+ lm_config=lm_config,
268
+ model_name=model,
269
+ )
270
+ if not raw_response:
271
+ raw_response = ""
272
+ lm_response = BaseLMResponse(
273
+ raw_response=raw_response,
274
+ structured_output=None,
275
+ tool_calls=tool_calls,
276
+ )
277
+
278
+ lm_config["reasoning_effort"] = reasoning_effort
279
+ used_cache_handler.add_to_managed_cache(
280
+ model, messages, lm_config=lm_config, output=lm_response, tools=tools
281
+ )
282
+ return lm_response