vectorvein 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,8 @@
2
2
  # @Date: 2024-06-17 23:47:49
3
3
  import json
4
4
  import random
5
+ from functools import cached_property
6
+ from typing import Iterable, Literal, Generator, AsyncGenerator, overload, Any
5
7
 
6
8
  import httpx
7
9
 
@@ -10,11 +12,18 @@ from .utils import cutoff_messages
10
12
  from ..types import defaults as defs
11
13
  from .base_client import BaseChatClient, BaseAsyncChatClient
12
14
  from ..types.enums import ContextLengthControlType, BackendType
13
- from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
15
+ from ..types.llm_parameters import (
16
+ NotGiven,
17
+ NOT_GIVEN,
18
+ ToolParam,
19
+ ToolChoice,
20
+ ChatCompletionMessage,
21
+ ChatCompletionDeltaMessage,
22
+ )
14
23
 
15
24
 
16
25
  class GeminiChatClient(BaseChatClient):
17
- DEFAULT_MODEL: str = defs.GEMINI_DEFAULT_MODEL
26
+ DEFAULT_MODEL: str | None = defs.GEMINI_DEFAULT_MODEL
18
27
  BACKEND_NAME: BackendType = BackendType.Gemini
19
28
 
20
29
  def __init__(
@@ -39,19 +48,49 @@ class GeminiChatClient(BaseChatClient):
39
48
  **kwargs,
40
49
  )
41
50
 
42
- @property
51
+ @cached_property
43
52
  def raw_client(self):
44
53
  return self.http_client
45
54
 
55
+ @overload
46
56
  def create_completion(
47
57
  self,
48
- messages: list = list,
58
+ messages: list,
59
+ model: str | None = None,
60
+ stream: Literal[False] = False,
61
+ temperature: float | None = None,
62
+ max_tokens: int | None = None,
63
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
64
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
65
+ response_format: dict | None = None,
66
+ **kwargs,
67
+ ) -> ChatCompletionMessage:
68
+ pass
69
+
70
+ @overload
71
+ def create_completion(
72
+ self,
73
+ messages: list,
74
+ model: str | None = None,
75
+ stream: Literal[True] = True,
76
+ temperature: float | None = None,
77
+ max_tokens: int | None = None,
78
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
79
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
80
+ response_format: dict | None = None,
81
+ **kwargs,
82
+ ) -> Generator[ChatCompletionDeltaMessage, None, None]:
83
+ pass
84
+
85
+ def create_completion(
86
+ self,
87
+ messages: list,
49
88
  model: str | None = None,
50
89
  stream: bool | None = None,
51
90
  temperature: float | None = None,
52
91
  max_tokens: int | None = None,
53
- tools: list | None = None,
54
- tool_choice: str | None = None,
92
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
93
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
55
94
  response_format: dict | None = None,
56
95
  **kwargs,
57
96
  ):
@@ -121,14 +160,14 @@ class GeminiChatClient(BaseChatClient):
121
160
  params["alt"] = "sse"
122
161
 
123
162
  def generator():
124
- result = {"content": ""}
163
+ result = {"content": "", "tool_calls": [], "usage": {}}
125
164
  if self.http_client:
126
165
  client = self.http_client
127
166
  else:
128
167
  client = httpx.Client()
129
168
  with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
130
169
  for chunk in response.iter_lines():
131
- message = {"content": ""}
170
+ message = {"content": "", "tool_calls": []}
132
171
  if not chunk.startswith("data:"):
133
172
  continue
134
173
  data = json.loads(chunk[5:])
@@ -197,7 +236,7 @@ class GeminiChatClient(BaseChatClient):
197
236
 
198
237
 
199
238
  class AsyncGeminiChatClient(BaseAsyncChatClient):
200
- DEFAULT_MODEL: str = defs.GEMINI_DEFAULT_MODEL
239
+ DEFAULT_MODEL: str | None = defs.GEMINI_DEFAULT_MODEL
201
240
  BACKEND_NAME: BackendType = BackendType.Gemini
202
241
 
203
242
  def __init__(
@@ -222,19 +261,49 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
222
261
  **kwargs,
223
262
  )
224
263
 
225
- @property
264
+ @cached_property
226
265
  def raw_client(self):
227
266
  return self.http_client
228
267
 
268
+ @overload
269
+ async def create_completion(
270
+ self,
271
+ messages: list,
272
+ model: str | None = None,
273
+ stream: Literal[False] = False,
274
+ temperature: float | None = None,
275
+ max_tokens: int | None = None,
276
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
277
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
278
+ response_format: dict | None = None,
279
+ **kwargs,
280
+ ) -> ChatCompletionMessage:
281
+ pass
282
+
283
+ @overload
284
+ async def create_completion(
285
+ self,
286
+ messages: list,
287
+ model: str | None = None,
288
+ stream: Literal[True] = True,
289
+ temperature: float | None = None,
290
+ max_tokens: int | None = None,
291
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
292
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
293
+ response_format: dict | None = None,
294
+ **kwargs,
295
+ ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
296
+ pass
297
+
229
298
  async def create_completion(
230
299
  self,
231
- messages: list = list,
300
+ messages: list,
232
301
  model: str | None = None,
233
302
  stream: bool | None = None,
234
303
  temperature: float | None = None,
235
304
  max_tokens: int | None = None,
236
- tools: list | None = None,
237
- tool_choice: str | None = None,
305
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
306
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
238
307
  response_format: dict | None = None,
239
308
  **kwargs,
240
309
  ):
@@ -304,14 +373,14 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
304
373
  params["alt"] = "sse"
305
374
 
306
375
  async def generator():
307
- result = {"content": ""}
376
+ result = {"content": "", "tool_calls": [], "usage": {}}
308
377
  if self.http_client:
309
378
  client = self.http_client
310
379
  else:
311
380
  client = httpx.AsyncClient()
312
381
  async with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
313
382
  async for chunk in response.aiter_lines():
314
- message = {"content": ""}
383
+ message = {"content": "", "tool_calls": []}
315
384
  if not chunk.startswith("data:"):
316
385
  continue
317
386
  data = json.loads(chunk[5:])
@@ -2,16 +2,23 @@
2
2
  # @Date: 2024-07-26 14:48:55
3
3
  import json
4
4
  import random
5
-
5
+ from functools import cached_property
6
+ from typing import Iterable, Literal, Generator, AsyncGenerator, overload, Any
6
7
  import httpx
7
- from openai._types import NotGiven
8
8
 
9
9
  from ..settings import settings
10
10
  from ..types import defaults as defs
11
11
  from .utils import cutoff_messages, get_token_counts
12
12
  from .base_client import BaseChatClient, BaseAsyncChatClient
13
13
  from ..types.enums import ContextLengthControlType, BackendType
14
- from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
14
+ from ..types.llm_parameters import (
15
+ NotGiven,
16
+ NOT_GIVEN,
17
+ ToolParam,
18
+ ToolChoice,
19
+ ChatCompletionMessage,
20
+ ChatCompletionDeltaMessage,
21
+ )
15
22
 
16
23
 
17
24
  def extract_tool_calls(response):
@@ -37,7 +44,7 @@ def extract_tool_calls(response):
37
44
 
38
45
 
39
46
  class MiniMaxChatClient(BaseChatClient):
40
- DEFAULT_MODEL: str = defs.MINIMAX_DEFAULT_MODEL
47
+ DEFAULT_MODEL: str | None = defs.MINIMAX_DEFAULT_MODEL
41
48
  BACKEND_NAME: BackendType = BackendType.MiniMax
42
49
 
43
50
  def __init__(
@@ -66,19 +73,50 @@ class MiniMaxChatClient(BaseChatClient):
66
73
  else:
67
74
  self.http_client = httpx.Client()
68
75
 
69
- @property
76
+ @cached_property
70
77
  def raw_client(self):
71
78
  return self.http_client
72
79
 
80
+ @overload
81
+ def create_completion(
82
+ self,
83
+ messages: list,
84
+ model: str | None = None,
85
+ stream: Literal[False] = False,
86
+ temperature: float | None = None,
87
+ max_tokens: int | None = None,
88
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
89
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
90
+ response_format: dict | None = None,
91
+ **kwargs,
92
+ ) -> ChatCompletionMessage:
93
+ pass
94
+
95
+ @overload
96
+ def create_completion(
97
+ self,
98
+ messages: list,
99
+ model: str | None = None,
100
+ stream: Literal[True] = True,
101
+ temperature: float | None = None,
102
+ max_tokens: int | None = None,
103
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
104
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
105
+ response_format: dict | None = None,
106
+ **kwargs,
107
+ ) -> Generator[ChatCompletionDeltaMessage, None, None]:
108
+ pass
109
+
73
110
  def create_completion(
74
111
  self,
75
- messages: list = list,
112
+ messages: list,
76
113
  model: str | None = None,
77
114
  stream: bool | None = None,
78
115
  temperature: float | None = None,
79
116
  max_tokens: int | None = None,
80
- tools: list | None = None,
81
- tool_choice: str = "auto",
117
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
118
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
119
+ response_format: dict | None = None,
82
120
  **kwargs,
83
121
  ):
84
122
  if model is not None:
@@ -206,7 +244,7 @@ class MiniMaxChatClient(BaseChatClient):
206
244
 
207
245
 
208
246
  class AsyncMiniMaxChatClient(BaseAsyncChatClient):
209
- DEFAULT_MODEL: str = defs.MINIMAX_DEFAULT_MODEL
247
+ DEFAULT_MODEL: str | None = defs.MINIMAX_DEFAULT_MODEL
210
248
  BACKEND_NAME: BackendType = BackendType.MiniMax
211
249
 
212
250
  def __init__(
@@ -235,19 +273,50 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
235
273
  else:
236
274
  self.http_client = httpx.AsyncClient()
237
275
 
238
- @property
276
+ @cached_property
239
277
  def raw_client(self):
240
278
  return self.http_client
241
279
 
280
+ @overload
281
+ async def create_completion(
282
+ self,
283
+ messages: list,
284
+ model: str | None = None,
285
+ stream: Literal[False] = False,
286
+ temperature: float | None = None,
287
+ max_tokens: int | None = None,
288
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
289
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
290
+ response_format: dict | None = None,
291
+ **kwargs,
292
+ ) -> ChatCompletionMessage:
293
+ pass
294
+
295
+ @overload
296
+ async def create_completion(
297
+ self,
298
+ messages: list,
299
+ model: str | None = None,
300
+ stream: Literal[True] = True,
301
+ temperature: float | None = None,
302
+ max_tokens: int | None = None,
303
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
304
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
305
+ response_format: dict | None = None,
306
+ **kwargs,
307
+ ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
308
+ pass
309
+
242
310
  async def create_completion(
243
311
  self,
244
- messages: list = list,
312
+ messages: list,
245
313
  model: str | None = None,
246
314
  stream: bool | None = None,
247
315
  temperature: float | None = None,
248
316
  max_tokens: int | None = None,
249
- tools: list | None = None,
250
- tool_choice: str = "auto",
317
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
318
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
319
+ response_format: dict | None = None,
251
320
  **kwargs,
252
321
  ):
253
322
  if model is not None:
@@ -3,9 +3,9 @@
3
3
  import json
4
4
  import random
5
5
  from functools import cached_property
6
+ from typing import overload, Generator, AsyncGenerator, Any, Literal, Iterable
6
7
 
7
8
  import httpx
8
- from openai._types import NotGiven, NOT_GIVEN
9
9
  from openai._streaming import Stream, AsyncStream
10
10
  from openai.types.chat import ChatCompletion, ChatCompletionChunk
11
11
  from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
@@ -20,11 +20,18 @@ from .utils import (
20
20
  from ..settings import settings
21
21
  from ..types import defaults as defs
22
22
  from ..types.enums import ContextLengthControlType, BackendType
23
- from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
23
+ from ..types.llm_parameters import (
24
+ NotGiven,
25
+ NOT_GIVEN,
26
+ ToolParam,
27
+ ToolChoice,
28
+ ChatCompletionMessage,
29
+ ChatCompletionDeltaMessage,
30
+ )
24
31
 
25
32
 
26
33
  class OpenAICompatibleChatClient(BaseChatClient):
27
- DEFAULT_MODEL: str = ""
34
+ DEFAULT_MODEL: str | None = ""
28
35
  BACKEND_NAME: BackendType
29
36
 
30
37
  def __init__(
@@ -50,7 +57,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
50
57
  )
51
58
 
52
59
  @cached_property
53
- def raw_client(self):
60
+ def raw_client(self) -> OpenAI | AzureOpenAI:
54
61
  if self.random_endpoint:
55
62
  self.random_endpoint = True
56
63
  self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
@@ -70,15 +77,46 @@ class OpenAICompatibleChatClient(BaseChatClient):
70
77
  http_client=self.http_client,
71
78
  )
72
79
 
80
+ @overload
81
+ def create_completion(
82
+ self,
83
+ messages: list,
84
+ model: str | None = None,
85
+ stream: Literal[False] = False,
86
+ temperature: float | None = None,
87
+ max_tokens: int | None = None,
88
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
89
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
90
+ response_format: dict | None = None,
91
+ **kwargs,
92
+ ) -> ChatCompletionMessage:
93
+ pass
94
+
95
+ @overload
96
+ def create_completion(
97
+ self,
98
+ messages: list,
99
+ model: str | None = None,
100
+ stream: Literal[True] = True,
101
+ temperature: float | None = None,
102
+ max_tokens: int | None = None,
103
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
104
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
105
+ response_format: dict | None = None,
106
+ **kwargs,
107
+ ) -> Generator[ChatCompletionDeltaMessage, None, None]:
108
+ pass
109
+
73
110
  def create_completion(
74
111
  self,
75
- messages: list = list,
112
+ messages: list,
76
113
  model: str | None = None,
77
114
  stream: bool | None = None,
78
115
  temperature: float | None = None,
79
116
  max_tokens: int | None = None,
80
- tools: list | NotGiven = NOT_GIVEN,
81
- tool_choice: str | NotGiven = NOT_GIVEN,
117
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
118
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
119
+ response_format: dict | None = None,
82
120
  **kwargs,
83
121
  ):
84
122
  if model is not None:
@@ -114,29 +152,34 @@ class OpenAICompatibleChatClient(BaseChatClient):
114
152
 
115
153
  if max_tokens is None:
116
154
  max_output_tokens = self.model_setting.max_output_tokens
117
- token_counts = get_message_token_counts(messages=messages, tools=tools_params, model=self.model_setting.id)
155
+ token_counts = get_message_token_counts(messages=messages, tools=tools, model=self.model_setting.id)
118
156
  if max_output_tokens is not None:
119
157
  max_tokens = self.model_setting.context_length - token_counts
120
158
  max_tokens = min(max(max_tokens, 1), max_output_tokens)
121
159
  else:
122
160
  max_tokens = self.model_setting.context_length - token_counts
123
161
 
124
- response: ChatCompletion | Stream[ChatCompletionChunk] = self.raw_client.chat.completions.create(
125
- model=self.model_setting.id,
126
- messages=messages,
127
- stream=self.stream,
128
- temperature=self.temperature,
129
- max_tokens=max_tokens,
130
- **tools_params,
131
- **kwargs,
132
- )
162
+ if response_format and self.model_setting.response_format_available:
163
+ self.response_format = {"response_format": response_format}
164
+ else:
165
+ self.response_format = {}
133
166
 
134
167
  if self.stream:
168
+ stream_response: Stream[ChatCompletionChunk] = self.raw_client.chat.completions.create(
169
+ model=self.model_setting.id,
170
+ messages=messages,
171
+ stream=True,
172
+ temperature=self.temperature,
173
+ max_tokens=max_tokens,
174
+ **self.response_format,
175
+ **tools_params,
176
+ **kwargs,
177
+ )
135
178
 
136
179
  def generator():
137
180
  full_content = ""
138
181
  result = {}
139
- for chunk in response:
182
+ for chunk in stream_response:
140
183
  if len(chunk.choices) == 0:
141
184
  continue
142
185
  if not chunk.choices[0].delta:
@@ -163,9 +206,20 @@ class OpenAICompatibleChatClient(BaseChatClient):
163
206
 
164
207
  return generator()
165
208
  else:
209
+ response: ChatCompletion = self.raw_client.chat.completions.create(
210
+ model=self.model_setting.id,
211
+ messages=messages,
212
+ stream=False,
213
+ temperature=self.temperature,
214
+ max_tokens=max_tokens,
215
+ **self.response_format,
216
+ **tools_params,
217
+ **kwargs,
218
+ )
219
+
166
220
  result = {
167
221
  "content": response.choices[0].message.content,
168
- "usage": response.usage.model_dump(),
222
+ "usage": response.usage.model_dump() if response.usage else None,
169
223
  }
170
224
  if tools:
171
225
  if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
@@ -184,7 +238,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
184
238
 
185
239
 
186
240
  class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
187
- DEFAULT_MODEL: str = ""
241
+ DEFAULT_MODEL: str | None = ""
188
242
  BACKEND_NAME: BackendType
189
243
 
190
244
  def __init__(
@@ -230,15 +284,46 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
230
284
  http_client=self.http_client,
231
285
  )
232
286
 
287
+ @overload
233
288
  async def create_completion(
234
289
  self,
235
- messages: list = list,
290
+ messages: list,
291
+ model: str | None = None,
292
+ stream: Literal[False] = False,
293
+ temperature: float | None = None,
294
+ max_tokens: int | None = None,
295
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
296
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
297
+ response_format: dict | None = None,
298
+ **kwargs,
299
+ ) -> ChatCompletionMessage:
300
+ pass
301
+
302
+ @overload
303
+ async def create_completion(
304
+ self,
305
+ messages: list,
306
+ model: str | None = None,
307
+ stream: Literal[True] = True,
308
+ temperature: float | None = None,
309
+ max_tokens: int | None = None,
310
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
311
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
312
+ response_format: dict | None = None,
313
+ **kwargs,
314
+ ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
315
+ pass
316
+
317
+ async def create_completion(
318
+ self,
319
+ messages: list,
236
320
  model: str | None = None,
237
321
  stream: bool | None = None,
238
322
  temperature: float | None = None,
239
323
  max_tokens: int | None = None,
240
- tools: list | NotGiven = NOT_GIVEN,
241
- tool_choice: str | NotGiven = NOT_GIVEN,
324
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
325
+ tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
326
+ response_format: dict | None = None,
242
327
  **kwargs,
243
328
  ):
244
329
  if model is not None:
@@ -272,31 +357,36 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
272
357
  else:
273
358
  tools_params = {}
274
359
 
360
+ if response_format and self.model_setting.response_format_available:
361
+ self.response_format = {"response_format": response_format}
362
+ else:
363
+ self.response_format = {}
364
+
275
365
  if max_tokens is None:
276
366
  max_output_tokens = self.model_setting.max_output_tokens
277
- token_counts = get_message_token_counts(messages=messages, tools=tools_params, model=self.model_setting.id)
367
+ token_counts = get_message_token_counts(messages=messages, tools=tools, model=self.model_setting.id)
278
368
  if max_output_tokens is not None:
279
369
  max_tokens = self.model_setting.context_length - token_counts
280
370
  max_tokens = min(max(max_tokens, 1), max_output_tokens)
281
371
  else:
282
372
  max_tokens = self.model_setting.context_length - token_counts
283
373
 
284
- response: ChatCompletion | AsyncStream[ChatCompletionChunk] = await self.raw_client.chat.completions.create(
285
- model=self.model_setting.id,
286
- messages=messages,
287
- stream=self.stream,
288
- temperature=self.temperature,
289
- max_tokens=max_tokens,
290
- **tools_params,
291
- **kwargs,
292
- )
293
-
294
374
  if self.stream:
375
+ stream_response: AsyncStream[ChatCompletionChunk] = await self.raw_client.chat.completions.create(
376
+ model=self.model_setting.id,
377
+ messages=messages,
378
+ stream=self.stream,
379
+ temperature=self.temperature,
380
+ max_tokens=max_tokens,
381
+ **self.response_format,
382
+ **tools_params,
383
+ **kwargs,
384
+ )
295
385
 
296
386
  async def generator():
297
387
  full_content = ""
298
388
  result = {}
299
- async for chunk in response:
389
+ async for chunk in stream_response:
300
390
  if len(chunk.choices) == 0:
301
391
  continue
302
392
  if not chunk.choices[0].delta:
@@ -323,9 +413,19 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
323
413
 
324
414
  return generator()
325
415
  else:
416
+ response: ChatCompletion = await self.raw_client.chat.completions.create(
417
+ model=self.model_setting.id,
418
+ messages=messages,
419
+ stream=self.stream,
420
+ temperature=self.temperature,
421
+ max_tokens=max_tokens,
422
+ **self.response_format,
423
+ **tools_params,
424
+ **kwargs,
425
+ )
326
426
  result = {
327
427
  "content": response.choices[0].message.content,
328
- "usage": response.usage.model_dump(),
428
+ "usage": response.usage.model_dump() if response.usage else None,
329
429
  }
330
430
  if tools:
331
431
  if self.model_setting.function_call_available and response.choices[0].message.tool_calls: