vectorvein 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectorvein/chat_clients/anthropic_client.py +175 -56
- vectorvein/chat_clients/base_client.py +92 -15
- vectorvein/chat_clients/gemini_client.py +84 -15
- vectorvein/chat_clients/minimax_client.py +82 -13
- vectorvein/chat_clients/openai_compatible_client.py +136 -36
- vectorvein/chat_clients/utils.py +45 -17
- vectorvein/types/defaults.py +57 -1
- vectorvein/types/llm_parameters.py +24 -3
- {vectorvein-0.1.23.dist-info → vectorvein-0.1.25.dist-info}/METADATA +1 -1
- {vectorvein-0.1.23.dist-info → vectorvein-0.1.25.dist-info}/RECORD +12 -12
- {vectorvein-0.1.23.dist-info → vectorvein-0.1.25.dist-info}/WHEEL +1 -1
- {vectorvein-0.1.23.dist-info → vectorvein-0.1.25.dist-info}/entry_points.txt +0 -0
@@ -2,6 +2,8 @@
|
|
2
2
|
# @Date: 2024-06-17 23:47:49
|
3
3
|
import json
|
4
4
|
import random
|
5
|
+
from functools import cached_property
|
6
|
+
from typing import Iterable, Literal, Generator, AsyncGenerator, overload, Any
|
5
7
|
|
6
8
|
import httpx
|
7
9
|
|
@@ -10,11 +12,18 @@ from .utils import cutoff_messages
|
|
10
12
|
from ..types import defaults as defs
|
11
13
|
from .base_client import BaseChatClient, BaseAsyncChatClient
|
12
14
|
from ..types.enums import ContextLengthControlType, BackendType
|
13
|
-
from ..types.llm_parameters import
|
15
|
+
from ..types.llm_parameters import (
|
16
|
+
NotGiven,
|
17
|
+
NOT_GIVEN,
|
18
|
+
ToolParam,
|
19
|
+
ToolChoice,
|
20
|
+
ChatCompletionMessage,
|
21
|
+
ChatCompletionDeltaMessage,
|
22
|
+
)
|
14
23
|
|
15
24
|
|
16
25
|
class GeminiChatClient(BaseChatClient):
|
17
|
-
DEFAULT_MODEL: str = defs.GEMINI_DEFAULT_MODEL
|
26
|
+
DEFAULT_MODEL: str | None = defs.GEMINI_DEFAULT_MODEL
|
18
27
|
BACKEND_NAME: BackendType = BackendType.Gemini
|
19
28
|
|
20
29
|
def __init__(
|
@@ -39,19 +48,49 @@ class GeminiChatClient(BaseChatClient):
|
|
39
48
|
**kwargs,
|
40
49
|
)
|
41
50
|
|
42
|
-
@
|
51
|
+
@cached_property
|
43
52
|
def raw_client(self):
|
44
53
|
return self.http_client
|
45
54
|
|
55
|
+
@overload
|
46
56
|
def create_completion(
|
47
57
|
self,
|
48
|
-
messages: list
|
58
|
+
messages: list,
|
59
|
+
model: str | None = None,
|
60
|
+
stream: Literal[False] = False,
|
61
|
+
temperature: float | None = None,
|
62
|
+
max_tokens: int | None = None,
|
63
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
64
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
65
|
+
response_format: dict | None = None,
|
66
|
+
**kwargs,
|
67
|
+
) -> ChatCompletionMessage:
|
68
|
+
pass
|
69
|
+
|
70
|
+
@overload
|
71
|
+
def create_completion(
|
72
|
+
self,
|
73
|
+
messages: list,
|
74
|
+
model: str | None = None,
|
75
|
+
stream: Literal[True] = True,
|
76
|
+
temperature: float | None = None,
|
77
|
+
max_tokens: int | None = None,
|
78
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
79
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
80
|
+
response_format: dict | None = None,
|
81
|
+
**kwargs,
|
82
|
+
) -> Generator[ChatCompletionDeltaMessage, None, None]:
|
83
|
+
pass
|
84
|
+
|
85
|
+
def create_completion(
|
86
|
+
self,
|
87
|
+
messages: list,
|
49
88
|
model: str | None = None,
|
50
89
|
stream: bool | None = None,
|
51
90
|
temperature: float | None = None,
|
52
91
|
max_tokens: int | None = None,
|
53
|
-
tools:
|
54
|
-
tool_choice:
|
92
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
93
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
55
94
|
response_format: dict | None = None,
|
56
95
|
**kwargs,
|
57
96
|
):
|
@@ -121,14 +160,14 @@ class GeminiChatClient(BaseChatClient):
|
|
121
160
|
params["alt"] = "sse"
|
122
161
|
|
123
162
|
def generator():
|
124
|
-
result = {"content": ""}
|
163
|
+
result = {"content": "", "tool_calls": [], "usage": {}}
|
125
164
|
if self.http_client:
|
126
165
|
client = self.http_client
|
127
166
|
else:
|
128
167
|
client = httpx.Client()
|
129
168
|
with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
|
130
169
|
for chunk in response.iter_lines():
|
131
|
-
message = {"content": ""}
|
170
|
+
message = {"content": "", "tool_calls": []}
|
132
171
|
if not chunk.startswith("data:"):
|
133
172
|
continue
|
134
173
|
data = json.loads(chunk[5:])
|
@@ -197,7 +236,7 @@ class GeminiChatClient(BaseChatClient):
|
|
197
236
|
|
198
237
|
|
199
238
|
class AsyncGeminiChatClient(BaseAsyncChatClient):
|
200
|
-
DEFAULT_MODEL: str = defs.GEMINI_DEFAULT_MODEL
|
239
|
+
DEFAULT_MODEL: str | None = defs.GEMINI_DEFAULT_MODEL
|
201
240
|
BACKEND_NAME: BackendType = BackendType.Gemini
|
202
241
|
|
203
242
|
def __init__(
|
@@ -222,19 +261,49 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
222
261
|
**kwargs,
|
223
262
|
)
|
224
263
|
|
225
|
-
@
|
264
|
+
@cached_property
|
226
265
|
def raw_client(self):
|
227
266
|
return self.http_client
|
228
267
|
|
268
|
+
@overload
|
269
|
+
async def create_completion(
|
270
|
+
self,
|
271
|
+
messages: list,
|
272
|
+
model: str | None = None,
|
273
|
+
stream: Literal[False] = False,
|
274
|
+
temperature: float | None = None,
|
275
|
+
max_tokens: int | None = None,
|
276
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
277
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
278
|
+
response_format: dict | None = None,
|
279
|
+
**kwargs,
|
280
|
+
) -> ChatCompletionMessage:
|
281
|
+
pass
|
282
|
+
|
283
|
+
@overload
|
284
|
+
async def create_completion(
|
285
|
+
self,
|
286
|
+
messages: list,
|
287
|
+
model: str | None = None,
|
288
|
+
stream: Literal[True] = True,
|
289
|
+
temperature: float | None = None,
|
290
|
+
max_tokens: int | None = None,
|
291
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
292
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
293
|
+
response_format: dict | None = None,
|
294
|
+
**kwargs,
|
295
|
+
) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
296
|
+
pass
|
297
|
+
|
229
298
|
async def create_completion(
|
230
299
|
self,
|
231
|
-
messages: list
|
300
|
+
messages: list,
|
232
301
|
model: str | None = None,
|
233
302
|
stream: bool | None = None,
|
234
303
|
temperature: float | None = None,
|
235
304
|
max_tokens: int | None = None,
|
236
|
-
tools:
|
237
|
-
tool_choice:
|
305
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
306
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
238
307
|
response_format: dict | None = None,
|
239
308
|
**kwargs,
|
240
309
|
):
|
@@ -304,14 +373,14 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
304
373
|
params["alt"] = "sse"
|
305
374
|
|
306
375
|
async def generator():
|
307
|
-
result = {"content": ""}
|
376
|
+
result = {"content": "", "tool_calls": [], "usage": {}}
|
308
377
|
if self.http_client:
|
309
378
|
client = self.http_client
|
310
379
|
else:
|
311
380
|
client = httpx.AsyncClient()
|
312
381
|
async with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
|
313
382
|
async for chunk in response.aiter_lines():
|
314
|
-
message = {"content": ""}
|
383
|
+
message = {"content": "", "tool_calls": []}
|
315
384
|
if not chunk.startswith("data:"):
|
316
385
|
continue
|
317
386
|
data = json.loads(chunk[5:])
|
@@ -2,16 +2,23 @@
|
|
2
2
|
# @Date: 2024-07-26 14:48:55
|
3
3
|
import json
|
4
4
|
import random
|
5
|
-
|
5
|
+
from functools import cached_property
|
6
|
+
from typing import Iterable, Literal, Generator, AsyncGenerator, overload, Any
|
6
7
|
import httpx
|
7
|
-
from openai._types import NotGiven
|
8
8
|
|
9
9
|
from ..settings import settings
|
10
10
|
from ..types import defaults as defs
|
11
11
|
from .utils import cutoff_messages, get_token_counts
|
12
12
|
from .base_client import BaseChatClient, BaseAsyncChatClient
|
13
13
|
from ..types.enums import ContextLengthControlType, BackendType
|
14
|
-
from ..types.llm_parameters import
|
14
|
+
from ..types.llm_parameters import (
|
15
|
+
NotGiven,
|
16
|
+
NOT_GIVEN,
|
17
|
+
ToolParam,
|
18
|
+
ToolChoice,
|
19
|
+
ChatCompletionMessage,
|
20
|
+
ChatCompletionDeltaMessage,
|
21
|
+
)
|
15
22
|
|
16
23
|
|
17
24
|
def extract_tool_calls(response):
|
@@ -37,7 +44,7 @@ def extract_tool_calls(response):
|
|
37
44
|
|
38
45
|
|
39
46
|
class MiniMaxChatClient(BaseChatClient):
|
40
|
-
DEFAULT_MODEL: str = defs.MINIMAX_DEFAULT_MODEL
|
47
|
+
DEFAULT_MODEL: str | None = defs.MINIMAX_DEFAULT_MODEL
|
41
48
|
BACKEND_NAME: BackendType = BackendType.MiniMax
|
42
49
|
|
43
50
|
def __init__(
|
@@ -66,19 +73,50 @@ class MiniMaxChatClient(BaseChatClient):
|
|
66
73
|
else:
|
67
74
|
self.http_client = httpx.Client()
|
68
75
|
|
69
|
-
@
|
76
|
+
@cached_property
|
70
77
|
def raw_client(self):
|
71
78
|
return self.http_client
|
72
79
|
|
80
|
+
@overload
|
81
|
+
def create_completion(
|
82
|
+
self,
|
83
|
+
messages: list,
|
84
|
+
model: str | None = None,
|
85
|
+
stream: Literal[False] = False,
|
86
|
+
temperature: float | None = None,
|
87
|
+
max_tokens: int | None = None,
|
88
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
89
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
90
|
+
response_format: dict | None = None,
|
91
|
+
**kwargs,
|
92
|
+
) -> ChatCompletionMessage:
|
93
|
+
pass
|
94
|
+
|
95
|
+
@overload
|
96
|
+
def create_completion(
|
97
|
+
self,
|
98
|
+
messages: list,
|
99
|
+
model: str | None = None,
|
100
|
+
stream: Literal[True] = True,
|
101
|
+
temperature: float | None = None,
|
102
|
+
max_tokens: int | None = None,
|
103
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
104
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
105
|
+
response_format: dict | None = None,
|
106
|
+
**kwargs,
|
107
|
+
) -> Generator[ChatCompletionDeltaMessage, None, None]:
|
108
|
+
pass
|
109
|
+
|
73
110
|
def create_completion(
|
74
111
|
self,
|
75
|
-
messages: list
|
112
|
+
messages: list,
|
76
113
|
model: str | None = None,
|
77
114
|
stream: bool | None = None,
|
78
115
|
temperature: float | None = None,
|
79
116
|
max_tokens: int | None = None,
|
80
|
-
tools:
|
81
|
-
tool_choice:
|
117
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
118
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
119
|
+
response_format: dict | None = None,
|
82
120
|
**kwargs,
|
83
121
|
):
|
84
122
|
if model is not None:
|
@@ -206,7 +244,7 @@ class MiniMaxChatClient(BaseChatClient):
|
|
206
244
|
|
207
245
|
|
208
246
|
class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
209
|
-
DEFAULT_MODEL: str = defs.MINIMAX_DEFAULT_MODEL
|
247
|
+
DEFAULT_MODEL: str | None = defs.MINIMAX_DEFAULT_MODEL
|
210
248
|
BACKEND_NAME: BackendType = BackendType.MiniMax
|
211
249
|
|
212
250
|
def __init__(
|
@@ -235,19 +273,50 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
235
273
|
else:
|
236
274
|
self.http_client = httpx.AsyncClient()
|
237
275
|
|
238
|
-
@
|
276
|
+
@cached_property
|
239
277
|
def raw_client(self):
|
240
278
|
return self.http_client
|
241
279
|
|
280
|
+
@overload
|
281
|
+
async def create_completion(
|
282
|
+
self,
|
283
|
+
messages: list,
|
284
|
+
model: str | None = None,
|
285
|
+
stream: Literal[False] = False,
|
286
|
+
temperature: float | None = None,
|
287
|
+
max_tokens: int | None = None,
|
288
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
289
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
290
|
+
response_format: dict | None = None,
|
291
|
+
**kwargs,
|
292
|
+
) -> ChatCompletionMessage:
|
293
|
+
pass
|
294
|
+
|
295
|
+
@overload
|
296
|
+
async def create_completion(
|
297
|
+
self,
|
298
|
+
messages: list,
|
299
|
+
model: str | None = None,
|
300
|
+
stream: Literal[True] = True,
|
301
|
+
temperature: float | None = None,
|
302
|
+
max_tokens: int | None = None,
|
303
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
304
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
305
|
+
response_format: dict | None = None,
|
306
|
+
**kwargs,
|
307
|
+
) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
308
|
+
pass
|
309
|
+
|
242
310
|
async def create_completion(
|
243
311
|
self,
|
244
|
-
messages: list
|
312
|
+
messages: list,
|
245
313
|
model: str | None = None,
|
246
314
|
stream: bool | None = None,
|
247
315
|
temperature: float | None = None,
|
248
316
|
max_tokens: int | None = None,
|
249
|
-
tools:
|
250
|
-
tool_choice:
|
317
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
318
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
319
|
+
response_format: dict | None = None,
|
251
320
|
**kwargs,
|
252
321
|
):
|
253
322
|
if model is not None:
|
@@ -3,9 +3,9 @@
|
|
3
3
|
import json
|
4
4
|
import random
|
5
5
|
from functools import cached_property
|
6
|
+
from typing import overload, Generator, AsyncGenerator, Any, Literal, Iterable
|
6
7
|
|
7
8
|
import httpx
|
8
|
-
from openai._types import NotGiven, NOT_GIVEN
|
9
9
|
from openai._streaming import Stream, AsyncStream
|
10
10
|
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
11
11
|
from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
|
@@ -20,11 +20,18 @@ from .utils import (
|
|
20
20
|
from ..settings import settings
|
21
21
|
from ..types import defaults as defs
|
22
22
|
from ..types.enums import ContextLengthControlType, BackendType
|
23
|
-
from ..types.llm_parameters import
|
23
|
+
from ..types.llm_parameters import (
|
24
|
+
NotGiven,
|
25
|
+
NOT_GIVEN,
|
26
|
+
ToolParam,
|
27
|
+
ToolChoice,
|
28
|
+
ChatCompletionMessage,
|
29
|
+
ChatCompletionDeltaMessage,
|
30
|
+
)
|
24
31
|
|
25
32
|
|
26
33
|
class OpenAICompatibleChatClient(BaseChatClient):
|
27
|
-
DEFAULT_MODEL: str = ""
|
34
|
+
DEFAULT_MODEL: str | None = ""
|
28
35
|
BACKEND_NAME: BackendType
|
29
36
|
|
30
37
|
def __init__(
|
@@ -50,7 +57,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
50
57
|
)
|
51
58
|
|
52
59
|
@cached_property
|
53
|
-
def raw_client(self):
|
60
|
+
def raw_client(self) -> OpenAI | AzureOpenAI:
|
54
61
|
if self.random_endpoint:
|
55
62
|
self.random_endpoint = True
|
56
63
|
self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
|
@@ -70,15 +77,46 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
70
77
|
http_client=self.http_client,
|
71
78
|
)
|
72
79
|
|
80
|
+
@overload
|
81
|
+
def create_completion(
|
82
|
+
self,
|
83
|
+
messages: list,
|
84
|
+
model: str | None = None,
|
85
|
+
stream: Literal[False] = False,
|
86
|
+
temperature: float | None = None,
|
87
|
+
max_tokens: int | None = None,
|
88
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
89
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
90
|
+
response_format: dict | None = None,
|
91
|
+
**kwargs,
|
92
|
+
) -> ChatCompletionMessage:
|
93
|
+
pass
|
94
|
+
|
95
|
+
@overload
|
96
|
+
def create_completion(
|
97
|
+
self,
|
98
|
+
messages: list,
|
99
|
+
model: str | None = None,
|
100
|
+
stream: Literal[True] = True,
|
101
|
+
temperature: float | None = None,
|
102
|
+
max_tokens: int | None = None,
|
103
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
104
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
105
|
+
response_format: dict | None = None,
|
106
|
+
**kwargs,
|
107
|
+
) -> Generator[ChatCompletionDeltaMessage, None, None]:
|
108
|
+
pass
|
109
|
+
|
73
110
|
def create_completion(
|
74
111
|
self,
|
75
|
-
messages: list
|
112
|
+
messages: list,
|
76
113
|
model: str | None = None,
|
77
114
|
stream: bool | None = None,
|
78
115
|
temperature: float | None = None,
|
79
116
|
max_tokens: int | None = None,
|
80
|
-
tools:
|
81
|
-
tool_choice:
|
117
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
118
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
119
|
+
response_format: dict | None = None,
|
82
120
|
**kwargs,
|
83
121
|
):
|
84
122
|
if model is not None:
|
@@ -114,29 +152,34 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
114
152
|
|
115
153
|
if max_tokens is None:
|
116
154
|
max_output_tokens = self.model_setting.max_output_tokens
|
117
|
-
token_counts = get_message_token_counts(messages=messages, tools=
|
155
|
+
token_counts = get_message_token_counts(messages=messages, tools=tools, model=self.model_setting.id)
|
118
156
|
if max_output_tokens is not None:
|
119
157
|
max_tokens = self.model_setting.context_length - token_counts
|
120
158
|
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
121
159
|
else:
|
122
160
|
max_tokens = self.model_setting.context_length - token_counts
|
123
161
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
temperature=self.temperature,
|
129
|
-
max_tokens=max_tokens,
|
130
|
-
**tools_params,
|
131
|
-
**kwargs,
|
132
|
-
)
|
162
|
+
if response_format and self.model_setting.response_format_available:
|
163
|
+
self.response_format = {"response_format": response_format}
|
164
|
+
else:
|
165
|
+
self.response_format = {}
|
133
166
|
|
134
167
|
if self.stream:
|
168
|
+
stream_response: Stream[ChatCompletionChunk] = self.raw_client.chat.completions.create(
|
169
|
+
model=self.model_setting.id,
|
170
|
+
messages=messages,
|
171
|
+
stream=True,
|
172
|
+
temperature=self.temperature,
|
173
|
+
max_tokens=max_tokens,
|
174
|
+
**self.response_format,
|
175
|
+
**tools_params,
|
176
|
+
**kwargs,
|
177
|
+
)
|
135
178
|
|
136
179
|
def generator():
|
137
180
|
full_content = ""
|
138
181
|
result = {}
|
139
|
-
for chunk in
|
182
|
+
for chunk in stream_response:
|
140
183
|
if len(chunk.choices) == 0:
|
141
184
|
continue
|
142
185
|
if not chunk.choices[0].delta:
|
@@ -163,9 +206,20 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
163
206
|
|
164
207
|
return generator()
|
165
208
|
else:
|
209
|
+
response: ChatCompletion = self.raw_client.chat.completions.create(
|
210
|
+
model=self.model_setting.id,
|
211
|
+
messages=messages,
|
212
|
+
stream=False,
|
213
|
+
temperature=self.temperature,
|
214
|
+
max_tokens=max_tokens,
|
215
|
+
**self.response_format,
|
216
|
+
**tools_params,
|
217
|
+
**kwargs,
|
218
|
+
)
|
219
|
+
|
166
220
|
result = {
|
167
221
|
"content": response.choices[0].message.content,
|
168
|
-
"usage": response.usage.model_dump(),
|
222
|
+
"usage": response.usage.model_dump() if response.usage else None,
|
169
223
|
}
|
170
224
|
if tools:
|
171
225
|
if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
|
@@ -184,7 +238,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
184
238
|
|
185
239
|
|
186
240
|
class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
187
|
-
DEFAULT_MODEL: str = ""
|
241
|
+
DEFAULT_MODEL: str | None = ""
|
188
242
|
BACKEND_NAME: BackendType
|
189
243
|
|
190
244
|
def __init__(
|
@@ -230,15 +284,46 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
230
284
|
http_client=self.http_client,
|
231
285
|
)
|
232
286
|
|
287
|
+
@overload
|
233
288
|
async def create_completion(
|
234
289
|
self,
|
235
|
-
messages: list
|
290
|
+
messages: list,
|
291
|
+
model: str | None = None,
|
292
|
+
stream: Literal[False] = False,
|
293
|
+
temperature: float | None = None,
|
294
|
+
max_tokens: int | None = None,
|
295
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
296
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
297
|
+
response_format: dict | None = None,
|
298
|
+
**kwargs,
|
299
|
+
) -> ChatCompletionMessage:
|
300
|
+
pass
|
301
|
+
|
302
|
+
@overload
|
303
|
+
async def create_completion(
|
304
|
+
self,
|
305
|
+
messages: list,
|
306
|
+
model: str | None = None,
|
307
|
+
stream: Literal[True] = True,
|
308
|
+
temperature: float | None = None,
|
309
|
+
max_tokens: int | None = None,
|
310
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
311
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
312
|
+
response_format: dict | None = None,
|
313
|
+
**kwargs,
|
314
|
+
) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
315
|
+
pass
|
316
|
+
|
317
|
+
async def create_completion(
|
318
|
+
self,
|
319
|
+
messages: list,
|
236
320
|
model: str | None = None,
|
237
321
|
stream: bool | None = None,
|
238
322
|
temperature: float | None = None,
|
239
323
|
max_tokens: int | None = None,
|
240
|
-
tools:
|
241
|
-
tool_choice:
|
324
|
+
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
325
|
+
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
326
|
+
response_format: dict | None = None,
|
242
327
|
**kwargs,
|
243
328
|
):
|
244
329
|
if model is not None:
|
@@ -272,31 +357,36 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
272
357
|
else:
|
273
358
|
tools_params = {}
|
274
359
|
|
360
|
+
if response_format and self.model_setting.response_format_available:
|
361
|
+
self.response_format = {"response_format": response_format}
|
362
|
+
else:
|
363
|
+
self.response_format = {}
|
364
|
+
|
275
365
|
if max_tokens is None:
|
276
366
|
max_output_tokens = self.model_setting.max_output_tokens
|
277
|
-
token_counts = get_message_token_counts(messages=messages, tools=
|
367
|
+
token_counts = get_message_token_counts(messages=messages, tools=tools, model=self.model_setting.id)
|
278
368
|
if max_output_tokens is not None:
|
279
369
|
max_tokens = self.model_setting.context_length - token_counts
|
280
370
|
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
281
371
|
else:
|
282
372
|
max_tokens = self.model_setting.context_length - token_counts
|
283
373
|
|
284
|
-
response: ChatCompletion | AsyncStream[ChatCompletionChunk] = await self.raw_client.chat.completions.create(
|
285
|
-
model=self.model_setting.id,
|
286
|
-
messages=messages,
|
287
|
-
stream=self.stream,
|
288
|
-
temperature=self.temperature,
|
289
|
-
max_tokens=max_tokens,
|
290
|
-
**tools_params,
|
291
|
-
**kwargs,
|
292
|
-
)
|
293
|
-
|
294
374
|
if self.stream:
|
375
|
+
stream_response: AsyncStream[ChatCompletionChunk] = await self.raw_client.chat.completions.create(
|
376
|
+
model=self.model_setting.id,
|
377
|
+
messages=messages,
|
378
|
+
stream=self.stream,
|
379
|
+
temperature=self.temperature,
|
380
|
+
max_tokens=max_tokens,
|
381
|
+
**self.response_format,
|
382
|
+
**tools_params,
|
383
|
+
**kwargs,
|
384
|
+
)
|
295
385
|
|
296
386
|
async def generator():
|
297
387
|
full_content = ""
|
298
388
|
result = {}
|
299
|
-
async for chunk in
|
389
|
+
async for chunk in stream_response:
|
300
390
|
if len(chunk.choices) == 0:
|
301
391
|
continue
|
302
392
|
if not chunk.choices[0].delta:
|
@@ -323,9 +413,19 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
323
413
|
|
324
414
|
return generator()
|
325
415
|
else:
|
416
|
+
response: ChatCompletion = await self.raw_client.chat.completions.create(
|
417
|
+
model=self.model_setting.id,
|
418
|
+
messages=messages,
|
419
|
+
stream=self.stream,
|
420
|
+
temperature=self.temperature,
|
421
|
+
max_tokens=max_tokens,
|
422
|
+
**self.response_format,
|
423
|
+
**tools_params,
|
424
|
+
**kwargs,
|
425
|
+
)
|
326
426
|
result = {
|
327
427
|
"content": response.choices[0].message.content,
|
328
|
-
"usage": response.usage.model_dump(),
|
428
|
+
"usage": response.usage.model_dump() if response.usage else None,
|
329
429
|
}
|
330
430
|
if tools:
|
331
431
|
if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
|