vectorvein 0.1.15__py3-none-any.whl → 0.1.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectorvein/chat_clients/__init__.py +2 -1
- vectorvein/chat_clients/anthropic_client.py +93 -91
- vectorvein/chat_clients/base_client.py +12 -0
- vectorvein/chat_clients/openai_compatible_client.py +82 -41
- vectorvein/chat_clients/utils.py +41 -1
- {vectorvein-0.1.15.dist-info → vectorvein-0.1.17.dist-info}/METADATA +1 -1
- {vectorvein-0.1.15.dist-info → vectorvein-0.1.17.dist-info}/RECORD +8 -8
- {vectorvein-0.1.15.dist-info → vectorvein-0.1.17.dist-info}/WHEEL +0 -0
@@ -20,7 +20,7 @@ from .deepseek_client import DeepSeekChatClient, AsyncDeepSeekChatClient
|
|
20
20
|
from ..types import defaults as defs
|
21
21
|
from ..types.enums import BackendType, ContextLengthControlType
|
22
22
|
from .anthropic_client import AnthropicChatClient, AsyncAnthropicChatClient
|
23
|
-
from .utils import format_messages, get_token_counts, ToolCallContentProcessor
|
23
|
+
from .utils import format_messages, get_token_counts, get_message_token_counts, ToolCallContentProcessor
|
24
24
|
|
25
25
|
|
26
26
|
BackendMap = {
|
@@ -125,5 +125,6 @@ __all__ = [
|
|
125
125
|
"get_token_counts",
|
126
126
|
"create_chat_client",
|
127
127
|
"create_async_chat_client",
|
128
|
+
"get_message_token_counts",
|
128
129
|
"ToolCallContentProcessor",
|
129
130
|
]
|
@@ -21,7 +21,7 @@ from google.auth import _helpers
|
|
21
21
|
|
22
22
|
from ..settings import settings
|
23
23
|
from ..types import defaults as defs
|
24
|
-
from .utils import cutoff_messages,
|
24
|
+
from .utils import cutoff_messages, get_message_token_counts
|
25
25
|
from .base_client import BaseChatClient, BaseAsyncChatClient
|
26
26
|
from ..types.enums import ContextLengthControlType, BackendType
|
27
27
|
from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
|
@@ -112,46 +112,8 @@ class AnthropicChatClient(BaseChatClient):
|
|
112
112
|
**kwargs,
|
113
113
|
)
|
114
114
|
|
115
|
-
|
116
|
-
|
117
|
-
messages: list = list,
|
118
|
-
model: str | None = None,
|
119
|
-
stream: bool | None = None,
|
120
|
-
temperature: float | None = None,
|
121
|
-
max_tokens: int | None = None,
|
122
|
-
tools: list | NotGiven = NOT_GIVEN,
|
123
|
-
tool_choice: str | NotGiven = NOT_GIVEN,
|
124
|
-
**kwargs,
|
125
|
-
):
|
126
|
-
if model is not None:
|
127
|
-
self.model = model
|
128
|
-
if stream is not None:
|
129
|
-
self.stream = stream
|
130
|
-
if temperature is not None:
|
131
|
-
self.temperature = temperature
|
132
|
-
if isinstance(tools, OpenAINotGiven):
|
133
|
-
tools = NOT_GIVEN
|
134
|
-
if isinstance(tool_choice, OpenAINotGiven):
|
135
|
-
tool_choice = NOT_GIVEN
|
136
|
-
|
137
|
-
self.model_setting = self.backend_settings.models[self.model]
|
138
|
-
|
139
|
-
if messages[0].get("role") == "system":
|
140
|
-
system_prompt = messages[0]["content"]
|
141
|
-
messages = messages[1:]
|
142
|
-
else:
|
143
|
-
system_prompt = ""
|
144
|
-
|
145
|
-
if self.context_length_control == ContextLengthControlType.Latest:
|
146
|
-
messages = cutoff_messages(
|
147
|
-
messages,
|
148
|
-
max_count=self.model_setting.context_length,
|
149
|
-
backend=self.BACKEND_NAME,
|
150
|
-
model=self.model_setting.id,
|
151
|
-
)
|
152
|
-
|
153
|
-
messages = format_messages_alternate(messages)
|
154
|
-
|
115
|
+
@property
|
116
|
+
def raw_client(self):
|
155
117
|
if self.random_endpoint:
|
156
118
|
self.random_endpoint = True
|
157
119
|
self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
|
@@ -181,7 +143,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
181
143
|
else:
|
182
144
|
base_url = f"{self.endpoint.api_base}{self.endpoint.region}-aiplatform/v1"
|
183
145
|
|
184
|
-
|
146
|
+
return AnthropicVertex(
|
185
147
|
region=self.endpoint.region,
|
186
148
|
base_url=base_url,
|
187
149
|
project_id=self.endpoint.credentials.get("quota_project_id"),
|
@@ -189,25 +151,64 @@ class AnthropicChatClient(BaseChatClient):
|
|
189
151
|
http_client=self.http_client,
|
190
152
|
)
|
191
153
|
else:
|
192
|
-
|
154
|
+
return Anthropic(
|
193
155
|
api_key=self.endpoint.api_key,
|
194
156
|
base_url=self.endpoint.api_base,
|
195
157
|
http_client=self.http_client,
|
196
158
|
)
|
197
159
|
|
160
|
+
def create_completion(
|
161
|
+
self,
|
162
|
+
messages: list = list,
|
163
|
+
model: str | None = None,
|
164
|
+
stream: bool | None = None,
|
165
|
+
temperature: float | None = None,
|
166
|
+
max_tokens: int | None = None,
|
167
|
+
tools: list | NotGiven = NOT_GIVEN,
|
168
|
+
tool_choice: str | NotGiven = NOT_GIVEN,
|
169
|
+
**kwargs,
|
170
|
+
):
|
171
|
+
if model is not None:
|
172
|
+
self.model = model
|
173
|
+
if stream is not None:
|
174
|
+
self.stream = stream
|
175
|
+
if temperature is not None:
|
176
|
+
self.temperature = temperature
|
177
|
+
if isinstance(tools, OpenAINotGiven):
|
178
|
+
tools = NOT_GIVEN
|
179
|
+
if isinstance(tool_choice, OpenAINotGiven):
|
180
|
+
tool_choice = NOT_GIVEN
|
181
|
+
|
182
|
+
self.model_setting = self.backend_settings.models[self.model]
|
183
|
+
|
184
|
+
if messages[0].get("role") == "system":
|
185
|
+
system_prompt = messages[0]["content"]
|
186
|
+
messages = messages[1:]
|
187
|
+
else:
|
188
|
+
system_prompt = ""
|
189
|
+
|
190
|
+
if self.context_length_control == ContextLengthControlType.Latest:
|
191
|
+
messages = cutoff_messages(
|
192
|
+
messages,
|
193
|
+
max_count=self.model_setting.context_length,
|
194
|
+
backend=self.BACKEND_NAME,
|
195
|
+
model=self.model_setting.id,
|
196
|
+
)
|
197
|
+
|
198
|
+
messages = format_messages_alternate(messages)
|
199
|
+
|
198
200
|
tools_params = refactor_tool_use_params(tools) if tools else tools
|
199
201
|
|
200
202
|
if max_tokens is None:
|
201
203
|
max_output_tokens = self.model_setting.max_output_tokens
|
204
|
+
token_counts = get_message_token_counts(messages=messages, tools=tools_params, model=self.model_setting.id)
|
202
205
|
if max_output_tokens is not None:
|
203
|
-
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
204
206
|
max_tokens = self.model_setting.context_length - token_counts
|
205
207
|
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
206
208
|
else:
|
207
|
-
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
208
209
|
max_tokens = self.model_setting.context_length - token_counts
|
209
210
|
|
210
|
-
response = self.
|
211
|
+
response = self.raw_client.messages.create(
|
211
212
|
model=self.model_setting.id,
|
212
213
|
messages=messages,
|
213
214
|
system=system_prompt,
|
@@ -318,46 +319,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
318
319
|
**kwargs,
|
319
320
|
)
|
320
321
|
|
321
|
-
|
322
|
-
|
323
|
-
messages: list = list,
|
324
|
-
model: str | None = None,
|
325
|
-
stream: bool | None = None,
|
326
|
-
temperature: float | None = None,
|
327
|
-
max_tokens: int | None = None,
|
328
|
-
tools: list | NotGiven = NOT_GIVEN,
|
329
|
-
tool_choice: str | NotGiven = NOT_GIVEN,
|
330
|
-
**kwargs,
|
331
|
-
):
|
332
|
-
if model is not None:
|
333
|
-
self.model = model
|
334
|
-
if stream is not None:
|
335
|
-
self.stream = stream
|
336
|
-
if temperature is not None:
|
337
|
-
self.temperature = temperature
|
338
|
-
if isinstance(tools, OpenAINotGiven):
|
339
|
-
tools = NOT_GIVEN
|
340
|
-
if isinstance(tool_choice, OpenAINotGiven):
|
341
|
-
tool_choice = NOT_GIVEN
|
342
|
-
|
343
|
-
self.model_setting = self.backend_settings.models[self.model]
|
344
|
-
|
345
|
-
if messages[0].get("role") == "system":
|
346
|
-
system_prompt = messages[0]["content"]
|
347
|
-
messages = messages[1:]
|
348
|
-
else:
|
349
|
-
system_prompt = ""
|
350
|
-
|
351
|
-
if self.context_length_control == ContextLengthControlType.Latest:
|
352
|
-
messages = cutoff_messages(
|
353
|
-
messages,
|
354
|
-
max_count=self.model_setting.context_length,
|
355
|
-
backend=self.BACKEND_NAME,
|
356
|
-
model=self.model_setting.id,
|
357
|
-
)
|
358
|
-
|
359
|
-
messages = format_messages_alternate(messages)
|
360
|
-
|
322
|
+
@property
|
323
|
+
def raw_client(self):
|
361
324
|
if self.random_endpoint:
|
362
325
|
self.random_endpoint = True
|
363
326
|
self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
|
@@ -387,7 +350,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
387
350
|
else:
|
388
351
|
base_url = f"{self.endpoint.api_base}{self.endpoint.region}-aiplatform/v1"
|
389
352
|
|
390
|
-
|
353
|
+
return AsyncAnthropicVertex(
|
391
354
|
region=self.endpoint.region,
|
392
355
|
base_url=base_url,
|
393
356
|
project_id=self.endpoint.credentials.get("quota_project_id"),
|
@@ -395,25 +358,64 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
395
358
|
http_client=self.http_client,
|
396
359
|
)
|
397
360
|
else:
|
398
|
-
|
361
|
+
return AsyncAnthropic(
|
399
362
|
api_key=self.endpoint.api_key,
|
400
363
|
base_url=self.endpoint.api_base,
|
401
364
|
http_client=self.http_client,
|
402
365
|
)
|
403
366
|
|
367
|
+
async def create_completion(
|
368
|
+
self,
|
369
|
+
messages: list = list,
|
370
|
+
model: str | None = None,
|
371
|
+
stream: bool | None = None,
|
372
|
+
temperature: float | None = None,
|
373
|
+
max_tokens: int | None = None,
|
374
|
+
tools: list | NotGiven = NOT_GIVEN,
|
375
|
+
tool_choice: str | NotGiven = NOT_GIVEN,
|
376
|
+
**kwargs,
|
377
|
+
):
|
378
|
+
if model is not None:
|
379
|
+
self.model = model
|
380
|
+
if stream is not None:
|
381
|
+
self.stream = stream
|
382
|
+
if temperature is not None:
|
383
|
+
self.temperature = temperature
|
384
|
+
if isinstance(tools, OpenAINotGiven):
|
385
|
+
tools = NOT_GIVEN
|
386
|
+
if isinstance(tool_choice, OpenAINotGiven):
|
387
|
+
tool_choice = NOT_GIVEN
|
388
|
+
|
389
|
+
self.model_setting = self.backend_settings.models[self.model]
|
390
|
+
|
391
|
+
if messages[0].get("role") == "system":
|
392
|
+
system_prompt = messages[0]["content"]
|
393
|
+
messages = messages[1:]
|
394
|
+
else:
|
395
|
+
system_prompt = ""
|
396
|
+
|
397
|
+
if self.context_length_control == ContextLengthControlType.Latest:
|
398
|
+
messages = cutoff_messages(
|
399
|
+
messages,
|
400
|
+
max_count=self.model_setting.context_length,
|
401
|
+
backend=self.BACKEND_NAME,
|
402
|
+
model=self.model_setting.id,
|
403
|
+
)
|
404
|
+
|
405
|
+
messages = format_messages_alternate(messages)
|
406
|
+
|
404
407
|
tools_params = refactor_tool_use_params(tools) if tools else tools
|
405
408
|
|
406
409
|
if max_tokens is None:
|
407
410
|
max_output_tokens = self.model_setting.max_output_tokens
|
411
|
+
token_counts = get_message_token_counts(messages=messages, tools=tools_params, model=self.model_setting.id)
|
408
412
|
if max_output_tokens is not None:
|
409
|
-
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
410
413
|
max_tokens = self.model_setting.context_length - token_counts
|
411
414
|
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
412
415
|
else:
|
413
|
-
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
414
416
|
max_tokens = self.model_setting.context_length - token_counts
|
415
417
|
|
416
|
-
response = await self.
|
418
|
+
response = await self.raw_client.messages.create(
|
417
419
|
model=self.model_setting.id,
|
418
420
|
messages=messages,
|
419
421
|
system=system_prompt,
|
@@ -5,6 +5,8 @@ from typing import Generator, AsyncGenerator, Any
|
|
5
5
|
|
6
6
|
import httpx
|
7
7
|
from openai._types import NotGiven, NOT_GIVEN
|
8
|
+
from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
|
9
|
+
from anthropic import Anthropic, AnthropicVertex, AsyncAnthropic, AsyncAnthropicVertex
|
8
10
|
|
9
11
|
from ..settings import settings
|
10
12
|
from ..types import defaults as defs
|
@@ -42,6 +44,11 @@ class BaseChatClient(ABC):
|
|
42
44
|
self.random_endpoint = False
|
43
45
|
self.endpoint = settings.get_endpoint(self.endpoint_id)
|
44
46
|
|
47
|
+
@property
|
48
|
+
@abstractmethod
|
49
|
+
def raw_client(self) -> OpenAI | AzureOpenAI | Anthropic | AnthropicVertex:
|
50
|
+
pass
|
51
|
+
|
45
52
|
@abstractmethod
|
46
53
|
def create_completion(
|
47
54
|
self,
|
@@ -108,6 +115,11 @@ class BaseAsyncChatClient(ABC):
|
|
108
115
|
self.random_endpoint = False
|
109
116
|
self.endpoint = settings.get_endpoint(self.endpoint_id)
|
110
117
|
|
118
|
+
@property
|
119
|
+
@abstractmethod
|
120
|
+
def raw_client(self) -> AsyncOpenAI | AsyncAzureOpenAI | AsyncAnthropic | AsyncAnthropicVertex:
|
121
|
+
pass
|
122
|
+
|
111
123
|
@abstractmethod
|
112
124
|
async def create_completion(
|
113
125
|
self,
|
@@ -2,6 +2,7 @@
|
|
2
2
|
# @Date: 2024-07-26 14:48:55
|
3
3
|
import json
|
4
4
|
import random
|
5
|
+
from functools import cached_property
|
5
6
|
|
6
7
|
import httpx
|
7
8
|
from openai._types import NotGiven, NOT_GIVEN
|
@@ -12,7 +13,7 @@ from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
|
|
12
13
|
from .base_client import BaseChatClient, BaseAsyncChatClient
|
13
14
|
from .utils import (
|
14
15
|
cutoff_messages,
|
15
|
-
|
16
|
+
get_message_token_counts,
|
16
17
|
ToolCallContentProcessor,
|
17
18
|
generate_tool_use_system_prompt,
|
18
19
|
)
|
@@ -48,6 +49,27 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
48
49
|
**kwargs,
|
49
50
|
)
|
50
51
|
|
52
|
+
@cached_property
|
53
|
+
def raw_client(self):
|
54
|
+
if self.random_endpoint:
|
55
|
+
self.random_endpoint = True
|
56
|
+
self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
|
57
|
+
self.endpoint = settings.get_endpoint(self.endpoint_id)
|
58
|
+
|
59
|
+
if self.endpoint.is_azure:
|
60
|
+
return AzureOpenAI(
|
61
|
+
azure_endpoint=self.endpoint.api_base,
|
62
|
+
api_key=self.endpoint.api_key,
|
63
|
+
api_version="2024-08-01-preview",
|
64
|
+
http_client=self.http_client,
|
65
|
+
)
|
66
|
+
else:
|
67
|
+
return OpenAI(
|
68
|
+
api_key=self.endpoint.api_key,
|
69
|
+
base_url=self.endpoint.api_base,
|
70
|
+
http_client=self.http_client,
|
71
|
+
)
|
72
|
+
|
51
73
|
def create_completion(
|
52
74
|
self,
|
53
75
|
messages: list = list,
|
@@ -68,24 +90,24 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
68
90
|
|
69
91
|
self.model_setting = self.backend_settings.models[self.model]
|
70
92
|
|
71
|
-
if self.random_endpoint:
|
72
|
-
|
73
|
-
|
74
|
-
|
93
|
+
# if self.random_endpoint:
|
94
|
+
# self.random_endpoint = True
|
95
|
+
# self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
|
96
|
+
# self.endpoint = settings.get_endpoint(self.endpoint_id)
|
75
97
|
|
76
|
-
if self.endpoint.is_azure:
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
else:
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
98
|
+
# if self.endpoint.is_azure:
|
99
|
+
# self._client = AzureOpenAI(
|
100
|
+
# azure_endpoint=self.endpoint.api_base,
|
101
|
+
# api_key=self.endpoint.api_key,
|
102
|
+
# api_version="2024-08-01-preview",
|
103
|
+
# http_client=self.http_client,
|
104
|
+
# )
|
105
|
+
# else:
|
106
|
+
# self._client = OpenAI(
|
107
|
+
# api_key=self.endpoint.api_key,
|
108
|
+
# base_url=self.endpoint.api_base,
|
109
|
+
# http_client=self.http_client,
|
110
|
+
# )
|
89
111
|
|
90
112
|
if self.context_length_control == ContextLengthControlType.Latest:
|
91
113
|
messages = cutoff_messages(
|
@@ -111,15 +133,14 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
111
133
|
|
112
134
|
if max_tokens is None:
|
113
135
|
max_output_tokens = self.model_setting.max_output_tokens
|
136
|
+
token_counts = get_message_token_counts(messages=messages, tools=tools_params, model=self.model_setting.id)
|
114
137
|
if max_output_tokens is not None:
|
115
|
-
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
116
138
|
max_tokens = self.model_setting.context_length - token_counts
|
117
139
|
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
118
140
|
else:
|
119
|
-
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
120
141
|
max_tokens = self.model_setting.context_length - token_counts
|
121
142
|
|
122
|
-
response: ChatCompletion | Stream[ChatCompletionChunk] = self.
|
143
|
+
response: ChatCompletion | Stream[ChatCompletionChunk] = self.raw_client.chat.completions.create(
|
123
144
|
model=self.model_setting.id,
|
124
145
|
messages=messages,
|
125
146
|
stream=self.stream,
|
@@ -207,6 +228,27 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
207
228
|
**kwargs,
|
208
229
|
)
|
209
230
|
|
231
|
+
@cached_property
|
232
|
+
def raw_client(self):
|
233
|
+
if self.random_endpoint:
|
234
|
+
self.random_endpoint = True
|
235
|
+
self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
|
236
|
+
self.endpoint = settings.get_endpoint(self.endpoint_id)
|
237
|
+
|
238
|
+
if self.endpoint.is_azure:
|
239
|
+
return AsyncAzureOpenAI(
|
240
|
+
azure_endpoint=self.endpoint.api_base,
|
241
|
+
api_key=self.endpoint.api_key,
|
242
|
+
api_version="2024-08-01-preview",
|
243
|
+
http_client=self.http_client,
|
244
|
+
)
|
245
|
+
else:
|
246
|
+
return AsyncOpenAI(
|
247
|
+
api_key=self.endpoint.api_key,
|
248
|
+
base_url=self.endpoint.api_base,
|
249
|
+
http_client=self.http_client,
|
250
|
+
)
|
251
|
+
|
210
252
|
async def create_completion(
|
211
253
|
self,
|
212
254
|
messages: list = list,
|
@@ -227,24 +269,24 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
227
269
|
|
228
270
|
self.model_setting = self.backend_settings.models[self.model]
|
229
271
|
|
230
|
-
if self.random_endpoint:
|
231
|
-
|
232
|
-
|
233
|
-
|
272
|
+
# if self.random_endpoint:
|
273
|
+
# self.random_endpoint = True
|
274
|
+
# self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
|
275
|
+
# self.endpoint = settings.get_endpoint(self.endpoint_id)
|
234
276
|
|
235
|
-
if self.endpoint.is_azure:
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
else:
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
277
|
+
# if self.endpoint.is_azure:
|
278
|
+
# self._client = AsyncAzureOpenAI(
|
279
|
+
# azure_endpoint=self.endpoint.api_base,
|
280
|
+
# api_key=self.endpoint.api_key,
|
281
|
+
# api_version="2024-08-01-preview",
|
282
|
+
# http_client=self.http_client,
|
283
|
+
# )
|
284
|
+
# else:
|
285
|
+
# self._client = AsyncOpenAI(
|
286
|
+
# api_key=self.endpoint.api_key,
|
287
|
+
# base_url=self.endpoint.api_base,
|
288
|
+
# http_client=self.http_client,
|
289
|
+
# )
|
248
290
|
|
249
291
|
if self.context_length_control == ContextLengthControlType.Latest:
|
250
292
|
messages = cutoff_messages(
|
@@ -270,15 +312,14 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
270
312
|
|
271
313
|
if max_tokens is None:
|
272
314
|
max_output_tokens = self.model_setting.max_output_tokens
|
315
|
+
token_counts = get_message_token_counts(messages=messages, tools=tools_params, model=self.model_setting.id)
|
273
316
|
if max_output_tokens is not None:
|
274
|
-
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
275
317
|
max_tokens = self.model_setting.context_length - token_counts
|
276
318
|
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
277
319
|
else:
|
278
|
-
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
279
320
|
max_tokens = self.model_setting.context_length - token_counts
|
280
321
|
|
281
|
-
response: ChatCompletion | AsyncStream[ChatCompletionChunk] = await self.
|
322
|
+
response: ChatCompletion | AsyncStream[ChatCompletionChunk] = await self.raw_client.chat.completions.create(
|
282
323
|
model=self.model_setting.id,
|
283
324
|
messages=messages,
|
284
325
|
stream=self.stream,
|
vectorvein/chat_clients/utils.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# @Date: 2024-07-26 14:48:55
|
3
3
|
import re
|
4
4
|
import json
|
5
|
-
|
5
|
+
from math import ceil
|
6
6
|
import httpx
|
7
7
|
import tiktoken
|
8
8
|
from anthropic import Anthropic
|
@@ -187,6 +187,46 @@ def get_token_counts(text: str | dict, model: str = "") -> int:
|
|
187
187
|
return len(chatgpt_encoding.encode(text))
|
188
188
|
|
189
189
|
|
190
|
+
def calculate_image_tokens(width: int, height: int, model: str = "gpt-4o"):
|
191
|
+
if width > 2048 or height > 2048:
|
192
|
+
aspect_ratio = width / height
|
193
|
+
if aspect_ratio > 1:
|
194
|
+
width, height = 2048, int(2048 / aspect_ratio)
|
195
|
+
else:
|
196
|
+
width, height = int(2048 * aspect_ratio), 2048
|
197
|
+
|
198
|
+
if width >= height and height > 768:
|
199
|
+
width, height = int((768 / height) * width), 768
|
200
|
+
elif height > width and width > 768:
|
201
|
+
width, height = 768, int((768 / width) * height)
|
202
|
+
|
203
|
+
tiles_width = ceil(width / 512)
|
204
|
+
tiles_height = ceil(height / 512)
|
205
|
+
total_tokens = 85 + 170 * (tiles_width * tiles_height)
|
206
|
+
|
207
|
+
return total_tokens
|
208
|
+
|
209
|
+
|
210
|
+
def get_message_token_counts(messages: list, tools: dict | None = None, model: str = "gpt-4o") -> int:
|
211
|
+
tokens = 0
|
212
|
+
formatted_messages = format_messages(messages, backend=BackendType.OpenAI, native_multimodal=True)
|
213
|
+
for message in formatted_messages:
|
214
|
+
content = message["content"]
|
215
|
+
if isinstance(content, str):
|
216
|
+
tokens += get_token_counts(content, model)
|
217
|
+
elif isinstance(content, list):
|
218
|
+
for item in content:
|
219
|
+
if isinstance(item, dict) and item["type"] == "text":
|
220
|
+
tokens += get_token_counts(item["text"], model)
|
221
|
+
elif isinstance(item, dict) and item["type"].startswith("image"):
|
222
|
+
# TODO: Get real image size
|
223
|
+
tokens += calculate_image_tokens(2048, 2048, model)
|
224
|
+
if tools is not None:
|
225
|
+
tokens += get_token_counts(str(tools), model)
|
226
|
+
|
227
|
+
return tokens
|
228
|
+
|
229
|
+
|
190
230
|
def cutoff_messages(
|
191
231
|
messages: list,
|
192
232
|
max_count: int = 16000,
|
@@ -1,10 +1,10 @@
|
|
1
|
-
vectorvein-0.1.
|
2
|
-
vectorvein-0.1.
|
1
|
+
vectorvein-0.1.17.dist-info/METADATA,sha256=TgZP36atynGGP_1BGQNSZhPL5HfsNaex7dAs7ahSRfs,502
|
2
|
+
vectorvein-0.1.17.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
|
3
3
|
vectorvein/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
vectorvein/chat_clients/__init__.py,sha256=
|
5
|
-
vectorvein/chat_clients/anthropic_client.py,sha256=
|
4
|
+
vectorvein/chat_clients/__init__.py,sha256=lOGrIEBGN-EoxJ-dF5uMsO6viNCIFIeNL8whDwE6x3g,4657
|
5
|
+
vectorvein/chat_clients/anthropic_client.py,sha256=VJQi7cKLbbLkJtmEFN9M41JUaGjwEuJaXIuQLK-3rdw,20332
|
6
6
|
vectorvein/chat_clients/baichuan_client.py,sha256=CVMvpgjdrZGv0BWnTOBD-f2ufZ3wq3496wqukumsAr4,526
|
7
|
-
vectorvein/chat_clients/base_client.py,sha256=
|
7
|
+
vectorvein/chat_clients/base_client.py,sha256=AnzEmNfT4XW4lKcwf8fv2NlSk1EHK0evr4-1EH0KkwI,5018
|
8
8
|
vectorvein/chat_clients/deepseek_client.py,sha256=3qWu01NlJAP2N-Ff62d5-CZXZitlizE1fzb20LNetig,526
|
9
9
|
vectorvein/chat_clients/gemini_client.py,sha256=W-9Vu-GTE9wxStPznyNR0rBEgDG3LYBu2uQXd4sh1YQ,14425
|
10
10
|
vectorvein/chat_clients/groq_client.py,sha256=Uow4pgdmFi93ZQSoOol2-0PhhqkW-S0XuSldvppz5U4,498
|
@@ -13,9 +13,9 @@ vectorvein/chat_clients/minimax_client.py,sha256=iNq87zWx79g8tGo784c67xUt-YQ4TyM
|
|
13
13
|
vectorvein/chat_clients/mistral_client.py,sha256=1aKSylzBDaLYcFnaBIL4-sXSzWmXfBeON9Q0rq-ziWw,534
|
14
14
|
vectorvein/chat_clients/moonshot_client.py,sha256=gbu-6nGxx8uM_U2WlI4Wus881rFRotzHtMSoYOcruGU,526
|
15
15
|
vectorvein/chat_clients/openai_client.py,sha256=Nz6tV45pWcsOupxjnsRsGTicbQNJWIZyxuJoJ5DGMpg,527
|
16
|
-
vectorvein/chat_clients/openai_compatible_client.py,sha256=
|
16
|
+
vectorvein/chat_clients/openai_compatible_client.py,sha256=Rq_VTEddx6bXmr9TDKeI55mGUIos4THpRUgw1h3R4w4,15710
|
17
17
|
vectorvein/chat_clients/qwen_client.py,sha256=-ryh-m9PgsO0fc4ulcCmPTy1155J8YUy15uPoJQOHA0,513
|
18
|
-
vectorvein/chat_clients/utils.py,sha256=
|
18
|
+
vectorvein/chat_clients/utils.py,sha256=EbSodMwLCR8wnzyt2J6iOsL8WvSzlwdu71xZikzNdlg,22483
|
19
19
|
vectorvein/chat_clients/yi_client.py,sha256=RNf4CRuPJfixrwLZ3-DEc3t25QDe1mvZeb9sku2f8Bc,484
|
20
20
|
vectorvein/chat_clients/zhipuai_client.py,sha256=Ys5DSeLCuedaDXr3PfG1EW2zKXopt-awO2IylWSwY0s,519
|
21
21
|
vectorvein/settings/__init__.py,sha256=jVHbhHn1BuMcyfZGXrxWKiI4NdY9wzvYyGMvKYmUtqg,3378
|
@@ -24,4 +24,4 @@ vectorvein/types/enums.py,sha256=PNK_pTIyjJFy-yAG2PHaMIO1ey3W6fReMCkH8M8VRW4,159
|
|
24
24
|
vectorvein/types/llm_parameters.py,sha256=mmJjJZz4bPRi0nHzYNUNdWsQLHa9lbf3-MNVnU78vaY,3608
|
25
25
|
vectorvein/utilities/media_processing.py,sha256=BujciRmw1GMmc3ELRvafL8STcy6r5b2rVnh27-uA7so,2256
|
26
26
|
vectorvein/utilities/retry.py,sha256=9ePuJdeUUGx-qMWfaFxmlOvG_lQPwCQ4UB1z3Edlo34,993
|
27
|
-
vectorvein-0.1.
|
27
|
+
vectorvein-0.1.17.dist-info/RECORD,,
|
File without changes
|