vectorvein 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,8 +19,8 @@ from google.auth.transport.requests import Request
19
19
  from google.auth import _helpers
20
20
 
21
21
  from ..settings import settings
22
- from .utils import cutoff_messages
23
22
  from ..types import defaults as defs
23
+ from .utils import cutoff_messages, get_token_counts
24
24
  from .base_client import BaseChatClient, BaseAsyncChatClient
25
25
  from ..types.enums import ContextLengthControlType, BackendType
26
26
  from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
@@ -115,7 +115,7 @@ class AnthropicChatClient(BaseChatClient):
115
115
  model: str | None = None,
116
116
  stream: bool | None = None,
117
117
  temperature: float | None = None,
118
- max_tokens: int = 2000,
118
+ max_tokens: int | None = None,
119
119
  tools: list | NotGiven = NOT_GIVEN,
120
120
  tool_choice: str | NotGiven = NOT_GIVEN,
121
121
  ):
@@ -189,6 +189,18 @@ class AnthropicChatClient(BaseChatClient):
189
189
  base_url=self.endpoint.api_base,
190
190
  )
191
191
 
192
+ tools_params = refactor_tool_use_params(tools) if tools else tools
193
+
194
+ if max_tokens is None:
195
+ max_output_tokens = self.model_setting.max_output_tokens
196
+ if max_output_tokens is not None:
197
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
198
+ max_tokens = self.model_setting.context_length - token_counts
199
+ max_tokens = min(max(max_tokens, 1), max_output_tokens)
200
+ else:
201
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
202
+ max_tokens = self.model_setting.context_length - token_counts
203
+
192
204
  response = self._client.messages.create(
193
205
  model=self.model_setting.id,
194
206
  messages=messages,
@@ -196,7 +208,7 @@ class AnthropicChatClient(BaseChatClient):
196
208
  stream=self.stream,
197
209
  temperature=self.temperature,
198
210
  max_tokens=max_tokens,
199
- tools=refactor_tool_use_params(tools) if tools else tools,
211
+ tools=tools_params,
200
212
  tool_choice=tool_choice,
201
213
  )
202
214
 
@@ -303,7 +315,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
303
315
  model: str | None = None,
304
316
  stream: bool | None = None,
305
317
  temperature: float | None = None,
306
- max_tokens: int = 2000,
318
+ max_tokens: int | None = None,
307
319
  tools: list | NotGiven = NOT_GIVEN,
308
320
  tool_choice: str | NotGiven = NOT_GIVEN,
309
321
  ):
@@ -376,6 +388,19 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
376
388
  api_key=self.endpoint.api_key,
377
389
  base_url=self.endpoint.api_base,
378
390
  )
391
+
392
+ tools_params = refactor_tool_use_params(tools) if tools else tools
393
+
394
+ if max_tokens is None:
395
+ max_output_tokens = self.model_setting.max_output_tokens
396
+ if max_output_tokens is not None:
397
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
398
+ max_tokens = self.model_setting.context_length - token_counts
399
+ max_tokens = min(max(max_tokens, 1), max_output_tokens)
400
+ else:
401
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
402
+ max_tokens = self.model_setting.context_length - token_counts
403
+
379
404
  response = await self._client.messages.create(
380
405
  model=self.model_setting.id,
381
406
  messages=messages,
@@ -383,7 +408,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
383
408
  stream=self.stream,
384
409
  temperature=self.temperature,
385
410
  max_tokens=max_tokens,
386
- tools=refactor_tool_use_params(tools) if tools else tools,
411
+ tools=tools_params,
387
412
  tool_choice=tool_choice,
388
413
  )
389
414
 
@@ -46,7 +46,7 @@ class BaseChatClient(ABC):
46
46
  model: str | None = None,
47
47
  stream: bool = False,
48
48
  temperature: float = 0.7,
49
- max_tokens: int = 2000,
49
+ max_tokens: int | None = None,
50
50
  tools: list | NotGiven = NOT_GIVEN,
51
51
  tool_choice: str | NotGiven = NOT_GIVEN,
52
52
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
@@ -57,7 +57,7 @@ class BaseChatClient(ABC):
57
57
  messages: list,
58
58
  model: str | None = None,
59
59
  temperature: float = 0.7,
60
- max_tokens: int = 2000,
60
+ max_tokens: int | None = None,
61
61
  tools: list | NotGiven = NOT_GIVEN,
62
62
  tool_choice: str | NotGiven = NOT_GIVEN,
63
63
  ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
@@ -107,7 +107,7 @@ class BaseAsyncChatClient(ABC):
107
107
  model: str | None = None,
108
108
  stream: bool = False,
109
109
  temperature: float = 0.7,
110
- max_tokens: int = 2000,
110
+ max_tokens: int | None = None,
111
111
  tools: list | NotGiven = NOT_GIVEN,
112
112
  tool_choice: str | NotGiven = NOT_GIVEN,
113
113
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
@@ -118,7 +118,7 @@ class BaseAsyncChatClient(ABC):
118
118
  messages: list,
119
119
  model: str | None = None,
120
120
  temperature: float = 0.7,
121
- max_tokens: int = 2000,
121
+ max_tokens: int | None = None,
122
122
  tools: list | NotGiven = NOT_GIVEN,
123
123
  tool_choice: str | NotGiven = NOT_GIVEN,
124
124
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
@@ -43,7 +43,7 @@ class GeminiChatClient(BaseChatClient):
43
43
  model: str | None = None,
44
44
  stream: bool | None = None,
45
45
  temperature: float | None = None,
46
- max_tokens: int = 2000,
46
+ max_tokens: int | None = None,
47
47
  tools: list | None = None,
48
48
  tool_choice: str | None = None,
49
49
  ):
@@ -156,7 +156,16 @@ class GeminiChatClient(BaseChatClient):
156
156
  if "text" in part:
157
157
  result["content"] += part["text"]
158
158
  elif "functionCall" in part:
159
- tool_calls.append(part["functionCall"])
159
+ tool_call = {
160
+ "index": 0,
161
+ "id": "call_0",
162
+ "function": {
163
+ "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
164
+ "name": part["functionCall"]["name"],
165
+ },
166
+ "type": "function",
167
+ }
168
+ tool_calls.append(tool_call)
160
169
 
161
170
  if tool_calls:
162
171
  result["tool_calls"] = tool_calls
@@ -194,7 +203,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
194
203
  model: str | None = None,
195
204
  stream: bool | None = None,
196
205
  temperature: float | None = None,
197
- max_tokens: int = 2000,
206
+ max_tokens: int | None = None,
198
207
  tools: list | None = None,
199
208
  tool_choice: str | None = None,
200
209
  ):
@@ -310,7 +319,16 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
310
319
  if "text" in part:
311
320
  result["content"] += part["text"]
312
321
  elif "functionCall" in part:
313
- tool_calls.append(part["functionCall"])
322
+ tool_call = {
323
+ "index": 0,
324
+ "id": "call_0",
325
+ "function": {
326
+ "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
327
+ "name": part["functionCall"]["name"],
328
+ },
329
+ "type": "function",
330
+ }
331
+ tool_calls.append(tool_call)
314
332
 
315
333
  if tool_calls:
316
334
  result["tool_calls"] = tool_calls
@@ -7,8 +7,8 @@ import httpx
7
7
  from openai._types import NotGiven
8
8
 
9
9
  from ..settings import settings
10
- from .utils import cutoff_messages
11
10
  from ..types import defaults as defs
11
+ from .utils import cutoff_messages, get_token_counts
12
12
  from .base_client import BaseChatClient, BaseAsyncChatClient
13
13
  from ..types.enums import ContextLengthControlType, BackendType
14
14
  from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
@@ -66,7 +66,7 @@ class MiniMaxChatClient(BaseChatClient):
66
66
  model: str | None = None,
67
67
  stream: bool | None = None,
68
68
  temperature: float | None = None,
69
- max_tokens: int = 2048,
69
+ max_tokens: int | None = None,
70
70
  tools: list | None = None,
71
71
  tool_choice: str = "auto",
72
72
  ):
@@ -93,7 +93,7 @@ class MiniMaxChatClient(BaseChatClient):
93
93
  model=self.model_setting.id,
94
94
  )
95
95
 
96
- if tools is not None:
96
+ if tools:
97
97
  tools_params = {
98
98
  "tools": [
99
99
  {
@@ -113,6 +113,16 @@ class MiniMaxChatClient(BaseChatClient):
113
113
  else:
114
114
  tools_params = {}
115
115
 
116
+ if max_tokens is None:
117
+ max_output_tokens = self.model_setting.max_output_tokens
118
+ if max_output_tokens is not None:
119
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
120
+ max_tokens = self.model_setting.context_length - token_counts
121
+ max_tokens = min(max(max_tokens, 1), max_output_tokens)
122
+ else:
123
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
124
+ max_tokens = self.model_setting.context_length - token_counts
125
+
116
126
  self.url = self.endpoint.api_base
117
127
  self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
118
128
 
@@ -139,22 +149,19 @@ class MiniMaxChatClient(BaseChatClient):
139
149
  for chunk in response.iter_lines():
140
150
  if chunk:
141
151
  chunk_data = json.loads(chunk[6:])
152
+ if chunk_data["object"] != "chat.completion.chunk":
153
+ continue
142
154
  tool_calls_params = extract_tool_calls(chunk_data)
143
155
  has_tool_calls = True if tool_calls_params else False
144
156
  if has_tool_calls:
145
- if "usage" not in chunk_data:
146
- continue
147
- else:
148
- yield ChatCompletionDeltaMessage(
149
- **{
150
- "content": chunk_data["choices"][0]["message"].get("content"),
151
- "role": "assistant",
152
- **tool_calls_params,
153
- }
154
- )
157
+ yield ChatCompletionDeltaMessage(
158
+ **{
159
+ "content": chunk_data["choices"][0]["delta"].get("content"),
160
+ "role": "assistant",
161
+ **tool_calls_params,
162
+ }
163
+ )
155
164
  else:
156
- if "usage" in chunk_data:
157
- continue
158
165
  yield ChatCompletionDeltaMessage(
159
166
  **{
160
167
  "content": chunk_data["choices"][0]["delta"]["content"],
@@ -211,7 +218,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
211
218
  model: str | None = None,
212
219
  stream: bool | None = None,
213
220
  temperature: float | None = None,
214
- max_tokens: int = 2048,
221
+ max_tokens: int | None = None,
215
222
  tools: list | None = None,
216
223
  tool_choice: str = "auto",
217
224
  ):
@@ -238,7 +245,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
238
245
  model=self.model_setting.id,
239
246
  )
240
247
 
241
- if tools is not None:
248
+ if tools:
242
249
  tools_params = {
243
250
  "tools": [
244
251
  {
@@ -256,6 +263,16 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
256
263
  else:
257
264
  tools_params = {}
258
265
 
266
+ if max_tokens is None:
267
+ max_output_tokens = self.model_setting.max_output_tokens
268
+ if max_output_tokens is not None:
269
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
270
+ max_tokens = self.model_setting.context_length - token_counts
271
+ max_tokens = min(max(max_tokens, 1), max_output_tokens)
272
+ else:
273
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
274
+ max_tokens = self.model_setting.context_length - token_counts
275
+
259
276
  self.url = self.endpoint.api_base
260
277
  self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
261
278
 
@@ -283,22 +300,19 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
283
300
  async for chunk in response.aiter_lines():
284
301
  if chunk:
285
302
  chunk_data = json.loads(chunk[6:])
303
+ if chunk_data["object"] != "chat.completion.chunk":
304
+ continue
286
305
  tool_calls_params = extract_tool_calls(chunk_data)
287
306
  has_tool_calls = True if tool_calls_params else False
288
307
  if has_tool_calls:
289
- if "usage" not in chunk_data:
290
- continue
291
- else:
292
- yield ChatCompletionDeltaMessage(
293
- **{
294
- "content": chunk_data["choices"][0]["message"].get("content"),
295
- "role": "assistant",
296
- **tool_calls_params,
297
- }
298
- )
308
+ yield ChatCompletionDeltaMessage(
309
+ **{
310
+ "content": chunk_data["choices"][0]["delta"].get("content"),
311
+ "role": "assistant",
312
+ **tool_calls_params,
313
+ }
314
+ )
299
315
  else:
300
- if "usage" in chunk_data:
301
- continue
302
316
  yield ChatCompletionDeltaMessage(
303
317
  **{
304
318
  "content": chunk_data["choices"][0]["delta"]["content"],
@@ -11,6 +11,7 @@ from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
11
11
  from .base_client import BaseChatClient, BaseAsyncChatClient
12
12
  from .utils import (
13
13
  cutoff_messages,
14
+ get_token_counts,
14
15
  ToolCallContentProcessor,
15
16
  generate_tool_use_system_prompt,
16
17
  )
@@ -50,7 +51,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
50
51
  model: str | None = None,
51
52
  stream: bool | None = None,
52
53
  temperature: float | None = None,
53
- max_tokens: int = 2000,
54
+ max_tokens: int | None = None,
54
55
  tools: list | NotGiven = NOT_GIVEN,
55
56
  tool_choice: str | NotGiven = NOT_GIVEN,
56
57
  ):
@@ -102,6 +103,16 @@ class OpenAICompatibleChatClient(BaseChatClient):
102
103
  else:
103
104
  tools_params = {}
104
105
 
106
+ if max_tokens is None:
107
+ max_output_tokens = self.model_setting.max_output_tokens
108
+ if max_output_tokens is not None:
109
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
110
+ max_tokens = self.model_setting.context_length - token_counts
111
+ max_tokens = min(max(max_tokens, 1), max_output_tokens)
112
+ else:
113
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
114
+ max_tokens = self.model_setting.context_length - token_counts
115
+
105
116
  response: ChatCompletion | Stream[ChatCompletionChunk] = self._client.chat.completions.create(
106
117
  model=self.model_setting.id,
107
118
  messages=messages,
@@ -122,6 +133,9 @@ class OpenAICompatibleChatClient(BaseChatClient):
122
133
  if not chunk.choices[0].delta:
123
134
  continue
124
135
  if self.model_setting.function_call_available:
136
+ if chunk.choices[0].delta.tool_calls:
137
+ for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
138
+ tool_call.index = index
125
139
  yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
126
140
  else:
127
141
  message = chunk.choices[0].delta.model_dump()
@@ -147,7 +161,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
147
161
  if tools:
148
162
  if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
149
163
  result["tool_calls"] = [
150
- tool_call.model_dump() for tool_call in response.choices[0].message.tool_calls
164
+ {**tool_call.model_dump(), "type": "function"}
165
+ for tool_call in response.choices[0].message.tool_calls
151
166
  ]
152
167
  else:
153
168
  tool_call_content_processor = ToolCallContentProcessor(result["content"])
@@ -189,7 +204,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
189
204
  model: str | None = None,
190
205
  stream: bool | None = None,
191
206
  temperature: float | None = None,
192
- max_tokens: int = 2000,
207
+ max_tokens: int | None = None,
193
208
  tools: list | NotGiven = NOT_GIVEN,
194
209
  tool_choice: str | NotGiven = NOT_GIVEN,
195
210
  ):
@@ -241,6 +256,16 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
241
256
  else:
242
257
  tools_params = {}
243
258
 
259
+ if max_tokens is None:
260
+ max_output_tokens = self.model_setting.max_output_tokens
261
+ if max_output_tokens is not None:
262
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
263
+ max_tokens = self.model_setting.context_length - token_counts
264
+ max_tokens = min(max(max_tokens, 1), max_output_tokens)
265
+ else:
266
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
267
+ max_tokens = self.model_setting.context_length - token_counts
268
+
244
269
  response: ChatCompletion | AsyncStream[ChatCompletionChunk] = await self._client.chat.completions.create(
245
270
  model=self.model_setting.id,
246
271
  messages=messages,
@@ -261,6 +286,9 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
261
286
  if not chunk.choices[0].delta:
262
287
  continue
263
288
  if self.model_setting.function_call_available:
289
+ if chunk.choices[0].delta.tool_calls:
290
+ for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
291
+ tool_call.index = index
264
292
  yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
265
293
  else:
266
294
  message = chunk.choices[0].delta.model_dump()
@@ -286,7 +314,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
286
314
  if tools:
287
315
  if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
288
316
  result["tool_calls"] = [
289
- tool_call.model_dump() for tool_call in response.choices[0].message.tool_calls
317
+ {**tool_call.model_dump(), "type": "function"}
318
+ for tool_call in response.choices[0].message.tool_calls
290
319
  ]
291
320
  else:
292
321
  tool_call_content_processor = ToolCallContentProcessor(result["content"])
@@ -3,8 +3,14 @@
3
3
  import re
4
4
  import json
5
5
 
6
+ import httpx
6
7
  import tiktoken
8
+ from anthropic import Anthropic
9
+ from qwen_tokenizer import qwen_tokenizer
10
+ from deepseek_tokenizer import deepseek_tokenizer
7
11
 
12
+ from ..settings import settings
13
+ from ..utilities.retry import Retry
8
14
  from ..types.enums import BackendType
9
15
  from ..utilities.media_processing import ImageProcessor
10
16
 
@@ -95,10 +101,88 @@ def get_token_counts(text: str | dict, model: str = "") -> int:
95
101
  text = str(text)
96
102
  if model == "gpt-3.5-turbo":
97
103
  return len(chatgpt_encoding.encode(text))
98
- elif model == "gpt-4o":
104
+ elif model in ("gpt-4o", "gpt-4o-mini"):
99
105
  return len(gpt_4o_encoding.encode(text))
100
106
  elif model.startswith("abab"):
101
- return int(len(text) / 1.33)
107
+ model_setting = settings.minimax.models[model]
108
+ if len(model_setting.endpoints) == 0:
109
+ return int(len(text) / 1.33)
110
+ endpoint_id = model_setting.endpoints[0]
111
+ endpoint = settings.get_endpoint(endpoint_id)
112
+ tokenize_url = "https://api.minimax.chat/v1/tokenize"
113
+ headers = {"Authorization": f"Bearer {endpoint.api_key}", "Content-Type": "application/json"}
114
+ request_body = {
115
+ "model": model,
116
+ "tokens_to_generate": 128,
117
+ "temperature": 0.2,
118
+ "messages": [
119
+ {"sender_type": "USER", "text": text},
120
+ ],
121
+ }
122
+
123
+ _, response = (
124
+ Retry(httpx.post)
125
+ .args(url=tokenize_url, headers=headers, json=request_body, timeout=None)
126
+ .retry_times(5)
127
+ .sleep_time(10)
128
+ .run()
129
+ )
130
+ response = response.json()
131
+ return response["segments_num"]
132
+ elif model in ("moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k"):
133
+ model_setting = settings.moonshot.models[model]
134
+ if len(model_setting.endpoints) == 0:
135
+ return len(chatgpt_encoding.encode(text))
136
+ endpoint_id = model_setting.endpoints[0]
137
+ endpoint = settings.get_endpoint(endpoint_id)
138
+ tokenize_url = "https://api.moonshot.cn/v1/tokenizers/estimate-token-count"
139
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
140
+ request_body = {
141
+ "model": model,
142
+ "messages": [
143
+ {"role": "user", "content": text},
144
+ ],
145
+ }
146
+ _, response = (
147
+ Retry(httpx.post)
148
+ .args(url=tokenize_url, headers=headers, json=request_body, timeout=None)
149
+ .retry_times(5)
150
+ .sleep_time(10)
151
+ .run()
152
+ )
153
+ response = response.json()
154
+ return response["data"]["total_tokens"]
155
+ elif model.startswith("gemini"):
156
+ model_setting = settings.gemini.models[model]
157
+ if len(model_setting.endpoints) == 0:
158
+ return len(chatgpt_encoding.encode(text))
159
+ endpoint_id = model_setting.endpoints[0]
160
+ endpoint = settings.get_endpoint(endpoint_id)
161
+ url = f"{endpoint.api_base}/models/{model_setting.id}:countTokens"
162
+ params = {"key": endpoint.api_key}
163
+ request_body = {
164
+ "contents": {
165
+ "role": "USER",
166
+ "parts": [
167
+ {"text": "TEXT"},
168
+ ],
169
+ },
170
+ }
171
+ _, response = (
172
+ Retry(httpx.post)
173
+ .args(url, json=request_body, params=params, timeout=None)
174
+ .retry_times(5)
175
+ .sleep_time(10)
176
+ .run()
177
+ )
178
+ result = response.json()
179
+ return result["totalTokens"]
180
+ elif model.startswith("claude"):
181
+ return Anthropic().count_tokens(text)
182
+ elif model.startswith("deepseek"):
183
+ return len(deepseek_tokenizer.encode(text))
184
+ elif model.startswith("qwen"):
185
+ return len(qwen_tokenizer.encode(text))
102
186
  else:
103
187
  return len(chatgpt_encoding.encode(text))
104
188
 
@@ -14,19 +14,19 @@ MODEL_CONTEXT_LENGTH = 32768
14
14
  MOONSHOT_MODELS = {
15
15
  "moonshot-v1-8k": {
16
16
  "id": "moonshot-v1-8k",
17
- "context_length": 8000,
17
+ "context_length": 8192,
18
18
  "function_call_available": True,
19
19
  "response_format_available": True,
20
20
  },
21
21
  "moonshot-v1-32k": {
22
22
  "id": "moonshot-v1-32k",
23
- "context_length": 32000,
23
+ "context_length": 32768,
24
24
  "function_call_available": True,
25
25
  "response_format_available": True,
26
26
  },
27
27
  "moonshot-v1-128k": {
28
28
  "id": "moonshot-v1-128k",
29
- "context_length": 128000,
29
+ "context_length": 131072,
30
30
  "function_call_available": True,
31
31
  "response_format_available": True,
32
32
  },
@@ -38,12 +38,14 @@ DEEPSEEK_MODELS = {
38
38
  "deepseek-chat": {
39
39
  "id": "deepseek-chat",
40
40
  "context_length": 128000,
41
+ "max_output_tokens": 4096,
41
42
  "function_call_available": True,
42
43
  "response_format_available": True,
43
44
  },
44
45
  "deepseek-coder": {
45
46
  "id": "deepseek-chat",
46
47
  "context_length": 128000,
48
+ "max_output_tokens": 4096,
47
49
  "function_call_available": True,
48
50
  "response_format_available": True,
49
51
  },
@@ -85,48 +87,56 @@ QWEN_MODELS = {
85
87
  "qwen1.5-1.8b-chat": {
86
88
  "id": "qwen1.5-1.8b-chat",
87
89
  "context_length": 30000,
90
+ "max_output_tokens": 4096,
88
91
  "function_call_available": False,
89
92
  "response_format_available": True,
90
93
  },
91
94
  "qwen1.5-4b-chat": {
92
95
  "id": "qwen1.5-4b-chat",
93
96
  "context_length": 30000,
97
+ "max_output_tokens": 4096,
94
98
  "function_call_available": False,
95
99
  "response_format_available": True,
96
100
  },
97
101
  "qwen1.5-7b-chat": {
98
102
  "id": "qwen1.5-7b-chat",
99
103
  "context_length": 30000,
104
+ "max_output_tokens": 4096,
100
105
  "function_call_available": False,
101
106
  "response_format_available": True,
102
107
  },
103
108
  "qwen1.5-14b-chat": {
104
109
  "id": "qwen1.5-14b-chat",
105
110
  "context_length": 30000,
111
+ "max_output_tokens": 4096,
106
112
  "function_call_available": False,
107
113
  "response_format_available": True,
108
114
  },
109
115
  "qwen1.5-32b-chat": {
110
116
  "id": "qwen1.5-32b-chat",
111
117
  "context_length": 30000,
118
+ "max_output_tokens": 4096,
112
119
  "function_call_available": False,
113
120
  "response_format_available": True,
114
121
  },
115
122
  "qwen1.5-72b-chat": {
116
123
  "id": "qwen1.5-72b-chat",
117
124
  "context_length": 30000,
125
+ "max_output_tokens": 4096,
118
126
  "function_call_available": False,
119
127
  "response_format_available": True,
120
128
  },
121
129
  "qwen1.5-110b-chat": {
122
130
  "id": "qwen1.5-110b-chat",
123
131
  "context_length": 30000,
132
+ "max_output_tokens": 4096,
124
133
  "function_call_available": False,
125
134
  "response_format_available": True,
126
135
  },
127
136
  "qwen2-72b-instruct": {
128
137
  "id": "qwen2-72b-instruct",
129
138
  "context_length": 30000,
139
+ "max_output_tokens": 4096,
130
140
  "function_call_available": False,
131
141
  "response_format_available": True,
132
142
  },
@@ -138,42 +148,49 @@ YI_MODELS = {
138
148
  "yi-large": {
139
149
  "id": "yi-large",
140
150
  "context_length": 32000,
151
+ "max_output_tokens": 4096,
141
152
  "function_call_available": False,
142
153
  "response_format_available": False,
143
154
  },
144
155
  "yi-large-turbo": {
145
156
  "id": "yi-large-turbo",
146
157
  "context_length": 16000,
158
+ "max_output_tokens": 4096,
147
159
  "function_call_available": False,
148
160
  "response_format_available": False,
149
161
  },
150
162
  "yi-large-fc": {
151
163
  "id": "yi-large-fc",
152
164
  "context_length": 32000,
165
+ "max_output_tokens": 4096,
153
166
  "function_call_available": True,
154
167
  "response_format_available": False,
155
168
  },
156
169
  "yi-medium": {
157
170
  "id": "yi-medium",
158
171
  "context_length": 16000,
172
+ "max_output_tokens": 4096,
159
173
  "function_call_available": False,
160
174
  "response_format_available": False,
161
175
  },
162
176
  "yi-medium-200k": {
163
177
  "id": "yi-medium-200k",
164
178
  "context_length": 200000,
179
+ "max_output_tokens": 4096,
165
180
  "function_call_available": False,
166
181
  "response_format_available": False,
167
182
  },
168
183
  "yi-spark": {
169
184
  "id": "yi-spark",
170
185
  "context_length": 16000,
186
+ "max_output_tokens": 4096,
171
187
  "function_call_available": False,
172
188
  "response_format_available": False,
173
189
  },
174
190
  "yi-vision": {
175
191
  "id": "yi-vision",
176
192
  "context_length": 4000,
193
+ "max_output_tokens": 2000,
177
194
  "function_call_available": False,
178
195
  "response_format_available": False,
179
196
  },
@@ -187,42 +204,56 @@ ZHIPUAI_MODELS = {
187
204
  "context_length": 128000,
188
205
  "function_call_available": True,
189
206
  "response_format_available": False,
207
+ "max_output_tokens": 4095,
190
208
  },
191
209
  "glm-4": {
192
210
  "id": "glm-4",
193
211
  "context_length": 128000,
194
212
  "function_call_available": True,
195
213
  "response_format_available": False,
214
+ "max_output_tokens": 4095,
196
215
  },
197
216
  "glm-4-0520": {
198
217
  "id": "glm-4-0520",
199
218
  "context_length": 128000,
200
219
  "function_call_available": True,
201
220
  "response_format_available": False,
221
+ "max_output_tokens": 4095,
202
222
  },
203
223
  "glm-4-air": {
204
224
  "id": "glm-4-air",
205
225
  "context_length": 128000,
206
226
  "function_call_available": True,
207
227
  "response_format_available": False,
228
+ "max_output_tokens": 4095,
208
229
  },
209
230
  "glm-4-airx": {
210
231
  "id": "glm-4-airx",
211
232
  "context_length": 128000,
212
233
  "function_call_available": True,
213
234
  "response_format_available": False,
235
+ "max_output_tokens": 4095,
214
236
  },
215
237
  "glm-4-flash": {
216
238
  "id": "glm-4-flash",
217
239
  "context_length": 128000,
218
240
  "function_call_available": True,
219
241
  "response_format_available": False,
242
+ "max_output_tokens": 4095,
243
+ },
244
+ "glm-4-long": {
245
+ "id": "glm-4-long",
246
+ "context_length": 1000000,
247
+ "function_call_available": True,
248
+ "response_format_available": False,
249
+ "max_output_tokens": 4095,
220
250
  },
221
251
  "glm-4v": {
222
252
  "id": "glm-4v",
223
253
  "context_length": 2000,
224
254
  "function_call_available": False,
225
255
  "response_format_available": False,
256
+ "max_output_tokens": 1024,
226
257
  },
227
258
  }
228
259
 
@@ -287,34 +318,40 @@ OPENAI_MODELS = {
287
318
  "context_length": 16385,
288
319
  "function_call_available": True,
289
320
  "response_format_available": True,
321
+ "max_output_tokens": 4096,
290
322
  },
291
323
  "gpt-4-turbo": {
292
324
  "id": "gpt-4-turbo",
293
325
  "context_length": 128000,
326
+ "max_output_tokens": 4096,
294
327
  "function_call_available": True,
295
328
  "response_format_available": True,
296
329
  },
297
330
  "gpt-4": {
298
331
  "id": "gpt-4",
299
332
  "context_length": 8192,
333
+ "max_output_tokens": 4096,
300
334
  "function_call_available": True,
301
335
  "response_format_available": True,
302
336
  },
303
337
  "gpt-4o": {
304
338
  "id": "gpt-4o",
305
339
  "context_length": 128000,
340
+ "max_output_tokens": 4096,
306
341
  "function_call_available": True,
307
342
  "response_format_available": True,
308
343
  },
309
344
  "gpt-4o-mini": {
310
345
  "id": "gpt-4o-mini",
311
346
  "context_length": 128000,
347
+ "max_output_tokens": 16384,
312
348
  "function_call_available": True,
313
349
  "response_format_available": True,
314
350
  },
315
351
  "gpt-4v": {
316
352
  "id": "gpt-4v",
317
353
  "context_length": 128000,
354
+ "max_output_tokens": 4096,
318
355
  "function_call_available": True,
319
356
  "response_format_available": True,
320
357
  },
@@ -326,24 +363,28 @@ ANTHROPIC_MODELS = {
326
363
  "claude-3-opus-20240229": {
327
364
  "id": "claude-3-opus-20240229",
328
365
  "context_length": 200000,
366
+ "max_output_tokens": 4096,
329
367
  "function_call_available": True,
330
368
  "response_format_available": True,
331
369
  },
332
370
  "claude-3-sonnet-20240229": {
333
371
  "id": "claude-3-sonnet-20240229",
334
372
  "context_length": 200000,
373
+ "max_output_tokens": 4096,
335
374
  "function_call_available": True,
336
375
  "response_format_available": True,
337
376
  },
338
377
  "claude-3-haiku-20240307": {
339
378
  "id": "claude-3-haiku-20240307",
340
379
  "context_length": 200000,
380
+ "max_output_tokens": 4096,
341
381
  "function_call_available": True,
342
382
  "response_format_available": True,
343
383
  },
344
384
  "claude-3-5-sonnet-20240620": {
345
385
  "id": "claude-3-5-sonnet-20240620",
346
386
  "context_length": 200000,
387
+ "max_output_tokens": 4096,
347
388
  "function_call_available": True,
348
389
  "response_format_available": True,
349
390
  },
@@ -355,24 +396,28 @@ MINIMAX_MODELS = {
355
396
  "abab5-chat": {
356
397
  "id": "abab5-chat",
357
398
  "context_length": 6144,
399
+ "max_output_tokens": 6144,
358
400
  "function_call_available": True,
359
401
  "response_format_available": True,
360
402
  },
361
403
  "abab5.5-chat": {
362
404
  "id": "abab5.5-chat",
363
405
  "context_length": 16384,
406
+ "max_output_tokens": 16384,
364
407
  "function_call_available": True,
365
408
  "response_format_available": True,
366
409
  },
367
410
  "abab6-chat": {
368
411
  "id": "abab6-chat",
369
412
  "context_length": 32768,
413
+ "max_output_tokens": 32768,
370
414
  "function_call_available": True,
371
415
  "response_format_available": True,
372
416
  },
373
417
  "abab6.5s-chat": {
374
418
  "id": "abab6.5s-chat",
375
419
  "context_length": 245760,
420
+ "max_output_tokens": 245760,
376
421
  "function_call_available": True,
377
422
  "response_format_available": True,
378
423
  },
@@ -0,0 +1,36 @@
1
+ # @Author: Bi Ying
2
+ # @Date: 2024-08-14 13:03:10
3
+ import time
4
+
5
+
6
+ class Retry:
7
+ def __init__(self, function):
8
+ self.function = function
9
+ self.__retry_times = 3
10
+ self.__sleep_time = 1
11
+ self.pargs = []
12
+ self.kwargs = {}
13
+
14
+ def args(self, *args, **kwargs):
15
+ self.pargs = args
16
+ self.kwargs = kwargs
17
+ return self
18
+
19
+ def retry_times(self, retry_times: int):
20
+ self.__retry_times = retry_times
21
+ return self
22
+
23
+ def sleep_time(self, sleep_time):
24
+ self.__sleep_time = sleep_time
25
+ return self
26
+
27
+ def run(self):
28
+ try_times = 0
29
+ while try_times < self.__retry_times:
30
+ try:
31
+ return True, self.function(*self.pargs, **self.kwargs)
32
+ except Exception as e:
33
+ print(f"{self.function.__name__} 函数出错:{e}")
34
+ try_times += 1
35
+ time.sleep(self.__sleep_time)
36
+ return False, None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectorvein
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: Default template for PDM package
5
5
  Author-Email: Anderson <andersonby@163.com>
6
6
  License: MIT
@@ -11,6 +11,8 @@ Requires-Dist: httpx>=0.27.0
11
11
  Requires-Dist: anthropic[vertex]>=0.31.2
12
12
  Requires-Dist: pydantic>=2.8.2
13
13
  Requires-Dist: Pillow>=10.4.0
14
+ Requires-Dist: deepseek-tokenizer>=0.1.0
15
+ Requires-Dist: qwen-tokenizer>=0.1.0
14
16
  Description-Content-Type: text/markdown
15
17
 
16
18
  # vectorvein
@@ -1,25 +1,26 @@
1
- vectorvein-0.1.7.dist-info/METADATA,sha256=C_3UCv_92cL58DnFKRMs-1GbEbzGIdDPlNt-X4aIKF4,423
2
- vectorvein-0.1.7.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
1
+ vectorvein-0.1.9.dist-info/METADATA,sha256=AlikMRU7DLdZ6gZMohsL1X6NiuAWP3jGV0tE-uZkhNo,501
2
+ vectorvein-0.1.9.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
3
3
  vectorvein/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  vectorvein/chat_clients/__init__.py,sha256=5j7W--jr-l2cqDJp38uXYvkydDK0rnzm7MYGSACHKmU,3976
5
- vectorvein/chat_clients/anthropic_client.py,sha256=pkk0yPH05WTOnKOAXkm60ZZw1kdT8pCydNkHx7QArh4,18707
6
- vectorvein/chat_clients/base_client.py,sha256=GjPUAjgd_36-lHr8QXvz2X_-ApjbmAD3KZRjmntJ65U,4247
5
+ vectorvein/chat_clients/anthropic_client.py,sha256=JjigSUsIn06ixIEjnOJhVbcMqy2_MAL3iVUlDFAFMW4,20008
6
+ vectorvein/chat_clients/base_client.py,sha256=wMXpQ1L1KDb2Hg6va3H3GmcVeQB6r6sh7F4IS0DBQWI,4275
7
7
  vectorvein/chat_clients/deepseek_client.py,sha256=3qWu01NlJAP2N-Ff62d5-CZXZitlizE1fzb20LNetig,526
8
- vectorvein/chat_clients/gemini_client.py,sha256=IpkfcqVF38f4kAFnlHyisn4vkiMeM4cICyS4XDD0jJE,12787
8
+ vectorvein/chat_clients/gemini_client.py,sha256=IHcBHTSHkj3f962S5L7Ga-XA-96sq8quIDRZpoqvGss,13653
9
9
  vectorvein/chat_clients/groq_client.py,sha256=Uow4pgdmFi93ZQSoOol2-0PhhqkW-S0XuSldvppz5U4,498
10
10
  vectorvein/chat_clients/local_client.py,sha256=55nOsxzqUf79q3Y14MKROA71zxhsT7p7FsDZ89rts2M,422
11
- vectorvein/chat_clients/minimax_client.py,sha256=toe4mFYHphHnPQpOpegaL5n2OxTUu5TJVju61j7hgBw,12100
11
+ vectorvein/chat_clients/minimax_client.py,sha256=uomp3DyTBmDXQtCmRiYp1VIIOFoVZ9_oyM3-j4JO7go,13000
12
12
  vectorvein/chat_clients/mistral_client.py,sha256=1aKSylzBDaLYcFnaBIL4-sXSzWmXfBeON9Q0rq-ziWw,534
13
13
  vectorvein/chat_clients/moonshot_client.py,sha256=gbu-6nGxx8uM_U2WlI4Wus881rFRotzHtMSoYOcruGU,526
14
14
  vectorvein/chat_clients/openai_client.py,sha256=Nz6tV45pWcsOupxjnsRsGTicbQNJWIZyxuJoJ5DGMpg,527
15
- vectorvein/chat_clients/openai_compatible_client.py,sha256=sN9fq8yZ0XKSVg0eQZDWUE-awrkkJA2lEdMG-WENnUg,11951
15
+ vectorvein/chat_clients/openai_compatible_client.py,sha256=fvg--wFwnFEEhLGS9_u1XzNhtkkDUf4_rq6zYKwnOuI,13738
16
16
  vectorvein/chat_clients/qwen_client.py,sha256=-ryh-m9PgsO0fc4ulcCmPTy1155J8YUy15uPoJQOHA0,513
17
- vectorvein/chat_clients/utils.py,sha256=tAQwfydj46sMxSHeaeOWXrTUY2q0h7482NbvZjbNz9A,17637
17
+ vectorvein/chat_clients/utils.py,sha256=mnAew2Ie3nQHdEyDLKuJvXkQ5QdcSAJ6SpYk5JPbR1Q,20888
18
18
  vectorvein/chat_clients/yi_client.py,sha256=RNf4CRuPJfixrwLZ3-DEc3t25QDe1mvZeb9sku2f8Bc,484
19
19
  vectorvein/chat_clients/zhipuai_client.py,sha256=Ys5DSeLCuedaDXr3PfG1EW2zKXopt-awO2IylWSwY0s,519
20
20
  vectorvein/settings/__init__.py,sha256=4mpccT7eZC3yI1vVnVViW4wHBnDEH9D2R5EsIP34VgU,3218
21
- vectorvein/types/defaults.py,sha256=Mg-Mj3_eBzKZn1N8x1V2GqyaYgLD13i-NdSYdQC28X4,11437
21
+ vectorvein/types/defaults.py,sha256=ANIYL0W0bxl2IBxvtkS_WlS_qMQQwpi5TKRdLxdk47M,13027
22
22
  vectorvein/types/enums.py,sha256=vzOenCnRlFXBwPh-lfFhjGfM-6yfDj7wZColHODqocI,1550
23
23
  vectorvein/types/llm_parameters.py,sha256=nBjStC2zndTY__yhD2WFXB09taxEhDLE3OHA6MICfgE,3494
24
24
  vectorvein/utilities/media_processing.py,sha256=BujciRmw1GMmc3ELRvafL8STcy6r5b2rVnh27-uA7so,2256
25
- vectorvein-0.1.7.dist-info/RECORD,,
25
+ vectorvein/utilities/retry.py,sha256=9ePuJdeUUGx-qMWfaFxmlOvG_lQPwCQ4UB1z3Edlo34,993
26
+ vectorvein-0.1.9.dist-info/RECORD,,