vectorvein 0.1.88__tar.gz → 0.1.90__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectorvein-0.1.88 → vectorvein-0.1.90}/PKG-INFO +6 -1
- {vectorvein-0.1.88 → vectorvein-0.1.90}/pyproject.toml +13 -1
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/anthropic_client.py +4 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/base_client.py +121 -2
- vectorvein-0.1.90/src/vectorvein/chat_clients/gemini_client.py +13 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/openai_compatible_client.py +4 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/utils.py +34 -116
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/settings/__init__.py +30 -1
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/types/defaults.py +30 -6
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/types/llm_parameters.py +4 -1
- vectorvein-0.1.90/src/vectorvein/utilities/rate_limiter.py +312 -0
- vectorvein-0.1.88/src/vectorvein/chat_clients/gemini_client.py +0 -527
- {vectorvein-0.1.88 → vectorvein-0.1.90}/README.md +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/__init__.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/__init__.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/baichuan_client.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/groq_client.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/local_client.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/minimax_client.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/mistral_client.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/openai_client.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/py.typed +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/qwen_client.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/stepfun_client.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/xai_client.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/yi_client.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/py.typed +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/server/token_server.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/settings/py.typed +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/types/enums.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/types/exception.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/types/py.typed +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/utilities/media_processing.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/utilities/retry.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/graph/edge.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/graph/node.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/graph/port.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/graph/workflow.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/__init__.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/audio_generation.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/control_flows.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/file_processing.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/image_generation.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/llms.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/media_editing.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/media_processing.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/output.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/relational_db.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/text_processing.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/tools.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/triggers.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/vector_db.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/video_generation.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/web_crawlers.py +0 -0
- {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/utils/json_to_code.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vectorvein
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.90
|
4
4
|
Summary: VectorVein python SDK
|
5
5
|
Author-Email: Anderson <andersonby@163.com>
|
6
6
|
License: MIT
|
@@ -14,9 +14,14 @@ Requires-Dist: Pillow>=10.4.0
|
|
14
14
|
Requires-Dist: deepseek-tokenizer>=0.1.0
|
15
15
|
Requires-Dist: qwen-tokenizer>=0.2.0
|
16
16
|
Requires-Dist: google-auth>=2.35.0
|
17
|
+
Requires-Dist: diskcache>=5.0.0
|
17
18
|
Provides-Extra: server
|
18
19
|
Requires-Dist: fastapi; extra == "server"
|
19
20
|
Requires-Dist: uvicorn; extra == "server"
|
21
|
+
Provides-Extra: redis
|
22
|
+
Requires-Dist: redis; extra == "redis"
|
23
|
+
Provides-Extra: diskcache
|
24
|
+
Requires-Dist: diskcache; extra == "diskcache"
|
20
25
|
Description-Content-Type: text/markdown
|
21
26
|
|
22
27
|
# vectorvein
|
@@ -12,12 +12,13 @@ dependencies = [
|
|
12
12
|
"deepseek-tokenizer>=0.1.0",
|
13
13
|
"qwen-tokenizer>=0.2.0",
|
14
14
|
"google-auth>=2.35.0",
|
15
|
+
"diskcache>=5.0.0",
|
15
16
|
]
|
16
17
|
description = "VectorVein python SDK"
|
17
18
|
name = "vectorvein"
|
18
19
|
readme = "README.md"
|
19
20
|
requires-python = ">=3.10"
|
20
|
-
version = "0.1.
|
21
|
+
version = "0.1.90"
|
21
22
|
|
22
23
|
[project.license]
|
23
24
|
text = "MIT"
|
@@ -27,6 +28,12 @@ server = [
|
|
27
28
|
"fastapi",
|
28
29
|
"uvicorn",
|
29
30
|
]
|
31
|
+
redis = [
|
32
|
+
"redis",
|
33
|
+
]
|
34
|
+
diskcache = [
|
35
|
+
"diskcache",
|
36
|
+
]
|
30
37
|
|
31
38
|
[build-system]
|
32
39
|
build-backend = "pdm.backend"
|
@@ -45,3 +52,8 @@ excludes = [
|
|
45
52
|
[tool.pdm.scripts.test]
|
46
53
|
cmd = "python"
|
47
54
|
env_file = ".env"
|
55
|
+
|
56
|
+
[dependency-groups]
|
57
|
+
dev = [
|
58
|
+
"types-redis>=4.6.0.20241004",
|
59
|
+
]
|
@@ -434,6 +434,8 @@ class AnthropicChatClient(BaseChatClient):
|
|
434
434
|
else:
|
435
435
|
max_tokens = self.model_setting.context_length - token_counts
|
436
436
|
|
437
|
+
self._acquire_rate_limit(self.endpoint, self.model, messages)
|
438
|
+
|
437
439
|
if self.stream:
|
438
440
|
stream_response = raw_client.messages.create(
|
439
441
|
model=self.model_id,
|
@@ -824,6 +826,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
824
826
|
else:
|
825
827
|
max_tokens = self.model_setting.context_length - token_counts
|
826
828
|
|
829
|
+
await self._acquire_rate_limit(self.endpoint, self.model, messages)
|
830
|
+
|
827
831
|
if self.stream:
|
828
832
|
stream_response = await raw_client.messages.create(
|
829
833
|
model=self.model_id,
|
@@ -1,7 +1,8 @@
|
|
1
|
-
|
2
|
-
# @Date: 2024-07-26 14:48:55
|
1
|
+
import time
|
3
2
|
import random
|
3
|
+
import asyncio
|
4
4
|
from abc import ABC, abstractmethod
|
5
|
+
from collections import defaultdict
|
5
6
|
from functools import cached_property
|
6
7
|
from typing import Generator, AsyncGenerator, Any, overload, Literal, Iterable
|
7
8
|
|
@@ -29,6 +30,8 @@ from ..types.llm_parameters import (
|
|
29
30
|
ChatCompletionDeltaMessage,
|
30
31
|
ChatCompletionStreamOptionsParam,
|
31
32
|
)
|
33
|
+
from ..utilities.rate_limiter import SyncMemoryRateLimiter, SyncRedisRateLimiter, SyncDiskCacheRateLimiter
|
34
|
+
from ..utilities.rate_limiter import AsyncMemoryRateLimiter, AsyncRedisRateLimiter, AsyncDiskCacheRateLimiter
|
32
35
|
|
33
36
|
|
34
37
|
class BaseChatClient(ABC):
|
@@ -59,11 +62,65 @@ class BaseChatClient(ABC):
|
|
59
62
|
|
60
63
|
self.backend_settings = settings.get_backend(self.BACKEND_NAME)
|
61
64
|
|
65
|
+
self.rate_limiter = self._init_rate_limiter()
|
66
|
+
self.active_requests = defaultdict(int)
|
67
|
+
self.rpm = None
|
68
|
+
self.tpm = None
|
69
|
+
self.concurrent_requests = None
|
70
|
+
|
62
71
|
if endpoint_id:
|
63
72
|
self.endpoint_id = endpoint_id
|
64
73
|
self.random_endpoint = False
|
65
74
|
self.endpoint = settings.get_endpoint(self.endpoint_id)
|
66
75
|
|
76
|
+
def _init_rate_limiter(self):
|
77
|
+
if not settings.rate_limit:
|
78
|
+
return None
|
79
|
+
if not settings.rate_limit.enabled:
|
80
|
+
return None
|
81
|
+
|
82
|
+
if settings.rate_limit.backend == "memory":
|
83
|
+
return SyncMemoryRateLimiter()
|
84
|
+
elif settings.rate_limit.backend == "redis":
|
85
|
+
if not settings.rate_limit.redis:
|
86
|
+
raise ValueError("Redis settings must be provided if Redis backend is selected.")
|
87
|
+
return SyncRedisRateLimiter(
|
88
|
+
host=settings.rate_limit.redis.host,
|
89
|
+
port=settings.rate_limit.redis.port,
|
90
|
+
db=settings.rate_limit.redis.db,
|
91
|
+
)
|
92
|
+
elif settings.rate_limit.backend == "diskcache":
|
93
|
+
if not settings.rate_limit.diskcache:
|
94
|
+
raise ValueError("Diskcache settings must be provided if Diskcache backend is selected.")
|
95
|
+
return SyncDiskCacheRateLimiter(
|
96
|
+
cache_dir=settings.rate_limit.diskcache.cache_dir,
|
97
|
+
)
|
98
|
+
return None
|
99
|
+
|
100
|
+
def _acquire_rate_limit(self, endpoint: EndpointSetting | None, model: str, messages: list):
|
101
|
+
if endpoint is None:
|
102
|
+
return
|
103
|
+
|
104
|
+
key = f"{endpoint.id}:{model}"
|
105
|
+
|
106
|
+
# Get rate limit parameters
|
107
|
+
# Priority: parameters in model.endpoints > parameters in endpoint > default parameters
|
108
|
+
rpm = self.rpm or endpoint.rpm or (settings.rate_limit.default_rpm if settings.rate_limit else 60)
|
109
|
+
tpm = self.tpm or endpoint.tpm or (settings.rate_limit.default_tpm if settings.rate_limit else 1000000)
|
110
|
+
|
111
|
+
while self.rate_limiter:
|
112
|
+
allowed, wait_time = self.rate_limiter.check_limit(key, rpm, tpm, self._estimate_request_tokens(messages))
|
113
|
+
if allowed:
|
114
|
+
break
|
115
|
+
time.sleep(wait_time)
|
116
|
+
|
117
|
+
def _estimate_request_tokens(self, messages: list) -> int:
|
118
|
+
"""Roughly estimate the number of tokens in the request"""
|
119
|
+
tokens = 0
|
120
|
+
for message in messages:
|
121
|
+
tokens += int(len(message.get("content", "")) * 0.6)
|
122
|
+
return tokens
|
123
|
+
|
67
124
|
def set_model_id_by_endpoint_id(self, endpoint_id: str):
|
68
125
|
for endpoint_option in self.backend_settings.models[self.model].endpoints:
|
69
126
|
if isinstance(endpoint_option, dict) and endpoint_id == endpoint_option["endpoint_id"]:
|
@@ -79,6 +136,9 @@ class BaseChatClient(ABC):
|
|
79
136
|
if isinstance(endpoint, dict):
|
80
137
|
self.endpoint_id = endpoint["endpoint_id"]
|
81
138
|
self.model_id = endpoint["model_id"]
|
139
|
+
self.rpm = endpoint.get("rpm", None)
|
140
|
+
self.tpm = endpoint.get("tpm", None)
|
141
|
+
self.concurrent_requests = endpoint.get("concurrent_requests", None)
|
82
142
|
else:
|
83
143
|
self.endpoint_id = endpoint
|
84
144
|
self.endpoint = settings.get_endpoint(self.endpoint_id)
|
@@ -236,11 +296,67 @@ class BaseAsyncChatClient(ABC):
|
|
236
296
|
|
237
297
|
self.backend_settings = settings.get_backend(self.BACKEND_NAME)
|
238
298
|
|
299
|
+
self.rate_limiter = self._init_rate_limiter()
|
300
|
+
self.active_requests = defaultdict(int)
|
301
|
+
self.rpm = None
|
302
|
+
self.tpm = None
|
303
|
+
self.concurrent_requests = None
|
304
|
+
|
239
305
|
if endpoint_id:
|
240
306
|
self.endpoint_id = endpoint_id
|
241
307
|
self.random_endpoint = False
|
242
308
|
self.endpoint = settings.get_endpoint(self.endpoint_id)
|
243
309
|
|
310
|
+
def _init_rate_limiter(self):
|
311
|
+
if not settings.rate_limit:
|
312
|
+
return None
|
313
|
+
if not settings.rate_limit.enabled:
|
314
|
+
return None
|
315
|
+
|
316
|
+
if settings.rate_limit.backend == "memory":
|
317
|
+
return AsyncMemoryRateLimiter()
|
318
|
+
elif settings.rate_limit.backend == "redis":
|
319
|
+
if not settings.rate_limit.redis:
|
320
|
+
raise ValueError("Redis settings must be provided if Redis backend is selected.")
|
321
|
+
return AsyncRedisRateLimiter(
|
322
|
+
host=settings.rate_limit.redis.host,
|
323
|
+
port=settings.rate_limit.redis.port,
|
324
|
+
db=settings.rate_limit.redis.db,
|
325
|
+
)
|
326
|
+
elif settings.rate_limit.backend == "diskcache":
|
327
|
+
if not settings.rate_limit.diskcache:
|
328
|
+
raise ValueError("Diskcache settings must be provided if Diskcache backend is selected.")
|
329
|
+
return AsyncDiskCacheRateLimiter(
|
330
|
+
cache_dir=settings.rate_limit.diskcache.cache_dir,
|
331
|
+
)
|
332
|
+
return None
|
333
|
+
|
334
|
+
async def _acquire_rate_limit(self, endpoint: EndpointSetting | None, model: str, messages: list):
|
335
|
+
if endpoint is None:
|
336
|
+
return
|
337
|
+
|
338
|
+
key = f"{endpoint.id}:{model}"
|
339
|
+
|
340
|
+
# Get rate limit parameters
|
341
|
+
# Priority: parameters in model.endpoints > parameters in endpoint > default parameters
|
342
|
+
rpm = self.rpm or endpoint.rpm or (settings.rate_limit.default_rpm if settings.rate_limit else 60)
|
343
|
+
tpm = self.tpm or endpoint.tpm or (settings.rate_limit.default_tpm if settings.rate_limit else 1000000)
|
344
|
+
|
345
|
+
while self.rate_limiter:
|
346
|
+
allowed, wait_time = await self.rate_limiter.check_limit(
|
347
|
+
key, rpm, tpm, self._estimate_request_tokens(messages)
|
348
|
+
)
|
349
|
+
if allowed:
|
350
|
+
break
|
351
|
+
await asyncio.sleep(wait_time)
|
352
|
+
|
353
|
+
def _estimate_request_tokens(self, messages: list) -> int:
|
354
|
+
"""Roughly estimate the number of tokens in the request"""
|
355
|
+
tokens = 0
|
356
|
+
for message in messages:
|
357
|
+
tokens += int(len(message.get("content", "")) * 0.6)
|
358
|
+
return tokens
|
359
|
+
|
244
360
|
def set_model_id_by_endpoint_id(self, endpoint_id: str):
|
245
361
|
for endpoint_option in self.backend_settings.models[self.model].endpoints:
|
246
362
|
if isinstance(endpoint_option, dict) and endpoint_id == endpoint_option["endpoint_id"]:
|
@@ -256,6 +372,9 @@ class BaseAsyncChatClient(ABC):
|
|
256
372
|
if isinstance(endpoint, dict):
|
257
373
|
self.endpoint_id = endpoint["endpoint_id"]
|
258
374
|
self.model_id = endpoint["model_id"]
|
375
|
+
self.rpm = endpoint.get("rpm", None)
|
376
|
+
self.tpm = endpoint.get("tpm", None)
|
377
|
+
self.concurrent_requests = endpoint.get("concurrent_requests", None)
|
259
378
|
else:
|
260
379
|
self.endpoint_id = endpoint
|
261
380
|
self.endpoint = settings.get_endpoint(self.endpoint_id)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
from ..types.enums import BackendType
|
2
|
+
from ..types.defaults import GEMINI_DEFAULT_MODEL
|
3
|
+
from .openai_compatible_client import OpenAICompatibleChatClient, AsyncOpenAICompatibleChatClient
|
4
|
+
|
5
|
+
|
6
|
+
class GeminiChatClient(OpenAICompatibleChatClient):
|
7
|
+
DEFAULT_MODEL = GEMINI_DEFAULT_MODEL
|
8
|
+
BACKEND_NAME = BackendType.Gemini
|
9
|
+
|
10
|
+
|
11
|
+
class AsyncGeminiChatClient(AsyncOpenAICompatibleChatClient):
|
12
|
+
DEFAULT_MODEL = GEMINI_DEFAULT_MODEL
|
13
|
+
BACKEND_NAME = BackendType.Gemini
|
{vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/openai_compatible_client.py
RENAMED
@@ -212,6 +212,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
212
212
|
else:
|
213
213
|
_stream_options_params = {}
|
214
214
|
|
215
|
+
self._acquire_rate_limit(self.endpoint, self.model, messages)
|
216
|
+
|
215
217
|
if self.stream:
|
216
218
|
stream_response = raw_client.chat.completions.create(
|
217
219
|
model=self.model_id,
|
@@ -538,6 +540,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
538
540
|
else:
|
539
541
|
max_tokens = self.model_setting.context_length - token_counts - 64
|
540
542
|
|
543
|
+
await self._acquire_rate_limit(self.endpoint, self.model, messages)
|
544
|
+
|
541
545
|
if self.stream:
|
542
546
|
stream_response = await raw_client.chat.completions.create(
|
543
547
|
model=self.model_id,
|
@@ -83,20 +83,6 @@ class ToolCallContentProcessor:
|
|
83
83
|
return {}
|
84
84
|
|
85
85
|
|
86
|
-
def get_assistant_role_key(backend: BackendType) -> str:
|
87
|
-
if backend == BackendType.Gemini:
|
88
|
-
return "model"
|
89
|
-
else:
|
90
|
-
return "assistant"
|
91
|
-
|
92
|
-
|
93
|
-
def get_content_key(backend: BackendType) -> str:
|
94
|
-
if backend == BackendType.Gemini:
|
95
|
-
return "parts"
|
96
|
-
else:
|
97
|
-
return "content"
|
98
|
-
|
99
|
-
|
100
86
|
def convert_type(value, value_type):
|
101
87
|
if value_type == "string":
|
102
88
|
return str(value)
|
@@ -141,9 +127,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
141
127
|
text = str(text)
|
142
128
|
if model == "gpt-3.5-turbo":
|
143
129
|
return len(get_gpt_35_encoding().encode(text))
|
144
|
-
elif model
|
130
|
+
elif model.startswith(("gpt-4o", "o1-")):
|
145
131
|
return len(get_gpt_4o_encoding().encode(text))
|
146
|
-
elif model.startswith("abab"):
|
132
|
+
elif model.startswith(("abab", "MiniMax")):
|
147
133
|
model_setting = settings.minimax.models[model]
|
148
134
|
if len(model_setting.endpoints) == 0:
|
149
135
|
return int(len(text) / 1.33)
|
@@ -201,10 +187,6 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
201
187
|
result = response.json()
|
202
188
|
return result["data"]["total_tokens"]
|
203
189
|
elif model.startswith("gemini"):
|
204
|
-
# TODO: gemini-exp-1206 暂时不支持,使用 gemini-1.5-flash 代替
|
205
|
-
if model in ("gemini-exp-1206", "gemini-2.0-flash-exp", "gemini-2.0-flash-thinking-exp-1219"):
|
206
|
-
model = "gemini-1.5-flash"
|
207
|
-
|
208
190
|
model_setting = settings.gemini.models[model]
|
209
191
|
if len(model_setting.endpoints) == 0:
|
210
192
|
return len(get_gpt_35_encoding().encode(text))
|
@@ -213,7 +195,12 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
213
195
|
endpoint_id = endpoint_id["endpoint_id"]
|
214
196
|
endpoint = settings.get_endpoint(endpoint_id)
|
215
197
|
|
216
|
-
|
198
|
+
api_base = (
|
199
|
+
endpoint.api_base.removesuffix("/openai/")
|
200
|
+
if endpoint.api_base
|
201
|
+
else "https://generativelanguage.googleapis.com/v1beta"
|
202
|
+
)
|
203
|
+
base_url = f"{api_base}/models/{model_setting.id}:countTokens"
|
217
204
|
params = {"key": endpoint.api_key}
|
218
205
|
request_body = {
|
219
206
|
"contents": {
|
@@ -304,7 +291,7 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
304
291
|
endpoint = settings.get_endpoint(endpoint_id)
|
305
292
|
if model not in ("glm-4-plus", "glm-4-long", "glm-4-0520", "glm-4-air", "glm-4-flash"):
|
306
293
|
model = "glm-4-plus"
|
307
|
-
tokenize_url = f"{endpoint.api_base}/tokenizer"
|
294
|
+
tokenize_url = f"{endpoint.api_base or 'https://open.bigmodel.cn/api/paas/v4'}/tokenizer"
|
308
295
|
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
|
309
296
|
request_body = {
|
310
297
|
"model": model,
|
@@ -395,7 +382,7 @@ def cutoff_messages(
|
|
395
382
|
return messages
|
396
383
|
|
397
384
|
messages_length = 0
|
398
|
-
content_key =
|
385
|
+
content_key = "content"
|
399
386
|
|
400
387
|
# 先检查并保留第一条system消息(如果有)
|
401
388
|
system_message = None
|
@@ -440,21 +427,14 @@ def cutoff_messages(
|
|
440
427
|
continue
|
441
428
|
if index == 0:
|
442
429
|
# 一条消息就超过长度则将该消息内容进行截断,保留该消息最后的一部分
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
content = message[content_key][max_count - messages_length :]
|
452
|
-
return system_message + [
|
453
|
-
{
|
454
|
-
"role": message["role"],
|
455
|
-
content_key: content,
|
456
|
-
}
|
457
|
-
]
|
430
|
+
content = message[content_key][max_count - messages_length :]
|
431
|
+
return system_message + [
|
432
|
+
{
|
433
|
+
"role": message["role"],
|
434
|
+
content_key: content,
|
435
|
+
}
|
436
|
+
]
|
437
|
+
|
458
438
|
return system_message + messages[-index:]
|
459
439
|
return system_message + messages
|
460
440
|
|
@@ -477,13 +457,6 @@ def format_image_message(image: str, backend: BackendType = BackendType.OpenAI)
|
|
477
457
|
"data": image_processor.base64_image,
|
478
458
|
},
|
479
459
|
}
|
480
|
-
elif backend == BackendType.Gemini:
|
481
|
-
return {
|
482
|
-
"inline_data": {
|
483
|
-
"mime_type": image_processor.mime_type,
|
484
|
-
"data": image_processor.base64_image,
|
485
|
-
}
|
486
|
-
}
|
487
460
|
else:
|
488
461
|
return {
|
489
462
|
"type": "image_url",
|
@@ -495,7 +468,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
|
|
495
468
|
formatted_messages = []
|
496
469
|
|
497
470
|
# 工具调用消息
|
498
|
-
if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi):
|
471
|
+
if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi, BackendType.Gemini):
|
499
472
|
tool_call_message = {
|
500
473
|
"content": None,
|
501
474
|
"role": "assistant",
|
@@ -524,20 +497,6 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
|
|
524
497
|
}
|
525
498
|
if content:
|
526
499
|
tool_call_message["content"].insert(0, {"type": "text", "text": content})
|
527
|
-
elif backend == BackendType.Gemini:
|
528
|
-
tool_call_message = {
|
529
|
-
"role": "model",
|
530
|
-
"parts": [
|
531
|
-
{
|
532
|
-
"functionCall": {
|
533
|
-
"name": message["metadata"]["selected_workflow"]["function_name"],
|
534
|
-
"args": message["metadata"]["selected_workflow"]["params"],
|
535
|
-
}
|
536
|
-
},
|
537
|
-
],
|
538
|
-
}
|
539
|
-
if content:
|
540
|
-
tool_call_message["parts"].insert(0, {"text": content})
|
541
500
|
else:
|
542
501
|
tool_call_message = {
|
543
502
|
"content": json.dumps(
|
@@ -552,7 +511,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
|
|
552
511
|
formatted_messages.append(tool_call_message)
|
553
512
|
|
554
513
|
# 工具调用结果消息
|
555
|
-
if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi):
|
514
|
+
if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi, BackendType.Gemini):
|
556
515
|
tool_call_result_message = {
|
557
516
|
"role": "tool",
|
558
517
|
"tool_call_id": message["metadata"]["selected_workflow"]["tool_call_id"],
|
@@ -570,21 +529,6 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
|
|
570
529
|
}
|
571
530
|
],
|
572
531
|
}
|
573
|
-
elif backend == BackendType.Gemini:
|
574
|
-
tool_call_result_message = {
|
575
|
-
"role": "function",
|
576
|
-
"parts": [
|
577
|
-
{
|
578
|
-
"functionResponse": {
|
579
|
-
"name": message["metadata"]["selected_workflow"]["function_name"],
|
580
|
-
"response": {
|
581
|
-
"name": message["metadata"]["selected_workflow"]["function_name"],
|
582
|
-
"content": message["metadata"].get("workflow_result", ""),
|
583
|
-
},
|
584
|
-
}
|
585
|
-
}
|
586
|
-
],
|
587
|
-
}
|
588
532
|
else:
|
589
533
|
tool_call_result_message = {
|
590
534
|
"role": "user",
|
@@ -598,7 +542,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
|
|
598
542
|
}
|
599
543
|
formatted_messages.append(tool_call_result_message)
|
600
544
|
|
601
|
-
if content and backend not in (BackendType.Mistral, BackendType.Anthropic
|
545
|
+
if content and backend not in (BackendType.Mistral, BackendType.Anthropic):
|
602
546
|
formatted_messages.append({"role": "assistant", "content": content})
|
603
547
|
|
604
548
|
return formatted_messages
|
@@ -608,21 +552,7 @@ def transform_from_openai_message(message: ChatCompletionMessageParam, backend:
|
|
608
552
|
role = message.get("role", "user")
|
609
553
|
content = message.get("content", "")
|
610
554
|
|
611
|
-
if backend == BackendType.
|
612
|
-
if isinstance(content, list):
|
613
|
-
parts = []
|
614
|
-
for item in content:
|
615
|
-
if isinstance(item, str):
|
616
|
-
parts.append({"text": item})
|
617
|
-
elif isinstance(item, dict) and "type" in item:
|
618
|
-
if item["type"] == "image":
|
619
|
-
parts.append({"image": item["image"]})
|
620
|
-
elif item["type"] == "text":
|
621
|
-
parts.append({"text": item["text"]})
|
622
|
-
return {"role": "user" if role == "user" else "model", "parts": parts}
|
623
|
-
else:
|
624
|
-
return {"role": "user" if role == "user" else "model", "parts": [{"text": content}]}
|
625
|
-
elif backend == BackendType.Anthropic:
|
555
|
+
if backend == BackendType.Anthropic:
|
626
556
|
if isinstance(content, list):
|
627
557
|
formatted_content = []
|
628
558
|
for item in content:
|
@@ -663,7 +593,7 @@ def format_messages(
|
|
663
593
|
# 处理 VectorVein 格式的消息
|
664
594
|
content = message["content"]["text"]
|
665
595
|
if message["content_type"] == "TXT":
|
666
|
-
role = "user" if message["author_type"] == "U" else
|
596
|
+
role = "user" if message["author_type"] == "U" else "assistant"
|
667
597
|
formatted_message = format_text_message(
|
668
598
|
content, role, message.get("attachments", []), backend, native_multimodal
|
669
599
|
)
|
@@ -693,31 +623,19 @@ def format_text_message(
|
|
693
623
|
content += "\n".join([f"- {attachment}" for attachment in attachments])
|
694
624
|
|
695
625
|
if native_multimodal and has_images:
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
"role": role,
|
705
|
-
"content": [
|
706
|
-
{"type": "text", "text": content},
|
707
|
-
*[
|
708
|
-
format_image_message(image=attachment, backend=backend)
|
709
|
-
for attachment in attachments
|
710
|
-
if attachment.lower().endswith(images_extensions)
|
711
|
-
],
|
626
|
+
return {
|
627
|
+
"role": role,
|
628
|
+
"content": [
|
629
|
+
{"type": "text", "text": content},
|
630
|
+
*[
|
631
|
+
format_image_message(image=attachment, backend=backend)
|
632
|
+
for attachment in attachments
|
633
|
+
if attachment.lower().endswith(images_extensions)
|
712
634
|
],
|
713
|
-
|
635
|
+
],
|
636
|
+
}
|
714
637
|
else:
|
715
|
-
|
716
|
-
return {"role": role, "parts": [{"text": content}]}
|
717
|
-
elif backend == BackendType.Anthropic:
|
718
|
-
return {"role": role, "content": content}
|
719
|
-
else:
|
720
|
-
return {"role": role, "content": content}
|
638
|
+
return {"role": role, "content": content}
|
721
639
|
|
722
640
|
|
723
641
|
def generate_tool_use_system_prompt(tools: list | str, format_type: str = "json") -> str:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# @Author: Bi Ying
|
2
2
|
# @Date: 2024-07-27 00:30:56
|
3
|
-
from typing import List, Dict, Optional
|
3
|
+
from typing import List, Dict, Optional, Literal
|
4
4
|
|
5
5
|
from pydantic import BaseModel, Field
|
6
6
|
|
@@ -9,6 +9,26 @@ from ..types.enums import BackendType
|
|
9
9
|
from ..types.llm_parameters import BackendSettings, EndpointSetting
|
10
10
|
|
11
11
|
|
12
|
+
class RedisConfig(BaseModel):
|
13
|
+
host: str = "localhost"
|
14
|
+
port: int = 6379
|
15
|
+
db: int = 0
|
16
|
+
|
17
|
+
|
18
|
+
class DiskCacheConfig(BaseModel):
|
19
|
+
cache_dir: str = ".rate_limit_cache"
|
20
|
+
|
21
|
+
|
22
|
+
class RateLimitConfig(BaseModel):
|
23
|
+
enabled: bool = False
|
24
|
+
|
25
|
+
backend: Literal["memory", "redis", "diskcache"] = "memory"
|
26
|
+
redis: Optional[RedisConfig] = Field(default=None)
|
27
|
+
diskcache: Optional[DiskCacheConfig] = Field(default=None)
|
28
|
+
default_rpm: int = 60
|
29
|
+
default_tpm: int = 1000000
|
30
|
+
|
31
|
+
|
12
32
|
class Server(BaseModel):
|
13
33
|
host: str
|
14
34
|
port: int
|
@@ -20,6 +40,7 @@ class Settings(BaseModel):
|
|
20
40
|
default_factory=list, description="Available endpoints for the LLM service."
|
21
41
|
)
|
22
42
|
token_server: Optional[Server] = Field(default=None, description="Token server address. Format: host:port")
|
43
|
+
rate_limit: Optional[RateLimitConfig] = Field(default=None, description="Rate limit settings.")
|
23
44
|
|
24
45
|
anthropic: BackendSettings = Field(default_factory=BackendSettings, description="Anthropic models settings.")
|
25
46
|
deepseek: BackendSettings = Field(default_factory=BackendSettings, description="Deepseek models settings.")
|
@@ -63,6 +84,14 @@ class Settings(BaseModel):
|
|
63
84
|
else:
|
64
85
|
data[model_type] = BackendSettings(models=default_models)
|
65
86
|
|
87
|
+
for endpoint in data.get("endpoints", []):
|
88
|
+
if not endpoint.get("api_base"):
|
89
|
+
continue
|
90
|
+
api_base = endpoint["api_base"]
|
91
|
+
if api_base.startswith("https://generativelanguage.googleapis.com/v1beta"):
|
92
|
+
if not api_base.endswith("openai/"):
|
93
|
+
endpoint["api_base"] = api_base.strip("/") + "/openai/"
|
94
|
+
|
66
95
|
super().__init__(**data)
|
67
96
|
|
68
97
|
def load(self, settings_dict: Dict):
|