vectorvein 0.1.88__tar.gz → 0.1.90__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {vectorvein-0.1.88 → vectorvein-0.1.90}/PKG-INFO +6 -1
  2. {vectorvein-0.1.88 → vectorvein-0.1.90}/pyproject.toml +13 -1
  3. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/anthropic_client.py +4 -0
  4. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/base_client.py +121 -2
  5. vectorvein-0.1.90/src/vectorvein/chat_clients/gemini_client.py +13 -0
  6. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/openai_compatible_client.py +4 -0
  7. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/utils.py +34 -116
  8. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/settings/__init__.py +30 -1
  9. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/types/defaults.py +30 -6
  10. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/types/llm_parameters.py +4 -1
  11. vectorvein-0.1.90/src/vectorvein/utilities/rate_limiter.py +312 -0
  12. vectorvein-0.1.88/src/vectorvein/chat_clients/gemini_client.py +0 -527
  13. {vectorvein-0.1.88 → vectorvein-0.1.90}/README.md +0 -0
  14. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/__init__.py +0 -0
  15. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/__init__.py +0 -0
  16. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/baichuan_client.py +0 -0
  17. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
  18. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/groq_client.py +0 -0
  19. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/local_client.py +0 -0
  20. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/minimax_client.py +0 -0
  21. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/mistral_client.py +0 -0
  22. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
  23. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/openai_client.py +0 -0
  24. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/py.typed +0 -0
  25. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/qwen_client.py +0 -0
  26. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/stepfun_client.py +0 -0
  27. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/xai_client.py +0 -0
  28. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/yi_client.py +0 -0
  29. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
  30. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/py.typed +0 -0
  31. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/server/token_server.py +0 -0
  32. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/settings/py.typed +0 -0
  33. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/types/enums.py +0 -0
  34. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/types/exception.py +0 -0
  35. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/types/py.typed +0 -0
  36. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/utilities/media_processing.py +0 -0
  37. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/utilities/retry.py +0 -0
  38. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/graph/edge.py +0 -0
  39. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/graph/node.py +0 -0
  40. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/graph/port.py +0 -0
  41. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/graph/workflow.py +0 -0
  42. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/__init__.py +0 -0
  43. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/audio_generation.py +0 -0
  44. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/control_flows.py +0 -0
  45. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/file_processing.py +0 -0
  46. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/image_generation.py +0 -0
  47. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/llms.py +0 -0
  48. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/media_editing.py +0 -0
  49. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/media_processing.py +0 -0
  50. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/output.py +0 -0
  51. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/relational_db.py +0 -0
  52. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/text_processing.py +0 -0
  53. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/tools.py +0 -0
  54. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/triggers.py +0 -0
  55. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/vector_db.py +0 -0
  56. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/video_generation.py +0 -0
  57. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/nodes/web_crawlers.py +0 -0
  58. {vectorvein-0.1.88 → vectorvein-0.1.90}/src/vectorvein/workflow/utils/json_to_code.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectorvein
3
- Version: 0.1.88
3
+ Version: 0.1.90
4
4
  Summary: VectorVein python SDK
5
5
  Author-Email: Anderson <andersonby@163.com>
6
6
  License: MIT
@@ -14,9 +14,14 @@ Requires-Dist: Pillow>=10.4.0
14
14
  Requires-Dist: deepseek-tokenizer>=0.1.0
15
15
  Requires-Dist: qwen-tokenizer>=0.2.0
16
16
  Requires-Dist: google-auth>=2.35.0
17
+ Requires-Dist: diskcache>=5.0.0
17
18
  Provides-Extra: server
18
19
  Requires-Dist: fastapi; extra == "server"
19
20
  Requires-Dist: uvicorn; extra == "server"
21
+ Provides-Extra: redis
22
+ Requires-Dist: redis; extra == "redis"
23
+ Provides-Extra: diskcache
24
+ Requires-Dist: diskcache; extra == "diskcache"
20
25
  Description-Content-Type: text/markdown
21
26
 
22
27
  # vectorvein
@@ -12,12 +12,13 @@ dependencies = [
12
12
  "deepseek-tokenizer>=0.1.0",
13
13
  "qwen-tokenizer>=0.2.0",
14
14
  "google-auth>=2.35.0",
15
+ "diskcache>=5.0.0",
15
16
  ]
16
17
  description = "VectorVein python SDK"
17
18
  name = "vectorvein"
18
19
  readme = "README.md"
19
20
  requires-python = ">=3.10"
20
- version = "0.1.88"
21
+ version = "0.1.90"
21
22
 
22
23
  [project.license]
23
24
  text = "MIT"
@@ -27,6 +28,12 @@ server = [
27
28
  "fastapi",
28
29
  "uvicorn",
29
30
  ]
31
+ redis = [
32
+ "redis",
33
+ ]
34
+ diskcache = [
35
+ "diskcache",
36
+ ]
30
37
 
31
38
  [build-system]
32
39
  build-backend = "pdm.backend"
@@ -45,3 +52,8 @@ excludes = [
45
52
  [tool.pdm.scripts.test]
46
53
  cmd = "python"
47
54
  env_file = ".env"
55
+
56
+ [dependency-groups]
57
+ dev = [
58
+ "types-redis>=4.6.0.20241004",
59
+ ]
@@ -434,6 +434,8 @@ class AnthropicChatClient(BaseChatClient):
434
434
  else:
435
435
  max_tokens = self.model_setting.context_length - token_counts
436
436
 
437
+ self._acquire_rate_limit(self.endpoint, self.model, messages)
438
+
437
439
  if self.stream:
438
440
  stream_response = raw_client.messages.create(
439
441
  model=self.model_id,
@@ -824,6 +826,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
824
826
  else:
825
827
  max_tokens = self.model_setting.context_length - token_counts
826
828
 
829
+ await self._acquire_rate_limit(self.endpoint, self.model, messages)
830
+
827
831
  if self.stream:
828
832
  stream_response = await raw_client.messages.create(
829
833
  model=self.model_id,
@@ -1,7 +1,8 @@
1
- # @Author: Bi Ying
2
- # @Date: 2024-07-26 14:48:55
1
+ import time
3
2
  import random
3
+ import asyncio
4
4
  from abc import ABC, abstractmethod
5
+ from collections import defaultdict
5
6
  from functools import cached_property
6
7
  from typing import Generator, AsyncGenerator, Any, overload, Literal, Iterable
7
8
 
@@ -29,6 +30,8 @@ from ..types.llm_parameters import (
29
30
  ChatCompletionDeltaMessage,
30
31
  ChatCompletionStreamOptionsParam,
31
32
  )
33
+ from ..utilities.rate_limiter import SyncMemoryRateLimiter, SyncRedisRateLimiter, SyncDiskCacheRateLimiter
34
+ from ..utilities.rate_limiter import AsyncMemoryRateLimiter, AsyncRedisRateLimiter, AsyncDiskCacheRateLimiter
32
35
 
33
36
 
34
37
  class BaseChatClient(ABC):
@@ -59,11 +62,65 @@ class BaseChatClient(ABC):
59
62
 
60
63
  self.backend_settings = settings.get_backend(self.BACKEND_NAME)
61
64
 
65
+ self.rate_limiter = self._init_rate_limiter()
66
+ self.active_requests = defaultdict(int)
67
+ self.rpm = None
68
+ self.tpm = None
69
+ self.concurrent_requests = None
70
+
62
71
  if endpoint_id:
63
72
  self.endpoint_id = endpoint_id
64
73
  self.random_endpoint = False
65
74
  self.endpoint = settings.get_endpoint(self.endpoint_id)
66
75
 
76
+ def _init_rate_limiter(self):
77
+ if not settings.rate_limit:
78
+ return None
79
+ if not settings.rate_limit.enabled:
80
+ return None
81
+
82
+ if settings.rate_limit.backend == "memory":
83
+ return SyncMemoryRateLimiter()
84
+ elif settings.rate_limit.backend == "redis":
85
+ if not settings.rate_limit.redis:
86
+ raise ValueError("Redis settings must be provided if Redis backend is selected.")
87
+ return SyncRedisRateLimiter(
88
+ host=settings.rate_limit.redis.host,
89
+ port=settings.rate_limit.redis.port,
90
+ db=settings.rate_limit.redis.db,
91
+ )
92
+ elif settings.rate_limit.backend == "diskcache":
93
+ if not settings.rate_limit.diskcache:
94
+ raise ValueError("Diskcache settings must be provided if Diskcache backend is selected.")
95
+ return SyncDiskCacheRateLimiter(
96
+ cache_dir=settings.rate_limit.diskcache.cache_dir,
97
+ )
98
+ return None
99
+
100
+ def _acquire_rate_limit(self, endpoint: EndpointSetting | None, model: str, messages: list):
101
+ if endpoint is None:
102
+ return
103
+
104
+ key = f"{endpoint.id}:{model}"
105
+
106
+ # Get rate limit parameters
107
+ # Priority: parameters in model.endpoints > parameters in endpoint > default parameters
108
+ rpm = self.rpm or endpoint.rpm or (settings.rate_limit.default_rpm if settings.rate_limit else 60)
109
+ tpm = self.tpm or endpoint.tpm or (settings.rate_limit.default_tpm if settings.rate_limit else 1000000)
110
+
111
+ while self.rate_limiter:
112
+ allowed, wait_time = self.rate_limiter.check_limit(key, rpm, tpm, self._estimate_request_tokens(messages))
113
+ if allowed:
114
+ break
115
+ time.sleep(wait_time)
116
+
117
+ def _estimate_request_tokens(self, messages: list) -> int:
118
+ """Roughly estimate the number of tokens in the request"""
119
+ tokens = 0
120
+ for message in messages:
121
+ tokens += int(len(message.get("content", "")) * 0.6)
122
+ return tokens
123
+
67
124
  def set_model_id_by_endpoint_id(self, endpoint_id: str):
68
125
  for endpoint_option in self.backend_settings.models[self.model].endpoints:
69
126
  if isinstance(endpoint_option, dict) and endpoint_id == endpoint_option["endpoint_id"]:
@@ -79,6 +136,9 @@ class BaseChatClient(ABC):
79
136
  if isinstance(endpoint, dict):
80
137
  self.endpoint_id = endpoint["endpoint_id"]
81
138
  self.model_id = endpoint["model_id"]
139
+ self.rpm = endpoint.get("rpm", None)
140
+ self.tpm = endpoint.get("tpm", None)
141
+ self.concurrent_requests = endpoint.get("concurrent_requests", None)
82
142
  else:
83
143
  self.endpoint_id = endpoint
84
144
  self.endpoint = settings.get_endpoint(self.endpoint_id)
@@ -236,11 +296,67 @@ class BaseAsyncChatClient(ABC):
236
296
 
237
297
  self.backend_settings = settings.get_backend(self.BACKEND_NAME)
238
298
 
299
+ self.rate_limiter = self._init_rate_limiter()
300
+ self.active_requests = defaultdict(int)
301
+ self.rpm = None
302
+ self.tpm = None
303
+ self.concurrent_requests = None
304
+
239
305
  if endpoint_id:
240
306
  self.endpoint_id = endpoint_id
241
307
  self.random_endpoint = False
242
308
  self.endpoint = settings.get_endpoint(self.endpoint_id)
243
309
 
310
+ def _init_rate_limiter(self):
311
+ if not settings.rate_limit:
312
+ return None
313
+ if not settings.rate_limit.enabled:
314
+ return None
315
+
316
+ if settings.rate_limit.backend == "memory":
317
+ return AsyncMemoryRateLimiter()
318
+ elif settings.rate_limit.backend == "redis":
319
+ if not settings.rate_limit.redis:
320
+ raise ValueError("Redis settings must be provided if Redis backend is selected.")
321
+ return AsyncRedisRateLimiter(
322
+ host=settings.rate_limit.redis.host,
323
+ port=settings.rate_limit.redis.port,
324
+ db=settings.rate_limit.redis.db,
325
+ )
326
+ elif settings.rate_limit.backend == "diskcache":
327
+ if not settings.rate_limit.diskcache:
328
+ raise ValueError("Diskcache settings must be provided if Diskcache backend is selected.")
329
+ return AsyncDiskCacheRateLimiter(
330
+ cache_dir=settings.rate_limit.diskcache.cache_dir,
331
+ )
332
+ return None
333
+
334
+ async def _acquire_rate_limit(self, endpoint: EndpointSetting | None, model: str, messages: list):
335
+ if endpoint is None:
336
+ return
337
+
338
+ key = f"{endpoint.id}:{model}"
339
+
340
+ # Get rate limit parameters
341
+ # Priority: parameters in model.endpoints > parameters in endpoint > default parameters
342
+ rpm = self.rpm or endpoint.rpm or (settings.rate_limit.default_rpm if settings.rate_limit else 60)
343
+ tpm = self.tpm or endpoint.tpm or (settings.rate_limit.default_tpm if settings.rate_limit else 1000000)
344
+
345
+ while self.rate_limiter:
346
+ allowed, wait_time = await self.rate_limiter.check_limit(
347
+ key, rpm, tpm, self._estimate_request_tokens(messages)
348
+ )
349
+ if allowed:
350
+ break
351
+ await asyncio.sleep(wait_time)
352
+
353
+ def _estimate_request_tokens(self, messages: list) -> int:
354
+ """Roughly estimate the number of tokens in the request"""
355
+ tokens = 0
356
+ for message in messages:
357
+ tokens += int(len(message.get("content", "")) * 0.6)
358
+ return tokens
359
+
244
360
  def set_model_id_by_endpoint_id(self, endpoint_id: str):
245
361
  for endpoint_option in self.backend_settings.models[self.model].endpoints:
246
362
  if isinstance(endpoint_option, dict) and endpoint_id == endpoint_option["endpoint_id"]:
@@ -256,6 +372,9 @@ class BaseAsyncChatClient(ABC):
256
372
  if isinstance(endpoint, dict):
257
373
  self.endpoint_id = endpoint["endpoint_id"]
258
374
  self.model_id = endpoint["model_id"]
375
+ self.rpm = endpoint.get("rpm", None)
376
+ self.tpm = endpoint.get("tpm", None)
377
+ self.concurrent_requests = endpoint.get("concurrent_requests", None)
259
378
  else:
260
379
  self.endpoint_id = endpoint
261
380
  self.endpoint = settings.get_endpoint(self.endpoint_id)
@@ -0,0 +1,13 @@
1
+ from ..types.enums import BackendType
2
+ from ..types.defaults import GEMINI_DEFAULT_MODEL
3
+ from .openai_compatible_client import OpenAICompatibleChatClient, AsyncOpenAICompatibleChatClient
4
+
5
+
6
+ class GeminiChatClient(OpenAICompatibleChatClient):
7
+ DEFAULT_MODEL = GEMINI_DEFAULT_MODEL
8
+ BACKEND_NAME = BackendType.Gemini
9
+
10
+
11
+ class AsyncGeminiChatClient(AsyncOpenAICompatibleChatClient):
12
+ DEFAULT_MODEL = GEMINI_DEFAULT_MODEL
13
+ BACKEND_NAME = BackendType.Gemini
@@ -212,6 +212,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
212
212
  else:
213
213
  _stream_options_params = {}
214
214
 
215
+ self._acquire_rate_limit(self.endpoint, self.model, messages)
216
+
215
217
  if self.stream:
216
218
  stream_response = raw_client.chat.completions.create(
217
219
  model=self.model_id,
@@ -538,6 +540,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
538
540
  else:
539
541
  max_tokens = self.model_setting.context_length - token_counts - 64
540
542
 
543
+ await self._acquire_rate_limit(self.endpoint, self.model, messages)
544
+
541
545
  if self.stream:
542
546
  stream_response = await raw_client.chat.completions.create(
543
547
  model=self.model_id,
@@ -83,20 +83,6 @@ class ToolCallContentProcessor:
83
83
  return {}
84
84
 
85
85
 
86
- def get_assistant_role_key(backend: BackendType) -> str:
87
- if backend == BackendType.Gemini:
88
- return "model"
89
- else:
90
- return "assistant"
91
-
92
-
93
- def get_content_key(backend: BackendType) -> str:
94
- if backend == BackendType.Gemini:
95
- return "parts"
96
- else:
97
- return "content"
98
-
99
-
100
86
  def convert_type(value, value_type):
101
87
  if value_type == "string":
102
88
  return str(value)
@@ -141,9 +127,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
141
127
  text = str(text)
142
128
  if model == "gpt-3.5-turbo":
143
129
  return len(get_gpt_35_encoding().encode(text))
144
- elif model in ("gpt-4o", "gpt-4o-mini"):
130
+ elif model.startswith(("gpt-4o", "o1-")):
145
131
  return len(get_gpt_4o_encoding().encode(text))
146
- elif model.startswith("abab"):
132
+ elif model.startswith(("abab", "MiniMax")):
147
133
  model_setting = settings.minimax.models[model]
148
134
  if len(model_setting.endpoints) == 0:
149
135
  return int(len(text) / 1.33)
@@ -201,10 +187,6 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
201
187
  result = response.json()
202
188
  return result["data"]["total_tokens"]
203
189
  elif model.startswith("gemini"):
204
- # TODO: gemini-exp-1206 暂时不支持,使用 gemini-1.5-flash 代替
205
- if model in ("gemini-exp-1206", "gemini-2.0-flash-exp", "gemini-2.0-flash-thinking-exp-1219"):
206
- model = "gemini-1.5-flash"
207
-
208
190
  model_setting = settings.gemini.models[model]
209
191
  if len(model_setting.endpoints) == 0:
210
192
  return len(get_gpt_35_encoding().encode(text))
@@ -213,7 +195,12 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
213
195
  endpoint_id = endpoint_id["endpoint_id"]
214
196
  endpoint = settings.get_endpoint(endpoint_id)
215
197
 
216
- base_url = f"{endpoint.api_base}/models/{model_setting.id}:countTokens"
198
+ api_base = (
199
+ endpoint.api_base.removesuffix("/openai/")
200
+ if endpoint.api_base
201
+ else "https://generativelanguage.googleapis.com/v1beta"
202
+ )
203
+ base_url = f"{api_base}/models/{model_setting.id}:countTokens"
217
204
  params = {"key": endpoint.api_key}
218
205
  request_body = {
219
206
  "contents": {
@@ -304,7 +291,7 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
304
291
  endpoint = settings.get_endpoint(endpoint_id)
305
292
  if model not in ("glm-4-plus", "glm-4-long", "glm-4-0520", "glm-4-air", "glm-4-flash"):
306
293
  model = "glm-4-plus"
307
- tokenize_url = f"{endpoint.api_base}/tokenizer"
294
+ tokenize_url = f"{endpoint.api_base or 'https://open.bigmodel.cn/api/paas/v4'}/tokenizer"
308
295
  headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
309
296
  request_body = {
310
297
  "model": model,
@@ -395,7 +382,7 @@ def cutoff_messages(
395
382
  return messages
396
383
 
397
384
  messages_length = 0
398
- content_key = get_content_key(backend)
385
+ content_key = "content"
399
386
 
400
387
  # 先检查并保留第一条system消息(如果有)
401
388
  system_message = None
@@ -440,21 +427,14 @@ def cutoff_messages(
440
427
  continue
441
428
  if index == 0:
442
429
  # 一条消息就超过长度则将该消息内容进行截断,保留该消息最后的一部分
443
- if backend == BackendType.Gemini:
444
- return system_message + [
445
- {
446
- "role": message["role"],
447
- content_key: [{"text": message[content_key][-max_count:]}],
448
- }
449
- ]
450
- else:
451
- content = message[content_key][max_count - messages_length :]
452
- return system_message + [
453
- {
454
- "role": message["role"],
455
- content_key: content,
456
- }
457
- ]
430
+ content = message[content_key][max_count - messages_length :]
431
+ return system_message + [
432
+ {
433
+ "role": message["role"],
434
+ content_key: content,
435
+ }
436
+ ]
437
+
458
438
  return system_message + messages[-index:]
459
439
  return system_message + messages
460
440
 
@@ -477,13 +457,6 @@ def format_image_message(image: str, backend: BackendType = BackendType.OpenAI)
477
457
  "data": image_processor.base64_image,
478
458
  },
479
459
  }
480
- elif backend == BackendType.Gemini:
481
- return {
482
- "inline_data": {
483
- "mime_type": image_processor.mime_type,
484
- "data": image_processor.base64_image,
485
- }
486
- }
487
460
  else:
488
461
  return {
489
462
  "type": "image_url",
@@ -495,7 +468,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
495
468
  formatted_messages = []
496
469
 
497
470
  # 工具调用消息
498
- if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi):
471
+ if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi, BackendType.Gemini):
499
472
  tool_call_message = {
500
473
  "content": None,
501
474
  "role": "assistant",
@@ -524,20 +497,6 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
524
497
  }
525
498
  if content:
526
499
  tool_call_message["content"].insert(0, {"type": "text", "text": content})
527
- elif backend == BackendType.Gemini:
528
- tool_call_message = {
529
- "role": "model",
530
- "parts": [
531
- {
532
- "functionCall": {
533
- "name": message["metadata"]["selected_workflow"]["function_name"],
534
- "args": message["metadata"]["selected_workflow"]["params"],
535
- }
536
- },
537
- ],
538
- }
539
- if content:
540
- tool_call_message["parts"].insert(0, {"text": content})
541
500
  else:
542
501
  tool_call_message = {
543
502
  "content": json.dumps(
@@ -552,7 +511,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
552
511
  formatted_messages.append(tool_call_message)
553
512
 
554
513
  # 工具调用结果消息
555
- if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi):
514
+ if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi, BackendType.Gemini):
556
515
  tool_call_result_message = {
557
516
  "role": "tool",
558
517
  "tool_call_id": message["metadata"]["selected_workflow"]["tool_call_id"],
@@ -570,21 +529,6 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
570
529
  }
571
530
  ],
572
531
  }
573
- elif backend == BackendType.Gemini:
574
- tool_call_result_message = {
575
- "role": "function",
576
- "parts": [
577
- {
578
- "functionResponse": {
579
- "name": message["metadata"]["selected_workflow"]["function_name"],
580
- "response": {
581
- "name": message["metadata"]["selected_workflow"]["function_name"],
582
- "content": message["metadata"].get("workflow_result", ""),
583
- },
584
- }
585
- }
586
- ],
587
- }
588
532
  else:
589
533
  tool_call_result_message = {
590
534
  "role": "user",
@@ -598,7 +542,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
598
542
  }
599
543
  formatted_messages.append(tool_call_result_message)
600
544
 
601
- if content and backend not in (BackendType.Mistral, BackendType.Anthropic, BackendType.Gemini):
545
+ if content and backend not in (BackendType.Mistral, BackendType.Anthropic):
602
546
  formatted_messages.append({"role": "assistant", "content": content})
603
547
 
604
548
  return formatted_messages
@@ -608,21 +552,7 @@ def transform_from_openai_message(message: ChatCompletionMessageParam, backend:
608
552
  role = message.get("role", "user")
609
553
  content = message.get("content", "")
610
554
 
611
- if backend == BackendType.Gemini:
612
- if isinstance(content, list):
613
- parts = []
614
- for item in content:
615
- if isinstance(item, str):
616
- parts.append({"text": item})
617
- elif isinstance(item, dict) and "type" in item:
618
- if item["type"] == "image":
619
- parts.append({"image": item["image"]})
620
- elif item["type"] == "text":
621
- parts.append({"text": item["text"]})
622
- return {"role": "user" if role == "user" else "model", "parts": parts}
623
- else:
624
- return {"role": "user" if role == "user" else "model", "parts": [{"text": content}]}
625
- elif backend == BackendType.Anthropic:
555
+ if backend == BackendType.Anthropic:
626
556
  if isinstance(content, list):
627
557
  formatted_content = []
628
558
  for item in content:
@@ -663,7 +593,7 @@ def format_messages(
663
593
  # 处理 VectorVein 格式的消息
664
594
  content = message["content"]["text"]
665
595
  if message["content_type"] == "TXT":
666
- role = "user" if message["author_type"] == "U" else get_assistant_role_key(backend)
596
+ role = "user" if message["author_type"] == "U" else "assistant"
667
597
  formatted_message = format_text_message(
668
598
  content, role, message.get("attachments", []), backend, native_multimodal
669
599
  )
@@ -693,31 +623,19 @@ def format_text_message(
693
623
  content += "\n".join([f"- {attachment}" for attachment in attachments])
694
624
 
695
625
  if native_multimodal and has_images:
696
- if backend == BackendType.Gemini:
697
- parts = [{"text": content}]
698
- for attachment in attachments:
699
- if attachment.lower().endswith(images_extensions):
700
- parts.append(format_image_message(image=attachment, backend=backend))
701
- return {"role": role, "parts": parts}
702
- else:
703
- return {
704
- "role": role,
705
- "content": [
706
- {"type": "text", "text": content},
707
- *[
708
- format_image_message(image=attachment, backend=backend)
709
- for attachment in attachments
710
- if attachment.lower().endswith(images_extensions)
711
- ],
626
+ return {
627
+ "role": role,
628
+ "content": [
629
+ {"type": "text", "text": content},
630
+ *[
631
+ format_image_message(image=attachment, backend=backend)
632
+ for attachment in attachments
633
+ if attachment.lower().endswith(images_extensions)
712
634
  ],
713
- }
635
+ ],
636
+ }
714
637
  else:
715
- if backend == BackendType.Gemini:
716
- return {"role": role, "parts": [{"text": content}]}
717
- elif backend == BackendType.Anthropic:
718
- return {"role": role, "content": content}
719
- else:
720
- return {"role": role, "content": content}
638
+ return {"role": role, "content": content}
721
639
 
722
640
 
723
641
  def generate_tool_use_system_prompt(tools: list | str, format_type: str = "json") -> str:
@@ -1,6 +1,6 @@
1
1
  # @Author: Bi Ying
2
2
  # @Date: 2024-07-27 00:30:56
3
- from typing import List, Dict, Optional
3
+ from typing import List, Dict, Optional, Literal
4
4
 
5
5
  from pydantic import BaseModel, Field
6
6
 
@@ -9,6 +9,26 @@ from ..types.enums import BackendType
9
9
  from ..types.llm_parameters import BackendSettings, EndpointSetting
10
10
 
11
11
 
12
+ class RedisConfig(BaseModel):
13
+ host: str = "localhost"
14
+ port: int = 6379
15
+ db: int = 0
16
+
17
+
18
+ class DiskCacheConfig(BaseModel):
19
+ cache_dir: str = ".rate_limit_cache"
20
+
21
+
22
+ class RateLimitConfig(BaseModel):
23
+ enabled: bool = False
24
+
25
+ backend: Literal["memory", "redis", "diskcache"] = "memory"
26
+ redis: Optional[RedisConfig] = Field(default=None)
27
+ diskcache: Optional[DiskCacheConfig] = Field(default=None)
28
+ default_rpm: int = 60
29
+ default_tpm: int = 1000000
30
+
31
+
12
32
  class Server(BaseModel):
13
33
  host: str
14
34
  port: int
@@ -20,6 +40,7 @@ class Settings(BaseModel):
20
40
  default_factory=list, description="Available endpoints for the LLM service."
21
41
  )
22
42
  token_server: Optional[Server] = Field(default=None, description="Token server address. Format: host:port")
43
+ rate_limit: Optional[RateLimitConfig] = Field(default=None, description="Rate limit settings.")
23
44
 
24
45
  anthropic: BackendSettings = Field(default_factory=BackendSettings, description="Anthropic models settings.")
25
46
  deepseek: BackendSettings = Field(default_factory=BackendSettings, description="Deepseek models settings.")
@@ -63,6 +84,14 @@ class Settings(BaseModel):
63
84
  else:
64
85
  data[model_type] = BackendSettings(models=default_models)
65
86
 
87
+ for endpoint in data.get("endpoints", []):
88
+ if not endpoint.get("api_base"):
89
+ continue
90
+ api_base = endpoint["api_base"]
91
+ if api_base.startswith("https://generativelanguage.googleapis.com/v1beta"):
92
+ if not api_base.endswith("openai/"):
93
+ endpoint["api_base"] = api_base.strip("/") + "/openai/"
94
+
66
95
  super().__init__(**data)
67
96
 
68
97
  def load(self, settings_dict: Dict):