vectorvein 0.1.88__py3-none-any.whl → 0.1.89__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectorvein/chat_clients/anthropic_client.py +4 -0
- vectorvein/chat_clients/base_client.py +121 -2
- vectorvein/chat_clients/gemini_client.py +9 -523
- vectorvein/chat_clients/openai_compatible_client.py +4 -0
- vectorvein/chat_clients/utils.py +34 -116
- vectorvein/settings/__init__.py +30 -1
- vectorvein/types/defaults.py +30 -6
- vectorvein/types/llm_parameters.py +4 -1
- vectorvein/utilities/rate_limiter.py +312 -0
- {vectorvein-0.1.88.dist-info → vectorvein-0.1.89.dist-info}/METADATA +6 -1
- {vectorvein-0.1.88.dist-info → vectorvein-0.1.89.dist-info}/RECORD +13 -12
- {vectorvein-0.1.88.dist-info → vectorvein-0.1.89.dist-info}/WHEEL +0 -0
- {vectorvein-0.1.88.dist-info → vectorvein-0.1.89.dist-info}/entry_points.txt +0 -0
vectorvein/chat_clients/utils.py
CHANGED
@@ -83,20 +83,6 @@ class ToolCallContentProcessor:
|
|
83
83
|
return {}
|
84
84
|
|
85
85
|
|
86
|
-
def get_assistant_role_key(backend: BackendType) -> str:
|
87
|
-
if backend == BackendType.Gemini:
|
88
|
-
return "model"
|
89
|
-
else:
|
90
|
-
return "assistant"
|
91
|
-
|
92
|
-
|
93
|
-
def get_content_key(backend: BackendType) -> str:
|
94
|
-
if backend == BackendType.Gemini:
|
95
|
-
return "parts"
|
96
|
-
else:
|
97
|
-
return "content"
|
98
|
-
|
99
|
-
|
100
86
|
def convert_type(value, value_type):
|
101
87
|
if value_type == "string":
|
102
88
|
return str(value)
|
@@ -141,9 +127,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
141
127
|
text = str(text)
|
142
128
|
if model == "gpt-3.5-turbo":
|
143
129
|
return len(get_gpt_35_encoding().encode(text))
|
144
|
-
elif model
|
130
|
+
elif model.startswith(("gpt-4o", "o1-")):
|
145
131
|
return len(get_gpt_4o_encoding().encode(text))
|
146
|
-
elif model.startswith("abab"):
|
132
|
+
elif model.startswith(("abab", "MiniMax")):
|
147
133
|
model_setting = settings.minimax.models[model]
|
148
134
|
if len(model_setting.endpoints) == 0:
|
149
135
|
return int(len(text) / 1.33)
|
@@ -201,10 +187,6 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
201
187
|
result = response.json()
|
202
188
|
return result["data"]["total_tokens"]
|
203
189
|
elif model.startswith("gemini"):
|
204
|
-
# TODO: gemini-exp-1206 暂时不支持,使用 gemini-1.5-flash 代替
|
205
|
-
if model in ("gemini-exp-1206", "gemini-2.0-flash-exp", "gemini-2.0-flash-thinking-exp-1219"):
|
206
|
-
model = "gemini-1.5-flash"
|
207
|
-
|
208
190
|
model_setting = settings.gemini.models[model]
|
209
191
|
if len(model_setting.endpoints) == 0:
|
210
192
|
return len(get_gpt_35_encoding().encode(text))
|
@@ -213,7 +195,12 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
213
195
|
endpoint_id = endpoint_id["endpoint_id"]
|
214
196
|
endpoint = settings.get_endpoint(endpoint_id)
|
215
197
|
|
216
|
-
|
198
|
+
api_base = (
|
199
|
+
endpoint.api_base.removesuffix("/openai/")
|
200
|
+
if endpoint.api_base
|
201
|
+
else "https://generativelanguage.googleapis.com/v1beta"
|
202
|
+
)
|
203
|
+
base_url = f"{api_base}/models/{model_setting.id}:countTokens"
|
217
204
|
params = {"key": endpoint.api_key}
|
218
205
|
request_body = {
|
219
206
|
"contents": {
|
@@ -304,7 +291,7 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
304
291
|
endpoint = settings.get_endpoint(endpoint_id)
|
305
292
|
if model not in ("glm-4-plus", "glm-4-long", "glm-4-0520", "glm-4-air", "glm-4-flash"):
|
306
293
|
model = "glm-4-plus"
|
307
|
-
tokenize_url = f"{endpoint.api_base}/tokenizer"
|
294
|
+
tokenize_url = f"{endpoint.api_base or 'https://open.bigmodel.cn/api/paas/v4'}/tokenizer"
|
308
295
|
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
|
309
296
|
request_body = {
|
310
297
|
"model": model,
|
@@ -395,7 +382,7 @@ def cutoff_messages(
|
|
395
382
|
return messages
|
396
383
|
|
397
384
|
messages_length = 0
|
398
|
-
content_key =
|
385
|
+
content_key = "content"
|
399
386
|
|
400
387
|
# 先检查并保留第一条system消息(如果有)
|
401
388
|
system_message = None
|
@@ -440,21 +427,14 @@ def cutoff_messages(
|
|
440
427
|
continue
|
441
428
|
if index == 0:
|
442
429
|
# 一条消息就超过长度则将该消息内容进行截断,保留该消息最后的一部分
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
content = message[content_key][max_count - messages_length :]
|
452
|
-
return system_message + [
|
453
|
-
{
|
454
|
-
"role": message["role"],
|
455
|
-
content_key: content,
|
456
|
-
}
|
457
|
-
]
|
430
|
+
content = message[content_key][max_count - messages_length :]
|
431
|
+
return system_message + [
|
432
|
+
{
|
433
|
+
"role": message["role"],
|
434
|
+
content_key: content,
|
435
|
+
}
|
436
|
+
]
|
437
|
+
|
458
438
|
return system_message + messages[-index:]
|
459
439
|
return system_message + messages
|
460
440
|
|
@@ -477,13 +457,6 @@ def format_image_message(image: str, backend: BackendType = BackendType.OpenAI)
|
|
477
457
|
"data": image_processor.base64_image,
|
478
458
|
},
|
479
459
|
}
|
480
|
-
elif backend == BackendType.Gemini:
|
481
|
-
return {
|
482
|
-
"inline_data": {
|
483
|
-
"mime_type": image_processor.mime_type,
|
484
|
-
"data": image_processor.base64_image,
|
485
|
-
}
|
486
|
-
}
|
487
460
|
else:
|
488
461
|
return {
|
489
462
|
"type": "image_url",
|
@@ -495,7 +468,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
|
|
495
468
|
formatted_messages = []
|
496
469
|
|
497
470
|
# 工具调用消息
|
498
|
-
if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi):
|
471
|
+
if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi, BackendType.Gemini):
|
499
472
|
tool_call_message = {
|
500
473
|
"content": None,
|
501
474
|
"role": "assistant",
|
@@ -524,20 +497,6 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
|
|
524
497
|
}
|
525
498
|
if content:
|
526
499
|
tool_call_message["content"].insert(0, {"type": "text", "text": content})
|
527
|
-
elif backend == BackendType.Gemini:
|
528
|
-
tool_call_message = {
|
529
|
-
"role": "model",
|
530
|
-
"parts": [
|
531
|
-
{
|
532
|
-
"functionCall": {
|
533
|
-
"name": message["metadata"]["selected_workflow"]["function_name"],
|
534
|
-
"args": message["metadata"]["selected_workflow"]["params"],
|
535
|
-
}
|
536
|
-
},
|
537
|
-
],
|
538
|
-
}
|
539
|
-
if content:
|
540
|
-
tool_call_message["parts"].insert(0, {"text": content})
|
541
500
|
else:
|
542
501
|
tool_call_message = {
|
543
502
|
"content": json.dumps(
|
@@ -552,7 +511,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
|
|
552
511
|
formatted_messages.append(tool_call_message)
|
553
512
|
|
554
513
|
# 工具调用结果消息
|
555
|
-
if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi):
|
514
|
+
if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi, BackendType.Gemini):
|
556
515
|
tool_call_result_message = {
|
557
516
|
"role": "tool",
|
558
517
|
"tool_call_id": message["metadata"]["selected_workflow"]["tool_call_id"],
|
@@ -570,21 +529,6 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
|
|
570
529
|
}
|
571
530
|
],
|
572
531
|
}
|
573
|
-
elif backend == BackendType.Gemini:
|
574
|
-
tool_call_result_message = {
|
575
|
-
"role": "function",
|
576
|
-
"parts": [
|
577
|
-
{
|
578
|
-
"functionResponse": {
|
579
|
-
"name": message["metadata"]["selected_workflow"]["function_name"],
|
580
|
-
"response": {
|
581
|
-
"name": message["metadata"]["selected_workflow"]["function_name"],
|
582
|
-
"content": message["metadata"].get("workflow_result", ""),
|
583
|
-
},
|
584
|
-
}
|
585
|
-
}
|
586
|
-
],
|
587
|
-
}
|
588
532
|
else:
|
589
533
|
tool_call_result_message = {
|
590
534
|
"role": "user",
|
@@ -598,7 +542,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
|
|
598
542
|
}
|
599
543
|
formatted_messages.append(tool_call_result_message)
|
600
544
|
|
601
|
-
if content and backend not in (BackendType.Mistral, BackendType.Anthropic
|
545
|
+
if content and backend not in (BackendType.Mistral, BackendType.Anthropic):
|
602
546
|
formatted_messages.append({"role": "assistant", "content": content})
|
603
547
|
|
604
548
|
return formatted_messages
|
@@ -608,21 +552,7 @@ def transform_from_openai_message(message: ChatCompletionMessageParam, backend:
|
|
608
552
|
role = message.get("role", "user")
|
609
553
|
content = message.get("content", "")
|
610
554
|
|
611
|
-
if backend == BackendType.
|
612
|
-
if isinstance(content, list):
|
613
|
-
parts = []
|
614
|
-
for item in content:
|
615
|
-
if isinstance(item, str):
|
616
|
-
parts.append({"text": item})
|
617
|
-
elif isinstance(item, dict) and "type" in item:
|
618
|
-
if item["type"] == "image":
|
619
|
-
parts.append({"image": item["image"]})
|
620
|
-
elif item["type"] == "text":
|
621
|
-
parts.append({"text": item["text"]})
|
622
|
-
return {"role": "user" if role == "user" else "model", "parts": parts}
|
623
|
-
else:
|
624
|
-
return {"role": "user" if role == "user" else "model", "parts": [{"text": content}]}
|
625
|
-
elif backend == BackendType.Anthropic:
|
555
|
+
if backend == BackendType.Anthropic:
|
626
556
|
if isinstance(content, list):
|
627
557
|
formatted_content = []
|
628
558
|
for item in content:
|
@@ -663,7 +593,7 @@ def format_messages(
|
|
663
593
|
# 处理 VectorVein 格式的消息
|
664
594
|
content = message["content"]["text"]
|
665
595
|
if message["content_type"] == "TXT":
|
666
|
-
role = "user" if message["author_type"] == "U" else
|
596
|
+
role = "user" if message["author_type"] == "U" else "assistant"
|
667
597
|
formatted_message = format_text_message(
|
668
598
|
content, role, message.get("attachments", []), backend, native_multimodal
|
669
599
|
)
|
@@ -693,31 +623,19 @@ def format_text_message(
|
|
693
623
|
content += "\n".join([f"- {attachment}" for attachment in attachments])
|
694
624
|
|
695
625
|
if native_multimodal and has_images:
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
"role": role,
|
705
|
-
"content": [
|
706
|
-
{"type": "text", "text": content},
|
707
|
-
*[
|
708
|
-
format_image_message(image=attachment, backend=backend)
|
709
|
-
for attachment in attachments
|
710
|
-
if attachment.lower().endswith(images_extensions)
|
711
|
-
],
|
626
|
+
return {
|
627
|
+
"role": role,
|
628
|
+
"content": [
|
629
|
+
{"type": "text", "text": content},
|
630
|
+
*[
|
631
|
+
format_image_message(image=attachment, backend=backend)
|
632
|
+
for attachment in attachments
|
633
|
+
if attachment.lower().endswith(images_extensions)
|
712
634
|
],
|
713
|
-
|
635
|
+
],
|
636
|
+
}
|
714
637
|
else:
|
715
|
-
|
716
|
-
return {"role": role, "parts": [{"text": content}]}
|
717
|
-
elif backend == BackendType.Anthropic:
|
718
|
-
return {"role": role, "content": content}
|
719
|
-
else:
|
720
|
-
return {"role": role, "content": content}
|
638
|
+
return {"role": role, "content": content}
|
721
639
|
|
722
640
|
|
723
641
|
def generate_tool_use_system_prompt(tools: list | str, format_type: str = "json") -> str:
|
vectorvein/settings/__init__.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# @Author: Bi Ying
|
2
2
|
# @Date: 2024-07-27 00:30:56
|
3
|
-
from typing import List, Dict, Optional
|
3
|
+
from typing import List, Dict, Optional, Literal
|
4
4
|
|
5
5
|
from pydantic import BaseModel, Field
|
6
6
|
|
@@ -9,6 +9,26 @@ from ..types.enums import BackendType
|
|
9
9
|
from ..types.llm_parameters import BackendSettings, EndpointSetting
|
10
10
|
|
11
11
|
|
12
|
+
class RedisConfig(BaseModel):
|
13
|
+
host: str = "localhost"
|
14
|
+
port: int = 6379
|
15
|
+
db: int = 0
|
16
|
+
|
17
|
+
|
18
|
+
class DiskCacheConfig(BaseModel):
|
19
|
+
cache_dir: str = ".rate_limit_cache"
|
20
|
+
|
21
|
+
|
22
|
+
class RateLimitConfig(BaseModel):
|
23
|
+
enabled: bool = False
|
24
|
+
|
25
|
+
backend: Literal["memory", "redis", "diskcache"] = "memory"
|
26
|
+
redis: Optional[RedisConfig] = Field(default=None)
|
27
|
+
diskcache: Optional[DiskCacheConfig] = Field(default=None)
|
28
|
+
default_rpm: int = 60
|
29
|
+
default_tpm: int = 1000000
|
30
|
+
|
31
|
+
|
12
32
|
class Server(BaseModel):
|
13
33
|
host: str
|
14
34
|
port: int
|
@@ -20,6 +40,7 @@ class Settings(BaseModel):
|
|
20
40
|
default_factory=list, description="Available endpoints for the LLM service."
|
21
41
|
)
|
22
42
|
token_server: Optional[Server] = Field(default=None, description="Token server address. Format: host:port")
|
43
|
+
rate_limit: Optional[RateLimitConfig] = Field(default=None, description="Rate limit settings.")
|
23
44
|
|
24
45
|
anthropic: BackendSettings = Field(default_factory=BackendSettings, description="Anthropic models settings.")
|
25
46
|
deepseek: BackendSettings = Field(default_factory=BackendSettings, description="Deepseek models settings.")
|
@@ -63,6 +84,14 @@ class Settings(BaseModel):
|
|
63
84
|
else:
|
64
85
|
data[model_type] = BackendSettings(models=default_models)
|
65
86
|
|
87
|
+
for endpoint in data.get("endpoints", []):
|
88
|
+
if not endpoint.get("api_base"):
|
89
|
+
continue
|
90
|
+
api_base = endpoint["api_base"]
|
91
|
+
if api_base.startswith("https://generativelanguage.googleapis.com/v1beta"):
|
92
|
+
if not api_base.endswith("openai/"):
|
93
|
+
endpoint["api_base"] = api_base.strip("/") + "/openai/"
|
94
|
+
|
66
95
|
super().__init__(**data)
|
67
96
|
|
68
97
|
def load(self, settings_dict: Dict):
|
vectorvein/types/defaults.py
CHANGED
@@ -13,6 +13,7 @@ ENDPOINT_TPM: Final[int] = 300000
|
|
13
13
|
MODEL_CONTEXT_LENGTH: Final[int] = 32768
|
14
14
|
|
15
15
|
# Moonshot models
|
16
|
+
MOONSHOT_DEFAULT_MODEL: Final[str] = "moonshot-v1-8k"
|
16
17
|
MOONSHOT_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
17
18
|
"moonshot-v1-8k": {
|
18
19
|
"id": "moonshot-v1-8k",
|
@@ -33,9 +34,9 @@ MOONSHOT_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
|
33
34
|
"response_format_available": True,
|
34
35
|
},
|
35
36
|
}
|
36
|
-
MOONSHOT_DEFAULT_MODEL: Final[str] = "moonshot-v1-8k"
|
37
37
|
|
38
38
|
# Deepseek models
|
39
|
+
DEEPSEEK_DEFAULT_MODEL: Final[str] = "deepseek-chat"
|
39
40
|
DEEPSEEK_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
40
41
|
"deepseek-chat": {
|
41
42
|
"id": "deepseek-chat",
|
@@ -52,9 +53,9 @@ DEEPSEEK_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
|
52
53
|
"response_format_available": False,
|
53
54
|
},
|
54
55
|
}
|
55
|
-
DEEPSEEK_DEFAULT_MODEL: Final[str] = "deepseek-chat"
|
56
56
|
|
57
57
|
# Baichuan models
|
58
|
+
BAICHUAN_DEFAULT_MODEL: Final[str] = "Baichuan3-Turbo"
|
58
59
|
BAICHUAN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
59
60
|
"Baichuan4": {
|
60
61
|
"id": "Baichuan4",
|
@@ -92,7 +93,6 @@ BAICHUAN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
|
92
93
|
"response_format_available": False,
|
93
94
|
},
|
94
95
|
}
|
95
|
-
BAICHUAN_DEFAULT_MODEL: Final[str] = "Baichuan3-Turbo"
|
96
96
|
|
97
97
|
# Groq models
|
98
98
|
GROQ_DEFAULT_MODEL: Final[str] = "llama3-70b-8192"
|
@@ -617,7 +617,7 @@ ANTHROPIC_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
|
617
617
|
}
|
618
618
|
|
619
619
|
# Minimax models
|
620
|
-
MINIMAX_DEFAULT_MODEL: Final[str] = "
|
620
|
+
MINIMAX_DEFAULT_MODEL: Final[str] = "MiniMax-Text-01"
|
621
621
|
MINIMAX_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
622
622
|
"abab5-chat": {
|
623
623
|
"id": "abab5-chat",
|
@@ -664,7 +664,7 @@ MINIMAX_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
|
664
664
|
}
|
665
665
|
|
666
666
|
# Gemini models
|
667
|
-
GEMINI_DEFAULT_MODEL: Final[str] = "gemini-
|
667
|
+
GEMINI_DEFAULT_MODEL: Final[str] = "gemini-2.0-flash"
|
668
668
|
GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
669
669
|
"gemini-1.5-pro": {
|
670
670
|
"id": "gemini-1.5-pro",
|
@@ -674,6 +674,14 @@ GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
|
674
674
|
"response_format_available": True,
|
675
675
|
"native_multimodal": True,
|
676
676
|
},
|
677
|
+
"gemini-2.0-pro-exp-02-05": {
|
678
|
+
"id": "gemini-2.0-pro-exp-02-05",
|
679
|
+
"context_length": 2097152,
|
680
|
+
"max_output_tokens": 8192,
|
681
|
+
"function_call_available": True,
|
682
|
+
"response_format_available": True,
|
683
|
+
"native_multimodal": True,
|
684
|
+
},
|
677
685
|
"gemini-1.5-flash": {
|
678
686
|
"id": "gemini-1.5-flash",
|
679
687
|
"context_length": 1048576,
|
@@ -682,6 +690,22 @@ GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
|
682
690
|
"response_format_available": True,
|
683
691
|
"native_multimodal": True,
|
684
692
|
},
|
693
|
+
"gemini-2.0-flash": {
|
694
|
+
"id": "gemini-2.0-flash",
|
695
|
+
"context_length": 1048576,
|
696
|
+
"max_output_tokens": 8192,
|
697
|
+
"function_call_available": True,
|
698
|
+
"response_format_available": True,
|
699
|
+
"native_multimodal": True,
|
700
|
+
},
|
701
|
+
"gemini-2.0-flash-lite-preview-02-05": {
|
702
|
+
"id": "gemini-2.0-flash-lite-preview-02-05",
|
703
|
+
"context_length": 1048576,
|
704
|
+
"max_output_tokens": 8192,
|
705
|
+
"function_call_available": True,
|
706
|
+
"response_format_available": True,
|
707
|
+
"native_multimodal": True,
|
708
|
+
},
|
685
709
|
"gemini-2.0-flash-exp": {
|
686
710
|
"id": "gemini-2.0-flash-exp",
|
687
711
|
"context_length": 1048576,
|
@@ -819,7 +843,7 @@ STEPFUN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
|
819
843
|
}
|
820
844
|
|
821
845
|
|
822
|
-
XAI_DEFAULT_MODEL: Final[str] = "grok-
|
846
|
+
XAI_DEFAULT_MODEL: Final[str] = "grok-2-latest"
|
823
847
|
XAI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
824
848
|
"grok-beta": {
|
825
849
|
"id": "grok-beta",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# @Author: Bi Ying
|
2
2
|
# @Date: 2024-07-26 23:48:04
|
3
|
-
from typing import List, Dict, Optional, Union, Iterable
|
3
|
+
from typing import List, Dict, Optional, Union, Iterable, NotRequired
|
4
4
|
from typing_extensions import TypedDict # Required by pydantic under Python < 3.12
|
5
5
|
|
6
6
|
from pydantic import BaseModel, Field
|
@@ -24,6 +24,9 @@ from . import defaults as defs
|
|
24
24
|
class EndpointOptionDict(TypedDict):
|
25
25
|
endpoint_id: str
|
26
26
|
model_id: str
|
27
|
+
rpm: NotRequired[int]
|
28
|
+
tpm: NotRequired[int]
|
29
|
+
concurrent_requests: NotRequired[int]
|
27
30
|
|
28
31
|
|
29
32
|
class EndpointSetting(BaseModel):
|