vectorvein 0.1.88__py3-none-any.whl → 0.1.89__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -83,20 +83,6 @@ class ToolCallContentProcessor:
83
83
  return {}
84
84
 
85
85
 
86
- def get_assistant_role_key(backend: BackendType) -> str:
87
- if backend == BackendType.Gemini:
88
- return "model"
89
- else:
90
- return "assistant"
91
-
92
-
93
- def get_content_key(backend: BackendType) -> str:
94
- if backend == BackendType.Gemini:
95
- return "parts"
96
- else:
97
- return "content"
98
-
99
-
100
86
  def convert_type(value, value_type):
101
87
  if value_type == "string":
102
88
  return str(value)
@@ -141,9 +127,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
141
127
  text = str(text)
142
128
  if model == "gpt-3.5-turbo":
143
129
  return len(get_gpt_35_encoding().encode(text))
144
- elif model in ("gpt-4o", "gpt-4o-mini"):
130
+ elif model.startswith(("gpt-4o", "o1-")):
145
131
  return len(get_gpt_4o_encoding().encode(text))
146
- elif model.startswith("abab"):
132
+ elif model.startswith(("abab", "MiniMax")):
147
133
  model_setting = settings.minimax.models[model]
148
134
  if len(model_setting.endpoints) == 0:
149
135
  return int(len(text) / 1.33)
@@ -201,10 +187,6 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
201
187
  result = response.json()
202
188
  return result["data"]["total_tokens"]
203
189
  elif model.startswith("gemini"):
204
- # TODO: gemini-exp-1206 暂时不支持,使用 gemini-1.5-flash 代替
205
- if model in ("gemini-exp-1206", "gemini-2.0-flash-exp", "gemini-2.0-flash-thinking-exp-1219"):
206
- model = "gemini-1.5-flash"
207
-
208
190
  model_setting = settings.gemini.models[model]
209
191
  if len(model_setting.endpoints) == 0:
210
192
  return len(get_gpt_35_encoding().encode(text))
@@ -213,7 +195,12 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
213
195
  endpoint_id = endpoint_id["endpoint_id"]
214
196
  endpoint = settings.get_endpoint(endpoint_id)
215
197
 
216
- base_url = f"{endpoint.api_base}/models/{model_setting.id}:countTokens"
198
+ api_base = (
199
+ endpoint.api_base.removesuffix("/openai/")
200
+ if endpoint.api_base
201
+ else "https://generativelanguage.googleapis.com/v1beta"
202
+ )
203
+ base_url = f"{api_base}/models/{model_setting.id}:countTokens"
217
204
  params = {"key": endpoint.api_key}
218
205
  request_body = {
219
206
  "contents": {
@@ -304,7 +291,7 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
304
291
  endpoint = settings.get_endpoint(endpoint_id)
305
292
  if model not in ("glm-4-plus", "glm-4-long", "glm-4-0520", "glm-4-air", "glm-4-flash"):
306
293
  model = "glm-4-plus"
307
- tokenize_url = f"{endpoint.api_base}/tokenizer"
294
+ tokenize_url = f"{endpoint.api_base or 'https://open.bigmodel.cn/api/paas/v4'}/tokenizer"
308
295
  headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
309
296
  request_body = {
310
297
  "model": model,
@@ -395,7 +382,7 @@ def cutoff_messages(
395
382
  return messages
396
383
 
397
384
  messages_length = 0
398
- content_key = get_content_key(backend)
385
+ content_key = "content"
399
386
 
400
387
  # 先检查并保留第一条system消息(如果有)
401
388
  system_message = None
@@ -440,21 +427,14 @@ def cutoff_messages(
440
427
  continue
441
428
  if index == 0:
442
429
  # 一条消息就超过长度则将该消息内容进行截断,保留该消息最后的一部分
443
- if backend == BackendType.Gemini:
444
- return system_message + [
445
- {
446
- "role": message["role"],
447
- content_key: [{"text": message[content_key][-max_count:]}],
448
- }
449
- ]
450
- else:
451
- content = message[content_key][max_count - messages_length :]
452
- return system_message + [
453
- {
454
- "role": message["role"],
455
- content_key: content,
456
- }
457
- ]
430
+ content = message[content_key][max_count - messages_length :]
431
+ return system_message + [
432
+ {
433
+ "role": message["role"],
434
+ content_key: content,
435
+ }
436
+ ]
437
+
458
438
  return system_message + messages[-index:]
459
439
  return system_message + messages
460
440
 
@@ -477,13 +457,6 @@ def format_image_message(image: str, backend: BackendType = BackendType.OpenAI)
477
457
  "data": image_processor.base64_image,
478
458
  },
479
459
  }
480
- elif backend == BackendType.Gemini:
481
- return {
482
- "inline_data": {
483
- "mime_type": image_processor.mime_type,
484
- "data": image_processor.base64_image,
485
- }
486
- }
487
460
  else:
488
461
  return {
489
462
  "type": "image_url",
@@ -495,7 +468,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
495
468
  formatted_messages = []
496
469
 
497
470
  # 工具调用消息
498
- if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi):
471
+ if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi, BackendType.Gemini):
499
472
  tool_call_message = {
500
473
  "content": None,
501
474
  "role": "assistant",
@@ -524,20 +497,6 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
524
497
  }
525
498
  if content:
526
499
  tool_call_message["content"].insert(0, {"type": "text", "text": content})
527
- elif backend == BackendType.Gemini:
528
- tool_call_message = {
529
- "role": "model",
530
- "parts": [
531
- {
532
- "functionCall": {
533
- "name": message["metadata"]["selected_workflow"]["function_name"],
534
- "args": message["metadata"]["selected_workflow"]["params"],
535
- }
536
- },
537
- ],
538
- }
539
- if content:
540
- tool_call_message["parts"].insert(0, {"text": content})
541
500
  else:
542
501
  tool_call_message = {
543
502
  "content": json.dumps(
@@ -552,7 +511,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
552
511
  formatted_messages.append(tool_call_message)
553
512
 
554
513
  # 工具调用结果消息
555
- if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi):
514
+ if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi, BackendType.Gemini):
556
515
  tool_call_result_message = {
557
516
  "role": "tool",
558
517
  "tool_call_id": message["metadata"]["selected_workflow"]["tool_call_id"],
@@ -570,21 +529,6 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
570
529
  }
571
530
  ],
572
531
  }
573
- elif backend == BackendType.Gemini:
574
- tool_call_result_message = {
575
- "role": "function",
576
- "parts": [
577
- {
578
- "functionResponse": {
579
- "name": message["metadata"]["selected_workflow"]["function_name"],
580
- "response": {
581
- "name": message["metadata"]["selected_workflow"]["function_name"],
582
- "content": message["metadata"].get("workflow_result", ""),
583
- },
584
- }
585
- }
586
- ],
587
- }
588
532
  else:
589
533
  tool_call_result_message = {
590
534
  "role": "user",
@@ -598,7 +542,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
598
542
  }
599
543
  formatted_messages.append(tool_call_result_message)
600
544
 
601
- if content and backend not in (BackendType.Mistral, BackendType.Anthropic, BackendType.Gemini):
545
+ if content and backend not in (BackendType.Mistral, BackendType.Anthropic):
602
546
  formatted_messages.append({"role": "assistant", "content": content})
603
547
 
604
548
  return formatted_messages
@@ -608,21 +552,7 @@ def transform_from_openai_message(message: ChatCompletionMessageParam, backend:
608
552
  role = message.get("role", "user")
609
553
  content = message.get("content", "")
610
554
 
611
- if backend == BackendType.Gemini:
612
- if isinstance(content, list):
613
- parts = []
614
- for item in content:
615
- if isinstance(item, str):
616
- parts.append({"text": item})
617
- elif isinstance(item, dict) and "type" in item:
618
- if item["type"] == "image":
619
- parts.append({"image": item["image"]})
620
- elif item["type"] == "text":
621
- parts.append({"text": item["text"]})
622
- return {"role": "user" if role == "user" else "model", "parts": parts}
623
- else:
624
- return {"role": "user" if role == "user" else "model", "parts": [{"text": content}]}
625
- elif backend == BackendType.Anthropic:
555
+ if backend == BackendType.Anthropic:
626
556
  if isinstance(content, list):
627
557
  formatted_content = []
628
558
  for item in content:
@@ -663,7 +593,7 @@ def format_messages(
663
593
  # 处理 VectorVein 格式的消息
664
594
  content = message["content"]["text"]
665
595
  if message["content_type"] == "TXT":
666
- role = "user" if message["author_type"] == "U" else get_assistant_role_key(backend)
596
+ role = "user" if message["author_type"] == "U" else "assistant"
667
597
  formatted_message = format_text_message(
668
598
  content, role, message.get("attachments", []), backend, native_multimodal
669
599
  )
@@ -693,31 +623,19 @@ def format_text_message(
693
623
  content += "\n".join([f"- {attachment}" for attachment in attachments])
694
624
 
695
625
  if native_multimodal and has_images:
696
- if backend == BackendType.Gemini:
697
- parts = [{"text": content}]
698
- for attachment in attachments:
699
- if attachment.lower().endswith(images_extensions):
700
- parts.append(format_image_message(image=attachment, backend=backend))
701
- return {"role": role, "parts": parts}
702
- else:
703
- return {
704
- "role": role,
705
- "content": [
706
- {"type": "text", "text": content},
707
- *[
708
- format_image_message(image=attachment, backend=backend)
709
- for attachment in attachments
710
- if attachment.lower().endswith(images_extensions)
711
- ],
626
+ return {
627
+ "role": role,
628
+ "content": [
629
+ {"type": "text", "text": content},
630
+ *[
631
+ format_image_message(image=attachment, backend=backend)
632
+ for attachment in attachments
633
+ if attachment.lower().endswith(images_extensions)
712
634
  ],
713
- }
635
+ ],
636
+ }
714
637
  else:
715
- if backend == BackendType.Gemini:
716
- return {"role": role, "parts": [{"text": content}]}
717
- elif backend == BackendType.Anthropic:
718
- return {"role": role, "content": content}
719
- else:
720
- return {"role": role, "content": content}
638
+ return {"role": role, "content": content}
721
639
 
722
640
 
723
641
  def generate_tool_use_system_prompt(tools: list | str, format_type: str = "json") -> str:
@@ -1,6 +1,6 @@
1
1
  # @Author: Bi Ying
2
2
  # @Date: 2024-07-27 00:30:56
3
- from typing import List, Dict, Optional
3
+ from typing import List, Dict, Optional, Literal
4
4
 
5
5
  from pydantic import BaseModel, Field
6
6
 
@@ -9,6 +9,26 @@ from ..types.enums import BackendType
9
9
  from ..types.llm_parameters import BackendSettings, EndpointSetting
10
10
 
11
11
 
12
+ class RedisConfig(BaseModel):
13
+ host: str = "localhost"
14
+ port: int = 6379
15
+ db: int = 0
16
+
17
+
18
+ class DiskCacheConfig(BaseModel):
19
+ cache_dir: str = ".rate_limit_cache"
20
+
21
+
22
+ class RateLimitConfig(BaseModel):
23
+ enabled: bool = False
24
+
25
+ backend: Literal["memory", "redis", "diskcache"] = "memory"
26
+ redis: Optional[RedisConfig] = Field(default=None)
27
+ diskcache: Optional[DiskCacheConfig] = Field(default=None)
28
+ default_rpm: int = 60
29
+ default_tpm: int = 1000000
30
+
31
+
12
32
  class Server(BaseModel):
13
33
  host: str
14
34
  port: int
@@ -20,6 +40,7 @@ class Settings(BaseModel):
20
40
  default_factory=list, description="Available endpoints for the LLM service."
21
41
  )
22
42
  token_server: Optional[Server] = Field(default=None, description="Token server address. Format: host:port")
43
+ rate_limit: Optional[RateLimitConfig] = Field(default=None, description="Rate limit settings.")
23
44
 
24
45
  anthropic: BackendSettings = Field(default_factory=BackendSettings, description="Anthropic models settings.")
25
46
  deepseek: BackendSettings = Field(default_factory=BackendSettings, description="Deepseek models settings.")
@@ -63,6 +84,14 @@ class Settings(BaseModel):
63
84
  else:
64
85
  data[model_type] = BackendSettings(models=default_models)
65
86
 
87
+ for endpoint in data.get("endpoints", []):
88
+ if not endpoint.get("api_base"):
89
+ continue
90
+ api_base = endpoint["api_base"]
91
+ if api_base.startswith("https://generativelanguage.googleapis.com/v1beta"):
92
+ if not api_base.endswith("openai/"):
93
+ endpoint["api_base"] = api_base.strip("/") + "/openai/"
94
+
66
95
  super().__init__(**data)
67
96
 
68
97
  def load(self, settings_dict: Dict):
@@ -13,6 +13,7 @@ ENDPOINT_TPM: Final[int] = 300000
13
13
  MODEL_CONTEXT_LENGTH: Final[int] = 32768
14
14
 
15
15
  # Moonshot models
16
+ MOONSHOT_DEFAULT_MODEL: Final[str] = "moonshot-v1-8k"
16
17
  MOONSHOT_MODELS: Final[Dict[str, Dict[str, Any]]] = {
17
18
  "moonshot-v1-8k": {
18
19
  "id": "moonshot-v1-8k",
@@ -33,9 +34,9 @@ MOONSHOT_MODELS: Final[Dict[str, Dict[str, Any]]] = {
33
34
  "response_format_available": True,
34
35
  },
35
36
  }
36
- MOONSHOT_DEFAULT_MODEL: Final[str] = "moonshot-v1-8k"
37
37
 
38
38
  # Deepseek models
39
+ DEEPSEEK_DEFAULT_MODEL: Final[str] = "deepseek-chat"
39
40
  DEEPSEEK_MODELS: Final[Dict[str, Dict[str, Any]]] = {
40
41
  "deepseek-chat": {
41
42
  "id": "deepseek-chat",
@@ -52,9 +53,9 @@ DEEPSEEK_MODELS: Final[Dict[str, Dict[str, Any]]] = {
52
53
  "response_format_available": False,
53
54
  },
54
55
  }
55
- DEEPSEEK_DEFAULT_MODEL: Final[str] = "deepseek-chat"
56
56
 
57
57
  # Baichuan models
58
+ BAICHUAN_DEFAULT_MODEL: Final[str] = "Baichuan3-Turbo"
58
59
  BAICHUAN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
59
60
  "Baichuan4": {
60
61
  "id": "Baichuan4",
@@ -92,7 +93,6 @@ BAICHUAN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
92
93
  "response_format_available": False,
93
94
  },
94
95
  }
95
- BAICHUAN_DEFAULT_MODEL: Final[str] = "Baichuan3-Turbo"
96
96
 
97
97
  # Groq models
98
98
  GROQ_DEFAULT_MODEL: Final[str] = "llama3-70b-8192"
@@ -617,7 +617,7 @@ ANTHROPIC_MODELS: Final[Dict[str, Dict[str, Any]]] = {
617
617
  }
618
618
 
619
619
  # Minimax models
620
- MINIMAX_DEFAULT_MODEL: Final[str] = "abab6.5s-chat"
620
+ MINIMAX_DEFAULT_MODEL: Final[str] = "MiniMax-Text-01"
621
621
  MINIMAX_MODELS: Final[Dict[str, Dict[str, Any]]] = {
622
622
  "abab5-chat": {
623
623
  "id": "abab5-chat",
@@ -664,7 +664,7 @@ MINIMAX_MODELS: Final[Dict[str, Dict[str, Any]]] = {
664
664
  }
665
665
 
666
666
  # Gemini models
667
- GEMINI_DEFAULT_MODEL: Final[str] = "gemini-1.5-pro"
667
+ GEMINI_DEFAULT_MODEL: Final[str] = "gemini-2.0-flash"
668
668
  GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
669
669
  "gemini-1.5-pro": {
670
670
  "id": "gemini-1.5-pro",
@@ -674,6 +674,14 @@ GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
674
674
  "response_format_available": True,
675
675
  "native_multimodal": True,
676
676
  },
677
+ "gemini-2.0-pro-exp-02-05": {
678
+ "id": "gemini-2.0-pro-exp-02-05",
679
+ "context_length": 2097152,
680
+ "max_output_tokens": 8192,
681
+ "function_call_available": True,
682
+ "response_format_available": True,
683
+ "native_multimodal": True,
684
+ },
677
685
  "gemini-1.5-flash": {
678
686
  "id": "gemini-1.5-flash",
679
687
  "context_length": 1048576,
@@ -682,6 +690,22 @@ GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
682
690
  "response_format_available": True,
683
691
  "native_multimodal": True,
684
692
  },
693
+ "gemini-2.0-flash": {
694
+ "id": "gemini-2.0-flash",
695
+ "context_length": 1048576,
696
+ "max_output_tokens": 8192,
697
+ "function_call_available": True,
698
+ "response_format_available": True,
699
+ "native_multimodal": True,
700
+ },
701
+ "gemini-2.0-flash-lite-preview-02-05": {
702
+ "id": "gemini-2.0-flash-lite-preview-02-05",
703
+ "context_length": 1048576,
704
+ "max_output_tokens": 8192,
705
+ "function_call_available": True,
706
+ "response_format_available": True,
707
+ "native_multimodal": True,
708
+ },
685
709
  "gemini-2.0-flash-exp": {
686
710
  "id": "gemini-2.0-flash-exp",
687
711
  "context_length": 1048576,
@@ -819,7 +843,7 @@ STEPFUN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
819
843
  }
820
844
 
821
845
 
822
- XAI_DEFAULT_MODEL: Final[str] = "grok-beta"
846
+ XAI_DEFAULT_MODEL: Final[str] = "grok-2-latest"
823
847
  XAI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
824
848
  "grok-beta": {
825
849
  "id": "grok-beta",
@@ -1,6 +1,6 @@
1
1
  # @Author: Bi Ying
2
2
  # @Date: 2024-07-26 23:48:04
3
- from typing import List, Dict, Optional, Union, Iterable
3
+ from typing import List, Dict, Optional, Union, Iterable, NotRequired
4
4
  from typing_extensions import TypedDict # Required by pydantic under Python < 3.12
5
5
 
6
6
  from pydantic import BaseModel, Field
@@ -24,6 +24,9 @@ from . import defaults as defs
24
24
  class EndpointOptionDict(TypedDict):
25
25
  endpoint_id: str
26
26
  model_id: str
27
+ rpm: NotRequired[int]
28
+ tpm: NotRequired[int]
29
+ concurrent_requests: NotRequired[int]
27
30
 
28
31
 
29
32
  class EndpointSetting(BaseModel):