vectorvein 0.1.93__tar.gz → 0.1.94__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectorvein-0.1.93 → vectorvein-0.1.94}/PKG-INFO +1 -1
- {vectorvein-0.1.93 → vectorvein-0.1.94}/pyproject.toml +1 -1
- vectorvein-0.1.94/src/vectorvein/chat_clients/minimax_client.py +13 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/openai_compatible_client.py +32 -2
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/utils.py +4 -1
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/types/defaults.py +24 -44
- vectorvein-0.1.93/src/vectorvein/chat_clients/minimax_client.py +0 -548
- {vectorvein-0.1.93 → vectorvein-0.1.94}/README.md +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/__init__.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/__init__.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/anthropic_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/baichuan_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/base_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/gemini_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/groq_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/local_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/mistral_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/openai_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/py.typed +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/qwen_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/stepfun_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/xai_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/yi_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/py.typed +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/server/token_server.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/settings/__init__.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/settings/py.typed +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/types/enums.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/types/exception.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/types/llm_parameters.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/types/py.typed +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/utilities/media_processing.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/utilities/rate_limiter.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/utilities/retry.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/graph/edge.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/graph/node.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/graph/port.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/graph/workflow.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/__init__.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/audio_generation.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/control_flows.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/file_processing.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/image_generation.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/llms.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/media_editing.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/media_processing.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/output.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/relational_db.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/text_processing.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/tools.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/triggers.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/vector_db.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/video_generation.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/nodes/web_crawlers.py +0 -0
- {vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/workflow/utils/json_to_code.py +0 -0
@@ -0,0 +1,13 @@
|
|
1
|
+
from ..types.enums import BackendType
|
2
|
+
from ..types.defaults import MINIMAX_DEFAULT_MODEL
|
3
|
+
from .openai_compatible_client import OpenAICompatibleChatClient, AsyncOpenAICompatibleChatClient
|
4
|
+
|
5
|
+
|
6
|
+
class MiniMaxChatClient(OpenAICompatibleChatClient):
|
7
|
+
DEFAULT_MODEL = MINIMAX_DEFAULT_MODEL
|
8
|
+
BACKEND_NAME = BackendType.MiniMax
|
9
|
+
|
10
|
+
|
11
|
+
class AsyncMiniMaxChatClient(AsyncOpenAICompatibleChatClient):
|
12
|
+
DEFAULT_MODEL = MINIMAX_DEFAULT_MODEL
|
13
|
+
BACKEND_NAME = BackendType.MiniMax
|
{vectorvein-0.1.93 → vectorvein-0.1.94}/src/vectorvein/chat_clients/openai_compatible_client.py
RENAMED
@@ -181,7 +181,21 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
181
181
|
|
182
182
|
if tools:
|
183
183
|
if self.model_setting.function_call_available:
|
184
|
-
|
184
|
+
_tools = tools
|
185
|
+
if self.BACKEND_NAME.value == BackendType.MiniMax.value: # MiniMax 就非要搞特殊
|
186
|
+
_tools = []
|
187
|
+
for tool in tools:
|
188
|
+
_tools.append(
|
189
|
+
{
|
190
|
+
"type": "function",
|
191
|
+
"function": {
|
192
|
+
"name": tool["function"]["name"],
|
193
|
+
"description": tool["function"].get("description", ""),
|
194
|
+
"parameters": json.dumps(tool["function"].get("parameters", {})),
|
195
|
+
},
|
196
|
+
}
|
197
|
+
)
|
198
|
+
tools_params = dict(tools=_tools, tool_choice=tool_choice)
|
185
199
|
else:
|
186
200
|
tools_str = json.dumps(tools, ensure_ascii=False, indent=None)
|
187
201
|
additional_system_prompt = generate_tool_use_system_prompt(tools=tools_str)
|
@@ -256,6 +270,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
256
270
|
if chunk.choices[0].delta.tool_calls:
|
257
271
|
for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
|
258
272
|
tool_call.index = index
|
273
|
+
tool_call.type = "function" # 也是 MiniMax 的不规范导致的问题
|
259
274
|
yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump(), usage=usage)
|
260
275
|
else:
|
261
276
|
message = chunk.choices[0].delta.model_dump()
|
@@ -509,7 +524,21 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
509
524
|
|
510
525
|
if tools:
|
511
526
|
if self.model_setting.function_call_available:
|
512
|
-
|
527
|
+
_tools = tools
|
528
|
+
if self.BACKEND_NAME.value == BackendType.MiniMax.value:
|
529
|
+
_tools = []
|
530
|
+
for tool in tools:
|
531
|
+
_tools.append(
|
532
|
+
{
|
533
|
+
"type": "function",
|
534
|
+
"function": {
|
535
|
+
"name": tool["function"]["name"],
|
536
|
+
"description": tool["function"].get("description", ""),
|
537
|
+
"parameters": json.dumps(tool["function"].get("parameters", {})),
|
538
|
+
},
|
539
|
+
}
|
540
|
+
)
|
541
|
+
tools_params = dict(tools=_tools, tool_choice=tool_choice)
|
513
542
|
else:
|
514
543
|
tools_str = json.dumps(tools, ensure_ascii=False, indent=None)
|
515
544
|
additional_system_prompt = generate_tool_use_system_prompt(tools=tools_str)
|
@@ -584,6 +613,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
584
613
|
if chunk.choices[0].delta.tool_calls:
|
585
614
|
for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
|
586
615
|
tool_call.index = index
|
616
|
+
tool_call.type = "function"
|
587
617
|
yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump(), usage=usage)
|
588
618
|
else:
|
589
619
|
message = chunk.choices[0].delta.model_dump()
|
@@ -311,10 +311,13 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
311
311
|
result = response.json()
|
312
312
|
return result["usage"]["prompt_tokens"]
|
313
313
|
else:
|
314
|
-
return len(
|
314
|
+
return len(get_gpt_4o_encoding().encode(text))
|
315
315
|
|
316
316
|
|
317
317
|
def calculate_image_tokens(width: int, height: int, model: str = "gpt-4o"):
|
318
|
+
if model.startswith("moonshot"):
|
319
|
+
return 1024
|
320
|
+
|
318
321
|
if width > 2048 or height > 2048:
|
319
322
|
aspect_ratio = width / height
|
320
323
|
if aspect_ratio > 1:
|
@@ -33,6 +33,28 @@ MOONSHOT_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
|
33
33
|
"function_call_available": True,
|
34
34
|
"response_format_available": True,
|
35
35
|
},
|
36
|
+
"moonshot-v1-8k-vision-preview": {
|
37
|
+
"id": "moonshot-v1-8k-vision-preview",
|
38
|
+
"context_length": 8192,
|
39
|
+
"max_output_tokens": 4096,
|
40
|
+
"function_call_available": True,
|
41
|
+
"response_format_available": True,
|
42
|
+
"native_multimodal": True,
|
43
|
+
},
|
44
|
+
"moonshot-v1-32k-vision-preview": {
|
45
|
+
"id": "moonshot-v1-32k-vision-preview",
|
46
|
+
"context_length": 32768,
|
47
|
+
"function_call_available": True,
|
48
|
+
"response_format_available": True,
|
49
|
+
"native_multimodal": True,
|
50
|
+
},
|
51
|
+
"moonshot-v1-128k-vision-preview": {
|
52
|
+
"id": "moonshot-v1-128k-vision-preview",
|
53
|
+
"context_length": 131072,
|
54
|
+
"function_call_available": True,
|
55
|
+
"response_format_available": True,
|
56
|
+
"native_multimodal": True,
|
57
|
+
},
|
36
58
|
}
|
37
59
|
|
38
60
|
# Deepseek models
|
@@ -293,13 +315,6 @@ QWEN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
|
293
315
|
# Yi models
|
294
316
|
YI_DEFAULT_MODEL: Final[str] = "yi-lightning"
|
295
317
|
YI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
296
|
-
"yi-large": {
|
297
|
-
"id": "yi-large",
|
298
|
-
"context_length": 32000,
|
299
|
-
"max_output_tokens": 4096,
|
300
|
-
"function_call_available": False,
|
301
|
-
"response_format_available": False,
|
302
|
-
},
|
303
318
|
"yi-lightning": {
|
304
319
|
"id": "yi-lightning",
|
305
320
|
"context_length": 16000,
|
@@ -307,44 +322,9 @@ YI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
|
307
322
|
"function_call_available": False,
|
308
323
|
"response_format_available": False,
|
309
324
|
},
|
310
|
-
"yi-
|
311
|
-
"id": "yi-
|
325
|
+
"yi-vision-v2": {
|
326
|
+
"id": "yi-vision-v2",
|
312
327
|
"context_length": 16000,
|
313
|
-
"max_output_tokens": 4096,
|
314
|
-
"function_call_available": False,
|
315
|
-
"response_format_available": False,
|
316
|
-
},
|
317
|
-
"yi-large-fc": {
|
318
|
-
"id": "yi-large-fc",
|
319
|
-
"context_length": 32000,
|
320
|
-
"max_output_tokens": 4096,
|
321
|
-
"function_call_available": True,
|
322
|
-
"response_format_available": False,
|
323
|
-
},
|
324
|
-
"yi-medium": {
|
325
|
-
"id": "yi-medium",
|
326
|
-
"context_length": 16000,
|
327
|
-
"max_output_tokens": 4096,
|
328
|
-
"function_call_available": False,
|
329
|
-
"response_format_available": False,
|
330
|
-
},
|
331
|
-
"yi-medium-200k": {
|
332
|
-
"id": "yi-medium-200k",
|
333
|
-
"context_length": 200000,
|
334
|
-
"max_output_tokens": 4096,
|
335
|
-
"function_call_available": False,
|
336
|
-
"response_format_available": False,
|
337
|
-
},
|
338
|
-
"yi-spark": {
|
339
|
-
"id": "yi-spark",
|
340
|
-
"context_length": 16000,
|
341
|
-
"max_output_tokens": 4096,
|
342
|
-
"function_call_available": False,
|
343
|
-
"response_format_available": False,
|
344
|
-
},
|
345
|
-
"yi-vision": {
|
346
|
-
"id": "yi-vision",
|
347
|
-
"context_length": 4000,
|
348
328
|
"max_output_tokens": 2000,
|
349
329
|
"function_call_available": False,
|
350
330
|
"response_format_available": False,
|
@@ -1,548 +0,0 @@
|
|
1
|
-
# @Author: Bi Ying
|
2
|
-
# @Date: 2024-07-26 14:48:55
|
3
|
-
import json
|
4
|
-
from functools import cached_property
|
5
|
-
from typing import Iterable, Literal, Generator, AsyncGenerator, overload, Any
|
6
|
-
|
7
|
-
import httpx
|
8
|
-
|
9
|
-
from ..types import defaults as defs
|
10
|
-
from .utils import cutoff_messages, get_token_counts
|
11
|
-
from .base_client import BaseChatClient, BaseAsyncChatClient
|
12
|
-
from ..types.enums import ContextLengthControlType, BackendType
|
13
|
-
from ..types.llm_parameters import (
|
14
|
-
NotGiven,
|
15
|
-
NOT_GIVEN,
|
16
|
-
ToolParam,
|
17
|
-
ToolChoice,
|
18
|
-
ChatCompletionMessage,
|
19
|
-
ChatCompletionDeltaMessage,
|
20
|
-
ChatCompletionStreamOptionsParam,
|
21
|
-
)
|
22
|
-
|
23
|
-
|
24
|
-
def extract_tool_calls(response):
|
25
|
-
try:
|
26
|
-
message = response["choices"][0].get("delta") or response["choices"][0].get("message", {})
|
27
|
-
tool_calls = message.get("tool_calls")
|
28
|
-
if tool_calls:
|
29
|
-
return {
|
30
|
-
"tool_calls": [
|
31
|
-
{
|
32
|
-
"index": index,
|
33
|
-
"id": tool_call["id"],
|
34
|
-
"function": tool_call["function"],
|
35
|
-
"type": "function",
|
36
|
-
}
|
37
|
-
for index, tool_call in enumerate(tool_calls)
|
38
|
-
]
|
39
|
-
}
|
40
|
-
else:
|
41
|
-
return {}
|
42
|
-
except Exception:
|
43
|
-
return {}
|
44
|
-
|
45
|
-
|
46
|
-
class MiniMaxChatClient(BaseChatClient):
|
47
|
-
DEFAULT_MODEL: str = defs.MINIMAX_DEFAULT_MODEL
|
48
|
-
BACKEND_NAME: BackendType = BackendType.MiniMax
|
49
|
-
|
50
|
-
def __init__(
|
51
|
-
self,
|
52
|
-
model: str = defs.MINIMAX_DEFAULT_MODEL,
|
53
|
-
stream: bool = True,
|
54
|
-
temperature: float | None | NotGiven = NOT_GIVEN,
|
55
|
-
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
56
|
-
random_endpoint: bool = True,
|
57
|
-
endpoint_id: str = "",
|
58
|
-
http_client: httpx.Client | None = None,
|
59
|
-
backend_name: str | None = None,
|
60
|
-
):
|
61
|
-
super().__init__(
|
62
|
-
model,
|
63
|
-
stream,
|
64
|
-
temperature,
|
65
|
-
context_length_control,
|
66
|
-
random_endpoint,
|
67
|
-
endpoint_id,
|
68
|
-
http_client,
|
69
|
-
backend_name,
|
70
|
-
)
|
71
|
-
self.model_id = None
|
72
|
-
self.endpoint = None
|
73
|
-
|
74
|
-
@cached_property
|
75
|
-
def raw_client(self):
|
76
|
-
self.endpoint, self.model_id = self._set_endpoint()
|
77
|
-
if not self.http_client:
|
78
|
-
self.http_client = httpx.Client(timeout=300, proxy=self.endpoint.proxy)
|
79
|
-
return self.http_client
|
80
|
-
|
81
|
-
@overload
|
82
|
-
def create_completion(
|
83
|
-
self,
|
84
|
-
*,
|
85
|
-
messages: list,
|
86
|
-
model: str | None = None,
|
87
|
-
stream: Literal[False] = False,
|
88
|
-
temperature: float | None | NotGiven = NOT_GIVEN,
|
89
|
-
max_tokens: int | None = None,
|
90
|
-
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
91
|
-
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
92
|
-
response_format: dict | None = None,
|
93
|
-
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
94
|
-
top_p: float | NotGiven | None = NOT_GIVEN,
|
95
|
-
skip_cutoff: bool = False,
|
96
|
-
**kwargs,
|
97
|
-
) -> ChatCompletionMessage:
|
98
|
-
pass
|
99
|
-
|
100
|
-
@overload
|
101
|
-
def create_completion(
|
102
|
-
self,
|
103
|
-
*,
|
104
|
-
messages: list,
|
105
|
-
model: str | None = None,
|
106
|
-
stream: Literal[True],
|
107
|
-
temperature: float | None | NotGiven = NOT_GIVEN,
|
108
|
-
max_tokens: int | None = None,
|
109
|
-
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
110
|
-
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
111
|
-
response_format: dict | None = None,
|
112
|
-
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
113
|
-
top_p: float | NotGiven | None = NOT_GIVEN,
|
114
|
-
skip_cutoff: bool = False,
|
115
|
-
**kwargs,
|
116
|
-
) -> Generator[ChatCompletionDeltaMessage, None, None]:
|
117
|
-
pass
|
118
|
-
|
119
|
-
@overload
|
120
|
-
def create_completion(
|
121
|
-
self,
|
122
|
-
*,
|
123
|
-
messages: list,
|
124
|
-
model: str | None = None,
|
125
|
-
stream: bool,
|
126
|
-
temperature: float | None | NotGiven = NOT_GIVEN,
|
127
|
-
max_tokens: int | None = None,
|
128
|
-
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
129
|
-
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
130
|
-
response_format: dict | None = None,
|
131
|
-
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
132
|
-
top_p: float | NotGiven | None = NOT_GIVEN,
|
133
|
-
skip_cutoff: bool = False,
|
134
|
-
**kwargs,
|
135
|
-
) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
|
136
|
-
pass
|
137
|
-
|
138
|
-
def create_completion(
|
139
|
-
self,
|
140
|
-
messages: list,
|
141
|
-
model: str | None = None,
|
142
|
-
stream: Literal[False] | Literal[True] = False,
|
143
|
-
temperature: float | None | NotGiven = NOT_GIVEN,
|
144
|
-
max_tokens: int | None = None,
|
145
|
-
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
146
|
-
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
147
|
-
response_format: dict | None = None,
|
148
|
-
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
149
|
-
top_p: float | NotGiven | None = NOT_GIVEN,
|
150
|
-
skip_cutoff: bool = False,
|
151
|
-
**kwargs,
|
152
|
-
):
|
153
|
-
if model is not None:
|
154
|
-
self.model = model
|
155
|
-
if stream is not None:
|
156
|
-
self.stream = stream
|
157
|
-
if temperature is not None:
|
158
|
-
self.temperature = temperature
|
159
|
-
if isinstance(tool_choice, NotGiven):
|
160
|
-
tool_choice = "auto"
|
161
|
-
|
162
|
-
self.model_setting = self.backend_settings.models[self.model]
|
163
|
-
if self.model_id is None:
|
164
|
-
self.model_id = self.model_setting.id
|
165
|
-
|
166
|
-
self.endpoint, self.model_id = self._set_endpoint()
|
167
|
-
|
168
|
-
if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
|
169
|
-
messages = cutoff_messages(
|
170
|
-
messages,
|
171
|
-
max_count=self.model_setting.context_length,
|
172
|
-
backend=self.BACKEND_NAME,
|
173
|
-
model=self.model_setting.id,
|
174
|
-
)
|
175
|
-
|
176
|
-
if tools:
|
177
|
-
tools_params = {
|
178
|
-
"tools": [
|
179
|
-
{
|
180
|
-
"type": "function",
|
181
|
-
"function": {
|
182
|
-
"name": tool["function"]["name"],
|
183
|
-
"description": tool["function"].get("description", ""),
|
184
|
-
"parameters": json.dumps(
|
185
|
-
tool["function"].get("parameters", {})
|
186
|
-
), # 非要搞不同,parameters 是个字符串
|
187
|
-
},
|
188
|
-
}
|
189
|
-
for tool in tools
|
190
|
-
],
|
191
|
-
"tool_choice": tool_choice,
|
192
|
-
}
|
193
|
-
else:
|
194
|
-
tools_params = {}
|
195
|
-
|
196
|
-
if top_p:
|
197
|
-
top_p_params = {"top_p": top_p}
|
198
|
-
else:
|
199
|
-
top_p_params = {}
|
200
|
-
|
201
|
-
temperature_params = {}
|
202
|
-
if temperature:
|
203
|
-
temperature_params = {"temperature": temperature}
|
204
|
-
|
205
|
-
if max_tokens is None:
|
206
|
-
max_output_tokens = self.model_setting.max_output_tokens
|
207
|
-
if max_output_tokens is not None:
|
208
|
-
token_counts = get_token_counts(
|
209
|
-
text={"messages": messages, "tools_params": tools_params},
|
210
|
-
model=self.model,
|
211
|
-
use_token_server_first=True,
|
212
|
-
)
|
213
|
-
max_tokens = self.model_setting.context_length - token_counts
|
214
|
-
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
215
|
-
else:
|
216
|
-
token_counts = get_token_counts(
|
217
|
-
text={"messages": messages, "tools_params": tools_params},
|
218
|
-
model=self.model,
|
219
|
-
use_token_server_first=True,
|
220
|
-
)
|
221
|
-
max_tokens = self.model_setting.context_length - token_counts
|
222
|
-
|
223
|
-
self.url = self.endpoint.api_base or "https://api.minimax.chat/v1/text/chatcompletion_v2"
|
224
|
-
self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
|
225
|
-
|
226
|
-
request_body = {
|
227
|
-
"model": self.model_id,
|
228
|
-
"messages": messages,
|
229
|
-
"max_tokens": max_tokens,
|
230
|
-
"stream": self.stream,
|
231
|
-
"mask_sensitive_info": False,
|
232
|
-
**temperature_params,
|
233
|
-
**top_p_params,
|
234
|
-
**tools_params,
|
235
|
-
**kwargs,
|
236
|
-
}
|
237
|
-
|
238
|
-
raw_client = self.raw_client
|
239
|
-
|
240
|
-
if self.stream:
|
241
|
-
|
242
|
-
def generator():
|
243
|
-
with raw_client.stream(
|
244
|
-
"POST",
|
245
|
-
url=self.url,
|
246
|
-
headers=self.headers,
|
247
|
-
json=request_body,
|
248
|
-
timeout=300,
|
249
|
-
) as response:
|
250
|
-
for chunk in response.iter_lines():
|
251
|
-
if chunk:
|
252
|
-
chunk_data = json.loads(chunk[6:])
|
253
|
-
if chunk_data["object"] != "chat.completion.chunk":
|
254
|
-
continue
|
255
|
-
tool_calls_params = extract_tool_calls(chunk_data)
|
256
|
-
has_tool_calls = True if tool_calls_params else False
|
257
|
-
if has_tool_calls:
|
258
|
-
yield ChatCompletionDeltaMessage(
|
259
|
-
**{
|
260
|
-
"content": chunk_data["choices"][0]["delta"].get("content"),
|
261
|
-
"role": "assistant",
|
262
|
-
**tool_calls_params,
|
263
|
-
}
|
264
|
-
)
|
265
|
-
else:
|
266
|
-
yield ChatCompletionDeltaMessage(
|
267
|
-
**{
|
268
|
-
"content": chunk_data["choices"][0]["delta"]["content"],
|
269
|
-
"role": "assistant",
|
270
|
-
}
|
271
|
-
)
|
272
|
-
|
273
|
-
return generator()
|
274
|
-
else:
|
275
|
-
response = raw_client.post(
|
276
|
-
url=self.url,
|
277
|
-
headers=self.headers,
|
278
|
-
json=request_body,
|
279
|
-
timeout=300,
|
280
|
-
)
|
281
|
-
result = response.json()
|
282
|
-
tool_calls_params = extract_tool_calls(result)
|
283
|
-
return ChatCompletionMessage(
|
284
|
-
**{
|
285
|
-
"content": result["choices"][0]["message"].get("content"),
|
286
|
-
"usage": {
|
287
|
-
"prompt_tokens": 0,
|
288
|
-
"completion_tokens": result["usage"]["total_tokens"],
|
289
|
-
"total_tokens": result["usage"]["total_tokens"],
|
290
|
-
},
|
291
|
-
"role": "assistant",
|
292
|
-
**tool_calls_params,
|
293
|
-
}
|
294
|
-
)
|
295
|
-
|
296
|
-
|
297
|
-
class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
298
|
-
DEFAULT_MODEL: str = defs.MINIMAX_DEFAULT_MODEL
|
299
|
-
BACKEND_NAME: BackendType = BackendType.MiniMax
|
300
|
-
|
301
|
-
def __init__(
|
302
|
-
self,
|
303
|
-
model: str = defs.MINIMAX_DEFAULT_MODEL,
|
304
|
-
stream: bool = True,
|
305
|
-
temperature: float | None | NotGiven = NOT_GIVEN,
|
306
|
-
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
307
|
-
random_endpoint: bool = True,
|
308
|
-
endpoint_id: str = "",
|
309
|
-
http_client: httpx.AsyncClient | None = None,
|
310
|
-
backend_name: str | None = None,
|
311
|
-
):
|
312
|
-
super().__init__(
|
313
|
-
model,
|
314
|
-
stream,
|
315
|
-
temperature,
|
316
|
-
context_length_control,
|
317
|
-
random_endpoint,
|
318
|
-
endpoint_id,
|
319
|
-
http_client,
|
320
|
-
backend_name,
|
321
|
-
)
|
322
|
-
self.model_id = None
|
323
|
-
self.endpoint = None
|
324
|
-
|
325
|
-
@cached_property
|
326
|
-
def raw_client(self):
|
327
|
-
self.endpoint, self.model_id = self._set_endpoint()
|
328
|
-
if not self.http_client:
|
329
|
-
self.http_client = httpx.AsyncClient(timeout=300, proxy=self.endpoint.proxy)
|
330
|
-
return self.http_client
|
331
|
-
|
332
|
-
@overload
|
333
|
-
async def create_completion(
|
334
|
-
self,
|
335
|
-
*,
|
336
|
-
messages: list,
|
337
|
-
model: str | None = None,
|
338
|
-
stream: Literal[False] = False,
|
339
|
-
temperature: float | None | NotGiven = NOT_GIVEN,
|
340
|
-
max_tokens: int | None = None,
|
341
|
-
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
342
|
-
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
343
|
-
response_format: dict | None = None,
|
344
|
-
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
345
|
-
top_p: float | NotGiven | None = NOT_GIVEN,
|
346
|
-
skip_cutoff: bool = False,
|
347
|
-
**kwargs,
|
348
|
-
) -> ChatCompletionMessage:
|
349
|
-
pass
|
350
|
-
|
351
|
-
@overload
|
352
|
-
async def create_completion(
|
353
|
-
self,
|
354
|
-
*,
|
355
|
-
messages: list,
|
356
|
-
model: str | None = None,
|
357
|
-
stream: Literal[True],
|
358
|
-
temperature: float | None | NotGiven = NOT_GIVEN,
|
359
|
-
max_tokens: int | None = None,
|
360
|
-
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
361
|
-
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
362
|
-
response_format: dict | None = None,
|
363
|
-
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
364
|
-
top_p: float | NotGiven | None = NOT_GIVEN,
|
365
|
-
skip_cutoff: bool = False,
|
366
|
-
**kwargs,
|
367
|
-
) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
368
|
-
pass
|
369
|
-
|
370
|
-
@overload
|
371
|
-
async def create_completion(
|
372
|
-
self,
|
373
|
-
*,
|
374
|
-
messages: list,
|
375
|
-
model: str | None = None,
|
376
|
-
stream: bool,
|
377
|
-
temperature: float | None | NotGiven = NOT_GIVEN,
|
378
|
-
max_tokens: int | None = None,
|
379
|
-
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
380
|
-
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
381
|
-
response_format: dict | None = None,
|
382
|
-
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
383
|
-
top_p: float | NotGiven | None = NOT_GIVEN,
|
384
|
-
skip_cutoff: bool = False,
|
385
|
-
**kwargs,
|
386
|
-
) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
387
|
-
pass
|
388
|
-
|
389
|
-
async def create_completion(
|
390
|
-
self,
|
391
|
-
*,
|
392
|
-
messages: list,
|
393
|
-
model: str | None = None,
|
394
|
-
stream: Literal[False] | Literal[True] = False,
|
395
|
-
temperature: float | None | NotGiven = NOT_GIVEN,
|
396
|
-
max_tokens: int | None = None,
|
397
|
-
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
398
|
-
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
399
|
-
response_format: dict | None = None,
|
400
|
-
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
401
|
-
top_p: float | NotGiven | None = NOT_GIVEN,
|
402
|
-
skip_cutoff: bool = False,
|
403
|
-
**kwargs,
|
404
|
-
):
|
405
|
-
if model is not None:
|
406
|
-
self.model = model
|
407
|
-
if stream is not None:
|
408
|
-
self.stream = stream
|
409
|
-
if temperature is not None:
|
410
|
-
self.temperature = temperature
|
411
|
-
if isinstance(tool_choice, NotGiven):
|
412
|
-
tool_choice = "auto"
|
413
|
-
|
414
|
-
self.model_setting = self.backend_settings.models[self.model]
|
415
|
-
if self.model_id is None:
|
416
|
-
self.model_id = self.model_setting.id
|
417
|
-
|
418
|
-
self.endpoint, self.model_id = self._set_endpoint()
|
419
|
-
|
420
|
-
if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
|
421
|
-
messages = cutoff_messages(
|
422
|
-
messages,
|
423
|
-
max_count=self.model_setting.context_length,
|
424
|
-
backend=self.BACKEND_NAME,
|
425
|
-
model=self.model_setting.id,
|
426
|
-
)
|
427
|
-
|
428
|
-
if tools:
|
429
|
-
tools_params = {
|
430
|
-
"tools": [
|
431
|
-
{
|
432
|
-
"type": "function",
|
433
|
-
"function": {
|
434
|
-
"name": tool["function"]["name"],
|
435
|
-
"description": tool["function"].get("description", ""),
|
436
|
-
"parameters": json.dumps(tool["function"].get("parameters", {})),
|
437
|
-
},
|
438
|
-
}
|
439
|
-
for tool in tools
|
440
|
-
],
|
441
|
-
"tool_choice": tool_choice,
|
442
|
-
}
|
443
|
-
else:
|
444
|
-
tools_params = {}
|
445
|
-
|
446
|
-
if top_p:
|
447
|
-
top_p_params = {"top_p": top_p}
|
448
|
-
else:
|
449
|
-
top_p_params = {}
|
450
|
-
|
451
|
-
temperature_params = {}
|
452
|
-
if temperature:
|
453
|
-
temperature_params = {"temperature": temperature}
|
454
|
-
|
455
|
-
if max_tokens is None:
|
456
|
-
max_output_tokens = self.model_setting.max_output_tokens
|
457
|
-
if max_output_tokens is not None:
|
458
|
-
token_counts = get_token_counts(
|
459
|
-
text={"messages": messages, "tools_params": tools_params},
|
460
|
-
model=self.model,
|
461
|
-
use_token_server_first=True,
|
462
|
-
)
|
463
|
-
max_tokens = self.model_setting.context_length - token_counts
|
464
|
-
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
465
|
-
else:
|
466
|
-
token_counts = get_token_counts(
|
467
|
-
text={"messages": messages, "tools_params": tools_params},
|
468
|
-
model=self.model,
|
469
|
-
use_token_server_first=True,
|
470
|
-
)
|
471
|
-
max_tokens = self.model_setting.context_length - token_counts
|
472
|
-
|
473
|
-
self.url = self.endpoint.api_base or "https://api.minimax.chat/v1/text/chatcompletion_v2"
|
474
|
-
self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
|
475
|
-
|
476
|
-
request_body = {
|
477
|
-
"model": self.model_id,
|
478
|
-
"messages": messages,
|
479
|
-
"max_tokens": max_tokens,
|
480
|
-
"stream": self.stream,
|
481
|
-
"mask_sensitive_info": False,
|
482
|
-
**temperature_params,
|
483
|
-
**top_p_params,
|
484
|
-
**tools_params,
|
485
|
-
**kwargs,
|
486
|
-
}
|
487
|
-
|
488
|
-
raw_client = self.raw_client
|
489
|
-
|
490
|
-
if self.stream:
|
491
|
-
|
492
|
-
async def generator():
|
493
|
-
async with raw_client.stream(
|
494
|
-
"POST",
|
495
|
-
url=self.url,
|
496
|
-
headers=self.headers,
|
497
|
-
json=request_body,
|
498
|
-
timeout=300,
|
499
|
-
) as response:
|
500
|
-
has_tool_calls = False
|
501
|
-
async for chunk in response.aiter_lines():
|
502
|
-
if chunk:
|
503
|
-
chunk_data = json.loads(chunk[6:])
|
504
|
-
if chunk_data["object"] != "chat.completion.chunk":
|
505
|
-
continue
|
506
|
-
tool_calls_params = extract_tool_calls(chunk_data)
|
507
|
-
has_tool_calls = True if tool_calls_params else False
|
508
|
-
if has_tool_calls:
|
509
|
-
yield ChatCompletionDeltaMessage(
|
510
|
-
**{
|
511
|
-
"content": chunk_data["choices"][0]["delta"].get("content"),
|
512
|
-
"role": "assistant",
|
513
|
-
**tool_calls_params,
|
514
|
-
}
|
515
|
-
)
|
516
|
-
else:
|
517
|
-
yield ChatCompletionDeltaMessage(
|
518
|
-
**{
|
519
|
-
"content": chunk_data["choices"][0]["delta"]["content"],
|
520
|
-
"role": "assistant",
|
521
|
-
}
|
522
|
-
)
|
523
|
-
|
524
|
-
return generator()
|
525
|
-
else:
|
526
|
-
response = await raw_client.post(
|
527
|
-
url=self.url,
|
528
|
-
headers=self.headers,
|
529
|
-
json=request_body,
|
530
|
-
timeout=300,
|
531
|
-
)
|
532
|
-
result = response.json()
|
533
|
-
tool_calls_params = extract_tool_calls(result)
|
534
|
-
return ChatCompletionMessage(
|
535
|
-
**{
|
536
|
-
"content": result["choices"][0]["message"].get("content"),
|
537
|
-
"usage": {
|
538
|
-
"prompt_tokens": 0,
|
539
|
-
"completion_tokens": result["usage"]["total_tokens"],
|
540
|
-
"total_tokens": result["usage"]["total_tokens"],
|
541
|
-
},
|
542
|
-
"role": "assistant",
|
543
|
-
**tool_calls_params,
|
544
|
-
}
|
545
|
-
)
|
546
|
-
|
547
|
-
async def __aexit__(self, exc_type, exc, tb):
|
548
|
-
await self.http_client.aclose()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|