xinference 1.9.1__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (34) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +415 -1
  3. xinference/constants.py +2 -0
  4. xinference/core/supervisor.py +29 -1
  5. xinference/model/audio/core.py +5 -0
  6. xinference/model/audio/kokoro.py +1 -1
  7. xinference/model/audio/kokoro_zh.py +124 -0
  8. xinference/model/audio/model_spec.json +20 -0
  9. xinference/model/embedding/sentence_transformers/core.py +4 -4
  10. xinference/model/embedding/vllm/core.py +7 -1
  11. xinference/model/image/model_spec.json +2 -3
  12. xinference/model/llm/core.py +10 -0
  13. xinference/model/llm/llama_cpp/core.py +1 -0
  14. xinference/model/llm/llm_family.json +40 -20
  15. xinference/model/llm/llm_family.py +1 -0
  16. xinference/model/llm/mlx/core.py +52 -33
  17. xinference/model/llm/sglang/core.py +2 -44
  18. xinference/model/llm/tool_parsers/__init__.py +58 -0
  19. xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
  20. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +128 -0
  21. xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
  22. xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
  23. xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
  24. xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
  25. xinference/model/llm/transformers/core.py +1 -1
  26. xinference/model/llm/utils.py +127 -45
  27. xinference/model/llm/vllm/core.py +2 -61
  28. xinference/types.py +105 -2
  29. {xinference-1.9.1.dist-info → xinference-1.10.0.dist-info}/METADATA +7 -3
  30. {xinference-1.9.1.dist-info → xinference-1.10.0.dist-info}/RECORD +34 -26
  31. {xinference-1.9.1.dist-info → xinference-1.10.0.dist-info}/WHEEL +0 -0
  32. {xinference-1.9.1.dist-info → xinference-1.10.0.dist-info}/entry_points.txt +0 -0
  33. {xinference-1.9.1.dist-info → xinference-1.10.0.dist-info}/licenses/LICENSE +0 -0
  34. {xinference-1.9.1.dist-info → xinference-1.10.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,58 @@
1
+ from functools import wraps
2
+ from typing import Any, Callable, Dict, Type
3
+
4
+ # Global registry for tool parsers, mapping parser names to their classes
5
+ TOOL_PARSERS: Dict[str, Type[Any]] = {}
6
+
7
+
8
+ def register_tool_parser(name: str):
9
+ """
10
+ Decorator for registering ToolParser classes to the TOOL_PARSERS registry.
11
+
12
+ This decorator allows tool parser classes to be automatically registered
13
+ when they are defined, making them available for dynamic lookup.
14
+
15
+ Args:
16
+ name (str): The name to register the tool parser under. This should
17
+ typically match the model family name (e.g., "qwen", "glm4").
18
+
19
+ Returns:
20
+ Callable: The decorator function that registers the class.
21
+
22
+ Example:
23
+ @register_tool_parser("qwen")
24
+ class QwenToolParser(ToolParser):
25
+ def parse_tool_calls(self, text: str) -> List[ToolCall]:
26
+ # Implementation for parsing Qwen model tool calls
27
+ pass
28
+
29
+ Note:
30
+ The registered class should implement the ToolParser interface
31
+ and provide methods for parsing tool calls from model outputs.
32
+ """
33
+
34
+ def decorator(cls: Type[Any]) -> Type[Any]:
35
+ """
36
+ The actual decorator that performs the registration.
37
+
38
+ Args:
39
+ cls: The tool parser class to register.
40
+
41
+ Returns:
42
+ The same class (unmodified) after registration.
43
+ """
44
+ TOOL_PARSERS[name] = cls
45
+ return cls
46
+
47
+ return decorator
48
+
49
+
50
+ # Import all tool parser modules to trigger decorator registration
51
+ # This ensures all tool parsers are automatically registered when this module is imported
52
+ from . import (
53
+ deepseek_r1_tool_parser,
54
+ deepseek_v3_tool_parser,
55
+ glm4_tool_parser,
56
+ llama3_tool_parser,
57
+ qwen_tool_parser,
58
+ )
@@ -0,0 +1,33 @@
1
+ class ToolParser:
2
+ """
3
+ Abstract ToolParser class that should not be used directly. Provided
4
+ properties and methods should be used in
5
+ derived classes.
6
+ """
7
+
8
+ def extract_tool_calls(self, model_output: str):
9
+ """
10
+ Static method that should be implemented for extracting tool calls from
11
+ a complete model-generated string.
12
+ Used for non-streaming responses where we have the entire model response
13
+ available before sending to the client.
14
+ Static because it's stateless.
15
+ """
16
+ raise NotImplementedError(
17
+ "AbstractToolParser.extract_tool_calls has not been implemented!"
18
+ )
19
+
20
+ def extract_tool_calls_streaming(
21
+ self, previous_text, current_text: str, delta_text: str
22
+ ):
23
+ """
24
+ Instance method that should be implemented for extracting tool calls
25
+ from an incomplete response; for use when handling tool calls and
26
+ streaming. Has to be an instance method because it requires state -
27
+ the current tokens/diffs, but also the information about what has
28
+ previously been parsed and extracted (see constructor)
29
+ """
30
+ raise NotImplementedError(
31
+ "AbstractToolParser.extract_tool_calls_streaming has not been "
32
+ "implemented!"
33
+ )
@@ -0,0 +1,128 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ from typing import Any, List, Optional, Tuple
5
+
6
+ from . import register_tool_parser
7
+ from .abstract_tool_parser import ToolParser
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ @register_tool_parser("deepseek-r1")
13
+ class DeepseekR1ToolParser(ToolParser):
14
+ """
15
+ Tool parser implementation for DeepSeek R1 model.
16
+
17
+ This parser handles the specific format used by DeepSeek R1 for tool calls,
18
+ which includes special Unicode tokens and JSON-formatted function arguments.
19
+ """
20
+
21
+ def __init__(self):
22
+ """
23
+ Initialize the DeepSeek R1 tool parser.
24
+ """
25
+ super().__init__()
26
+ # Regex pattern to match DeepSeek R1 tool call format
27
+ self.tool_calls_regex = (
28
+ r"<\|tool▁call▁begin|>function<\|tool▁sep|>([^\n]+)\n"
29
+ r"```json\n(.*?)\n```<\|tool▁call▁end|>"
30
+ )
31
+
32
+ def extract_tool_calls(
33
+ self, model_output: str
34
+ ) -> List[Tuple[Optional[str], Optional[str], Optional[dict]]]:
35
+ """
36
+ Extract tool calls from complete model output.
37
+
38
+ Parses the model output to find tool call patterns and extracts
39
+ function names and arguments. Handles JSON parsing errors gracefully
40
+ and deduplicates identical tool calls.
41
+
42
+ Args:
43
+ model_output (str): The complete output string from the model.
44
+
45
+ Returns:
46
+ List[Tuple[Optional[str], Optional[str], Optional[dict]]]:
47
+ A list of tuples where each tuple contains:
48
+ - content (str or None): Raw content if parsing failed, None if successful
49
+ - function_name (str or None): Name of the function to call
50
+ - arguments (dict or None): Parsed function arguments
51
+
52
+ Example:
53
+ >>> parser = DeepseekR1ToolParser()
54
+ >>> output = '<|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "上海", "unit": "celsius"}\n```<|tool▁call▁end|>'
55
+ >>> result = parser.extract_tool_calls(output)
56
+ >>> print(result)
57
+ [(None, 'get_current_weather', {'location': 'Beijing'})]
58
+ """
59
+ matches = re.findall(self.tool_calls_regex, model_output, re.DOTALL)
60
+ if not matches:
61
+ # No tool calls found, return the original output as content
62
+ return [(model_output, None, None)]
63
+
64
+ # Use set for deduplication of identical tool calls
65
+ tool_calls = set()
66
+ results: List[Tuple[Optional[str], Optional[str], Optional[dict]]] = []
67
+
68
+ for func_name, raw_json in matches:
69
+ func_and_args = None
70
+ try:
71
+ # Parse JSON arguments
72
+ func_and_args = json.loads(raw_json)
73
+ # Create hashable representation for deduplication
74
+ arguments_hashable = frozenset(func_and_args.items())
75
+ tool_call_tuple = (
76
+ None, # No content error
77
+ func_name,
78
+ func_and_args,
79
+ )
80
+ except Exception as e:
81
+ # JSON parsing failed, treat as raw content
82
+ logger.warning(
83
+ f"Failed to parse tool call JSON: {raw_json}, error: {e}"
84
+ )
85
+ tool_call_tuple = (raw_json, None, None)
86
+ arguments_hashable = None
87
+
88
+ # Create deduplication key
89
+ dedup_key = (
90
+ (func_name, arguments_hashable)
91
+ if func_and_args is not None
92
+ else raw_json
93
+ )
94
+
95
+ # Add to results if not already seen
96
+ if dedup_key not in tool_calls:
97
+ tool_calls.add(dedup_key)
98
+ results.append(tool_call_tuple)
99
+
100
+ return results
101
+
102
+ def extract_tool_calls_streaming(
103
+ self, previous_text: List[str], current_text: str, delta_text: str
104
+ ) -> Optional[Any]:
105
+ """
106
+ Extract tool calls from streaming output.
107
+
108
+ Currently not supported for DeepSeek R1 model. This method raises
109
+ a ValueError indicating that streaming tool call extraction is only
110
+ available for specific model/backend combinations.
111
+
112
+ Args:
113
+ previous_text (List[str]): Previous text chunks from the stream.
114
+ current_text (str): Current accumulated text.
115
+ delta_text (str): New text delta in this chunk.
116
+
117
+ Raises:
118
+ ValueError: Always raised as streaming is not supported.
119
+
120
+ Note:
121
+ DeepSeek R1 model does not currently support streaming tool call
122
+ extraction. Use extract_tool_calls() with complete output instead.
123
+ """
124
+ raise NotImplementedError(
125
+ "Streaming support for tool calls is available only when using "
126
+ "Qwen models with vLLM backend or GLM4-chat models without vLLM backend. "
127
+ "DeepSeek R1 does not support streaming tool call extraction."
128
+ )
@@ -0,0 +1,145 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ from typing import Any, Dict, List, Optional, Tuple
5
+
6
+ from . import register_tool_parser
7
+ from .abstract_tool_parser import ToolParser
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ @register_tool_parser("deepseek-v3")
13
+ class DeepseekV3ToolParser(ToolParser):
14
+ """
15
+ Tool parser implementation for DeepSeek V3 model.
16
+
17
+ This parser handles the specific format used by DeepSeek V3 for tool calls,
18
+ which uses JSON code blocks wrapped in markdown format.
19
+
20
+ """
21
+
22
+ def __init__(self):
23
+ """
24
+ Initialize the DeepSeek V3 tool parser.
25
+ """
26
+ super().__init__()
27
+ # Regex pattern to match JSON code blocks
28
+ self.tool_calls_regex = r"\s*```json\s*(.*?)\s*```"
29
+
30
+ def _parse_json_function_call(
31
+ self,
32
+ function_call_str: str,
33
+ ) -> str:
34
+ """
35
+ Parse JSON function call from string.
36
+
37
+ Args:
38
+ function_call_str (str): The function call string to parse.
39
+
40
+ Returns:
41
+ str: Parsed result or original string if no match found.
42
+
43
+ """
44
+ match = self.tool_calls_regex.search(function_call_str)
45
+ if match:
46
+ result = match.group(1)
47
+ return result
48
+ return function_call_str
49
+
50
+ def extract_tool_calls(
51
+ self, model_output: str
52
+ ) -> List[Tuple[Optional[str], Optional[str], Optional[Dict[str, Any]]]]:
53
+ """
54
+ Extract tool calls from complete model output.
55
+
56
+ Parses the model output to find JSON code blocks containing tool calls
57
+ and extracts function names and parameters. Handles JSON parsing errors
58
+ gracefully and deduplicates identical tool calls.
59
+
60
+ Args:
61
+ model_output (str): The complete output string from the model.
62
+
63
+ Returns:
64
+ List[Tuple[Optional[str], Optional[str], Optional[Dict[str, Any]]]]:
65
+ A list of tuples where each tuple contains:
66
+ - content (str or None): Raw content if parsing failed, None if successful
67
+ - function_name (str or None): Name of the function to call
68
+ - parameters (dict or None): Function parameters
69
+
70
+ Example:
71
+ >>> parser = DeepseekV3ToolParser()
72
+ >>> output = '```json\n{"name": "get_weather", "parameters": {"location": "Beijing"}}\n```'
73
+ >>> result = parser.extract_tool_calls(output)
74
+ >>> print(result)
75
+ [(None, 'get_weather', {'location': 'Beijing'})]
76
+ """
77
+ matches = re.findall(self.tool_calls_regex, model_output, re.DOTALL)
78
+
79
+ if not matches:
80
+ # No tool calls found, return the original output as content
81
+ return [(model_output, None, None)]
82
+
83
+ # Use set for deduplication of identical tool calls
84
+ tool_calls = set()
85
+ results: List[Tuple[Optional[str], Optional[str], Optional[Dict[str, Any]]]] = (
86
+ []
87
+ )
88
+
89
+ for raw_json in matches:
90
+ func_and_args = None
91
+ try:
92
+ # Parse JSON to extract function call information
93
+ func_and_args = json.loads(raw_json)
94
+ # Convert dictionary to frozenset for deduplication
95
+ arguments_hashable = frozenset(func_and_args["parameters"])
96
+ tool_call_tuple = (
97
+ None, # No content error
98
+ func_and_args["name"],
99
+ func_and_args["parameters"],
100
+ )
101
+ except json.JSONDecodeError:
102
+ tool_call_tuple = (
103
+ raw_json,
104
+ None,
105
+ None,
106
+ ) # If parsing fails, treat as raw content
107
+ arguments_hashable = None # No need for hashing
108
+
109
+ # Avoid duplicate entries
110
+ dedup_key = (
111
+ (func_and_args["name"], arguments_hashable)
112
+ if func_and_args is not None
113
+ else (raw_json)
114
+ )
115
+
116
+ # Add to results if not already seen
117
+ if dedup_key not in tool_calls:
118
+ tool_calls.add(dedup_key)
119
+ results.append(tool_call_tuple)
120
+
121
+ return results
122
+
123
+ def extract_tool_calls_streaming(
124
+ self, previous_text: List[str], current_text: str, delta_text: str
125
+ ) -> Optional[Any]:
126
+ """
127
+ Extract tool calls from streaming output.
128
+
129
+ Currently not supported for DeepSeek V3 model. This method raises
130
+ a ValueError indicating that streaming tool call extraction is only
131
+ available for specific model/backend combinations.
132
+
133
+ Args:
134
+ previous_text (List[str]): Previous text chunks from the stream.
135
+ current_text (str): Current accumulated text.
136
+ delta_text (str): New text delta in this chunk.
137
+
138
+ Raises:
139
+ ValueError: Always raised as streaming is not supported.
140
+ """
141
+ raise NotImplementedError(
142
+ "Streaming support for tool calls is available only when using "
143
+ "Qwen models with vLLM backend or GLM4-chat models without vLLM backend. "
144
+ "DeepSeek V3 does not support streaming tool call extraction."
145
+ )
@@ -0,0 +1,123 @@
1
+ import json
2
+ import logging
3
+ from typing import Any, Dict, List, Optional, Tuple
4
+
5
+ from . import register_tool_parser
6
+ from .abstract_tool_parser import ToolParser
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ @register_tool_parser("glm4")
12
+ class Glm4ToolParser(ToolParser):
13
+ """
14
+ Tool parser implementation for GLM4 model.
15
+
16
+ This parser handles the specific format used by GLM4 for tool calls,
17
+ which uses JSON code blocks wrapped in markdown format.
18
+
19
+ """
20
+
21
+ def __init__(self):
22
+ """
23
+ Initialize the GLM4 tool parser.
24
+ """
25
+ super().__init__()
26
+ # Regex pattern to match JSON code blocks
27
+ self.tool_calls_regex = r"\s*```json\s*(.*?)\s*```"
28
+
29
+ def _parse_json_function_call(
30
+ self,
31
+ function_call_str: str,
32
+ ) -> str:
33
+ """
34
+ Parse JSON function call from string.
35
+
36
+ Args:
37
+ function_call_str (str): The function call string to parse.
38
+
39
+ Returns:
40
+ str: Parsed result or original string if no match found.
41
+
42
+ """
43
+ match = self.tool_calls_regex.search(function_call_str)
44
+ if match:
45
+ result = match.group(1)
46
+ return result
47
+ return function_call_str
48
+
49
+ def extract_tool_calls(
50
+ self, model_output: str
51
+ ) -> List[Tuple[Optional[str], Optional[str], Optional[Dict[str, Any]]]]:
52
+ """
53
+ Extract tool calls from complete model output.
54
+
55
+ Parses the model output to find JSON code blocks containing tool calls
56
+ and extracts function names and parameters. Handles JSON parsing errors
57
+ gracefully and deduplicates identical tool calls.
58
+
59
+ Args:
60
+ model_output (str): The complete output string from the model.
61
+
62
+ Returns:
63
+ List[Tuple[Optional[str], Optional[str], Optional[Dict[str, Any]]]]:
64
+ A list of tuples where each tuple contains:
65
+ - content (str or None): Raw content if parsing failed, None if successful
66
+ - function_name (str or None): Name of the function to call
67
+ - parameters (dict or None): Function parameters
68
+
69
+ Example:
70
+ >>> parser = Glm4ToolParser()
71
+ >>> output = {"name": "get_weather", "parameters": {"location": "Beijing"}}
72
+ >>> result = parser.extract_tool_calls(output)
73
+ >>> print(result)
74
+ [(None, 'get_weather', {'location': 'Beijing'})]
75
+ """
76
+ try:
77
+ if isinstance(model_output, dict):
78
+ try:
79
+ return [
80
+ (
81
+ None,
82
+ model_output["name"],
83
+ json.loads(model_output["arguments"]),
84
+ )
85
+ ]
86
+ except Exception:
87
+ return [(None, model_output["name"], model_output["arguments"])]
88
+ except KeyError:
89
+ logger.error("Can't parse glm output: %s", model_output)
90
+ return [(str(model_output), None, None)]
91
+ else:
92
+ return [(str(model_output), None, None)]
93
+
94
+ def extract_tool_calls_streaming(
95
+ self, previous_text: List[str], current_text: str, delta_text: str
96
+ ) -> Optional[Any]:
97
+ """
98
+ Extract tool calls from streaming output.
99
+
100
+ Currently has limited support for GLM4 model streaming. This method raises
101
+ a ValueError indicating that streaming tool call extraction is only
102
+ available for specific model/backend combinations.
103
+
104
+ Args:
105
+ previous_text (List[str]): Previous text chunks from the stream.
106
+ current_text (str): Current accumulated text.
107
+ delta_text (str): New text delta in this chunk.
108
+ """
109
+ try:
110
+ if isinstance(current_text, dict):
111
+ try:
112
+ return (
113
+ None,
114
+ current_text["name"],
115
+ json.loads(current_text["arguments"]),
116
+ )
117
+ except Exception:
118
+ return (None, current_text["name"], current_text["arguments"])
119
+ except KeyError:
120
+ logger.error("Can't parse glm output: %s", current_text)
121
+ return (str(current_text), None, None)
122
+ else:
123
+ return (str(current_text), None, None)
@@ -0,0 +1,77 @@
1
+ import logging
2
+ from typing import Any, Dict, List, Optional, Tuple
3
+
4
+ from . import register_tool_parser
5
+ from .abstract_tool_parser import ToolParser
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ @register_tool_parser("llama3")
11
+ class Llama3ToolParser(ToolParser):
12
+ """
13
+ Tool parser implementation for Llama3 model.
14
+
15
+ This parser handles the specific format used by Llama3 for tool calls,
16
+ which uses Python dictionary format that needs to be evaluated safely.
17
+
18
+ """
19
+
20
+ def __init__(self):
21
+ """
22
+ Initialize the Llama3 tool parser.
23
+ """
24
+ super().__init__()
25
+
26
+ def extract_tool_calls(
27
+ self, model_output: str
28
+ ) -> List[Tuple[Optional[str], Optional[str], Optional[Dict[str, Any]]]]:
29
+ """
30
+ Extract tool calls from complete model output.
31
+
32
+ Parses the model output using eval() to extract tool call information.
33
+ This method expects the output to be a valid Python dictionary format.
34
+
35
+ Args:
36
+ model_output (str): The complete output string from the model.
37
+
38
+ Returns:
39
+ List[Tuple[Optional[str], Optional[str], Optional[Dict[str, Any]]]]:
40
+ A list of tuples where each tuple contains:
41
+ - content (str or None): Raw content if parsing failed, None if successful
42
+ - function_name (str or None): Name of the function to call
43
+ - parameters (dict or None): Function parameters
44
+ """
45
+ try:
46
+ data = eval(model_output, {}, {})
47
+ return [(None, data["name"], data["parameters"])]
48
+ except Exception:
49
+ return [(model_output, None, None)]
50
+
51
+ def extract_tool_calls_streaming(
52
+ self, previous_text: List[str], current_text: str, delta_text: str
53
+ ) -> Optional[Any]:
54
+ """
55
+ Extract tool calls from streaming output.
56
+
57
+ Currently not supported for Llama3 model. This method raises
58
+ a ValueError indicating that streaming tool call extraction is only
59
+ available for specific model/backend combinations.
60
+
61
+ Args:
62
+ previous_text (List[str]): Previous text chunks from the stream.
63
+ current_text (str): Current accumulated text.
64
+ delta_text (str): New text delta in this chunk.
65
+
66
+ Raises:
67
+ ValueError: Always raised as streaming is not supported.
68
+
69
+ Note:
70
+ Llama3 model does not currently support streaming tool call
71
+ extraction. Use extract_tool_calls() with complete output instead.
72
+ """
73
+ raise NotImplementedError(
74
+ "Streaming support for tool calls is available only when using "
75
+ "Qwen models with vLLM backend or GLM4-chat models without vLLM backend. "
76
+ "Llama3 does not support streaming tool call extraction."
77
+ )