xinference 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +107 -11
- xinference/client/restful/restful_client.py +51 -11
- xinference/constants.py +5 -1
- xinference/core/media_interface.py +758 -0
- xinference/core/model.py +49 -9
- xinference/core/supervisor.py +1 -1
- xinference/core/utils.py +1 -1
- xinference/core/worker.py +33 -39
- xinference/deploy/cmdline.py +17 -0
- xinference/deploy/utils.py +0 -3
- xinference/model/audio/__init__.py +16 -27
- xinference/model/audio/core.py +2 -1
- xinference/model/audio/cosyvoice.py +4 -2
- xinference/model/audio/model_spec.json +63 -46
- xinference/model/audio/model_spec_modelscope.json +31 -14
- xinference/model/embedding/__init__.py +16 -24
- xinference/model/image/__init__.py +15 -25
- xinference/model/llm/__init__.py +40 -115
- xinference/model/llm/core.py +29 -6
- xinference/model/llm/llama_cpp/core.py +30 -347
- xinference/model/llm/llm_family.json +1674 -2203
- xinference/model/llm/llm_family.py +71 -7
- xinference/model/llm/llm_family_csghub.json +0 -32
- xinference/model/llm/llm_family_modelscope.json +1838 -2016
- xinference/model/llm/llm_family_openmind_hub.json +19 -325
- xinference/model/llm/lmdeploy/core.py +7 -2
- xinference/model/llm/mlx/core.py +23 -7
- xinference/model/llm/reasoning_parser.py +281 -5
- xinference/model/llm/sglang/core.py +39 -11
- xinference/model/llm/transformers/chatglm.py +9 -2
- xinference/model/llm/transformers/cogagent.py +10 -12
- xinference/model/llm/transformers/cogvlm2.py +6 -3
- xinference/model/llm/transformers/cogvlm2_video.py +3 -6
- xinference/model/llm/transformers/core.py +58 -60
- xinference/model/llm/transformers/deepseek_v2.py +4 -2
- xinference/model/llm/transformers/deepseek_vl.py +10 -4
- xinference/model/llm/transformers/deepseek_vl2.py +9 -4
- xinference/model/llm/transformers/gemma3.py +4 -5
- xinference/model/llm/transformers/glm4v.py +3 -21
- xinference/model/llm/transformers/glm_edge_v.py +3 -20
- xinference/model/llm/transformers/intern_vl.py +3 -6
- xinference/model/llm/transformers/internlm2.py +1 -1
- xinference/model/llm/transformers/minicpmv25.py +4 -2
- xinference/model/llm/transformers/minicpmv26.py +5 -3
- xinference/model/llm/transformers/omnilmm.py +1 -1
- xinference/model/llm/transformers/opt.py +1 -1
- xinference/model/llm/transformers/ovis2.py +302 -0
- xinference/model/llm/transformers/qwen-omni.py +8 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +5 -1
- xinference/model/llm/transformers/qwen_vl.py +5 -2
- xinference/model/llm/utils.py +96 -45
- xinference/model/llm/vllm/core.py +108 -24
- xinference/model/llm/vllm/distributed_executor.py +8 -7
- xinference/model/llm/vllm/xavier/allocator.py +1 -1
- xinference/model/llm/vllm/xavier/block_manager.py +1 -1
- xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
- xinference/model/llm/vllm/xavier/executor.py +1 -1
- xinference/model/llm/vllm/xavier/test/test_xavier.py +2 -11
- xinference/model/rerank/__init__.py +13 -24
- xinference/model/video/__init__.py +15 -25
- xinference/model/video/core.py +3 -3
- xinference/model/video/diffusers.py +157 -13
- xinference/model/video/model_spec.json +100 -0
- xinference/model/video/model_spec_modelscope.json +104 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
- xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
- xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
- xinference/thirdparty/cosyvoice/bin/train.py +7 -2
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
- xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
- xinference/thirdparty/cosyvoice/cli/model.py +140 -155
- xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
- xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
- xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
- xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
- xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
- xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
- xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
- xinference/thirdparty/cosyvoice/utils/common.py +1 -1
- xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
- xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
- xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
- xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
- xinference/types.py +2 -71
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
- xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
- xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
- xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
- xinference/web/ui/src/locales/en.json +7 -4
- xinference/web/ui/src/locales/zh.json +7 -4
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/METADATA +56 -36
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/RECORD +120 -121
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/WHEEL +1 -1
- xinference/core/image_interface.py +0 -377
- xinference/model/llm/transformers/compression.py +0 -258
- xinference/model/llm/transformers/yi_vl.py +0 -239
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
- xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
- xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
- xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
- /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/top_level.txt +0 -0
xinference/model/llm/__init__.py
CHANGED
|
@@ -57,7 +57,7 @@ from .llm_family import (
|
|
|
57
57
|
|
|
58
58
|
def check_format_with_engine(model_format, engine):
|
|
59
59
|
# only llama-cpp-python support and only support ggufv2
|
|
60
|
-
if model_format in ["ggufv2"] and engine
|
|
60
|
+
if model_format in ["ggufv2"] and engine not in ["llama.cpp", "vLLM"]:
|
|
61
61
|
return False
|
|
62
62
|
if model_format not in ["ggufv2"] and engine == "llama.cpp":
|
|
63
63
|
return False
|
|
@@ -128,8 +128,38 @@ def register_custom_model():
|
|
|
128
128
|
warnings.warn(f"{user_defined_llm_dir}/{f} has error, {e}")
|
|
129
129
|
|
|
130
130
|
|
|
131
|
+
def load_model_family_from_json(json_filename, target_families):
|
|
132
|
+
json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), json_filename)
|
|
133
|
+
for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
|
|
134
|
+
model_spec = LLMFamilyV1.parse_obj(json_obj)
|
|
135
|
+
target_families.append(model_spec)
|
|
136
|
+
|
|
137
|
+
# register chat_template
|
|
138
|
+
if (
|
|
139
|
+
"chat" in model_spec.model_ability
|
|
140
|
+
and isinstance(model_spec.chat_template, str)
|
|
141
|
+
and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
|
|
142
|
+
):
|
|
143
|
+
# note that the key is the model name,
|
|
144
|
+
# since there are multiple representations of the same prompt style name in json.
|
|
145
|
+
if model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE:
|
|
146
|
+
BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
|
|
147
|
+
"chat_template": model_spec.chat_template,
|
|
148
|
+
"stop_token_ids": model_spec.stop_token_ids,
|
|
149
|
+
"stop": model_spec.stop,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
# register model family
|
|
153
|
+
if "chat" in model_spec.model_ability:
|
|
154
|
+
BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
|
|
155
|
+
else:
|
|
156
|
+
BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
|
|
157
|
+
if "tools" in model_spec.model_ability:
|
|
158
|
+
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
|
|
159
|
+
|
|
160
|
+
|
|
131
161
|
def _install():
|
|
132
|
-
from .llama_cpp.core import
|
|
162
|
+
from .llama_cpp.core import XllamaCppModel
|
|
133
163
|
from .lmdeploy.core import LMDeployChatModel, LMDeployModel
|
|
134
164
|
from .mlx.core import MLXChatModel, MLXModel, MLXVisionModel
|
|
135
165
|
from .sglang.core import SGLANGChatModel, SGLANGModel, SGLANGVisionModel
|
|
@@ -147,13 +177,12 @@ def _install():
|
|
|
147
177
|
from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
|
|
148
178
|
from .transformers.glm4v import Glm4VModel
|
|
149
179
|
from .transformers.glm_edge_v import GlmEdgeVModel
|
|
150
|
-
from .transformers.internlm2 import Internlm2PytorchChatModel
|
|
151
180
|
from .transformers.minicpmv25 import MiniCPMV25Model
|
|
152
181
|
from .transformers.minicpmv26 import MiniCPMV26Model
|
|
153
182
|
from .transformers.opt import OptPytorchModel
|
|
183
|
+
from .transformers.ovis2 import Ovis2ChatModel
|
|
154
184
|
from .transformers.qwen2_audio import Qwen2AudioChatModel
|
|
155
185
|
from .transformers.qwen_vl import QwenVLChatModel
|
|
156
|
-
from .transformers.yi_vl import YiVLChatModel
|
|
157
186
|
from .vllm.core import VLLMChatModel, VLLMModel, VLLMVisionModel
|
|
158
187
|
|
|
159
188
|
try:
|
|
@@ -167,8 +196,6 @@ def _install():
|
|
|
167
196
|
# register llm classes.
|
|
168
197
|
LLAMA_CLASSES.extend(
|
|
169
198
|
[
|
|
170
|
-
LlamaCppChatModel,
|
|
171
|
-
LlamaCppModel,
|
|
172
199
|
XllamaCppModel,
|
|
173
200
|
]
|
|
174
201
|
)
|
|
@@ -180,10 +207,8 @@ def _install():
|
|
|
180
207
|
[
|
|
181
208
|
ChatglmPytorchChatModel,
|
|
182
209
|
PytorchChatModel,
|
|
183
|
-
Internlm2PytorchChatModel,
|
|
184
210
|
QwenVLChatModel,
|
|
185
211
|
Qwen2AudioChatModel,
|
|
186
|
-
YiVLChatModel,
|
|
187
212
|
DeepSeekVLChatModel,
|
|
188
213
|
DeepSeekVL2ChatModel,
|
|
189
214
|
PytorchModel,
|
|
@@ -199,6 +224,7 @@ def _install():
|
|
|
199
224
|
CogAgentChatModel,
|
|
200
225
|
Gemma3TextChatModel,
|
|
201
226
|
Gemma3ChatModel,
|
|
227
|
+
Ovis2ChatModel,
|
|
202
228
|
]
|
|
203
229
|
)
|
|
204
230
|
if OmniLMMModel: # type: ignore
|
|
@@ -212,115 +238,14 @@ def _install():
|
|
|
212
238
|
SUPPORTED_ENGINES["MLX"] = MLX_CLASSES
|
|
213
239
|
SUPPORTED_ENGINES["LMDEPLOY"] = LMDEPLOY_CLASSES
|
|
214
240
|
|
|
215
|
-
|
|
216
|
-
|
|
241
|
+
load_model_family_from_json("llm_family.json", BUILTIN_LLM_FAMILIES)
|
|
242
|
+
load_model_family_from_json(
|
|
243
|
+
"llm_family_modelscope.json", BUILTIN_MODELSCOPE_LLM_FAMILIES
|
|
217
244
|
)
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
BUILTIN_LLM_FAMILIES.append(model_spec)
|
|
221
|
-
|
|
222
|
-
# register chat_template
|
|
223
|
-
if "chat" in model_spec.model_ability and isinstance(
|
|
224
|
-
model_spec.chat_template, str
|
|
225
|
-
):
|
|
226
|
-
# note that the key is the model name,
|
|
227
|
-
# since there are multiple representations of the same prompt style name in json.
|
|
228
|
-
BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
|
|
229
|
-
"chat_template": model_spec.chat_template,
|
|
230
|
-
"stop_token_ids": model_spec.stop_token_ids,
|
|
231
|
-
"stop": model_spec.stop,
|
|
232
|
-
}
|
|
233
|
-
# register model family
|
|
234
|
-
if "chat" in model_spec.model_ability:
|
|
235
|
-
BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
|
|
236
|
-
else:
|
|
237
|
-
BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
|
|
238
|
-
if "tools" in model_spec.model_ability:
|
|
239
|
-
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
|
|
240
|
-
|
|
241
|
-
modelscope_json_path = os.path.join(
|
|
242
|
-
os.path.dirname(os.path.abspath(__file__)), "llm_family_modelscope.json"
|
|
245
|
+
load_model_family_from_json(
|
|
246
|
+
"llm_family_openmind_hub.json", BUILTIN_OPENMIND_HUB_LLM_FAMILIES
|
|
243
247
|
)
|
|
244
|
-
|
|
245
|
-
model_spec = LLMFamilyV1.parse_obj(json_obj)
|
|
246
|
-
BUILTIN_MODELSCOPE_LLM_FAMILIES.append(model_spec)
|
|
247
|
-
|
|
248
|
-
# register prompt style, in case that we have something missed
|
|
249
|
-
# if duplicated with huggingface json, keep it as the huggingface style
|
|
250
|
-
if (
|
|
251
|
-
"chat" in model_spec.model_ability
|
|
252
|
-
and isinstance(model_spec.chat_template, str)
|
|
253
|
-
and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
|
|
254
|
-
):
|
|
255
|
-
BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
|
|
256
|
-
"chat_template": model_spec.chat_template,
|
|
257
|
-
"stop_token_ids": model_spec.stop_token_ids,
|
|
258
|
-
"stop": model_spec.stop,
|
|
259
|
-
}
|
|
260
|
-
# register model family
|
|
261
|
-
if "chat" in model_spec.model_ability:
|
|
262
|
-
BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
|
|
263
|
-
else:
|
|
264
|
-
BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
|
|
265
|
-
if "tools" in model_spec.model_ability:
|
|
266
|
-
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
|
|
267
|
-
|
|
268
|
-
openmind_hub_json_path = os.path.join(
|
|
269
|
-
os.path.dirname(os.path.abspath(__file__)), "llm_family_openmind_hub.json"
|
|
270
|
-
)
|
|
271
|
-
for json_obj in json.load(
|
|
272
|
-
codecs.open(openmind_hub_json_path, "r", encoding="utf-8")
|
|
273
|
-
):
|
|
274
|
-
model_spec = LLMFamilyV1.parse_obj(json_obj)
|
|
275
|
-
BUILTIN_OPENMIND_HUB_LLM_FAMILIES.append(model_spec)
|
|
276
|
-
|
|
277
|
-
# register prompt style, in case that we have something missed
|
|
278
|
-
# if duplicated with huggingface json, keep it as the huggingface style
|
|
279
|
-
|
|
280
|
-
if (
|
|
281
|
-
"chat" in model_spec.model_ability
|
|
282
|
-
and isinstance(model_spec.chat_template, str)
|
|
283
|
-
and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
|
|
284
|
-
):
|
|
285
|
-
BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
|
|
286
|
-
"chat_template": model_spec.chat_template,
|
|
287
|
-
"stop_token_ids": model_spec.stop_token_ids,
|
|
288
|
-
"stop": model_spec.stop,
|
|
289
|
-
}
|
|
290
|
-
# register model family
|
|
291
|
-
if "chat" in model_spec.model_ability:
|
|
292
|
-
BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
|
|
293
|
-
else:
|
|
294
|
-
BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
|
|
295
|
-
if "tools" in model_spec.model_ability:
|
|
296
|
-
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
|
|
297
|
-
|
|
298
|
-
csghub_json_path = os.path.join(
|
|
299
|
-
os.path.dirname(os.path.abspath(__file__)), "llm_family_csghub.json"
|
|
300
|
-
)
|
|
301
|
-
for json_obj in json.load(codecs.open(csghub_json_path, "r", encoding="utf-8")):
|
|
302
|
-
model_spec = LLMFamilyV1.parse_obj(json_obj)
|
|
303
|
-
BUILTIN_CSGHUB_LLM_FAMILIES.append(model_spec)
|
|
304
|
-
|
|
305
|
-
# register prompt style, in case that we have something missed
|
|
306
|
-
# if duplicated with huggingface json, keep it as the huggingface style
|
|
307
|
-
if (
|
|
308
|
-
"chat" in model_spec.model_ability
|
|
309
|
-
and isinstance(model_spec.chat_template, str)
|
|
310
|
-
and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
|
|
311
|
-
):
|
|
312
|
-
BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
|
|
313
|
-
"chat_template": model_spec.chat_template,
|
|
314
|
-
"stop_token_ids": model_spec.stop_token_ids,
|
|
315
|
-
"stop": model_spec.stop,
|
|
316
|
-
}
|
|
317
|
-
# register model family
|
|
318
|
-
if "chat" in model_spec.model_ability:
|
|
319
|
-
BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
|
|
320
|
-
else:
|
|
321
|
-
BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
|
|
322
|
-
if "tools" in model_spec.model_ability:
|
|
323
|
-
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
|
|
248
|
+
load_model_family_from_json("llm_family_csghub.json", BUILTIN_CSGHUB_LLM_FAMILIES)
|
|
324
249
|
|
|
325
250
|
for llm_specs in [
|
|
326
251
|
BUILTIN_LLM_FAMILIES,
|
xinference/model/llm/core.py
CHANGED
|
@@ -17,6 +17,7 @@ import inspect
|
|
|
17
17
|
import logging
|
|
18
18
|
import os
|
|
19
19
|
import platform
|
|
20
|
+
import warnings
|
|
20
21
|
from abc import abstractmethod
|
|
21
22
|
from collections import defaultdict
|
|
22
23
|
from functools import lru_cache
|
|
@@ -65,6 +66,11 @@ class LLM(abc.ABC):
|
|
|
65
66
|
if kwargs:
|
|
66
67
|
raise ValueError(f"Unrecognized keyword arguments: {kwargs}")
|
|
67
68
|
|
|
69
|
+
@classmethod
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def check_lib(cls) -> bool:
|
|
72
|
+
raise NotImplementedError
|
|
73
|
+
|
|
68
74
|
@staticmethod
|
|
69
75
|
def _is_darwin_and_apple_silicon():
|
|
70
76
|
return platform.system() == "Darwin" and platform.processor() == "arm"
|
|
@@ -117,16 +123,33 @@ class LLM(abc.ABC):
|
|
|
117
123
|
@classmethod
|
|
118
124
|
def match(
|
|
119
125
|
cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
|
|
126
|
+
) -> bool:
|
|
127
|
+
if not cls.check_lib():
|
|
128
|
+
return False
|
|
129
|
+
return cls.match_json(llm_family, llm_spec, quantization)
|
|
130
|
+
|
|
131
|
+
@classmethod
|
|
132
|
+
@abstractmethod
|
|
133
|
+
def match_json(
|
|
134
|
+
cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
|
|
120
135
|
) -> bool:
|
|
121
136
|
raise NotImplementedError
|
|
122
137
|
|
|
123
|
-
def prepare_parse_reasoning_content(
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
138
|
+
def prepare_parse_reasoning_content(
|
|
139
|
+
self, reasoning_content: bool, enable_thinking: bool = True
|
|
140
|
+
):
|
|
141
|
+
if "hybrid" not in self.model_family.model_ability and not enable_thinking:
|
|
142
|
+
enable_thinking = True
|
|
143
|
+
warnings.warn(
|
|
144
|
+
"enable_thinking cannot be disabled for non hybrid model, will be ignored"
|
|
129
145
|
)
|
|
146
|
+
# Initialize reasoning parser if model has reasoning ability
|
|
147
|
+
self.reasoning_parser = ReasoningParser( # type: ignore
|
|
148
|
+
reasoning_content,
|
|
149
|
+
self.model_family.reasoning_start_tag, # type: ignore
|
|
150
|
+
self.model_family.reasoning_end_tag, # type: ignore
|
|
151
|
+
enable_thinking=enable_thinking,
|
|
152
|
+
)
|
|
130
153
|
|
|
131
154
|
|
|
132
155
|
class LLMDescription(ModelDescription):
|