xinference 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (137) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +107 -11
  3. xinference/client/restful/restful_client.py +51 -11
  4. xinference/constants.py +5 -1
  5. xinference/core/media_interface.py +758 -0
  6. xinference/core/model.py +49 -9
  7. xinference/core/supervisor.py +1 -1
  8. xinference/core/utils.py +1 -1
  9. xinference/core/worker.py +33 -39
  10. xinference/deploy/cmdline.py +17 -0
  11. xinference/deploy/utils.py +0 -3
  12. xinference/model/audio/__init__.py +16 -27
  13. xinference/model/audio/core.py +2 -1
  14. xinference/model/audio/cosyvoice.py +4 -2
  15. xinference/model/audio/model_spec.json +63 -46
  16. xinference/model/audio/model_spec_modelscope.json +31 -14
  17. xinference/model/embedding/__init__.py +16 -24
  18. xinference/model/image/__init__.py +15 -25
  19. xinference/model/llm/__init__.py +40 -115
  20. xinference/model/llm/core.py +29 -6
  21. xinference/model/llm/llama_cpp/core.py +30 -347
  22. xinference/model/llm/llm_family.json +1674 -2203
  23. xinference/model/llm/llm_family.py +71 -7
  24. xinference/model/llm/llm_family_csghub.json +0 -32
  25. xinference/model/llm/llm_family_modelscope.json +1838 -2016
  26. xinference/model/llm/llm_family_openmind_hub.json +19 -325
  27. xinference/model/llm/lmdeploy/core.py +7 -2
  28. xinference/model/llm/mlx/core.py +23 -7
  29. xinference/model/llm/reasoning_parser.py +281 -5
  30. xinference/model/llm/sglang/core.py +39 -11
  31. xinference/model/llm/transformers/chatglm.py +9 -2
  32. xinference/model/llm/transformers/cogagent.py +10 -12
  33. xinference/model/llm/transformers/cogvlm2.py +6 -3
  34. xinference/model/llm/transformers/cogvlm2_video.py +3 -6
  35. xinference/model/llm/transformers/core.py +58 -60
  36. xinference/model/llm/transformers/deepseek_v2.py +4 -2
  37. xinference/model/llm/transformers/deepseek_vl.py +10 -4
  38. xinference/model/llm/transformers/deepseek_vl2.py +9 -4
  39. xinference/model/llm/transformers/gemma3.py +4 -5
  40. xinference/model/llm/transformers/glm4v.py +3 -21
  41. xinference/model/llm/transformers/glm_edge_v.py +3 -20
  42. xinference/model/llm/transformers/intern_vl.py +3 -6
  43. xinference/model/llm/transformers/internlm2.py +1 -1
  44. xinference/model/llm/transformers/minicpmv25.py +4 -2
  45. xinference/model/llm/transformers/minicpmv26.py +5 -3
  46. xinference/model/llm/transformers/omnilmm.py +1 -1
  47. xinference/model/llm/transformers/opt.py +1 -1
  48. xinference/model/llm/transformers/ovis2.py +302 -0
  49. xinference/model/llm/transformers/qwen-omni.py +8 -1
  50. xinference/model/llm/transformers/qwen2_audio.py +3 -1
  51. xinference/model/llm/transformers/qwen2_vl.py +5 -1
  52. xinference/model/llm/transformers/qwen_vl.py +5 -2
  53. xinference/model/llm/utils.py +96 -45
  54. xinference/model/llm/vllm/core.py +108 -24
  55. xinference/model/llm/vllm/distributed_executor.py +8 -7
  56. xinference/model/llm/vllm/xavier/allocator.py +1 -1
  57. xinference/model/llm/vllm/xavier/block_manager.py +1 -1
  58. xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
  59. xinference/model/llm/vllm/xavier/executor.py +1 -1
  60. xinference/model/llm/vllm/xavier/test/test_xavier.py +2 -11
  61. xinference/model/rerank/__init__.py +13 -24
  62. xinference/model/video/__init__.py +15 -25
  63. xinference/model/video/core.py +3 -3
  64. xinference/model/video/diffusers.py +157 -13
  65. xinference/model/video/model_spec.json +100 -0
  66. xinference/model/video/model_spec_modelscope.json +104 -0
  67. xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
  68. xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
  69. xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
  70. xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
  71. xinference/thirdparty/cosyvoice/bin/train.py +7 -2
  72. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
  73. xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
  74. xinference/thirdparty/cosyvoice/cli/model.py +140 -155
  75. xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
  76. xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
  77. xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
  78. xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
  79. xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
  80. xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
  81. xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
  82. xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
  83. xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
  84. xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
  85. xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
  86. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
  87. xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
  88. xinference/thirdparty/cosyvoice/utils/common.py +1 -1
  89. xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
  90. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
  91. xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
  92. xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
  93. xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
  94. xinference/types.py +2 -71
  95. xinference/web/ui/build/asset-manifest.json +6 -6
  96. xinference/web/ui/build/index.html +1 -1
  97. xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
  98. xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
  99. xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
  100. xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
  101. xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
  102. xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
  103. xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
  104. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
  105. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
  106. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
  107. xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
  108. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
  109. xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
  110. xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
  111. xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
  112. xinference/web/ui/src/locales/en.json +7 -4
  113. xinference/web/ui/src/locales/zh.json +7 -4
  114. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/METADATA +56 -36
  115. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/RECORD +120 -121
  116. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/WHEEL +1 -1
  117. xinference/core/image_interface.py +0 -377
  118. xinference/model/llm/transformers/compression.py +0 -258
  119. xinference/model/llm/transformers/yi_vl.py +0 -239
  120. xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
  121. xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
  122. xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
  123. xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
  124. xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
  125. xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
  126. xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
  127. xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
  128. xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
  129. xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
  130. xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
  131. xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
  132. xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
  133. xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
  134. /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
  135. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/entry_points.txt +0 -0
  136. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/licenses/LICENSE +0 -0
  137. {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/top_level.txt +0 -0
@@ -57,7 +57,7 @@ from .llm_family import (
57
57
 
58
58
  def check_format_with_engine(model_format, engine):
59
59
  # only llama-cpp-python support and only support ggufv2
60
- if model_format in ["ggufv2"] and engine != "llama.cpp":
60
+ if model_format in ["ggufv2"] and engine not in ["llama.cpp", "vLLM"]:
61
61
  return False
62
62
  if model_format not in ["ggufv2"] and engine == "llama.cpp":
63
63
  return False
@@ -128,8 +128,38 @@ def register_custom_model():
128
128
  warnings.warn(f"{user_defined_llm_dir}/{f} has error, {e}")
129
129
 
130
130
 
131
+ def load_model_family_from_json(json_filename, target_families):
132
+ json_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), json_filename)
133
+ for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
134
+ model_spec = LLMFamilyV1.parse_obj(json_obj)
135
+ target_families.append(model_spec)
136
+
137
+ # register chat_template
138
+ if (
139
+ "chat" in model_spec.model_ability
140
+ and isinstance(model_spec.chat_template, str)
141
+ and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
142
+ ):
143
+ # note that the key is the model name,
144
+ # since there are multiple representations of the same prompt style name in json.
145
+ if model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE:
146
+ BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
147
+ "chat_template": model_spec.chat_template,
148
+ "stop_token_ids": model_spec.stop_token_ids,
149
+ "stop": model_spec.stop,
150
+ }
151
+
152
+ # register model family
153
+ if "chat" in model_spec.model_ability:
154
+ BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
155
+ else:
156
+ BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
157
+ if "tools" in model_spec.model_ability:
158
+ BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
159
+
160
+
131
161
  def _install():
132
- from .llama_cpp.core import LlamaCppChatModel, LlamaCppModel, XllamaCppModel
162
+ from .llama_cpp.core import XllamaCppModel
133
163
  from .lmdeploy.core import LMDeployChatModel, LMDeployModel
134
164
  from .mlx.core import MLXChatModel, MLXModel, MLXVisionModel
135
165
  from .sglang.core import SGLANGChatModel, SGLANGModel, SGLANGVisionModel
@@ -147,13 +177,12 @@ def _install():
147
177
  from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
148
178
  from .transformers.glm4v import Glm4VModel
149
179
  from .transformers.glm_edge_v import GlmEdgeVModel
150
- from .transformers.internlm2 import Internlm2PytorchChatModel
151
180
  from .transformers.minicpmv25 import MiniCPMV25Model
152
181
  from .transformers.minicpmv26 import MiniCPMV26Model
153
182
  from .transformers.opt import OptPytorchModel
183
+ from .transformers.ovis2 import Ovis2ChatModel
154
184
  from .transformers.qwen2_audio import Qwen2AudioChatModel
155
185
  from .transformers.qwen_vl import QwenVLChatModel
156
- from .transformers.yi_vl import YiVLChatModel
157
186
  from .vllm.core import VLLMChatModel, VLLMModel, VLLMVisionModel
158
187
 
159
188
  try:
@@ -167,8 +196,6 @@ def _install():
167
196
  # register llm classes.
168
197
  LLAMA_CLASSES.extend(
169
198
  [
170
- LlamaCppChatModel,
171
- LlamaCppModel,
172
199
  XllamaCppModel,
173
200
  ]
174
201
  )
@@ -180,10 +207,8 @@ def _install():
180
207
  [
181
208
  ChatglmPytorchChatModel,
182
209
  PytorchChatModel,
183
- Internlm2PytorchChatModel,
184
210
  QwenVLChatModel,
185
211
  Qwen2AudioChatModel,
186
- YiVLChatModel,
187
212
  DeepSeekVLChatModel,
188
213
  DeepSeekVL2ChatModel,
189
214
  PytorchModel,
@@ -199,6 +224,7 @@ def _install():
199
224
  CogAgentChatModel,
200
225
  Gemma3TextChatModel,
201
226
  Gemma3ChatModel,
227
+ Ovis2ChatModel,
202
228
  ]
203
229
  )
204
230
  if OmniLMMModel: # type: ignore
@@ -212,115 +238,14 @@ def _install():
212
238
  SUPPORTED_ENGINES["MLX"] = MLX_CLASSES
213
239
  SUPPORTED_ENGINES["LMDEPLOY"] = LMDEPLOY_CLASSES
214
240
 
215
- json_path = os.path.join(
216
- os.path.dirname(os.path.abspath(__file__)), "llm_family.json"
241
+ load_model_family_from_json("llm_family.json", BUILTIN_LLM_FAMILIES)
242
+ load_model_family_from_json(
243
+ "llm_family_modelscope.json", BUILTIN_MODELSCOPE_LLM_FAMILIES
217
244
  )
218
- for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
219
- model_spec = LLMFamilyV1.parse_obj(json_obj)
220
- BUILTIN_LLM_FAMILIES.append(model_spec)
221
-
222
- # register chat_template
223
- if "chat" in model_spec.model_ability and isinstance(
224
- model_spec.chat_template, str
225
- ):
226
- # note that the key is the model name,
227
- # since there are multiple representations of the same prompt style name in json.
228
- BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
229
- "chat_template": model_spec.chat_template,
230
- "stop_token_ids": model_spec.stop_token_ids,
231
- "stop": model_spec.stop,
232
- }
233
- # register model family
234
- if "chat" in model_spec.model_ability:
235
- BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
236
- else:
237
- BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
238
- if "tools" in model_spec.model_ability:
239
- BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
240
-
241
- modelscope_json_path = os.path.join(
242
- os.path.dirname(os.path.abspath(__file__)), "llm_family_modelscope.json"
245
+ load_model_family_from_json(
246
+ "llm_family_openmind_hub.json", BUILTIN_OPENMIND_HUB_LLM_FAMILIES
243
247
  )
244
- for json_obj in json.load(codecs.open(modelscope_json_path, "r", encoding="utf-8")):
245
- model_spec = LLMFamilyV1.parse_obj(json_obj)
246
- BUILTIN_MODELSCOPE_LLM_FAMILIES.append(model_spec)
247
-
248
- # register prompt style, in case that we have something missed
249
- # if duplicated with huggingface json, keep it as the huggingface style
250
- if (
251
- "chat" in model_spec.model_ability
252
- and isinstance(model_spec.chat_template, str)
253
- and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
254
- ):
255
- BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
256
- "chat_template": model_spec.chat_template,
257
- "stop_token_ids": model_spec.stop_token_ids,
258
- "stop": model_spec.stop,
259
- }
260
- # register model family
261
- if "chat" in model_spec.model_ability:
262
- BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
263
- else:
264
- BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
265
- if "tools" in model_spec.model_ability:
266
- BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
267
-
268
- openmind_hub_json_path = os.path.join(
269
- os.path.dirname(os.path.abspath(__file__)), "llm_family_openmind_hub.json"
270
- )
271
- for json_obj in json.load(
272
- codecs.open(openmind_hub_json_path, "r", encoding="utf-8")
273
- ):
274
- model_spec = LLMFamilyV1.parse_obj(json_obj)
275
- BUILTIN_OPENMIND_HUB_LLM_FAMILIES.append(model_spec)
276
-
277
- # register prompt style, in case that we have something missed
278
- # if duplicated with huggingface json, keep it as the huggingface style
279
-
280
- if (
281
- "chat" in model_spec.model_ability
282
- and isinstance(model_spec.chat_template, str)
283
- and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
284
- ):
285
- BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
286
- "chat_template": model_spec.chat_template,
287
- "stop_token_ids": model_spec.stop_token_ids,
288
- "stop": model_spec.stop,
289
- }
290
- # register model family
291
- if "chat" in model_spec.model_ability:
292
- BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
293
- else:
294
- BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
295
- if "tools" in model_spec.model_ability:
296
- BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
297
-
298
- csghub_json_path = os.path.join(
299
- os.path.dirname(os.path.abspath(__file__)), "llm_family_csghub.json"
300
- )
301
- for json_obj in json.load(codecs.open(csghub_json_path, "r", encoding="utf-8")):
302
- model_spec = LLMFamilyV1.parse_obj(json_obj)
303
- BUILTIN_CSGHUB_LLM_FAMILIES.append(model_spec)
304
-
305
- # register prompt style, in case that we have something missed
306
- # if duplicated with huggingface json, keep it as the huggingface style
307
- if (
308
- "chat" in model_spec.model_ability
309
- and isinstance(model_spec.chat_template, str)
310
- and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
311
- ):
312
- BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
313
- "chat_template": model_spec.chat_template,
314
- "stop_token_ids": model_spec.stop_token_ids,
315
- "stop": model_spec.stop,
316
- }
317
- # register model family
318
- if "chat" in model_spec.model_ability:
319
- BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
320
- else:
321
- BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
322
- if "tools" in model_spec.model_ability:
323
- BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
248
+ load_model_family_from_json("llm_family_csghub.json", BUILTIN_CSGHUB_LLM_FAMILIES)
324
249
 
325
250
  for llm_specs in [
326
251
  BUILTIN_LLM_FAMILIES,
@@ -17,6 +17,7 @@ import inspect
17
17
  import logging
18
18
  import os
19
19
  import platform
20
+ import warnings
20
21
  from abc import abstractmethod
21
22
  from collections import defaultdict
22
23
  from functools import lru_cache
@@ -65,6 +66,11 @@ class LLM(abc.ABC):
65
66
  if kwargs:
66
67
  raise ValueError(f"Unrecognized keyword arguments: {kwargs}")
67
68
 
69
+ @classmethod
70
+ @abstractmethod
71
+ def check_lib(cls) -> bool:
72
+ raise NotImplementedError
73
+
68
74
  @staticmethod
69
75
  def _is_darwin_and_apple_silicon():
70
76
  return platform.system() == "Darwin" and platform.processor() == "arm"
@@ -117,16 +123,33 @@ class LLM(abc.ABC):
117
123
  @classmethod
118
124
  def match(
119
125
  cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
126
+ ) -> bool:
127
+ if not cls.check_lib():
128
+ return False
129
+ return cls.match_json(llm_family, llm_spec, quantization)
130
+
131
+ @classmethod
132
+ @abstractmethod
133
+ def match_json(
134
+ cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
120
135
  ) -> bool:
121
136
  raise NotImplementedError
122
137
 
123
- def prepare_parse_reasoning_content(self, reasoning_content):
124
- # Initialize reasoning parser if model has reasoning ability
125
- if "reasoning" in self.model_family.model_ability and reasoning_content:
126
- self.reasoning_parser = ReasoningParser(
127
- self.model_family.reasoning_start_tag,
128
- self.model_family.reasoning_end_tag,
138
+ def prepare_parse_reasoning_content(
139
+ self, reasoning_content: bool, enable_thinking: bool = True
140
+ ):
141
+ if "hybrid" not in self.model_family.model_ability and not enable_thinking:
142
+ enable_thinking = True
143
+ warnings.warn(
144
+ "enable_thinking cannot be disabled for non hybrid model, will be ignored"
129
145
  )
146
+ # Initialize reasoning parser if model has reasoning ability
147
+ self.reasoning_parser = ReasoningParser( # type: ignore
148
+ reasoning_content,
149
+ self.model_family.reasoning_start_tag, # type: ignore
150
+ self.model_family.reasoning_end_tag, # type: ignore
151
+ enable_thinking=enable_thinking,
152
+ )
130
153
 
131
154
 
132
155
  class LLMDescription(ModelDescription):