xinference 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (59) hide show
  1. xinference/_compat.py +1 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +4 -0
  4. xinference/core/model.py +23 -3
  5. xinference/core/supervisor.py +6 -0
  6. xinference/core/worker.py +54 -11
  7. xinference/model/llm/__init__.py +4 -2
  8. xinference/model/llm/core.py +1 -0
  9. xinference/model/llm/llama_cpp/core.py +6 -1
  10. xinference/model/llm/llm_family.json +117 -1
  11. xinference/model/llm/llm_family_modelscope.json +125 -1
  12. xinference/model/llm/reasoning_parser.py +3 -3
  13. xinference/model/llm/sglang/core.py +111 -13
  14. xinference/model/llm/transformers/core.py +1 -0
  15. xinference/model/llm/transformers/deepseek_vl.py +1 -1
  16. xinference/model/llm/transformers/deepseek_vl2.py +287 -0
  17. xinference/model/llm/utils.py +26 -14
  18. xinference/model/llm/vllm/core.py +149 -8
  19. xinference/model/llm/vllm/distributed_executor.py +314 -0
  20. xinference/model/rerank/core.py +16 -11
  21. xinference/thirdparty/deepseek_vl2/__init__.py +31 -0
  22. xinference/thirdparty/deepseek_vl2/models/__init__.py +26 -0
  23. xinference/thirdparty/deepseek_vl2/models/configuration_deepseek.py +210 -0
  24. xinference/thirdparty/deepseek_vl2/models/conversation.py +310 -0
  25. xinference/thirdparty/deepseek_vl2/models/modeling_deepseek.py +1975 -0
  26. xinference/thirdparty/deepseek_vl2/models/modeling_deepseek_vl_v2.py +697 -0
  27. xinference/thirdparty/deepseek_vl2/models/processing_deepseek_vl_v2.py +675 -0
  28. xinference/thirdparty/deepseek_vl2/models/siglip_vit.py +661 -0
  29. xinference/thirdparty/deepseek_vl2/serve/__init__.py +0 -0
  30. xinference/thirdparty/deepseek_vl2/serve/app_modules/__init__.py +0 -0
  31. xinference/thirdparty/deepseek_vl2/serve/app_modules/gradio_utils.py +83 -0
  32. xinference/thirdparty/deepseek_vl2/serve/app_modules/overwrites.py +81 -0
  33. xinference/thirdparty/deepseek_vl2/serve/app_modules/presets.py +115 -0
  34. xinference/thirdparty/deepseek_vl2/serve/app_modules/utils.py +333 -0
  35. xinference/thirdparty/deepseek_vl2/serve/assets/Kelpy-Codos.js +100 -0
  36. xinference/thirdparty/deepseek_vl2/serve/assets/avatar.png +0 -0
  37. xinference/thirdparty/deepseek_vl2/serve/assets/custom.css +355 -0
  38. xinference/thirdparty/deepseek_vl2/serve/assets/custom.js +22 -0
  39. xinference/thirdparty/deepseek_vl2/serve/assets/favicon.ico +0 -0
  40. xinference/thirdparty/deepseek_vl2/serve/assets/simsun.ttc +0 -0
  41. xinference/thirdparty/deepseek_vl2/serve/inference.py +197 -0
  42. xinference/thirdparty/deepseek_vl2/utils/__init__.py +18 -0
  43. xinference/thirdparty/deepseek_vl2/utils/io.py +80 -0
  44. xinference/web/ui/build/asset-manifest.json +3 -3
  45. xinference/web/ui/build/index.html +1 -1
  46. xinference/web/ui/build/static/js/{main.3cea968e.js → main.5ca4eea1.js} +3 -3
  47. xinference/web/ui/build/static/js/main.5ca4eea1.js.map +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/0f0967acaec5df1d45b80010949c258d64297ebbb0f44b8bb3afcbd45c6f0ec4.json +1 -0
  49. xinference/web/ui/node_modules/.cache/babel-loader/68249645124f37d01eef83b1d897e751f895bea919b6fb466f907c1f87cebc84.json +1 -0
  50. {xinference-1.4.0.dist-info → xinference-1.4.1.dist-info}/METADATA +4 -4
  51. {xinference-1.4.0.dist-info → xinference-1.4.1.dist-info}/RECORD +56 -31
  52. xinference/web/ui/build/static/js/main.3cea968e.js.map +0 -1
  53. xinference/web/ui/node_modules/.cache/babel-loader/7f59e45e3f268ab8a4788b6fb024cf8dab088736dff22f5a3a39c122a83ab930.json +0 -1
  54. xinference/web/ui/node_modules/.cache/babel-loader/dcd60488509450bfff37bfff56de2c096d51de17dd00ec60d4db49c8b483ada1.json +0 -1
  55. /xinference/web/ui/build/static/js/{main.3cea968e.js.LICENSE.txt → main.5ca4eea1.js.LICENSE.txt} +0 -0
  56. {xinference-1.4.0.dist-info → xinference-1.4.1.dist-info}/LICENSE +0 -0
  57. {xinference-1.4.0.dist-info → xinference-1.4.1.dist-info}/WHEEL +0 -0
  58. {xinference-1.4.0.dist-info → xinference-1.4.1.dist-info}/entry_points.txt +0 -0
  59. {xinference-1.4.0.dist-info → xinference-1.4.1.dist-info}/top_level.txt +0 -0
xinference/_compat.py CHANGED
@@ -102,6 +102,7 @@ class CreateChatCompletionOpenAI(BaseModel):
102
102
  frequency_penalty: Optional[float]
103
103
  logit_bias: Optional[Dict[str, int]]
104
104
  logprobs: Optional[bool]
105
+ max_completion_tokens: Optional[int]
105
106
  max_tokens: Optional[int]
106
107
  n: Optional[int]
107
108
  parallel_tool_calls: Optional[bool]
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-03-21T14:33:52+0800",
11
+ "date": "2025-04-03T21:26:30+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "ac88d425e3d5fc12166e22c4032286327871f5f2",
15
- "version": "1.4.0"
14
+ "full-revisionid": "23260be3b917e7a2e8381927721ed3de815c0a99",
15
+ "version": "1.4.1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -1952,6 +1952,7 @@ class RESTfulAPI(CancelMixin):
1952
1952
  "logit_bias",
1953
1953
  "logit_bias_type",
1954
1954
  "user",
1955
+ "max_completion_tokens",
1955
1956
  }
1956
1957
 
1957
1958
  raw_kwargs = {k: v for k, v in raw_body.items() if k not in exclude}
@@ -1964,6 +1965,9 @@ class RESTfulAPI(CancelMixin):
1964
1965
  if body.max_tokens is None:
1965
1966
  kwargs["max_tokens"] = max_tokens_field.default
1966
1967
 
1968
+ if body.max_completion_tokens is not None:
1969
+ kwargs["max_tokens"] = body.max_completion_tokens
1970
+
1967
1971
  if body.logit_bias is not None:
1968
1972
  raise HTTPException(status_code=501, detail="Not implemented")
1969
1973
 
xinference/core/model.py CHANGED
@@ -185,7 +185,7 @@ class ModelActor(xo.StatelessActor, CancelMixin):
185
185
  )
186
186
 
187
187
  if hasattr(self._model, "stop") and callable(self._model.stop):
188
- self._model.stop()
188
+ await asyncio.to_thread(self._model.stop)
189
189
 
190
190
  if isinstance(self._model, LLMVLLMModel):
191
191
  if self._transfer_ref is not None:
@@ -284,6 +284,8 @@ class ModelActor(xo.StatelessActor, CancelMixin):
284
284
  async def __post_create__(self):
285
285
  self._loop = asyncio.get_running_loop()
286
286
 
287
+ logger.debug("Starting ModelActor at %s, uid: %s", self.address, self.uid)
288
+
287
289
  self._handle_pending_requests_task = asyncio.create_task(
288
290
  self._handle_pending_requests()
289
291
  )
@@ -463,7 +465,9 @@ class ModelActor(xo.StatelessActor, CancelMixin):
463
465
  while True:
464
466
  i += 1
465
467
  try:
466
- self._model.load()
468
+ if hasattr(self._model, "set_loop"):
469
+ self._model.set_loop(asyncio.get_running_loop())
470
+ await asyncio.to_thread(self._model.load)
467
471
  if hasattr(self._model, "driver_info"):
468
472
  self._driver_info = self._model.driver_info
469
473
  break
@@ -490,7 +494,23 @@ class ModelActor(xo.StatelessActor, CancelMixin):
490
494
 
491
495
  async def wait_for_load(self):
492
496
  if hasattr(self._model, "wait_for_load"):
493
- self._model.wait_for_load()
497
+ await asyncio.to_thread(self._model.wait_for_load)
498
+
499
+ def need_create_pools(self):
500
+ return getattr(self._model, "need_create_pools", False)
501
+
502
+ def set_pool_addresses(self, pool_addresses: List[str]):
503
+ if hasattr(self._model, "set_pool_addresses"):
504
+ self._model.set_pool_addresses(pool_addresses)
505
+
506
+ def get_pool_addresses(self) -> Optional[List[str]]:
507
+ if hasattr(self._model, "get_pool_addresses"):
508
+ return self._model.get_pool_addresses()
509
+ return None
510
+
511
+ def set_worker_addresses(self, shard: int, worker_addresses: List[str]):
512
+ if hasattr(self._model, "set_worker_addresses"):
513
+ self._model.set_worker_addresses(shard, worker_addresses)
494
514
 
495
515
  def model_uid(self):
496
516
  return (
@@ -1097,6 +1097,7 @@ class SupervisorActor(xo.StatelessActor):
1097
1097
  xavier_config=xavier_config,
1098
1098
  **kwargs,
1099
1099
  )
1100
+ await worker_ref.wait_for_load(_replica_model_uid)
1100
1101
  self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref
1101
1102
  return subpool_address
1102
1103
 
@@ -1242,6 +1243,11 @@ class SupervisorActor(xo.StatelessActor):
1242
1243
  available_workers.append(worker_ip)
1243
1244
 
1244
1245
  async def _launch_model():
1246
+ # Validation of n_worker, intercept if it is greater than the available workers.
1247
+ if n_worker > len(available_workers):
1248
+ raise ValueError(
1249
+ "n_worker cannot be larger than the number of available workers."
1250
+ )
1245
1251
  try:
1246
1252
  for _idx, rep_model_uid in enumerate(
1247
1253
  iter_replica_model_uid(model_uid, replica)
xinference/core/worker.py CHANGED
@@ -874,7 +874,7 @@ class WorkerActor(xo.StatelessActor):
874
874
  subpool_address, devices = await self._create_subpool(
875
875
  model_uid, model_type, n_gpu=n_gpu, gpu_idx=gpu_idx
876
876
  )
877
-
877
+ all_subpool_addresses = [subpool_address]
878
878
  try:
879
879
  xavier_config: Optional[Dict] = kwargs.pop("xavier_config", None)
880
880
  if xavier_config is not None:
@@ -885,7 +885,7 @@ class WorkerActor(xo.StatelessActor):
885
885
  # add a few kwargs
886
886
  model_kwargs.update(
887
887
  dict(
888
- address=self.address,
888
+ address=subpool_address,
889
889
  n_worker=n_worker,
890
890
  shard=shard,
891
891
  driver_info=driver_info,
@@ -923,11 +923,28 @@ class WorkerActor(xo.StatelessActor):
923
923
  shard=shard,
924
924
  driver_info=driver_info,
925
925
  )
926
+ if await model_ref.need_create_pools() and (
927
+ len(devices) > 1 or n_worker > 1 # type: ignore
928
+ ):
929
+ coros = []
930
+ env_name = get_available_device_env_name() or "CUDA_VISIBLE_DEVICES"
931
+ env_value = ",".join(devices)
932
+ for device in devices:
933
+ coros.append(
934
+ self._main_pool.append_sub_pool(
935
+ env={env_name: env_value},
936
+ start_method=self._get_start_method(),
937
+ )
938
+ )
939
+ pool_addresses = await asyncio.gather(*coros)
940
+ all_subpool_addresses.extend(pool_addresses)
941
+ await model_ref.set_pool_addresses(pool_addresses)
926
942
  await model_ref.load()
927
943
  except:
928
944
  logger.error(f"Failed to load model {model_uid}", exc_info=True)
929
945
  self.release_devices(model_uid=model_uid)
930
- await self._main_pool.remove_sub_pool(subpool_address)
946
+ for addr in all_subpool_addresses:
947
+ await self._main_pool.remove_sub_pool(addr)
931
948
  raise
932
949
  self._model_uid_to_model[model_uid] = model_ref
933
950
  self._model_uid_to_model_spec[model_uid] = model_description
@@ -994,15 +1011,36 @@ class WorkerActor(xo.StatelessActor):
994
1011
  if model_ref is None:
995
1012
  logger.debug("Model not found, uid: %s", model_uid)
996
1013
 
1014
+ pool_addresses = None
1015
+ if model_ref is not None:
1016
+ try:
1017
+ # pool addresses if model.need_create_pools()
1018
+ pool_addresses = await model_ref.get_pool_addresses()
1019
+ except Exception as e:
1020
+ # process may disappear, we just ignore it.
1021
+ logger.debug("Fail to get pool addresses, error: %s", e)
1022
+
997
1023
  try:
998
- await xo.destroy_actor(model_ref)
1024
+ logger.debug("Start to destroy model actor: %s", model_ref)
1025
+ coro = xo.destroy_actor(model_ref)
1026
+ await asyncio.wait_for(coro, timeout=5)
999
1027
  except Exception as e:
1000
1028
  logger.debug(
1001
1029
  "Destroy model actor failed, model uid: %s, error: %s", model_uid, e
1002
1030
  )
1003
1031
  try:
1032
+ to_remove_addresses = []
1004
1033
  subpool_address = self._model_uid_to_addr[model_uid]
1005
- await self._main_pool.remove_sub_pool(subpool_address, force=True)
1034
+ to_remove_addresses.append(subpool_address)
1035
+ if pool_addresses:
1036
+ to_remove_addresses.extend(pool_addresses)
1037
+ logger.debug("Remove sub pools: %s", to_remove_addresses)
1038
+ coros = []
1039
+ for to_remove_addr in to_remove_addresses:
1040
+ coros.append(
1041
+ self._main_pool.remove_sub_pool(to_remove_addr, force=True)
1042
+ )
1043
+ await asyncio.gather(*coros)
1006
1044
  except Exception as e:
1007
1045
  logger.debug(
1008
1046
  "Remove sub pool failed, model uid: %s, error: %s", model_uid, e
@@ -1204,18 +1242,23 @@ class WorkerActor(xo.StatelessActor):
1204
1242
  model_ref = self._model_uid_to_model[rep_model_uid]
1205
1243
  await model_ref.start_transfer_for_vllm(rank_addresses)
1206
1244
 
1207
- @log_async(logger=logger, level=logging.INFO)
1208
- async def launch_rank0_model(
1209
- self, rep_model_uid: str, xavier_config: Dict[str, Any]
1210
- ) -> Tuple[str, int]:
1211
- from ..model.llm.vllm.xavier.collective_manager import Rank0ModelActor
1212
-
1245
+ @staticmethod
1246
+ def _get_start_method():
1213
1247
  if os.name != "nt" and platform.system() != "Darwin":
1214
1248
  # Linux
1215
1249
  start_method = "forkserver"
1216
1250
  else:
1217
1251
  # Windows and macOS
1218
1252
  start_method = "spawn"
1253
+ return start_method
1254
+
1255
+ @log_async(logger=logger, level=logging.INFO)
1256
+ async def launch_rank0_model(
1257
+ self, rep_model_uid: str, xavier_config: Dict[str, Any]
1258
+ ) -> Tuple[str, int]:
1259
+ from ..model.llm.vllm.xavier.collective_manager import Rank0ModelActor
1260
+
1261
+ start_method = self._get_start_method()
1219
1262
  subpool_address = await self._main_pool.append_sub_pool(
1220
1263
  start_method=start_method
1221
1264
  )
@@ -132,7 +132,7 @@ def _install():
132
132
  from .llama_cpp.core import LlamaCppChatModel, LlamaCppModel, XllamaCppModel
133
133
  from .lmdeploy.core import LMDeployChatModel, LMDeployModel
134
134
  from .mlx.core import MLXChatModel, MLXModel, MLXVisionModel
135
- from .sglang.core import SGLANGChatModel, SGLANGModel
135
+ from .sglang.core import SGLANGChatModel, SGLANGModel, SGLANGVisionModel
136
136
  from .transformers.chatglm import ChatglmPytorchChatModel
137
137
  from .transformers.cogagent import CogAgentChatModel
138
138
  from .transformers.cogvlm2 import CogVLM2Model
@@ -143,6 +143,7 @@ def _install():
143
143
  DeepSeekV2PytorchModel,
144
144
  )
145
145
  from .transformers.deepseek_vl import DeepSeekVLChatModel
146
+ from .transformers.deepseek_vl2 import DeepSeekVL2ChatModel
146
147
  from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
147
148
  from .transformers.glm4v import Glm4VModel
148
149
  from .transformers.glm_edge_v import GlmEdgeVModel
@@ -173,7 +174,7 @@ def _install():
173
174
  XllamaCppModel,
174
175
  ]
175
176
  )
176
- SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
177
+ SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel, SGLANGVisionModel])
177
178
  VLLM_CLASSES.extend([VLLMModel, VLLMChatModel, VLLMVisionModel])
178
179
  MLX_CLASSES.extend([MLXModel, MLXChatModel, MLXVisionModel])
179
180
  LMDEPLOY_CLASSES.extend([LMDeployModel, LMDeployChatModel])
@@ -187,6 +188,7 @@ def _install():
187
188
  Qwen2AudioChatModel,
188
189
  YiVLChatModel,
189
190
  DeepSeekVLChatModel,
191
+ DeepSeekVL2ChatModel,
190
192
  InternVLChatModel,
191
193
  PytorchModel,
192
194
  CogVLM2Model,
@@ -54,6 +54,7 @@ class LLM(abc.ABC):
54
54
  **kwargs,
55
55
  ):
56
56
  self.model_uid, self.rep_id = parse_replica_model_uid(replica_model_uid)
57
+ self.raw_model_uid = replica_model_uid
57
58
  self.model_family = model_family
58
59
  self.model_spec = model_spec
59
60
  self.quantization = quantization
@@ -302,7 +302,12 @@ class XllamaCppModel(LLM, ChatModelMixin):
302
302
  while (r := q.get()) is not _Done:
303
303
  if type(r) is _Error:
304
304
  raise Exception("Got error in chat stream: %s", r.msg)
305
- yield r
305
+ # Get valid keys (O(1) lookup)
306
+ chunk_keys = ChatCompletionChunk.__annotations__
307
+ # The chunk may contain additional keys (e.g., system_fingerprint),
308
+ # which might not conform to OpenAI/DeepSeek formats.
309
+ # Filter out keys that are not part of ChatCompletionChunk.
310
+ yield {key: r[key] for key in chunk_keys if key in r}
306
311
 
307
312
  return self._to_chat_completion_chunks(
308
313
  _to_iterator(), self.reasoning_parser
@@ -7561,7 +7561,7 @@
7561
7561
  "model_id":"Qwen/Qwen2-VL-7B-Instruct",
7562
7562
  "model_revision":"6010982c1010c3b222fa98afc81575f124aa9bd6"
7563
7563
  },
7564
- {
7564
+ {
7565
7565
  "model_format":"gptq",
7566
7566
  "model_size_in_billions":7,
7567
7567
  "quantizations":[
@@ -7672,6 +7672,14 @@
7672
7672
  ],
7673
7673
  "model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
7674
7674
  },
7675
+ {
7676
+ "model_format":"pytorch",
7677
+ "model_size_in_billions":32,
7678
+ "quantizations":[
7679
+ "none"
7680
+ ],
7681
+ "model_id":"Qwen/Qwen2.5-VL-32B-Instruct"
7682
+ },
7675
7683
  {
7676
7684
  "model_format":"pytorch",
7677
7685
  "model_size_in_billions":72,
@@ -7696,6 +7704,14 @@
7696
7704
  ],
7697
7705
  "model_id":"Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
7698
7706
  },
7707
+ {
7708
+ "model_format":"awq",
7709
+ "model_size_in_billions":32,
7710
+ "quantizations":[
7711
+ "Int4"
7712
+ ],
7713
+ "model_id":"Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
7714
+ },
7699
7715
  {
7700
7716
  "model_format":"awq",
7701
7717
  "model_size_in_billions":72,
@@ -10758,5 +10774,105 @@
10758
10774
  "stop": [
10759
10775
  "<|im_end|>"
10760
10776
  ]
10777
+ },
10778
+ {
10779
+ "version": 1,
10780
+ "context_length": 131072,
10781
+ "model_name": "fin-r1",
10782
+ "model_lang": [
10783
+ "en",
10784
+ "zh"
10785
+ ],
10786
+ "model_ability": [
10787
+ "chat"
10788
+ ],
10789
+ "model_description": "Fin-R1 is a large language model specifically designed for the field of financial reasoning",
10790
+ "model_specs": [
10791
+ {
10792
+ "model_format": "pytorch",
10793
+ "model_size_in_billions": 7,
10794
+ "quantizations": [
10795
+ "4-bit",
10796
+ "8-bit",
10797
+ "none"
10798
+ ],
10799
+ "model_id": "SUFE-AIFLM-Lab/Fin-R1"
10800
+ },
10801
+ {
10802
+ "model_format":"gptq",
10803
+ "model_size_in_billions":7,
10804
+ "quantizations":[
10805
+ "Int4",
10806
+ "Int8"
10807
+ ],
10808
+ "model_id":"JunHowie/Fin-R1-GPTQ-{quantization}"
10809
+ },
10810
+ {
10811
+ "model_format":"fp8",
10812
+ "model_size_in_billions":7,
10813
+ "quantizations":[
10814
+ "FP8"
10815
+ ],
10816
+ "model_id":"JunHowie/Fin-R1-FP8-Dynamic"
10817
+ }
10818
+ ],
10819
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
10820
+ "stop_token_ids": [
10821
+ 151643,
10822
+ 151644,
10823
+ 151645
10824
+ ],
10825
+ "stop": [
10826
+ "<|endoftext|>",
10827
+ "<|im_start|>",
10828
+ "<|im_end|>"
10829
+ ]
10830
+ },
10831
+ {
10832
+ "version": 1,
10833
+ "context_length": 4096,
10834
+ "model_name": "deepseek-vl2",
10835
+ "model_lang": [
10836
+ "en",
10837
+ "zh"
10838
+ ],
10839
+ "model_ability": [
10840
+ "chat",
10841
+ "vision"
10842
+ ],
10843
+ "model_description": "DeepSeek-VL2, an advanced series of large Mixture-of-Experts (MoE) Vision-Language Models that significantly improves upon its predecessor, DeepSeek-VL. DeepSeek-VL2 demonstrates superior capabilities across various tasks, including but not limited to visual question answering, optical character recognition, document/table/chart understanding, and visual grounding.",
10844
+ "model_specs": [
10845
+ {
10846
+ "model_format": "pytorch",
10847
+ "model_size_in_billions": 27,
10848
+ "quantizations": [
10849
+ "none"
10850
+ ],
10851
+ "model_id": "deepseek-ai/deepseek-vl2"
10852
+ },
10853
+ {
10854
+ "model_format": "pytorch",
10855
+ "model_size_in_billions": 16,
10856
+ "quantizations": [
10857
+ "none"
10858
+ ],
10859
+ "model_id": "deepseek-ai/deepseek-vl2-small"
10860
+ },
10861
+ {
10862
+ "model_format": "pytorch",
10863
+ "model_size_in_billions": 3,
10864
+ "quantizations": [
10865
+ "none"
10866
+ ],
10867
+ "model_id": "deepseek-ai/deepseek-vl2-tiny"
10868
+ }
10869
+ ],
10870
+ "chat_template": "",
10871
+ "stop_token_ids": [
10872
+ 1
10873
+ ],
10874
+ "stop": [
10875
+ "<|end▁of▁sentence|>"
10876
+ ]
10761
10877
  }
10762
10878
  ]
@@ -5399,6 +5399,15 @@
5399
5399
  "model_hub": "modelscope",
5400
5400
  "model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
5401
5401
  },
5402
+ {
5403
+ "model_format":"pytorch",
5404
+ "model_size_in_billions":32,
5405
+ "quantizations":[
5406
+ "none"
5407
+ ],
5408
+ "model_hub": "modelscope",
5409
+ "model_id":"Qwen/Qwen2.5-VL-32B-Instruct"
5410
+ },
5402
5411
  {
5403
5412
  "model_format":"pytorch",
5404
5413
  "model_size_in_billions":72,
@@ -5423,9 +5432,18 @@
5423
5432
  "quantizations":[
5424
5433
  "Int4"
5425
5434
  ],
5426
- "model_hub": "awq",
5435
+ "model_hub": "modelscope",
5427
5436
  "model_id":"Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
5428
5437
  },
5438
+ {
5439
+ "model_format":"awq",
5440
+ "model_size_in_billions":32,
5441
+ "quantizations":[
5442
+ "Int4"
5443
+ ],
5444
+ "model_hub": "modelscope",
5445
+ "model_id":"Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
5446
+ },
5429
5447
  {
5430
5448
  "model_format":"pytorch",
5431
5449
  "model_size_in_billions":72,
@@ -8420,5 +8438,111 @@
8420
8438
  "stop": [
8421
8439
  "<|im_end|>"
8422
8440
  ]
8441
+ },
8442
+ {
8443
+ "version": 1,
8444
+ "context_length": 131072,
8445
+ "model_name": "fin-r1",
8446
+ "model_lang": [
8447
+ "en",
8448
+ "zh"
8449
+ ],
8450
+ "model_ability": [
8451
+ "chat"
8452
+ ],
8453
+ "model_description": "Fin-R1 is a large language model specifically designed for the field of financial reasoning",
8454
+ "model_specs": [
8455
+ {
8456
+ "model_format": "pytorch",
8457
+ "model_size_in_billions": 7,
8458
+ "quantizations": [
8459
+ "4-bit",
8460
+ "8-bit",
8461
+ "none"
8462
+ ],
8463
+ "model_id": "AI-ModelScope/Fin-R1",
8464
+ "model_hub": "modelscope"
8465
+ },
8466
+ {
8467
+ "model_format": "gptq",
8468
+ "model_size_in_billions": 7,
8469
+ "quantizations": [
8470
+ "Int4",
8471
+ "Int8"
8472
+ ],
8473
+ "model_id": "JunHowie/Fin-R1-GPTQ-{quantization}",
8474
+ "model_hub": "modelscope"
8475
+ },
8476
+ {
8477
+ "model_format": "fp8",
8478
+ "model_size_in_billions": 7,
8479
+ "quantizations": [
8480
+ "FP8"
8481
+ ],
8482
+ "model_id": "JunHowie/Fin-R1-FP8-Dynamic",
8483
+ "model_hub": "modelscope"
8484
+ }
8485
+ ],
8486
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
8487
+ "stop_token_ids": [
8488
+ 151643,
8489
+ 151644,
8490
+ 151645
8491
+ ],
8492
+ "stop": [
8493
+ "<|endoftext|>",
8494
+ "<|im_start|>",
8495
+ "<|im_end|>"
8496
+ ]
8497
+ },
8498
+ {
8499
+ "version": 1,
8500
+ "context_length": 4096,
8501
+ "model_name": "deepseek-vl2",
8502
+ "model_lang": [
8503
+ "en",
8504
+ "zh"
8505
+ ],
8506
+ "model_ability": [
8507
+ "chat",
8508
+ "vision"
8509
+ ],
8510
+ "model_description": "DeepSeek-VL2, an advanced series of large Mixture-of-Experts (MoE) Vision-Language Models that significantly improves upon its predecessor, DeepSeek-VL. DeepSeek-VL2 demonstrates superior capabilities across various tasks, including but not limited to visual question answering, optical character recognition, document/table/chart understanding, and visual grounding.",
8511
+ "model_specs": [
8512
+ {
8513
+ "model_format": "pytorch",
8514
+ "model_size_in_billions": 27,
8515
+ "quantizations": [
8516
+ "none"
8517
+ ],
8518
+ "model_id": "deepseek-ai/deepseek-vl2",
8519
+ "model_hub": "modelscope"
8520
+ },
8521
+ {
8522
+ "model_format": "pytorch",
8523
+ "model_size_in_billions": 16,
8524
+ "quantizations": [
8525
+ "none"
8526
+ ],
8527
+ "model_id": "deepseek-ai/deepseek-vl2-small",
8528
+ "model_hub": "modelscope"
8529
+ },
8530
+ {
8531
+ "model_format": "pytorch",
8532
+ "model_size_in_billions": 3,
8533
+ "quantizations": [
8534
+ "none"
8535
+ ],
8536
+ "model_id": "deepseek-ai/deepseek-vl2-tiny",
8537
+ "model_hub": "modelscope"
8538
+ }
8539
+ ],
8540
+ "chat_template": "",
8541
+ "stop_token_ids": [
8542
+ 1
8543
+ ],
8544
+ "stop": [
8545
+ "<|end▁of▁sentence|>"
8546
+ ]
8423
8547
  }
8424
8548
  ]
@@ -43,7 +43,7 @@ class ReasoningParser:
43
43
  reasoning_content = delta_text[:end_idx]
44
44
  content = delta_text[end_idx + len(self.reasoning_end_tag) :]
45
45
  delta["reasoning_content"] = reasoning_content
46
- if content is not None:
46
+ if content:
47
47
  delta["content"] = content
48
48
  else:
49
49
  delta["content"] = None
@@ -71,7 +71,7 @@ class ReasoningParser:
71
71
  ]
72
72
  content = delta_text[end_idx + len(self.reasoning_end_tag) :]
73
73
  delta["reasoning_content"] = reasoning_content
74
- if content is not None:
74
+ if content:
75
75
  delta["content"] = content
76
76
  else:
77
77
  delta["content"] = None
@@ -93,7 +93,7 @@ class ReasoningParser:
93
93
  reasoning_content = delta_text[:end_idx]
94
94
  content = delta_text[end_idx + len(self.reasoning_end_tag) :]
95
95
  delta["reasoning_content"] = reasoning_content
96
- if content is not None:
96
+ if content:
97
97
  delta["content"] = content
98
98
  else:
99
99
  delta["content"] = None