xinference 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +1 -0
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +4 -0
- xinference/core/model.py +23 -3
- xinference/core/supervisor.py +6 -0
- xinference/core/worker.py +54 -11
- xinference/model/llm/__init__.py +4 -2
- xinference/model/llm/core.py +1 -0
- xinference/model/llm/llama_cpp/core.py +6 -1
- xinference/model/llm/llm_family.json +117 -1
- xinference/model/llm/llm_family_modelscope.json +125 -1
- xinference/model/llm/reasoning_parser.py +3 -3
- xinference/model/llm/sglang/core.py +111 -13
- xinference/model/llm/transformers/core.py +1 -0
- xinference/model/llm/transformers/deepseek_vl.py +1 -1
- xinference/model/llm/transformers/deepseek_vl2.py +287 -0
- xinference/model/llm/utils.py +26 -14
- xinference/model/llm/vllm/core.py +149 -8
- xinference/model/llm/vllm/distributed_executor.py +314 -0
- xinference/model/rerank/core.py +16 -11
- xinference/thirdparty/deepseek_vl2/__init__.py +31 -0
- xinference/thirdparty/deepseek_vl2/models/__init__.py +26 -0
- xinference/thirdparty/deepseek_vl2/models/configuration_deepseek.py +210 -0
- xinference/thirdparty/deepseek_vl2/models/conversation.py +310 -0
- xinference/thirdparty/deepseek_vl2/models/modeling_deepseek.py +1975 -0
- xinference/thirdparty/deepseek_vl2/models/modeling_deepseek_vl_v2.py +697 -0
- xinference/thirdparty/deepseek_vl2/models/processing_deepseek_vl_v2.py +675 -0
- xinference/thirdparty/deepseek_vl2/models/siglip_vit.py +661 -0
- xinference/thirdparty/deepseek_vl2/serve/__init__.py +0 -0
- xinference/thirdparty/deepseek_vl2/serve/app_modules/__init__.py +0 -0
- xinference/thirdparty/deepseek_vl2/serve/app_modules/gradio_utils.py +83 -0
- xinference/thirdparty/deepseek_vl2/serve/app_modules/overwrites.py +81 -0
- xinference/thirdparty/deepseek_vl2/serve/app_modules/presets.py +115 -0
- xinference/thirdparty/deepseek_vl2/serve/app_modules/utils.py +333 -0
- xinference/thirdparty/deepseek_vl2/serve/assets/Kelpy-Codos.js +100 -0
- xinference/thirdparty/deepseek_vl2/serve/assets/avatar.png +0 -0
- xinference/thirdparty/deepseek_vl2/serve/assets/custom.css +355 -0
- xinference/thirdparty/deepseek_vl2/serve/assets/custom.js +22 -0
- xinference/thirdparty/deepseek_vl2/serve/assets/favicon.ico +0 -0
- xinference/thirdparty/deepseek_vl2/serve/assets/simsun.ttc +0 -0
- xinference/thirdparty/deepseek_vl2/serve/inference.py +197 -0
- xinference/thirdparty/deepseek_vl2/utils/__init__.py +18 -0
- xinference/thirdparty/deepseek_vl2/utils/io.py +80 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.3cea968e.js → main.5ca4eea1.js} +3 -3
- xinference/web/ui/build/static/js/main.5ca4eea1.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0f0967acaec5df1d45b80010949c258d64297ebbb0f44b8bb3afcbd45c6f0ec4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/68249645124f37d01eef83b1d897e751f895bea919b6fb466f907c1f87cebc84.json +1 -0
- {xinference-1.4.0.dist-info → xinference-1.4.1.dist-info}/METADATA +4 -4
- {xinference-1.4.0.dist-info → xinference-1.4.1.dist-info}/RECORD +56 -31
- xinference/web/ui/build/static/js/main.3cea968e.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/7f59e45e3f268ab8a4788b6fb024cf8dab088736dff22f5a3a39c122a83ab930.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/dcd60488509450bfff37bfff56de2c096d51de17dd00ec60d4db49c8b483ada1.json +0 -1
- /xinference/web/ui/build/static/js/{main.3cea968e.js.LICENSE.txt → main.5ca4eea1.js.LICENSE.txt} +0 -0
- {xinference-1.4.0.dist-info → xinference-1.4.1.dist-info}/LICENSE +0 -0
- {xinference-1.4.0.dist-info → xinference-1.4.1.dist-info}/WHEEL +0 -0
- {xinference-1.4.0.dist-info → xinference-1.4.1.dist-info}/entry_points.txt +0 -0
- {xinference-1.4.0.dist-info → xinference-1.4.1.dist-info}/top_level.txt +0 -0
xinference/_compat.py
CHANGED
|
@@ -102,6 +102,7 @@ class CreateChatCompletionOpenAI(BaseModel):
|
|
|
102
102
|
frequency_penalty: Optional[float]
|
|
103
103
|
logit_bias: Optional[Dict[str, int]]
|
|
104
104
|
logprobs: Optional[bool]
|
|
105
|
+
max_completion_tokens: Optional[int]
|
|
105
106
|
max_tokens: Optional[int]
|
|
106
107
|
n: Optional[int]
|
|
107
108
|
parallel_tool_calls: Optional[bool]
|
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-
|
|
11
|
+
"date": "2025-04-03T21:26:30+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "1.4.
|
|
14
|
+
"full-revisionid": "23260be3b917e7a2e8381927721ed3de815c0a99",
|
|
15
|
+
"version": "1.4.1"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -1952,6 +1952,7 @@ class RESTfulAPI(CancelMixin):
|
|
|
1952
1952
|
"logit_bias",
|
|
1953
1953
|
"logit_bias_type",
|
|
1954
1954
|
"user",
|
|
1955
|
+
"max_completion_tokens",
|
|
1955
1956
|
}
|
|
1956
1957
|
|
|
1957
1958
|
raw_kwargs = {k: v for k, v in raw_body.items() if k not in exclude}
|
|
@@ -1964,6 +1965,9 @@ class RESTfulAPI(CancelMixin):
|
|
|
1964
1965
|
if body.max_tokens is None:
|
|
1965
1966
|
kwargs["max_tokens"] = max_tokens_field.default
|
|
1966
1967
|
|
|
1968
|
+
if body.max_completion_tokens is not None:
|
|
1969
|
+
kwargs["max_tokens"] = body.max_completion_tokens
|
|
1970
|
+
|
|
1967
1971
|
if body.logit_bias is not None:
|
|
1968
1972
|
raise HTTPException(status_code=501, detail="Not implemented")
|
|
1969
1973
|
|
xinference/core/model.py
CHANGED
|
@@ -185,7 +185,7 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
185
185
|
)
|
|
186
186
|
|
|
187
187
|
if hasattr(self._model, "stop") and callable(self._model.stop):
|
|
188
|
-
self._model.stop
|
|
188
|
+
await asyncio.to_thread(self._model.stop)
|
|
189
189
|
|
|
190
190
|
if isinstance(self._model, LLMVLLMModel):
|
|
191
191
|
if self._transfer_ref is not None:
|
|
@@ -284,6 +284,8 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
284
284
|
async def __post_create__(self):
|
|
285
285
|
self._loop = asyncio.get_running_loop()
|
|
286
286
|
|
|
287
|
+
logger.debug("Starting ModelActor at %s, uid: %s", self.address, self.uid)
|
|
288
|
+
|
|
287
289
|
self._handle_pending_requests_task = asyncio.create_task(
|
|
288
290
|
self._handle_pending_requests()
|
|
289
291
|
)
|
|
@@ -463,7 +465,9 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
463
465
|
while True:
|
|
464
466
|
i += 1
|
|
465
467
|
try:
|
|
466
|
-
self._model
|
|
468
|
+
if hasattr(self._model, "set_loop"):
|
|
469
|
+
self._model.set_loop(asyncio.get_running_loop())
|
|
470
|
+
await asyncio.to_thread(self._model.load)
|
|
467
471
|
if hasattr(self._model, "driver_info"):
|
|
468
472
|
self._driver_info = self._model.driver_info
|
|
469
473
|
break
|
|
@@ -490,7 +494,23 @@ class ModelActor(xo.StatelessActor, CancelMixin):
|
|
|
490
494
|
|
|
491
495
|
async def wait_for_load(self):
|
|
492
496
|
if hasattr(self._model, "wait_for_load"):
|
|
493
|
-
self._model.wait_for_load
|
|
497
|
+
await asyncio.to_thread(self._model.wait_for_load)
|
|
498
|
+
|
|
499
|
+
def need_create_pools(self):
|
|
500
|
+
return getattr(self._model, "need_create_pools", False)
|
|
501
|
+
|
|
502
|
+
def set_pool_addresses(self, pool_addresses: List[str]):
|
|
503
|
+
if hasattr(self._model, "set_pool_addresses"):
|
|
504
|
+
self._model.set_pool_addresses(pool_addresses)
|
|
505
|
+
|
|
506
|
+
def get_pool_addresses(self) -> Optional[List[str]]:
|
|
507
|
+
if hasattr(self._model, "get_pool_addresses"):
|
|
508
|
+
return self._model.get_pool_addresses()
|
|
509
|
+
return None
|
|
510
|
+
|
|
511
|
+
def set_worker_addresses(self, shard: int, worker_addresses: List[str]):
|
|
512
|
+
if hasattr(self._model, "set_worker_addresses"):
|
|
513
|
+
self._model.set_worker_addresses(shard, worker_addresses)
|
|
494
514
|
|
|
495
515
|
def model_uid(self):
|
|
496
516
|
return (
|
xinference/core/supervisor.py
CHANGED
|
@@ -1097,6 +1097,7 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1097
1097
|
xavier_config=xavier_config,
|
|
1098
1098
|
**kwargs,
|
|
1099
1099
|
)
|
|
1100
|
+
await worker_ref.wait_for_load(_replica_model_uid)
|
|
1100
1101
|
self._replica_model_uid_to_worker[_replica_model_uid] = worker_ref
|
|
1101
1102
|
return subpool_address
|
|
1102
1103
|
|
|
@@ -1242,6 +1243,11 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
1242
1243
|
available_workers.append(worker_ip)
|
|
1243
1244
|
|
|
1244
1245
|
async def _launch_model():
|
|
1246
|
+
# Validation of n_worker, intercept if it is greater than the available workers.
|
|
1247
|
+
if n_worker > len(available_workers):
|
|
1248
|
+
raise ValueError(
|
|
1249
|
+
"n_worker cannot be larger than the number of available workers."
|
|
1250
|
+
)
|
|
1245
1251
|
try:
|
|
1246
1252
|
for _idx, rep_model_uid in enumerate(
|
|
1247
1253
|
iter_replica_model_uid(model_uid, replica)
|
xinference/core/worker.py
CHANGED
|
@@ -874,7 +874,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
874
874
|
subpool_address, devices = await self._create_subpool(
|
|
875
875
|
model_uid, model_type, n_gpu=n_gpu, gpu_idx=gpu_idx
|
|
876
876
|
)
|
|
877
|
-
|
|
877
|
+
all_subpool_addresses = [subpool_address]
|
|
878
878
|
try:
|
|
879
879
|
xavier_config: Optional[Dict] = kwargs.pop("xavier_config", None)
|
|
880
880
|
if xavier_config is not None:
|
|
@@ -885,7 +885,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
885
885
|
# add a few kwargs
|
|
886
886
|
model_kwargs.update(
|
|
887
887
|
dict(
|
|
888
|
-
address=
|
|
888
|
+
address=subpool_address,
|
|
889
889
|
n_worker=n_worker,
|
|
890
890
|
shard=shard,
|
|
891
891
|
driver_info=driver_info,
|
|
@@ -923,11 +923,28 @@ class WorkerActor(xo.StatelessActor):
|
|
|
923
923
|
shard=shard,
|
|
924
924
|
driver_info=driver_info,
|
|
925
925
|
)
|
|
926
|
+
if await model_ref.need_create_pools() and (
|
|
927
|
+
len(devices) > 1 or n_worker > 1 # type: ignore
|
|
928
|
+
):
|
|
929
|
+
coros = []
|
|
930
|
+
env_name = get_available_device_env_name() or "CUDA_VISIBLE_DEVICES"
|
|
931
|
+
env_value = ",".join(devices)
|
|
932
|
+
for device in devices:
|
|
933
|
+
coros.append(
|
|
934
|
+
self._main_pool.append_sub_pool(
|
|
935
|
+
env={env_name: env_value},
|
|
936
|
+
start_method=self._get_start_method(),
|
|
937
|
+
)
|
|
938
|
+
)
|
|
939
|
+
pool_addresses = await asyncio.gather(*coros)
|
|
940
|
+
all_subpool_addresses.extend(pool_addresses)
|
|
941
|
+
await model_ref.set_pool_addresses(pool_addresses)
|
|
926
942
|
await model_ref.load()
|
|
927
943
|
except:
|
|
928
944
|
logger.error(f"Failed to load model {model_uid}", exc_info=True)
|
|
929
945
|
self.release_devices(model_uid=model_uid)
|
|
930
|
-
|
|
946
|
+
for addr in all_subpool_addresses:
|
|
947
|
+
await self._main_pool.remove_sub_pool(addr)
|
|
931
948
|
raise
|
|
932
949
|
self._model_uid_to_model[model_uid] = model_ref
|
|
933
950
|
self._model_uid_to_model_spec[model_uid] = model_description
|
|
@@ -994,15 +1011,36 @@ class WorkerActor(xo.StatelessActor):
|
|
|
994
1011
|
if model_ref is None:
|
|
995
1012
|
logger.debug("Model not found, uid: %s", model_uid)
|
|
996
1013
|
|
|
1014
|
+
pool_addresses = None
|
|
1015
|
+
if model_ref is not None:
|
|
1016
|
+
try:
|
|
1017
|
+
# pool addresses if model.need_create_pools()
|
|
1018
|
+
pool_addresses = await model_ref.get_pool_addresses()
|
|
1019
|
+
except Exception as e:
|
|
1020
|
+
# process may disappear, we just ignore it.
|
|
1021
|
+
logger.debug("Fail to get pool addresses, error: %s", e)
|
|
1022
|
+
|
|
997
1023
|
try:
|
|
998
|
-
|
|
1024
|
+
logger.debug("Start to destroy model actor: %s", model_ref)
|
|
1025
|
+
coro = xo.destroy_actor(model_ref)
|
|
1026
|
+
await asyncio.wait_for(coro, timeout=5)
|
|
999
1027
|
except Exception as e:
|
|
1000
1028
|
logger.debug(
|
|
1001
1029
|
"Destroy model actor failed, model uid: %s, error: %s", model_uid, e
|
|
1002
1030
|
)
|
|
1003
1031
|
try:
|
|
1032
|
+
to_remove_addresses = []
|
|
1004
1033
|
subpool_address = self._model_uid_to_addr[model_uid]
|
|
1005
|
-
|
|
1034
|
+
to_remove_addresses.append(subpool_address)
|
|
1035
|
+
if pool_addresses:
|
|
1036
|
+
to_remove_addresses.extend(pool_addresses)
|
|
1037
|
+
logger.debug("Remove sub pools: %s", to_remove_addresses)
|
|
1038
|
+
coros = []
|
|
1039
|
+
for to_remove_addr in to_remove_addresses:
|
|
1040
|
+
coros.append(
|
|
1041
|
+
self._main_pool.remove_sub_pool(to_remove_addr, force=True)
|
|
1042
|
+
)
|
|
1043
|
+
await asyncio.gather(*coros)
|
|
1006
1044
|
except Exception as e:
|
|
1007
1045
|
logger.debug(
|
|
1008
1046
|
"Remove sub pool failed, model uid: %s, error: %s", model_uid, e
|
|
@@ -1204,18 +1242,23 @@ class WorkerActor(xo.StatelessActor):
|
|
|
1204
1242
|
model_ref = self._model_uid_to_model[rep_model_uid]
|
|
1205
1243
|
await model_ref.start_transfer_for_vllm(rank_addresses)
|
|
1206
1244
|
|
|
1207
|
-
@
|
|
1208
|
-
|
|
1209
|
-
self, rep_model_uid: str, xavier_config: Dict[str, Any]
|
|
1210
|
-
) -> Tuple[str, int]:
|
|
1211
|
-
from ..model.llm.vllm.xavier.collective_manager import Rank0ModelActor
|
|
1212
|
-
|
|
1245
|
+
@staticmethod
|
|
1246
|
+
def _get_start_method():
|
|
1213
1247
|
if os.name != "nt" and platform.system() != "Darwin":
|
|
1214
1248
|
# Linux
|
|
1215
1249
|
start_method = "forkserver"
|
|
1216
1250
|
else:
|
|
1217
1251
|
# Windows and macOS
|
|
1218
1252
|
start_method = "spawn"
|
|
1253
|
+
return start_method
|
|
1254
|
+
|
|
1255
|
+
@log_async(logger=logger, level=logging.INFO)
|
|
1256
|
+
async def launch_rank0_model(
|
|
1257
|
+
self, rep_model_uid: str, xavier_config: Dict[str, Any]
|
|
1258
|
+
) -> Tuple[str, int]:
|
|
1259
|
+
from ..model.llm.vllm.xavier.collective_manager import Rank0ModelActor
|
|
1260
|
+
|
|
1261
|
+
start_method = self._get_start_method()
|
|
1219
1262
|
subpool_address = await self._main_pool.append_sub_pool(
|
|
1220
1263
|
start_method=start_method
|
|
1221
1264
|
)
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -132,7 +132,7 @@ def _install():
|
|
|
132
132
|
from .llama_cpp.core import LlamaCppChatModel, LlamaCppModel, XllamaCppModel
|
|
133
133
|
from .lmdeploy.core import LMDeployChatModel, LMDeployModel
|
|
134
134
|
from .mlx.core import MLXChatModel, MLXModel, MLXVisionModel
|
|
135
|
-
from .sglang.core import SGLANGChatModel, SGLANGModel
|
|
135
|
+
from .sglang.core import SGLANGChatModel, SGLANGModel, SGLANGVisionModel
|
|
136
136
|
from .transformers.chatglm import ChatglmPytorchChatModel
|
|
137
137
|
from .transformers.cogagent import CogAgentChatModel
|
|
138
138
|
from .transformers.cogvlm2 import CogVLM2Model
|
|
@@ -143,6 +143,7 @@ def _install():
|
|
|
143
143
|
DeepSeekV2PytorchModel,
|
|
144
144
|
)
|
|
145
145
|
from .transformers.deepseek_vl import DeepSeekVLChatModel
|
|
146
|
+
from .transformers.deepseek_vl2 import DeepSeekVL2ChatModel
|
|
146
147
|
from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
|
|
147
148
|
from .transformers.glm4v import Glm4VModel
|
|
148
149
|
from .transformers.glm_edge_v import GlmEdgeVModel
|
|
@@ -173,7 +174,7 @@ def _install():
|
|
|
173
174
|
XllamaCppModel,
|
|
174
175
|
]
|
|
175
176
|
)
|
|
176
|
-
SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
|
|
177
|
+
SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel, SGLANGVisionModel])
|
|
177
178
|
VLLM_CLASSES.extend([VLLMModel, VLLMChatModel, VLLMVisionModel])
|
|
178
179
|
MLX_CLASSES.extend([MLXModel, MLXChatModel, MLXVisionModel])
|
|
179
180
|
LMDEPLOY_CLASSES.extend([LMDeployModel, LMDeployChatModel])
|
|
@@ -187,6 +188,7 @@ def _install():
|
|
|
187
188
|
Qwen2AudioChatModel,
|
|
188
189
|
YiVLChatModel,
|
|
189
190
|
DeepSeekVLChatModel,
|
|
191
|
+
DeepSeekVL2ChatModel,
|
|
190
192
|
InternVLChatModel,
|
|
191
193
|
PytorchModel,
|
|
192
194
|
CogVLM2Model,
|
xinference/model/llm/core.py
CHANGED
|
@@ -302,7 +302,12 @@ class XllamaCppModel(LLM, ChatModelMixin):
|
|
|
302
302
|
while (r := q.get()) is not _Done:
|
|
303
303
|
if type(r) is _Error:
|
|
304
304
|
raise Exception("Got error in chat stream: %s", r.msg)
|
|
305
|
-
|
|
305
|
+
# Get valid keys (O(1) lookup)
|
|
306
|
+
chunk_keys = ChatCompletionChunk.__annotations__
|
|
307
|
+
# The chunk may contain additional keys (e.g., system_fingerprint),
|
|
308
|
+
# which might not conform to OpenAI/DeepSeek formats.
|
|
309
|
+
# Filter out keys that are not part of ChatCompletionChunk.
|
|
310
|
+
yield {key: r[key] for key in chunk_keys if key in r}
|
|
306
311
|
|
|
307
312
|
return self._to_chat_completion_chunks(
|
|
308
313
|
_to_iterator(), self.reasoning_parser
|
|
@@ -7561,7 +7561,7 @@
|
|
|
7561
7561
|
"model_id":"Qwen/Qwen2-VL-7B-Instruct",
|
|
7562
7562
|
"model_revision":"6010982c1010c3b222fa98afc81575f124aa9bd6"
|
|
7563
7563
|
},
|
|
7564
|
-
|
|
7564
|
+
{
|
|
7565
7565
|
"model_format":"gptq",
|
|
7566
7566
|
"model_size_in_billions":7,
|
|
7567
7567
|
"quantizations":[
|
|
@@ -7672,6 +7672,14 @@
|
|
|
7672
7672
|
],
|
|
7673
7673
|
"model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
|
|
7674
7674
|
},
|
|
7675
|
+
{
|
|
7676
|
+
"model_format":"pytorch",
|
|
7677
|
+
"model_size_in_billions":32,
|
|
7678
|
+
"quantizations":[
|
|
7679
|
+
"none"
|
|
7680
|
+
],
|
|
7681
|
+
"model_id":"Qwen/Qwen2.5-VL-32B-Instruct"
|
|
7682
|
+
},
|
|
7675
7683
|
{
|
|
7676
7684
|
"model_format":"pytorch",
|
|
7677
7685
|
"model_size_in_billions":72,
|
|
@@ -7696,6 +7704,14 @@
|
|
|
7696
7704
|
],
|
|
7697
7705
|
"model_id":"Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
|
|
7698
7706
|
},
|
|
7707
|
+
{
|
|
7708
|
+
"model_format":"awq",
|
|
7709
|
+
"model_size_in_billions":32,
|
|
7710
|
+
"quantizations":[
|
|
7711
|
+
"Int4"
|
|
7712
|
+
],
|
|
7713
|
+
"model_id":"Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
|
|
7714
|
+
},
|
|
7699
7715
|
{
|
|
7700
7716
|
"model_format":"awq",
|
|
7701
7717
|
"model_size_in_billions":72,
|
|
@@ -10758,5 +10774,105 @@
|
|
|
10758
10774
|
"stop": [
|
|
10759
10775
|
"<|im_end|>"
|
|
10760
10776
|
]
|
|
10777
|
+
},
|
|
10778
|
+
{
|
|
10779
|
+
"version": 1,
|
|
10780
|
+
"context_length": 131072,
|
|
10781
|
+
"model_name": "fin-r1",
|
|
10782
|
+
"model_lang": [
|
|
10783
|
+
"en",
|
|
10784
|
+
"zh"
|
|
10785
|
+
],
|
|
10786
|
+
"model_ability": [
|
|
10787
|
+
"chat"
|
|
10788
|
+
],
|
|
10789
|
+
"model_description": "Fin-R1 is a large language model specifically designed for the field of financial reasoning",
|
|
10790
|
+
"model_specs": [
|
|
10791
|
+
{
|
|
10792
|
+
"model_format": "pytorch",
|
|
10793
|
+
"model_size_in_billions": 7,
|
|
10794
|
+
"quantizations": [
|
|
10795
|
+
"4-bit",
|
|
10796
|
+
"8-bit",
|
|
10797
|
+
"none"
|
|
10798
|
+
],
|
|
10799
|
+
"model_id": "SUFE-AIFLM-Lab/Fin-R1"
|
|
10800
|
+
},
|
|
10801
|
+
{
|
|
10802
|
+
"model_format":"gptq",
|
|
10803
|
+
"model_size_in_billions":7,
|
|
10804
|
+
"quantizations":[
|
|
10805
|
+
"Int4",
|
|
10806
|
+
"Int8"
|
|
10807
|
+
],
|
|
10808
|
+
"model_id":"JunHowie/Fin-R1-GPTQ-{quantization}"
|
|
10809
|
+
},
|
|
10810
|
+
{
|
|
10811
|
+
"model_format":"fp8",
|
|
10812
|
+
"model_size_in_billions":7,
|
|
10813
|
+
"quantizations":[
|
|
10814
|
+
"FP8"
|
|
10815
|
+
],
|
|
10816
|
+
"model_id":"JunHowie/Fin-R1-FP8-Dynamic"
|
|
10817
|
+
}
|
|
10818
|
+
],
|
|
10819
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
10820
|
+
"stop_token_ids": [
|
|
10821
|
+
151643,
|
|
10822
|
+
151644,
|
|
10823
|
+
151645
|
|
10824
|
+
],
|
|
10825
|
+
"stop": [
|
|
10826
|
+
"<|endoftext|>",
|
|
10827
|
+
"<|im_start|>",
|
|
10828
|
+
"<|im_end|>"
|
|
10829
|
+
]
|
|
10830
|
+
},
|
|
10831
|
+
{
|
|
10832
|
+
"version": 1,
|
|
10833
|
+
"context_length": 4096,
|
|
10834
|
+
"model_name": "deepseek-vl2",
|
|
10835
|
+
"model_lang": [
|
|
10836
|
+
"en",
|
|
10837
|
+
"zh"
|
|
10838
|
+
],
|
|
10839
|
+
"model_ability": [
|
|
10840
|
+
"chat",
|
|
10841
|
+
"vision"
|
|
10842
|
+
],
|
|
10843
|
+
"model_description": "DeepSeek-VL2, an advanced series of large Mixture-of-Experts (MoE) Vision-Language Models that significantly improves upon its predecessor, DeepSeek-VL. DeepSeek-VL2 demonstrates superior capabilities across various tasks, including but not limited to visual question answering, optical character recognition, document/table/chart understanding, and visual grounding.",
|
|
10844
|
+
"model_specs": [
|
|
10845
|
+
{
|
|
10846
|
+
"model_format": "pytorch",
|
|
10847
|
+
"model_size_in_billions": 27,
|
|
10848
|
+
"quantizations": [
|
|
10849
|
+
"none"
|
|
10850
|
+
],
|
|
10851
|
+
"model_id": "deepseek-ai/deepseek-vl2"
|
|
10852
|
+
},
|
|
10853
|
+
{
|
|
10854
|
+
"model_format": "pytorch",
|
|
10855
|
+
"model_size_in_billions": 16,
|
|
10856
|
+
"quantizations": [
|
|
10857
|
+
"none"
|
|
10858
|
+
],
|
|
10859
|
+
"model_id": "deepseek-ai/deepseek-vl2-small"
|
|
10860
|
+
},
|
|
10861
|
+
{
|
|
10862
|
+
"model_format": "pytorch",
|
|
10863
|
+
"model_size_in_billions": 3,
|
|
10864
|
+
"quantizations": [
|
|
10865
|
+
"none"
|
|
10866
|
+
],
|
|
10867
|
+
"model_id": "deepseek-ai/deepseek-vl2-tiny"
|
|
10868
|
+
}
|
|
10869
|
+
],
|
|
10870
|
+
"chat_template": "",
|
|
10871
|
+
"stop_token_ids": [
|
|
10872
|
+
1
|
|
10873
|
+
],
|
|
10874
|
+
"stop": [
|
|
10875
|
+
"<|end▁of▁sentence|>"
|
|
10876
|
+
]
|
|
10761
10877
|
}
|
|
10762
10878
|
]
|
|
@@ -5399,6 +5399,15 @@
|
|
|
5399
5399
|
"model_hub": "modelscope",
|
|
5400
5400
|
"model_id":"Qwen/Qwen2.5-VL-7B-Instruct"
|
|
5401
5401
|
},
|
|
5402
|
+
{
|
|
5403
|
+
"model_format":"pytorch",
|
|
5404
|
+
"model_size_in_billions":32,
|
|
5405
|
+
"quantizations":[
|
|
5406
|
+
"none"
|
|
5407
|
+
],
|
|
5408
|
+
"model_hub": "modelscope",
|
|
5409
|
+
"model_id":"Qwen/Qwen2.5-VL-32B-Instruct"
|
|
5410
|
+
},
|
|
5402
5411
|
{
|
|
5403
5412
|
"model_format":"pytorch",
|
|
5404
5413
|
"model_size_in_billions":72,
|
|
@@ -5423,9 +5432,18 @@
|
|
|
5423
5432
|
"quantizations":[
|
|
5424
5433
|
"Int4"
|
|
5425
5434
|
],
|
|
5426
|
-
"model_hub": "
|
|
5435
|
+
"model_hub": "modelscope",
|
|
5427
5436
|
"model_id":"Qwen/Qwen2.5-VL-7B-Instruct-AWQ"
|
|
5428
5437
|
},
|
|
5438
|
+
{
|
|
5439
|
+
"model_format":"awq",
|
|
5440
|
+
"model_size_in_billions":32,
|
|
5441
|
+
"quantizations":[
|
|
5442
|
+
"Int4"
|
|
5443
|
+
],
|
|
5444
|
+
"model_hub": "modelscope",
|
|
5445
|
+
"model_id":"Qwen/Qwen2.5-VL-32B-Instruct-AWQ"
|
|
5446
|
+
},
|
|
5429
5447
|
{
|
|
5430
5448
|
"model_format":"pytorch",
|
|
5431
5449
|
"model_size_in_billions":72,
|
|
@@ -8420,5 +8438,111 @@
|
|
|
8420
8438
|
"stop": [
|
|
8421
8439
|
"<|im_end|>"
|
|
8422
8440
|
]
|
|
8441
|
+
},
|
|
8442
|
+
{
|
|
8443
|
+
"version": 1,
|
|
8444
|
+
"context_length": 131072,
|
|
8445
|
+
"model_name": "fin-r1",
|
|
8446
|
+
"model_lang": [
|
|
8447
|
+
"en",
|
|
8448
|
+
"zh"
|
|
8449
|
+
],
|
|
8450
|
+
"model_ability": [
|
|
8451
|
+
"chat"
|
|
8452
|
+
],
|
|
8453
|
+
"model_description": "Fin-R1 is a large language model specifically designed for the field of financial reasoning",
|
|
8454
|
+
"model_specs": [
|
|
8455
|
+
{
|
|
8456
|
+
"model_format": "pytorch",
|
|
8457
|
+
"model_size_in_billions": 7,
|
|
8458
|
+
"quantizations": [
|
|
8459
|
+
"4-bit",
|
|
8460
|
+
"8-bit",
|
|
8461
|
+
"none"
|
|
8462
|
+
],
|
|
8463
|
+
"model_id": "AI-ModelScope/Fin-R1",
|
|
8464
|
+
"model_hub": "modelscope"
|
|
8465
|
+
},
|
|
8466
|
+
{
|
|
8467
|
+
"model_format": "gptq",
|
|
8468
|
+
"model_size_in_billions": 7,
|
|
8469
|
+
"quantizations": [
|
|
8470
|
+
"Int4",
|
|
8471
|
+
"Int8"
|
|
8472
|
+
],
|
|
8473
|
+
"model_id": "JunHowie/Fin-R1-GPTQ-{quantization}",
|
|
8474
|
+
"model_hub": "modelscope"
|
|
8475
|
+
},
|
|
8476
|
+
{
|
|
8477
|
+
"model_format": "fp8",
|
|
8478
|
+
"model_size_in_billions": 7,
|
|
8479
|
+
"quantizations": [
|
|
8480
|
+
"FP8"
|
|
8481
|
+
],
|
|
8482
|
+
"model_id": "JunHowie/Fin-R1-FP8-Dynamic",
|
|
8483
|
+
"model_hub": "modelscope"
|
|
8484
|
+
}
|
|
8485
|
+
],
|
|
8486
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
8487
|
+
"stop_token_ids": [
|
|
8488
|
+
151643,
|
|
8489
|
+
151644,
|
|
8490
|
+
151645
|
|
8491
|
+
],
|
|
8492
|
+
"stop": [
|
|
8493
|
+
"<|endoftext|>",
|
|
8494
|
+
"<|im_start|>",
|
|
8495
|
+
"<|im_end|>"
|
|
8496
|
+
]
|
|
8497
|
+
},
|
|
8498
|
+
{
|
|
8499
|
+
"version": 1,
|
|
8500
|
+
"context_length": 4096,
|
|
8501
|
+
"model_name": "deepseek-vl2",
|
|
8502
|
+
"model_lang": [
|
|
8503
|
+
"en",
|
|
8504
|
+
"zh"
|
|
8505
|
+
],
|
|
8506
|
+
"model_ability": [
|
|
8507
|
+
"chat",
|
|
8508
|
+
"vision"
|
|
8509
|
+
],
|
|
8510
|
+
"model_description": "DeepSeek-VL2, an advanced series of large Mixture-of-Experts (MoE) Vision-Language Models that significantly improves upon its predecessor, DeepSeek-VL. DeepSeek-VL2 demonstrates superior capabilities across various tasks, including but not limited to visual question answering, optical character recognition, document/table/chart understanding, and visual grounding.",
|
|
8511
|
+
"model_specs": [
|
|
8512
|
+
{
|
|
8513
|
+
"model_format": "pytorch",
|
|
8514
|
+
"model_size_in_billions": 27,
|
|
8515
|
+
"quantizations": [
|
|
8516
|
+
"none"
|
|
8517
|
+
],
|
|
8518
|
+
"model_id": "deepseek-ai/deepseek-vl2",
|
|
8519
|
+
"model_hub": "modelscope"
|
|
8520
|
+
},
|
|
8521
|
+
{
|
|
8522
|
+
"model_format": "pytorch",
|
|
8523
|
+
"model_size_in_billions": 16,
|
|
8524
|
+
"quantizations": [
|
|
8525
|
+
"none"
|
|
8526
|
+
],
|
|
8527
|
+
"model_id": "deepseek-ai/deepseek-vl2-small",
|
|
8528
|
+
"model_hub": "modelscope"
|
|
8529
|
+
},
|
|
8530
|
+
{
|
|
8531
|
+
"model_format": "pytorch",
|
|
8532
|
+
"model_size_in_billions": 3,
|
|
8533
|
+
"quantizations": [
|
|
8534
|
+
"none"
|
|
8535
|
+
],
|
|
8536
|
+
"model_id": "deepseek-ai/deepseek-vl2-tiny",
|
|
8537
|
+
"model_hub": "modelscope"
|
|
8538
|
+
}
|
|
8539
|
+
],
|
|
8540
|
+
"chat_template": "",
|
|
8541
|
+
"stop_token_ids": [
|
|
8542
|
+
1
|
|
8543
|
+
],
|
|
8544
|
+
"stop": [
|
|
8545
|
+
"<|end▁of▁sentence|>"
|
|
8546
|
+
]
|
|
8423
8547
|
}
|
|
8424
8548
|
]
|
|
@@ -43,7 +43,7 @@ class ReasoningParser:
|
|
|
43
43
|
reasoning_content = delta_text[:end_idx]
|
|
44
44
|
content = delta_text[end_idx + len(self.reasoning_end_tag) :]
|
|
45
45
|
delta["reasoning_content"] = reasoning_content
|
|
46
|
-
if content
|
|
46
|
+
if content:
|
|
47
47
|
delta["content"] = content
|
|
48
48
|
else:
|
|
49
49
|
delta["content"] = None
|
|
@@ -71,7 +71,7 @@ class ReasoningParser:
|
|
|
71
71
|
]
|
|
72
72
|
content = delta_text[end_idx + len(self.reasoning_end_tag) :]
|
|
73
73
|
delta["reasoning_content"] = reasoning_content
|
|
74
|
-
if content
|
|
74
|
+
if content:
|
|
75
75
|
delta["content"] = content
|
|
76
76
|
else:
|
|
77
77
|
delta["content"] = None
|
|
@@ -93,7 +93,7 @@ class ReasoningParser:
|
|
|
93
93
|
reasoning_content = delta_text[:end_idx]
|
|
94
94
|
content = delta_text[end_idx + len(self.reasoning_end_tag) :]
|
|
95
95
|
delta["reasoning_content"] = reasoning_content
|
|
96
|
-
if content
|
|
96
|
+
if content:
|
|
97
97
|
delta["content"] = content
|
|
98
98
|
else:
|
|
99
99
|
delta["content"] = None
|