xinference 1.5.1__py3-none-any.whl → 1.6.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +97 -8
- xinference/client/restful/restful_client.py +51 -11
- xinference/core/media_interface.py +758 -0
- xinference/core/model.py +49 -9
- xinference/core/worker.py +31 -37
- xinference/deploy/utils.py +0 -3
- xinference/model/audio/__init__.py +16 -27
- xinference/model/audio/core.py +1 -0
- xinference/model/audio/cosyvoice.py +4 -2
- xinference/model/audio/model_spec.json +20 -3
- xinference/model/audio/model_spec_modelscope.json +18 -1
- xinference/model/embedding/__init__.py +16 -24
- xinference/model/image/__init__.py +15 -25
- xinference/model/llm/__init__.py +37 -110
- xinference/model/llm/core.py +15 -6
- xinference/model/llm/llama_cpp/core.py +25 -353
- xinference/model/llm/llm_family.json +613 -89
- xinference/model/llm/llm_family.py +9 -1
- xinference/model/llm/llm_family_modelscope.json +540 -90
- xinference/model/llm/mlx/core.py +6 -3
- xinference/model/llm/reasoning_parser.py +281 -5
- xinference/model/llm/sglang/core.py +16 -3
- xinference/model/llm/transformers/chatglm.py +2 -2
- xinference/model/llm/transformers/cogagent.py +1 -1
- xinference/model/llm/transformers/cogvlm2.py +1 -1
- xinference/model/llm/transformers/core.py +9 -3
- xinference/model/llm/transformers/glm4v.py +1 -1
- xinference/model/llm/transformers/minicpmv26.py +1 -1
- xinference/model/llm/transformers/qwen-omni.py +6 -0
- xinference/model/llm/transformers/qwen_vl.py +1 -1
- xinference/model/llm/utils.py +68 -45
- xinference/model/llm/vllm/core.py +38 -18
- xinference/model/llm/vllm/xavier/test/test_xavier.py +1 -10
- xinference/model/rerank/__init__.py +13 -24
- xinference/model/video/__init__.py +15 -25
- xinference/model/video/core.py +3 -3
- xinference/model/video/diffusers.py +133 -16
- xinference/model/video/model_spec.json +54 -0
- xinference/model/video/model_spec_modelscope.json +56 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
- xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
- xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
- xinference/thirdparty/cosyvoice/bin/train.py +7 -2
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
- xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
- xinference/thirdparty/cosyvoice/cli/model.py +140 -155
- xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
- xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
- xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
- xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
- xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
- xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
- xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
- xinference/thirdparty/cosyvoice/utils/common.py +1 -1
- xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
- xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
- xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
- xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
- xinference/types.py +0 -71
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
- xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
- xinference/web/ui/src/locales/en.json +6 -4
- xinference/web/ui/src/locales/zh.json +6 -4
- {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/METADATA +59 -39
- {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/RECORD +87 -87
- {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/WHEEL +1 -1
- xinference/core/image_interface.py +0 -377
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
- xinference/web/ui/build/static/js/main.91e77b5c.js +0 -3
- xinference/web/ui/build/static/js/main.91e77b5c.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5e6edb0fb87e3798f142e9abf8dd2dc46bab33a60d31dff525797c0c99887097.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/6087820be1bd5c02c42dff797e7df365448ef35ab26dd5d6bd33e967e05cbfd4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
- /xinference/web/ui/build/static/js/{main.91e77b5c.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
- {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/entry_points.txt +0 -0
- {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.5.1.dist-info → xinference-1.6.0.post1.dist-info}/top_level.txt +0 -0
|
@@ -4384,6 +4384,18 @@
|
|
|
4384
4384
|
"model_hub": "modelscope",
|
|
4385
4385
|
"model_id": "mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
|
|
4386
4386
|
},
|
|
4387
|
+
{
|
|
4388
|
+
"model_format": "mlx",
|
|
4389
|
+
"model_size_in_billions": 32,
|
|
4390
|
+
"quantizations": [
|
|
4391
|
+
"4bit",
|
|
4392
|
+
"6bit",
|
|
4393
|
+
"8bit",
|
|
4394
|
+
"bf16"
|
|
4395
|
+
],
|
|
4396
|
+
"model_hub": "modelscope",
|
|
4397
|
+
"model_id": "mlx-community/Qwen2.5-VL-32B-Instruct-{quantization}"
|
|
4398
|
+
},
|
|
4387
4399
|
{
|
|
4388
4400
|
"model_format": "mlx",
|
|
4389
4401
|
"model_size_in_billions": 72,
|
|
@@ -4424,6 +4436,15 @@
|
|
|
4424
4436
|
],
|
|
4425
4437
|
"model_description": "Qwen2.5-Omni: the new flagship end-to-end multimodal model in the Qwen series.",
|
|
4426
4438
|
"model_specs": [
|
|
4439
|
+
{
|
|
4440
|
+
"model_format": "pytorch",
|
|
4441
|
+
"model_size_in_billions": 3,
|
|
4442
|
+
"quantizations": [
|
|
4443
|
+
"none"
|
|
4444
|
+
],
|
|
4445
|
+
"model_hub": "modelscope",
|
|
4446
|
+
"model_id": "Qwen/Qwen2.5-Omni-3B"
|
|
4447
|
+
},
|
|
4427
4448
|
{
|
|
4428
4449
|
"model_format": "pytorch",
|
|
4429
4450
|
"model_size_in_billions": 7,
|
|
@@ -5867,6 +5888,140 @@
|
|
|
5867
5888
|
"<|im_end|>"
|
|
5868
5889
|
]
|
|
5869
5890
|
},
|
|
5891
|
+
{
|
|
5892
|
+
"version": 1,
|
|
5893
|
+
"context_length": 32768,
|
|
5894
|
+
"model_name": "HuatuoGPT-o1-Qwen2.5",
|
|
5895
|
+
"model_lang": [
|
|
5896
|
+
"en",
|
|
5897
|
+
"zh"
|
|
5898
|
+
],
|
|
5899
|
+
"model_ability": [
|
|
5900
|
+
"chat",
|
|
5901
|
+
"tools"
|
|
5902
|
+
],
|
|
5903
|
+
"model_description": "HuatuoGPT-o1 is a medical LLM designed for advanced medical reasoning. It generates a complex thought process, reflecting and refining its reasoning, before providing a final response.",
|
|
5904
|
+
"model_specs": [
|
|
5905
|
+
{
|
|
5906
|
+
"model_format": "pytorch",
|
|
5907
|
+
"model_size_in_billions": 7,
|
|
5908
|
+
"quantizations": [
|
|
5909
|
+
"none"
|
|
5910
|
+
],
|
|
5911
|
+
"model_id": "FreedomIntelligence/HuatuoGPT-o1-7B",
|
|
5912
|
+
"model_hub": "modelscope"
|
|
5913
|
+
},
|
|
5914
|
+
{
|
|
5915
|
+
"model_format": "pytorch",
|
|
5916
|
+
"model_size_in_billions": 72,
|
|
5917
|
+
"quantizations": [
|
|
5918
|
+
"none"
|
|
5919
|
+
],
|
|
5920
|
+
"model_id": "FreedomIntelligence/HuatuoGPT-o1-72B",
|
|
5921
|
+
"model_hub": "modelscope"
|
|
5922
|
+
}
|
|
5923
|
+
],
|
|
5924
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
5925
|
+
"stop_token_ids": [
|
|
5926
|
+
151643,
|
|
5927
|
+
151644,
|
|
5928
|
+
151645
|
|
5929
|
+
],
|
|
5930
|
+
"stop": [
|
|
5931
|
+
"<|endoftext|>",
|
|
5932
|
+
"<|im_start|>",
|
|
5933
|
+
"<|im_end|>"
|
|
5934
|
+
]
|
|
5935
|
+
},
|
|
5936
|
+
{
|
|
5937
|
+
"version": 1,
|
|
5938
|
+
"context_length": 131072,
|
|
5939
|
+
"model_name": "HuatuoGPT-o1-LLaMA-3.1",
|
|
5940
|
+
"model_lang": [
|
|
5941
|
+
"en"
|
|
5942
|
+
],
|
|
5943
|
+
"model_ability": [
|
|
5944
|
+
"chat",
|
|
5945
|
+
"tools"
|
|
5946
|
+
],
|
|
5947
|
+
"model_description": "HuatuoGPT-o1 is a medical LLM designed for advanced medical reasoning. It generates a complex thought process, reflecting and refining its reasoning, before providing a final response.",
|
|
5948
|
+
"model_specs": [
|
|
5949
|
+
{
|
|
5950
|
+
"model_format": "pytorch",
|
|
5951
|
+
"model_size_in_billions": 8,
|
|
5952
|
+
"quantizations": [
|
|
5953
|
+
"none"
|
|
5954
|
+
],
|
|
5955
|
+
"model_id": "FreedomIntelligence/HuatuoGPT-o1-8B",
|
|
5956
|
+
"model_hub": "modelscope"
|
|
5957
|
+
},
|
|
5958
|
+
{
|
|
5959
|
+
"model_format": "pytorch",
|
|
5960
|
+
"model_size_in_billions": 70,
|
|
5961
|
+
"quantizations": [
|
|
5962
|
+
"none"
|
|
5963
|
+
],
|
|
5964
|
+
"model_id": "FreedomIntelligence/HuatuoGPT-o1-70B",
|
|
5965
|
+
"model_hub": "modelscope"
|
|
5966
|
+
}
|
|
5967
|
+
],
|
|
5968
|
+
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
|
5969
|
+
"stop_token_ids": [
|
|
5970
|
+
128001,
|
|
5971
|
+
128008,
|
|
5972
|
+
128009
|
|
5973
|
+
],
|
|
5974
|
+
"stop": [
|
|
5975
|
+
"<|end_of_text|>",
|
|
5976
|
+
"<|eot_id|>",
|
|
5977
|
+
"<|eom_id|>"
|
|
5978
|
+
]
|
|
5979
|
+
},
|
|
5980
|
+
{
|
|
5981
|
+
"version": 1,
|
|
5982
|
+
"context_length": 32768,
|
|
5983
|
+
"model_name": "DianJin-R1",
|
|
5984
|
+
"model_lang": [
|
|
5985
|
+
"en",
|
|
5986
|
+
"zh"
|
|
5987
|
+
],
|
|
5988
|
+
"model_ability": [
|
|
5989
|
+
"chat",
|
|
5990
|
+
"tools"
|
|
5991
|
+
],
|
|
5992
|
+
"model_description": "Tongyi DianJin is a financial intelligence solution platform built by Alibaba Cloud, dedicated to providing financial business developers with a convenient artificial intelligence application development environment.",
|
|
5993
|
+
"model_specs": [
|
|
5994
|
+
{
|
|
5995
|
+
"model_format": "pytorch",
|
|
5996
|
+
"model_size_in_billions": 7,
|
|
5997
|
+
"quantizations": [
|
|
5998
|
+
"none"
|
|
5999
|
+
],
|
|
6000
|
+
"model_id": "DianJin/DianJin-R1-7B",
|
|
6001
|
+
"model_hub": "modelscope"
|
|
6002
|
+
},
|
|
6003
|
+
{
|
|
6004
|
+
"model_format": "pytorch",
|
|
6005
|
+
"model_size_in_billions": 32,
|
|
6006
|
+
"quantizations": [
|
|
6007
|
+
"none"
|
|
6008
|
+
],
|
|
6009
|
+
"model_id": "DianJin/DianJin-R1-32B",
|
|
6010
|
+
"model_hub": "modelscope"
|
|
6011
|
+
}
|
|
6012
|
+
],
|
|
6013
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
6014
|
+
"stop_token_ids": [
|
|
6015
|
+
151643,
|
|
6016
|
+
151644,
|
|
6017
|
+
151645
|
|
6018
|
+
],
|
|
6019
|
+
"stop": [
|
|
6020
|
+
"<|endoftext|>",
|
|
6021
|
+
"<|im_start|>",
|
|
6022
|
+
"<|im_end|>"
|
|
6023
|
+
]
|
|
6024
|
+
},
|
|
5870
6025
|
{
|
|
5871
6026
|
"version": 1,
|
|
5872
6027
|
"context_length": 32768,
|
|
@@ -6217,6 +6372,51 @@
|
|
|
6217
6372
|
"<|im_end|>"
|
|
6218
6373
|
]
|
|
6219
6374
|
},
|
|
6375
|
+
{
|
|
6376
|
+
"version": 1,
|
|
6377
|
+
"context_length": 32768,
|
|
6378
|
+
"model_name": "XiYanSQL-QwenCoder-2504",
|
|
6379
|
+
"model_lang": [
|
|
6380
|
+
"en",
|
|
6381
|
+
"zh"
|
|
6382
|
+
],
|
|
6383
|
+
"model_ability": [
|
|
6384
|
+
"chat",
|
|
6385
|
+
"tools"
|
|
6386
|
+
],
|
|
6387
|
+
"model_description": "The XiYanSQL-QwenCoder models, as multi-dialect SQL base models, demonstrating robust SQL generation capabilities.",
|
|
6388
|
+
"model_specs": [
|
|
6389
|
+
{
|
|
6390
|
+
"model_format": "pytorch",
|
|
6391
|
+
"model_size_in_billions": 7,
|
|
6392
|
+
"quantizations": [
|
|
6393
|
+
"none"
|
|
6394
|
+
],
|
|
6395
|
+
"model_id": "XGenerationLab/XiYanSQL-QwenCoder-7B-2504",
|
|
6396
|
+
"model_hub": "modelscope"
|
|
6397
|
+
},
|
|
6398
|
+
{
|
|
6399
|
+
"model_format": "pytorch",
|
|
6400
|
+
"model_size_in_billions": 32,
|
|
6401
|
+
"quantizations": [
|
|
6402
|
+
"none"
|
|
6403
|
+
],
|
|
6404
|
+
"model_id": "XGenerationLab/XiYanSQL-QwenCoder-32B-2504",
|
|
6405
|
+
"model_hub": "modelscope"
|
|
6406
|
+
}
|
|
6407
|
+
],
|
|
6408
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
6409
|
+
"stop_token_ids": [
|
|
6410
|
+
151643,
|
|
6411
|
+
151644,
|
|
6412
|
+
151645
|
|
6413
|
+
],
|
|
6414
|
+
"stop": [
|
|
6415
|
+
"<|endoftext|>",
|
|
6416
|
+
"<|im_start|>",
|
|
6417
|
+
"<|im_end|>"
|
|
6418
|
+
]
|
|
6419
|
+
},
|
|
6220
6420
|
{
|
|
6221
6421
|
"version": 1,
|
|
6222
6422
|
"context_length": 32768,
|
|
@@ -6333,40 +6533,40 @@
|
|
|
6333
6533
|
"model_format": "ggufv2",
|
|
6334
6534
|
"model_size_in_billions": 32,
|
|
6335
6535
|
"quantizations": [
|
|
6336
|
-
"
|
|
6337
|
-
"
|
|
6338
|
-
"
|
|
6339
|
-
"
|
|
6340
|
-
"
|
|
6341
|
-
"
|
|
6342
|
-
"
|
|
6343
|
-
"
|
|
6344
|
-
"
|
|
6536
|
+
"BF16",
|
|
6537
|
+
"IQ4_NL",
|
|
6538
|
+
"IQ4_XS",
|
|
6539
|
+
"Q2_K",
|
|
6540
|
+
"Q2_K_L",
|
|
6541
|
+
"Q3_K_M",
|
|
6542
|
+
"Q3_K_S",
|
|
6543
|
+
"Q4_0",
|
|
6544
|
+
"Q4_1",
|
|
6545
|
+
"Q4_K_M",
|
|
6546
|
+
"Q5_K_M",
|
|
6547
|
+
"Q6_K",
|
|
6548
|
+
"UD-IQ1_M",
|
|
6549
|
+
"UD-IQ1_S",
|
|
6550
|
+
"UD-IQ2_M",
|
|
6551
|
+
"UD-IQ2_XXS",
|
|
6552
|
+
"UD-IQ3_XXS",
|
|
6553
|
+
"UD-Q2_K_XL",
|
|
6554
|
+
"UD-Q3_K_XL",
|
|
6555
|
+
"UD-Q4_K_XL",
|
|
6556
|
+
"UD-Q5_K_XL",
|
|
6557
|
+
"UD-Q6_K_XL",
|
|
6558
|
+
"UD-Q8_K_XL",
|
|
6559
|
+
"Q8_0"
|
|
6345
6560
|
],
|
|
6346
6561
|
"quantization_parts": {
|
|
6347
|
-
"
|
|
6348
|
-
"00001-of-
|
|
6349
|
-
"00002-of-
|
|
6350
|
-
"00003-of-00017",
|
|
6351
|
-
"00004-of-00017",
|
|
6352
|
-
"00005-of-00017",
|
|
6353
|
-
"00006-of-00017",
|
|
6354
|
-
"00007-of-00017",
|
|
6355
|
-
"00008-of-00017",
|
|
6356
|
-
"00009-of-00017",
|
|
6357
|
-
"00010-of-00017",
|
|
6358
|
-
"00011-of-00017",
|
|
6359
|
-
"00012-of-00017",
|
|
6360
|
-
"00013-of-00017",
|
|
6361
|
-
"00014-of-00017",
|
|
6362
|
-
"00015-of-00017",
|
|
6363
|
-
"00016-of-00017",
|
|
6364
|
-
"00017-of-00017"
|
|
6562
|
+
"BF16": [
|
|
6563
|
+
"00001-of-00002",
|
|
6564
|
+
"00002-of-00002"
|
|
6365
6565
|
]
|
|
6366
6566
|
},
|
|
6367
|
-
"model_id": "
|
|
6368
|
-
"model_file_name_template": "
|
|
6369
|
-
"model_file_name_split_template": "
|
|
6567
|
+
"model_id": "unsloth/QwQ-32B-GGUF",
|
|
6568
|
+
"model_file_name_template": "QwQ-32B-{quantization}.gguf",
|
|
6569
|
+
"model_file_name_split_template": "BF16/QwQ-32B-{quantization}-{part}.gguf",
|
|
6370
6570
|
"model_hub": "modelscope"
|
|
6371
6571
|
}
|
|
6372
6572
|
],
|
|
@@ -7295,6 +7495,7 @@
|
|
|
7295
7495
|
{
|
|
7296
7496
|
"model_format": "pytorch",
|
|
7297
7497
|
"model_size_in_billions": 27,
|
|
7498
|
+
"activated_size_in_billions": "4_5",
|
|
7298
7499
|
"quantizations": [
|
|
7299
7500
|
"none"
|
|
7300
7501
|
],
|
|
@@ -7304,6 +7505,7 @@
|
|
|
7304
7505
|
{
|
|
7305
7506
|
"model_format": "pytorch",
|
|
7306
7507
|
"model_size_in_billions": 16,
|
|
7508
|
+
"activated_size_in_billions": "2_8",
|
|
7307
7509
|
"quantizations": [
|
|
7308
7510
|
"none"
|
|
7309
7511
|
],
|
|
@@ -7313,6 +7515,7 @@
|
|
|
7313
7515
|
{
|
|
7314
7516
|
"model_format": "pytorch",
|
|
7315
7517
|
"model_size_in_billions": 3,
|
|
7518
|
+
"activated_size_in_billions": 1,
|
|
7316
7519
|
"quantizations": [
|
|
7317
7520
|
"none"
|
|
7318
7521
|
],
|
|
@@ -7523,121 +7726,121 @@
|
|
|
7523
7726
|
}
|
|
7524
7727
|
},
|
|
7525
7728
|
{
|
|
7526
|
-
"version":1,
|
|
7527
|
-
"context_length":32768,
|
|
7528
|
-
"model_name":"Ovis2",
|
|
7529
|
-
"model_lang":[
|
|
7729
|
+
"version": 1,
|
|
7730
|
+
"context_length": 32768,
|
|
7731
|
+
"model_name": "Ovis2",
|
|
7732
|
+
"model_lang": [
|
|
7530
7733
|
"en",
|
|
7531
7734
|
"zh"
|
|
7532
7735
|
],
|
|
7533
|
-
"model_ability":[
|
|
7736
|
+
"model_ability": [
|
|
7534
7737
|
"chat",
|
|
7535
7738
|
"vision"
|
|
7536
7739
|
],
|
|
7537
|
-
"model_description":"Ovis (Open VISion) is a novel Multimodal Large Language Model (MLLM) architecture, designed to structurally align visual and textual embeddings.",
|
|
7538
|
-
"model_specs":[
|
|
7740
|
+
"model_description": "Ovis (Open VISion) is a novel Multimodal Large Language Model (MLLM) architecture, designed to structurally align visual and textual embeddings.",
|
|
7741
|
+
"model_specs": [
|
|
7539
7742
|
{
|
|
7540
|
-
"model_format":"pytorch",
|
|
7541
|
-
"model_size_in_billions":1,
|
|
7542
|
-
"quantizations":[
|
|
7743
|
+
"model_format": "pytorch",
|
|
7744
|
+
"model_size_in_billions": 1,
|
|
7745
|
+
"quantizations": [
|
|
7543
7746
|
"none"
|
|
7544
7747
|
],
|
|
7545
|
-
"model_id":"AIDC-AI/Ovis2-1B",
|
|
7748
|
+
"model_id": "AIDC-AI/Ovis2-1B",
|
|
7546
7749
|
"model_hub": "modelscope"
|
|
7547
7750
|
},
|
|
7548
7751
|
{
|
|
7549
|
-
"model_format":"pytorch",
|
|
7550
|
-
"model_size_in_billions":2,
|
|
7551
|
-
"quantizations":[
|
|
7752
|
+
"model_format": "pytorch",
|
|
7753
|
+
"model_size_in_billions": 2,
|
|
7754
|
+
"quantizations": [
|
|
7552
7755
|
"none"
|
|
7553
7756
|
],
|
|
7554
|
-
"model_id":"AIDC-AI/Ovis2-2B",
|
|
7757
|
+
"model_id": "AIDC-AI/Ovis2-2B",
|
|
7555
7758
|
"model_hub": "modelscope"
|
|
7556
7759
|
},
|
|
7557
7760
|
{
|
|
7558
|
-
"model_format":"pytorch",
|
|
7559
|
-
"model_size_in_billions":4,
|
|
7560
|
-
"quantizations":[
|
|
7761
|
+
"model_format": "pytorch",
|
|
7762
|
+
"model_size_in_billions": 4,
|
|
7763
|
+
"quantizations": [
|
|
7561
7764
|
"none"
|
|
7562
7765
|
],
|
|
7563
|
-
"model_id":"AIDC-AI/Ovis2-4B",
|
|
7766
|
+
"model_id": "AIDC-AI/Ovis2-4B",
|
|
7564
7767
|
"model_hub": "modelscope"
|
|
7565
7768
|
},
|
|
7566
7769
|
{
|
|
7567
|
-
"model_format":"pytorch",
|
|
7568
|
-
"model_size_in_billions":8,
|
|
7569
|
-
"quantizations":[
|
|
7770
|
+
"model_format": "pytorch",
|
|
7771
|
+
"model_size_in_billions": 8,
|
|
7772
|
+
"quantizations": [
|
|
7570
7773
|
"none"
|
|
7571
7774
|
],
|
|
7572
|
-
"model_id":"AIDC-AI/Ovis2-8B",
|
|
7775
|
+
"model_id": "AIDC-AI/Ovis2-8B",
|
|
7573
7776
|
"model_hub": "modelscope"
|
|
7574
7777
|
},
|
|
7575
7778
|
{
|
|
7576
|
-
"model_format":"pytorch",
|
|
7577
|
-
"model_size_in_billions":16,
|
|
7578
|
-
"quantizations":[
|
|
7779
|
+
"model_format": "pytorch",
|
|
7780
|
+
"model_size_in_billions": 16,
|
|
7781
|
+
"quantizations": [
|
|
7579
7782
|
"none"
|
|
7580
7783
|
],
|
|
7581
|
-
"model_id":"AIDC-AI/Ovis2-16B",
|
|
7784
|
+
"model_id": "AIDC-AI/Ovis2-16B",
|
|
7582
7785
|
"model_hub": "modelscope"
|
|
7583
7786
|
},
|
|
7584
7787
|
{
|
|
7585
|
-
"model_format":"pytorch",
|
|
7586
|
-
"model_size_in_billions":34,
|
|
7587
|
-
"quantizations":[
|
|
7788
|
+
"model_format": "pytorch",
|
|
7789
|
+
"model_size_in_billions": 34,
|
|
7790
|
+
"quantizations": [
|
|
7588
7791
|
"none"
|
|
7589
7792
|
],
|
|
7590
|
-
"model_id":"AIDC-AI/Ovis2-34B",
|
|
7793
|
+
"model_id": "AIDC-AI/Ovis2-34B",
|
|
7591
7794
|
"model_hub": "modelscope"
|
|
7592
7795
|
},
|
|
7593
7796
|
{
|
|
7594
|
-
"model_format":"gptq",
|
|
7595
|
-
"model_size_in_billions":2,
|
|
7596
|
-
"quantizations":[
|
|
7797
|
+
"model_format": "gptq",
|
|
7798
|
+
"model_size_in_billions": 2,
|
|
7799
|
+
"quantizations": [
|
|
7597
7800
|
"Int4"
|
|
7598
7801
|
],
|
|
7599
|
-
"model_id":"AIDC-AI/Ovis2-2B-GPTQ-{quantization}",
|
|
7802
|
+
"model_id": "AIDC-AI/Ovis2-2B-GPTQ-{quantization}",
|
|
7600
7803
|
"model_hub": "modelscope"
|
|
7601
7804
|
},
|
|
7602
7805
|
{
|
|
7603
|
-
"model_format":"gptq",
|
|
7604
|
-
"model_size_in_billions":4,
|
|
7605
|
-
"quantizations":[
|
|
7806
|
+
"model_format": "gptq",
|
|
7807
|
+
"model_size_in_billions": 4,
|
|
7808
|
+
"quantizations": [
|
|
7606
7809
|
"Int4"
|
|
7607
7810
|
],
|
|
7608
|
-
"model_id":"AIDC-AI/Ovis2-4B-GPTQ-{quantization}",
|
|
7811
|
+
"model_id": "AIDC-AI/Ovis2-4B-GPTQ-{quantization}",
|
|
7609
7812
|
"model_hub": "modelscope"
|
|
7610
7813
|
},
|
|
7611
7814
|
{
|
|
7612
|
-
"model_format":"gptq",
|
|
7613
|
-
"model_size_in_billions":8,
|
|
7614
|
-
"quantizations":[
|
|
7815
|
+
"model_format": "gptq",
|
|
7816
|
+
"model_size_in_billions": 8,
|
|
7817
|
+
"quantizations": [
|
|
7615
7818
|
"Int4"
|
|
7616
7819
|
],
|
|
7617
|
-
"model_id":"AIDC-AI/Ovis2-8B-GPTQ-{quantization}",
|
|
7820
|
+
"model_id": "AIDC-AI/Ovis2-8B-GPTQ-{quantization}",
|
|
7618
7821
|
"model_hub": "modelscope"
|
|
7619
7822
|
},
|
|
7620
7823
|
{
|
|
7621
|
-
"model_format":"gptq",
|
|
7622
|
-
"model_size_in_billions":16,
|
|
7623
|
-
"quantizations":[
|
|
7824
|
+
"model_format": "gptq",
|
|
7825
|
+
"model_size_in_billions": 16,
|
|
7826
|
+
"quantizations": [
|
|
7624
7827
|
"Int4"
|
|
7625
7828
|
],
|
|
7626
|
-
"model_id":"AIDC-AI/Ovis2-16B-GPTQ-{quantization}",
|
|
7829
|
+
"model_id": "AIDC-AI/Ovis2-16B-GPTQ-{quantization}",
|
|
7627
7830
|
"model_hub": "modelscope"
|
|
7628
7831
|
},
|
|
7629
7832
|
{
|
|
7630
|
-
"model_format":"gptq",
|
|
7631
|
-
"model_size_in_billions":34,
|
|
7632
|
-
"quantizations":[
|
|
7833
|
+
"model_format": "gptq",
|
|
7834
|
+
"model_size_in_billions": 34,
|
|
7835
|
+
"quantizations": [
|
|
7633
7836
|
"Int4",
|
|
7634
7837
|
"Int8"
|
|
7635
7838
|
],
|
|
7636
|
-
"model_id":"AIDC-AI/Ovis2-34B-GPTQ-{quantization}",
|
|
7839
|
+
"model_id": "AIDC-AI/Ovis2-34B-GPTQ-{quantization}",
|
|
7637
7840
|
"model_hub": "modelscope"
|
|
7638
7841
|
}
|
|
7639
7842
|
],
|
|
7640
|
-
"chat_template":
|
|
7843
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
7641
7844
|
"stop_token_ids": [
|
|
7642
7845
|
151645,
|
|
7643
7846
|
151643
|
|
@@ -7769,6 +7972,68 @@
|
|
|
7769
7972
|
"<|im_end|>"
|
|
7770
7973
|
]
|
|
7771
7974
|
},
|
|
7975
|
+
{
|
|
7976
|
+
"version": 1,
|
|
7977
|
+
"context_length": 131072,
|
|
7978
|
+
"model_name": "skywork-or1",
|
|
7979
|
+
"model_lang": [
|
|
7980
|
+
"en",
|
|
7981
|
+
"zh"
|
|
7982
|
+
],
|
|
7983
|
+
"model_ability": [
|
|
7984
|
+
"chat"
|
|
7985
|
+
],
|
|
7986
|
+
"model_description": "We release the final version of Skywork-OR1 (Open Reasoner 1) series of models, including",
|
|
7987
|
+
"model_specs": [
|
|
7988
|
+
{
|
|
7989
|
+
"model_format": "pytorch",
|
|
7990
|
+
"model_size_in_billions": 32,
|
|
7991
|
+
"quantizations": [
|
|
7992
|
+
"none"
|
|
7993
|
+
],
|
|
7994
|
+
"model_id": "Skywork/Skywork-OR1-32B",
|
|
7995
|
+
"model_hub": "modelscope"
|
|
7996
|
+
},
|
|
7997
|
+
{
|
|
7998
|
+
"model_format": "gptq",
|
|
7999
|
+
"model_size_in_billions": 32,
|
|
8000
|
+
"quantizations": [
|
|
8001
|
+
"Int8",
|
|
8002
|
+
"Int4"
|
|
8003
|
+
],
|
|
8004
|
+
"model_id": "JunHowie/Skywork-OR1-32B-GPTQ-{quantization}",
|
|
8005
|
+
"model_hub": "modelscope"
|
|
8006
|
+
},
|
|
8007
|
+
{
|
|
8008
|
+
"model_format": "pytorch",
|
|
8009
|
+
"model_size_in_billions": 7,
|
|
8010
|
+
"quantizations": [
|
|
8011
|
+
"none"
|
|
8012
|
+
],
|
|
8013
|
+
"model_id": "Skywork/Skywork-OR1-7B",
|
|
8014
|
+
"model_hub": "modelscope"
|
|
8015
|
+
},
|
|
8016
|
+
{
|
|
8017
|
+
"model_format": "gptq",
|
|
8018
|
+
"model_size_in_billions": 7,
|
|
8019
|
+
"quantizations": [
|
|
8020
|
+
"Int8",
|
|
8021
|
+
"Int4"
|
|
8022
|
+
],
|
|
8023
|
+
"model_id": "JunHowie/Skywork-OR1-7B-GPTQ-{quantization}",
|
|
8024
|
+
"model_hub": "modelscope"
|
|
8025
|
+
}
|
|
8026
|
+
],
|
|
8027
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
8028
|
+
"stop_token_ids": [
|
|
8029
|
+
151643
|
|
8030
|
+
],
|
|
8031
|
+
"stop": [
|
|
8032
|
+
"<|end▁of▁sentence|>"
|
|
8033
|
+
],
|
|
8034
|
+
"reasoning_start_tag": "<think>",
|
|
8035
|
+
"reasoning_end_tag": "</think>"
|
|
8036
|
+
},
|
|
7772
8037
|
{
|
|
7773
8038
|
"version": 1,
|
|
7774
8039
|
"context_length": 40960,
|
|
@@ -7780,6 +8045,7 @@
|
|
|
7780
8045
|
"model_ability": [
|
|
7781
8046
|
"chat",
|
|
7782
8047
|
"reasoning",
|
|
8048
|
+
"hybrid",
|
|
7783
8049
|
"tools"
|
|
7784
8050
|
],
|
|
7785
8051
|
"model_description": "Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support",
|
|
@@ -7806,10 +8072,31 @@
|
|
|
7806
8072
|
"model_format": "gptq",
|
|
7807
8073
|
"model_size_in_billions": "0_6",
|
|
7808
8074
|
"quantizations": [
|
|
7809
|
-
"Int4",
|
|
7810
8075
|
"Int8"
|
|
7811
8076
|
],
|
|
7812
|
-
"model_id": "
|
|
8077
|
+
"model_id": "Qwen/Qwen3-0.6B-GPTQ-Int8",
|
|
8078
|
+
"model_hub": "modelscope"
|
|
8079
|
+
},
|
|
8080
|
+
{
|
|
8081
|
+
"model_format": "gptq",
|
|
8082
|
+
"model_size_in_billions": "0_6",
|
|
8083
|
+
"quantizations": [
|
|
8084
|
+
"Int4"
|
|
8085
|
+
],
|
|
8086
|
+
"model_id": "JunHowie/Qwen3-0.6B-GPTQ-Int4",
|
|
8087
|
+
"model_hub": "modelscope"
|
|
8088
|
+
},
|
|
8089
|
+
{
|
|
8090
|
+
"model_format": "mlx",
|
|
8091
|
+
"model_size_in_billions": "0_6",
|
|
8092
|
+
"quantizations": [
|
|
8093
|
+
"3bit",
|
|
8094
|
+
"4bit",
|
|
8095
|
+
"6bit",
|
|
8096
|
+
"8bit",
|
|
8097
|
+
"bf16"
|
|
8098
|
+
],
|
|
8099
|
+
"model_id": "mlx-community/Qwen3-0.6B-{quantization}",
|
|
7813
8100
|
"model_hub": "modelscope"
|
|
7814
8101
|
},
|
|
7815
8102
|
{
|
|
@@ -7867,10 +8154,31 @@
|
|
|
7867
8154
|
"model_format": "gptq",
|
|
7868
8155
|
"model_size_in_billions": "1_7",
|
|
7869
8156
|
"quantizations": [
|
|
7870
|
-
"Int4",
|
|
7871
8157
|
"Int8"
|
|
7872
8158
|
],
|
|
7873
|
-
"model_id": "
|
|
8159
|
+
"model_id": "Qwen/Qwen3-1.7B-GPTQ-Int8",
|
|
8160
|
+
"model_hub": "modelscope"
|
|
8161
|
+
},
|
|
8162
|
+
{
|
|
8163
|
+
"model_format": "gptq",
|
|
8164
|
+
"model_size_in_billions": "1_7",
|
|
8165
|
+
"quantizations": [
|
|
8166
|
+
"Int4"
|
|
8167
|
+
],
|
|
8168
|
+
"model_id": "JunHowie/Qwen3-1.7B-GPTQ-Int4",
|
|
8169
|
+
"model_hub": "modelscope"
|
|
8170
|
+
},
|
|
8171
|
+
{
|
|
8172
|
+
"model_format": "mlx",
|
|
8173
|
+
"model_size_in_billions": "1_7",
|
|
8174
|
+
"quantizations": [
|
|
8175
|
+
"3bit",
|
|
8176
|
+
"4bit",
|
|
8177
|
+
"6bit",
|
|
8178
|
+
"8bit",
|
|
8179
|
+
"bf16"
|
|
8180
|
+
],
|
|
8181
|
+
"model_id": "mlx-community/Qwen3-1.7B-{quantization}",
|
|
7874
8182
|
"model_hub": "modelscope"
|
|
7875
8183
|
},
|
|
7876
8184
|
{
|
|
@@ -7924,6 +8232,15 @@
|
|
|
7924
8232
|
"model_id": "Qwen/Qwen3-4B-FP8",
|
|
7925
8233
|
"model_hub": "modelscope"
|
|
7926
8234
|
},
|
|
8235
|
+
{
|
|
8236
|
+
"model_format": "awq",
|
|
8237
|
+
"model_size_in_billions": 4,
|
|
8238
|
+
"quantizations": [
|
|
8239
|
+
"Int4"
|
|
8240
|
+
],
|
|
8241
|
+
"model_id": "Qwen/Qwen3-4B-AWQ",
|
|
8242
|
+
"model_hub": "modelscope"
|
|
8243
|
+
},
|
|
7927
8244
|
{
|
|
7928
8245
|
"model_format": "gptq",
|
|
7929
8246
|
"model_size_in_billions": 4,
|
|
@@ -7934,6 +8251,19 @@
|
|
|
7934
8251
|
"model_id": "JunHowie/Qwen3-4B-GPTQ-{quantization}",
|
|
7935
8252
|
"model_hub": "modelscope"
|
|
7936
8253
|
},
|
|
8254
|
+
{
|
|
8255
|
+
"model_format": "mlx",
|
|
8256
|
+
"model_size_in_billions": 4,
|
|
8257
|
+
"quantizations": [
|
|
8258
|
+
"3bit",
|
|
8259
|
+
"4bit",
|
|
8260
|
+
"6bit",
|
|
8261
|
+
"8bit",
|
|
8262
|
+
"bf16"
|
|
8263
|
+
],
|
|
8264
|
+
"model_id": "mlx-community/Qwen3-4B-{quantization}",
|
|
8265
|
+
"model_hub": "modelscope"
|
|
8266
|
+
},
|
|
7937
8267
|
{
|
|
7938
8268
|
"model_format": "ggufv2",
|
|
7939
8269
|
"model_size_in_billions": 4,
|
|
@@ -7985,6 +8315,15 @@
|
|
|
7985
8315
|
"model_id": "Qwen/Qwen3-8B-FP8",
|
|
7986
8316
|
"model_hub": "modelscope"
|
|
7987
8317
|
},
|
|
8318
|
+
{
|
|
8319
|
+
"model_format": "awq",
|
|
8320
|
+
"model_size_in_billions": 8,
|
|
8321
|
+
"quantizations": [
|
|
8322
|
+
"Int4"
|
|
8323
|
+
],
|
|
8324
|
+
"model_id": "Qwen/Qwen3-8B-AWQ",
|
|
8325
|
+
"model_hub": "modelscope"
|
|
8326
|
+
},
|
|
7988
8327
|
{
|
|
7989
8328
|
"model_format": "gptq",
|
|
7990
8329
|
"model_size_in_billions": 8,
|
|
@@ -7995,6 +8334,19 @@
|
|
|
7995
8334
|
"model_id": "JunHowie/Qwen3-8B-GPTQ-{quantization}",
|
|
7996
8335
|
"model_hub": "modelscope"
|
|
7997
8336
|
},
|
|
8337
|
+
{
|
|
8338
|
+
"model_format": "mlx",
|
|
8339
|
+
"model_size_in_billions": 8,
|
|
8340
|
+
"quantizations": [
|
|
8341
|
+
"3bit",
|
|
8342
|
+
"4bit",
|
|
8343
|
+
"6bit",
|
|
8344
|
+
"8bit",
|
|
8345
|
+
"bf16"
|
|
8346
|
+
],
|
|
8347
|
+
"model_id": "mlx-community/Qwen3-8B-{quantization}",
|
|
8348
|
+
"model_hub": "modelscope"
|
|
8349
|
+
},
|
|
7998
8350
|
{
|
|
7999
8351
|
"model_format": "ggufv2",
|
|
8000
8352
|
"model_size_in_billions": 8,
|
|
@@ -8046,6 +8398,15 @@
|
|
|
8046
8398
|
"model_id": "Qwen/Qwen3-14B-FP8",
|
|
8047
8399
|
"model_hub": "modelscope"
|
|
8048
8400
|
},
|
|
8401
|
+
{
|
|
8402
|
+
"model_format": "awq",
|
|
8403
|
+
"model_size_in_billions": 14,
|
|
8404
|
+
"quantizations": [
|
|
8405
|
+
"Int4"
|
|
8406
|
+
],
|
|
8407
|
+
"model_id": "Qwen/Qwen3-14B-AWQ",
|
|
8408
|
+
"model_hub": "modelscope"
|
|
8409
|
+
},
|
|
8049
8410
|
{
|
|
8050
8411
|
"model_format": "gptq",
|
|
8051
8412
|
"model_size_in_billions": 14,
|
|
@@ -8056,6 +8417,19 @@
|
|
|
8056
8417
|
"model_id": "JunHowie/Qwen3-14B-GPTQ-{quantization}",
|
|
8057
8418
|
"model_hub": "modelscope"
|
|
8058
8419
|
},
|
|
8420
|
+
{
|
|
8421
|
+
"model_format": "mlx",
|
|
8422
|
+
"model_size_in_billions": 14,
|
|
8423
|
+
"quantizations": [
|
|
8424
|
+
"3bit",
|
|
8425
|
+
"4bit",
|
|
8426
|
+
"6bit",
|
|
8427
|
+
"8bit",
|
|
8428
|
+
"bf16"
|
|
8429
|
+
],
|
|
8430
|
+
"model_id": "mlx-community/Qwen3-14B-{quantization}",
|
|
8431
|
+
"model_hub": "modelscope"
|
|
8432
|
+
},
|
|
8059
8433
|
{
|
|
8060
8434
|
"model_format": "ggufv2",
|
|
8061
8435
|
"model_size_in_billions": 14,
|
|
@@ -8114,10 +8488,32 @@
|
|
|
8114
8488
|
"model_size_in_billions": 30,
|
|
8115
8489
|
"activated_size_in_billions": 3,
|
|
8116
8490
|
"quantizations": [
|
|
8117
|
-
"Int4",
|
|
8118
8491
|
"Int8"
|
|
8119
8492
|
],
|
|
8120
|
-
"model_id": "JunHowie/Qwen3-30B-A3B-GPTQ-
|
|
8493
|
+
"model_id": "JunHowie/Qwen3-30B-A3B-GPTQ-Int8",
|
|
8494
|
+
"model_hub": "modelscope"
|
|
8495
|
+
},
|
|
8496
|
+
{
|
|
8497
|
+
"model_format": "gptq",
|
|
8498
|
+
"model_size_in_billions": 30,
|
|
8499
|
+
"activated_size_in_billions": 3,
|
|
8500
|
+
"quantizations": [
|
|
8501
|
+
"Int4"
|
|
8502
|
+
],
|
|
8503
|
+
"model_id": "Qwen/Qwen3-30B-A3B-GPTQ-Int4",
|
|
8504
|
+
"model_hub": "modelscope"
|
|
8505
|
+
},
|
|
8506
|
+
{
|
|
8507
|
+
"model_format": "mlx",
|
|
8508
|
+
"model_size_in_billions": 30,
|
|
8509
|
+
"activated_size_in_billions": 3,
|
|
8510
|
+
"quantizations": [
|
|
8511
|
+
"4bit",
|
|
8512
|
+
"6bit",
|
|
8513
|
+
"8bit",
|
|
8514
|
+
"bf16"
|
|
8515
|
+
],
|
|
8516
|
+
"model_id": "mlx-community/Qwen3-30B-A3B-{quantization}",
|
|
8121
8517
|
"model_hub": "modelscope"
|
|
8122
8518
|
},
|
|
8123
8519
|
{
|
|
@@ -8179,6 +8575,15 @@
|
|
|
8179
8575
|
"model_id": "Qwen/Qwen3-32B-FP8",
|
|
8180
8576
|
"model_hub": "modelscope"
|
|
8181
8577
|
},
|
|
8578
|
+
{
|
|
8579
|
+
"model_format": "awq",
|
|
8580
|
+
"model_size_in_billions": 32,
|
|
8581
|
+
"quantizations": [
|
|
8582
|
+
"Int4"
|
|
8583
|
+
],
|
|
8584
|
+
"model_id": "Qwen/Qwen3-32B-AWQ",
|
|
8585
|
+
"model_hub": "modelscope"
|
|
8586
|
+
},
|
|
8182
8587
|
{
|
|
8183
8588
|
"model_format": "gptq",
|
|
8184
8589
|
"model_size_in_billions": 32,
|
|
@@ -8189,6 +8594,18 @@
|
|
|
8189
8594
|
"model_id": "JunHowie/Qwen3-32B-GPTQ-{quantization}",
|
|
8190
8595
|
"model_hub": "modelscope"
|
|
8191
8596
|
},
|
|
8597
|
+
{
|
|
8598
|
+
"model_format": "mlx",
|
|
8599
|
+
"model_size_in_billions": 32,
|
|
8600
|
+
"quantizations": [
|
|
8601
|
+
"4bit",
|
|
8602
|
+
"6bit",
|
|
8603
|
+
"8bit",
|
|
8604
|
+
"bf16"
|
|
8605
|
+
],
|
|
8606
|
+
"model_id": "mlx-community/Qwen3-32B-{quantization}",
|
|
8607
|
+
"model_hub": "modelscope"
|
|
8608
|
+
},
|
|
8192
8609
|
{
|
|
8193
8610
|
"model_format": "ggufv2",
|
|
8194
8611
|
"model_size_in_billions": 32,
|
|
@@ -8236,7 +8653,7 @@
|
|
|
8236
8653
|
"quantizations": [
|
|
8237
8654
|
"none"
|
|
8238
8655
|
],
|
|
8239
|
-
"model_id": "Qwen/Qwen3-235B",
|
|
8656
|
+
"model_id": "Qwen/Qwen3-235B-A22B",
|
|
8240
8657
|
"model_hub": "modelscope"
|
|
8241
8658
|
},
|
|
8242
8659
|
{
|
|
@@ -8246,7 +8663,39 @@
|
|
|
8246
8663
|
"quantizations": [
|
|
8247
8664
|
"fp8"
|
|
8248
8665
|
],
|
|
8249
|
-
"model_id": "Qwen/Qwen3-235B-FP8",
|
|
8666
|
+
"model_id": "Qwen/Qwen3-235B-A22B-FP8",
|
|
8667
|
+
"model_hub": "modelscope"
|
|
8668
|
+
},
|
|
8669
|
+
{
|
|
8670
|
+
"model_format": "gptq",
|
|
8671
|
+
"model_size_in_billions": 235,
|
|
8672
|
+
"activated_size_in_billions": 22,
|
|
8673
|
+
"quantizations": [
|
|
8674
|
+
"Int8"
|
|
8675
|
+
],
|
|
8676
|
+
"model_id": "tclf90/Qwen3-235B-A22B-GPTQ-Int8",
|
|
8677
|
+
"model_hub": "modelscope"
|
|
8678
|
+
},
|
|
8679
|
+
{
|
|
8680
|
+
"model_format": "gptq",
|
|
8681
|
+
"model_size_in_billions": 235,
|
|
8682
|
+
"activated_size_in_billions": 22,
|
|
8683
|
+
"quantizations": [
|
|
8684
|
+
"Int4"
|
|
8685
|
+
],
|
|
8686
|
+
"model_id": "Qwen/Qwen3-235B-A22B-GPTQ-Int4",
|
|
8687
|
+
"model_hub": "modelscope"
|
|
8688
|
+
},
|
|
8689
|
+
{
|
|
8690
|
+
"model_format": "mlx",
|
|
8691
|
+
"model_size_in_billions": 235,
|
|
8692
|
+
"activated_size_in_billions": 22,
|
|
8693
|
+
"quantizations": [
|
|
8694
|
+
"3bit",
|
|
8695
|
+
"4bit",
|
|
8696
|
+
"8bit"
|
|
8697
|
+
],
|
|
8698
|
+
"model_id": "mlx-community/Qwen3-235B-A22B-{quantization}",
|
|
8250
8699
|
"model_hub": "modelscope"
|
|
8251
8700
|
},
|
|
8252
8701
|
{
|
|
@@ -8362,6 +8811,7 @@
|
|
|
8362
8811
|
"virtualenv": {
|
|
8363
8812
|
"packages": [
|
|
8364
8813
|
"transformers>=4.51.0",
|
|
8814
|
+
"mlx-lm>=0.24.0 ; sys_platform=='darwin'",
|
|
8365
8815
|
"numpy==1.26.4"
|
|
8366
8816
|
]
|
|
8367
8817
|
}
|