xinference 1.11.0.post1__py3-none-any.whl → 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/__init__.py +8 -0
- xinference/_version.py +3 -3
- xinference/api/oauth2/utils.py +26 -5
- xinference/core/model.py +1 -10
- xinference/device_utils.py +11 -1
- xinference/model/embedding/model_spec.json +70 -0
- xinference/model/image/core.py +20 -10
- xinference/model/image/model_spec.json +55 -3
- xinference/model/image/ocr/__init__.py +5 -0
- xinference/model/image/ocr/deepseek_ocr.py +958 -0
- xinference/model/llm/core.py +2 -0
- xinference/model/llm/llama_cpp/core.py +2 -0
- xinference/model/llm/llm_family.json +319 -6
- xinference/model/llm/lmdeploy/core.py +2 -0
- xinference/model/llm/sglang/core.py +2 -0
- xinference/model/llm/transformers/core.py +2 -0
- xinference/model/llm/transformers/multimodal/qwen-omni.py +60 -11
- xinference/model/llm/transformers/multimodal/qwen2_vl.py +2 -2
- xinference/model/llm/vllm/core.py +2 -0
- xinference/model/rerank/model_spec.json +368 -252
- xinference/model/rerank/sentence_transformers/core.py +10 -2
- xinference/thirdparty/indextts/gpt/transformers_generation_utils.py +71 -5
- xinference/thirdparty/indextts/gpt/transformers_gpt2.py +51 -1
- xinference/ui/gradio/media_interface.py +469 -4
- xinference/ui/gradio/utils/__init__.py +19 -0
- xinference/ui/gradio/utils/latex.py +342 -0
- xinference/ui/web/ui/build/asset-manifest.json +3 -3
- xinference/ui/web/ui/build/index.html +1 -1
- xinference/ui/web/ui/build/static/js/{main.e4d9a9e1.js → main.87d6859b.js} +3 -3
- xinference/ui/web/ui/build/static/js/{main.e4d9a9e1.js.map → main.87d6859b.js.map} +1 -1
- xinference/ui/web/ui/node_modules/.cache/babel-loader/412a6b414a8267c7a349d9beda4593cdf218abf32edaaf339e6a230df40397b8.json +1 -0
- {xinference-1.11.0.post1.dist-info → xinference-1.12.0.dist-info}/METADATA +10 -11
- {xinference-1.11.0.post1.dist-info → xinference-1.12.0.dist-info}/RECORD +38 -35
- xinference/ui/web/ui/node_modules/.cache/babel-loader/bb4e8722d2d41d87f1fce3661bc8937bffe9448e231fc5f0462630849e851592.json +0 -1
- /xinference/ui/web/ui/build/static/js/{main.e4d9a9e1.js.LICENSE.txt → main.87d6859b.js.LICENSE.txt} +0 -0
- {xinference-1.11.0.post1.dist-info → xinference-1.12.0.dist-info}/WHEEL +0 -0
- {xinference-1.11.0.post1.dist-info → xinference-1.12.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.11.0.post1.dist-info → xinference-1.12.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.11.0.post1.dist-info → xinference-1.12.0.dist-info}/top_level.txt +0 -0
xinference/model/llm/core.py
CHANGED
|
@@ -22085,6 +22085,208 @@
|
|
|
22085
22085
|
"model_id": "cpatonn-mirror/Qwen3-VL-30B-A3B-Instruct-AWQ-{quantization}"
|
|
22086
22086
|
}
|
|
22087
22087
|
}
|
|
22088
|
+
},
|
|
22089
|
+
{
|
|
22090
|
+
"model_format": "pytorch",
|
|
22091
|
+
"model_size_in_billions": 32,
|
|
22092
|
+
"model_src": {
|
|
22093
|
+
"huggingface": {
|
|
22094
|
+
"quantizations": [
|
|
22095
|
+
"none"
|
|
22096
|
+
],
|
|
22097
|
+
"model_id": "Qwen/Qwen3-VL-32B-Instruct"
|
|
22098
|
+
},
|
|
22099
|
+
"modelscope": {
|
|
22100
|
+
"quantizations": [
|
|
22101
|
+
"none"
|
|
22102
|
+
],
|
|
22103
|
+
"model_id": "Qwen/Qwen3-VL-32B-Instruct"
|
|
22104
|
+
}
|
|
22105
|
+
}
|
|
22106
|
+
},
|
|
22107
|
+
{
|
|
22108
|
+
"model_format": "fp8",
|
|
22109
|
+
"model_size_in_billions": 32,
|
|
22110
|
+
"model_src": {
|
|
22111
|
+
"huggingface": {
|
|
22112
|
+
"quantizations": [
|
|
22113
|
+
"fp8"
|
|
22114
|
+
],
|
|
22115
|
+
"model_id": "Qwen/Qwen3-VL-32B-Instruct-FP8"
|
|
22116
|
+
},
|
|
22117
|
+
"modelscope": {
|
|
22118
|
+
"quantizations": [
|
|
22119
|
+
"fp8"
|
|
22120
|
+
],
|
|
22121
|
+
"model_id": "Qwen/Qwen3-VL-32B-Instruct-FP8"
|
|
22122
|
+
}
|
|
22123
|
+
}
|
|
22124
|
+
},
|
|
22125
|
+
{
|
|
22126
|
+
"model_format": "awq",
|
|
22127
|
+
"model_size_in_billions": 32,
|
|
22128
|
+
"model_src": {
|
|
22129
|
+
"huggingface": {
|
|
22130
|
+
"quantizations": [
|
|
22131
|
+
"Int4"
|
|
22132
|
+
],
|
|
22133
|
+
"model_id": "QuantTrio/Qwen3-VL-32B-Instruct-AWQ"
|
|
22134
|
+
},
|
|
22135
|
+
"modelscope": {
|
|
22136
|
+
"quantizations": [
|
|
22137
|
+
"Int4"
|
|
22138
|
+
],
|
|
22139
|
+
"model_id": "tclf90/Qwen3-VL-32B-Instruct-AWQ"
|
|
22140
|
+
}
|
|
22141
|
+
}
|
|
22142
|
+
},
|
|
22143
|
+
{
|
|
22144
|
+
"model_format": "pytorch",
|
|
22145
|
+
"model_size_in_billions": 8,
|
|
22146
|
+
"model_src": {
|
|
22147
|
+
"huggingface": {
|
|
22148
|
+
"quantizations": [
|
|
22149
|
+
"none"
|
|
22150
|
+
],
|
|
22151
|
+
"model_id": "Qwen/Qwen3-VL-8B-Instruct"
|
|
22152
|
+
},
|
|
22153
|
+
"modelscope": {
|
|
22154
|
+
"quantizations": [
|
|
22155
|
+
"none"
|
|
22156
|
+
],
|
|
22157
|
+
"model_id": "Qwen/Qwen3-VL-8B-Instruct"
|
|
22158
|
+
}
|
|
22159
|
+
}
|
|
22160
|
+
},
|
|
22161
|
+
{
|
|
22162
|
+
"model_format": "fp8",
|
|
22163
|
+
"model_size_in_billions": 8,
|
|
22164
|
+
"model_src": {
|
|
22165
|
+
"huggingface": {
|
|
22166
|
+
"quantizations": [
|
|
22167
|
+
"fp8"
|
|
22168
|
+
],
|
|
22169
|
+
"model_id": "Qwen/Qwen3-VL-8B-Instruct-FP8"
|
|
22170
|
+
},
|
|
22171
|
+
"modelscope": {
|
|
22172
|
+
"quantizations": [
|
|
22173
|
+
"fp8"
|
|
22174
|
+
],
|
|
22175
|
+
"model_id": "Qwen/Qwen3-VL-8B-Instruct-FP8"
|
|
22176
|
+
}
|
|
22177
|
+
}
|
|
22178
|
+
},
|
|
22179
|
+
{
|
|
22180
|
+
"model_format": "awq",
|
|
22181
|
+
"model_size_in_billions": 8,
|
|
22182
|
+
"model_src": {
|
|
22183
|
+
"huggingface": {
|
|
22184
|
+
"quantizations": [
|
|
22185
|
+
"4bit",
|
|
22186
|
+
"8bit"
|
|
22187
|
+
],
|
|
22188
|
+
"model_id": "cpatonn/Qwen3-VL-8B-Instruct-AWQ-{quantization}"
|
|
22189
|
+
},
|
|
22190
|
+
"modelscope": {
|
|
22191
|
+
"quantizations": [
|
|
22192
|
+
"4bit",
|
|
22193
|
+
"8bit"
|
|
22194
|
+
],
|
|
22195
|
+
"model_id": "cpatonn-mirror/Qwen3-VL-8B-Instruct-AWQ-{quantization}"
|
|
22196
|
+
}
|
|
22197
|
+
}
|
|
22198
|
+
},
|
|
22199
|
+
{
|
|
22200
|
+
"model_format": "pytorch",
|
|
22201
|
+
"model_size_in_billions": 4,
|
|
22202
|
+
"model_src": {
|
|
22203
|
+
"huggingface": {
|
|
22204
|
+
"quantizations": [
|
|
22205
|
+
"none"
|
|
22206
|
+
],
|
|
22207
|
+
"model_id": "Qwen/Qwen3-VL-4B-Instruct"
|
|
22208
|
+
},
|
|
22209
|
+
"modelscope": {
|
|
22210
|
+
"quantizations": [
|
|
22211
|
+
"none"
|
|
22212
|
+
],
|
|
22213
|
+
"model_id": "Qwen/Qwen3-VL-4B-Instruct"
|
|
22214
|
+
}
|
|
22215
|
+
}
|
|
22216
|
+
},
|
|
22217
|
+
{
|
|
22218
|
+
"model_format": "fp8",
|
|
22219
|
+
"model_size_in_billions": 4,
|
|
22220
|
+
"model_src": {
|
|
22221
|
+
"huggingface": {
|
|
22222
|
+
"quantizations": [
|
|
22223
|
+
"fp8"
|
|
22224
|
+
],
|
|
22225
|
+
"model_id": "Qwen/Qwen3-VL-4B-Instruct-FP8"
|
|
22226
|
+
},
|
|
22227
|
+
"modelscope": {
|
|
22228
|
+
"quantizations": [
|
|
22229
|
+
"fp8"
|
|
22230
|
+
],
|
|
22231
|
+
"model_id": "Qwen/Qwen3-VL-4B-Instruct-FP8"
|
|
22232
|
+
}
|
|
22233
|
+
}
|
|
22234
|
+
},
|
|
22235
|
+
{
|
|
22236
|
+
"model_format": "awq",
|
|
22237
|
+
"model_size_in_billions": 4,
|
|
22238
|
+
"model_src": {
|
|
22239
|
+
"huggingface": {
|
|
22240
|
+
"quantizations": [
|
|
22241
|
+
"4bit",
|
|
22242
|
+
"8bit"
|
|
22243
|
+
],
|
|
22244
|
+
"model_id": "cpatonn/Qwen3-VL-4B-Instruct-AWQ-{quantization}"
|
|
22245
|
+
},
|
|
22246
|
+
"modelscope": {
|
|
22247
|
+
"quantizations": [
|
|
22248
|
+
"4bit",
|
|
22249
|
+
"8bit"
|
|
22250
|
+
],
|
|
22251
|
+
"model_id": "cpatonn-mirror/Qwen3-VL-4B-Instruct-AWQ-{quantization}"
|
|
22252
|
+
}
|
|
22253
|
+
}
|
|
22254
|
+
},
|
|
22255
|
+
{
|
|
22256
|
+
"model_format": "pytorch",
|
|
22257
|
+
"model_size_in_billions": 2,
|
|
22258
|
+
"model_src": {
|
|
22259
|
+
"huggingface": {
|
|
22260
|
+
"quantizations": [
|
|
22261
|
+
"none"
|
|
22262
|
+
],
|
|
22263
|
+
"model_id": "Qwen/Qwen3-VL-2B-Instruct"
|
|
22264
|
+
},
|
|
22265
|
+
"modelscope": {
|
|
22266
|
+
"quantizations": [
|
|
22267
|
+
"none"
|
|
22268
|
+
],
|
|
22269
|
+
"model_id": "Qwen/Qwen3-VL-2B-Instruct"
|
|
22270
|
+
}
|
|
22271
|
+
}
|
|
22272
|
+
},
|
|
22273
|
+
{
|
|
22274
|
+
"model_format": "fp8",
|
|
22275
|
+
"model_size_in_billions": 2,
|
|
22276
|
+
"model_src": {
|
|
22277
|
+
"huggingface": {
|
|
22278
|
+
"quantizations": [
|
|
22279
|
+
"fp8"
|
|
22280
|
+
],
|
|
22281
|
+
"model_id": "Qwen/Qwen3-VL-2B-Instruct-FP8"
|
|
22282
|
+
},
|
|
22283
|
+
"modelscope": {
|
|
22284
|
+
"quantizations": [
|
|
22285
|
+
"fp8"
|
|
22286
|
+
],
|
|
22287
|
+
"model_id": "Qwen/Qwen3-VL-2B-Instruct-FP8"
|
|
22288
|
+
}
|
|
22289
|
+
}
|
|
22088
22290
|
}
|
|
22089
22291
|
],
|
|
22090
22292
|
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {%- if messages[0].content is string %}\n {{- messages[0].content }}\n {%- else %}\n {%- for content in messages[0].content %}\n {%- if 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].content is string %}\n {{- messages[0].content }}\n {%- else %}\n {%- for content in messages[0].content %}\n {%- if 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set image_count = namespace(value=0) %}\n{%- set video_count = namespace(value=0) %}\n{%- for message in messages %}\n {%- if message.role == \"user\" %}\n {{- '<|im_start|>' + message.role + '\\n' }}\n {%- if message.content is string %}\n {{- message.content }}\n {%- else %}\n {%- for content in message.content %}\n {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}\n {%- set image_count.value = image_count.value + 1 %}\n {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}\n <|vision_start|><|image_pad|><|vision_end|>\n {%- elif content.type == 'video' or 'video' in content %}\n {%- set video_count.value = video_count.value + 1 %}\n {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}\n <|vision_start|><|video_pad|><|vision_end|>\n {%- elif 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role + '\\n' }}\n {%- if message.content is string %}\n {{- message.content }}\n {%- else %}\n {%- for content_item in message.content %}\n {%- if 'text' in content_item %}\n {{- content_item.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and message.content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {%- if message.content is string %}\n {{- message.content }}\n {%- else %}\n {%- for content in message.content %}\n {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}\n {%- set image_count.value = image_count.value + 1 %}\n {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}\n <|vision_start|><|image_pad|><|vision_end|>\n {%- elif content.type == 'video' or 'video' in content %}\n {%- set video_count.value = video_count.value + 1 %}\n {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}\n <|vision_start|><|video_pad|><|vision_end|>\n {%- elif 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
@@ -22579,14 +22781,14 @@
|
|
|
22579
22781
|
"huggingface": {
|
|
22580
22782
|
"quantizations": [
|
|
22581
22783
|
"4bit",
|
|
22582
|
-
|
|
22784
|
+
"8bit"
|
|
22583
22785
|
],
|
|
22584
22786
|
"model_id": "cpatonn/Qwen3-Omni-30B-A3B-Thinking-AWQ-{quantization}"
|
|
22585
22787
|
},
|
|
22586
22788
|
"modelscope": {
|
|
22587
22789
|
"quantizations": [
|
|
22588
22790
|
"4bit",
|
|
22589
|
-
|
|
22791
|
+
"8bit"
|
|
22590
22792
|
],
|
|
22591
22793
|
"model_id": "cpatonn-mirror/Qwen3-Omni-30B-A3B-Thinking-AWQ-{quantization}"
|
|
22592
22794
|
}
|
|
@@ -22604,7 +22806,15 @@
|
|
|
22604
22806
|
],
|
|
22605
22807
|
"reasoning_start_tag": "<think>",
|
|
22606
22808
|
"reasoning_end_tag": "</think>",
|
|
22607
|
-
"tool_parser":"qwen"
|
|
22809
|
+
"tool_parser": "qwen",
|
|
22810
|
+
"virtualenv": {
|
|
22811
|
+
"packages": [
|
|
22812
|
+
"transformers==4.57.1",
|
|
22813
|
+
"#system_numpy#",
|
|
22814
|
+
"qwen_omni_utils",
|
|
22815
|
+
"soundfile"
|
|
22816
|
+
]
|
|
22817
|
+
}
|
|
22608
22818
|
},
|
|
22609
22819
|
{
|
|
22610
22820
|
"version": 2,
|
|
@@ -22650,14 +22860,14 @@
|
|
|
22650
22860
|
"huggingface": {
|
|
22651
22861
|
"quantizations": [
|
|
22652
22862
|
"4bit",
|
|
22653
|
-
|
|
22863
|
+
"8bit"
|
|
22654
22864
|
],
|
|
22655
22865
|
"model_id": "cpatonn/Qwen3-Omni-30B-A3B-Instruct-AWQ-{quantization}"
|
|
22656
22866
|
},
|
|
22657
22867
|
"modelscope": {
|
|
22658
22868
|
"quantizations": [
|
|
22659
22869
|
"4bit",
|
|
22660
|
-
|
|
22870
|
+
"8bit"
|
|
22661
22871
|
],
|
|
22662
22872
|
"model_id": "cpatonn-mirror/Qwen3-Omni-30B-A3B-Instruct-AWQ-{quantization}"
|
|
22663
22873
|
}
|
|
@@ -22673,6 +22883,109 @@
|
|
|
22673
22883
|
"<|endoftext|>",
|
|
22674
22884
|
"<|im_end|>"
|
|
22675
22885
|
],
|
|
22676
|
-
"tool_parser":"qwen"
|
|
22886
|
+
"tool_parser": "qwen",
|
|
22887
|
+
"virtualenv": {
|
|
22888
|
+
"packages": [
|
|
22889
|
+
"transformers==4.57.1",
|
|
22890
|
+
"#system_numpy#",
|
|
22891
|
+
"qwen_omni_utils",
|
|
22892
|
+
"soundfile"
|
|
22893
|
+
]
|
|
22894
|
+
}
|
|
22895
|
+
},
|
|
22896
|
+
{
|
|
22897
|
+
"model_name": "MiniMax-M2",
|
|
22898
|
+
"model_description": "MiniMax-M2, a Mini model built for Max coding & agentic workflows.",
|
|
22899
|
+
"context_length": 196608,
|
|
22900
|
+
"model_lang": [
|
|
22901
|
+
"en",
|
|
22902
|
+
"zh"
|
|
22903
|
+
],
|
|
22904
|
+
"model_ability": [
|
|
22905
|
+
"chat",
|
|
22906
|
+
"tools",
|
|
22907
|
+
"reasoning"
|
|
22908
|
+
],
|
|
22909
|
+
"model_specs": [
|
|
22910
|
+
{
|
|
22911
|
+
"model_size_in_billions": 230,
|
|
22912
|
+
"activated_size_in_billions": 3,
|
|
22913
|
+
"model_format": "pytorch",
|
|
22914
|
+
"model_src": {
|
|
22915
|
+
"huggingface": {
|
|
22916
|
+
"model_id": "MiniMaxAI/MiniMax-M2",
|
|
22917
|
+
"quantizations": [
|
|
22918
|
+
"none"
|
|
22919
|
+
]
|
|
22920
|
+
},
|
|
22921
|
+
"modelscope": {
|
|
22922
|
+
"model_id": "MiniMax/MiniMax-M2",
|
|
22923
|
+
"quantizations": [
|
|
22924
|
+
"none"
|
|
22925
|
+
]
|
|
22926
|
+
}
|
|
22927
|
+
}
|
|
22928
|
+
},
|
|
22929
|
+
{
|
|
22930
|
+
"model_size_in_billions": 230,
|
|
22931
|
+
"activated_size_in_billions": 3,
|
|
22932
|
+
"model_format": "awq",
|
|
22933
|
+
"model_src": {
|
|
22934
|
+
"huggingface": {
|
|
22935
|
+
"model_id": "QuantTrio/MiniMax-M2-AWQ",
|
|
22936
|
+
"quantizations": [
|
|
22937
|
+
"Int4"
|
|
22938
|
+
]
|
|
22939
|
+
},
|
|
22940
|
+
"modelscope": {
|
|
22941
|
+
"model_id": "tclf90/MiniMax-M2-AWQ",
|
|
22942
|
+
"quantizations": [
|
|
22943
|
+
"Int4"
|
|
22944
|
+
]
|
|
22945
|
+
}
|
|
22946
|
+
}
|
|
22947
|
+
},
|
|
22948
|
+
{
|
|
22949
|
+
"model_size_in_billions": 230,
|
|
22950
|
+
"activated_size_in_billions": 3,
|
|
22951
|
+
"model_format": "mlx",
|
|
22952
|
+
"model_src": {
|
|
22953
|
+
"huggingface": {
|
|
22954
|
+
"model_id": "mlx-community/MiniMax-M2-{quantization}",
|
|
22955
|
+
"quantizations": [
|
|
22956
|
+
"3bit",
|
|
22957
|
+
"4bit",
|
|
22958
|
+
"5bit",
|
|
22959
|
+
"6bit",
|
|
22960
|
+
"8bit"
|
|
22961
|
+
]
|
|
22962
|
+
},
|
|
22963
|
+
"modelscope": {
|
|
22964
|
+
"model_id": "mlx-community/MiniMax-M2-{quantization}",
|
|
22965
|
+
"quantizations": [
|
|
22966
|
+
"3bit",
|
|
22967
|
+
"4bit",
|
|
22968
|
+
"5bit",
|
|
22969
|
+
"6bit",
|
|
22970
|
+
"8bit"
|
|
22971
|
+
]
|
|
22972
|
+
}
|
|
22973
|
+
}
|
|
22974
|
+
}
|
|
22975
|
+
],
|
|
22976
|
+
"chat_template": "{# ----------‑‑‑ special token variables ‑‑‑---------- #}\n{%- set toolcall_begin_token = '<minimax:tool_call>' -%}\n{%- set toolcall_end_token = '</minimax:tool_call>' -%}\n{#- Tool Rendering Functions ============================================== -#}\n{%- macro render_tool_namespace(namespace_name, tool_list) -%}\n{%- for tool in tool_list -%}\n<tool>{{ tool.function | tojson(ensure_ascii=False) }}</tool>\n{% endfor -%}\n{%- endmacro -%}\n{%- macro visible_text(content) -%}\n {%- if content is string -%}\n {{ content }}\n {%- elif content is iterable and content is not mapping -%}\n {%- for item in content -%}\n {%- if item is mapping and item.type == 'text' -%}\n {{- item.text }}\n {%- elif item is string -%}\n {{- item }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{- content }}\n {%- endif -%}\n{%- endmacro -%}\n{#- System Message Construction ============================================ -#}\n{%- macro build_system_message(system_message) -%}\n {%- if system_message and system_message.content -%}\n {{- visible_text(system_message.content) }}\n {%- else -%}\n {%- if model_identity is not defined -%}\n {%- set model_identity = \"You are a helpful assistant.\" -%}\n {%- endif -%}\n {{- model_identity }}\n {%- endif -%}\n \n {#- Handle current_date -#}\n {%- if system_message and system_message.current_date -%}\n {{- '\\n' ~ 'Current date: ' + system_message.current_date }}\n {%- endif -%}\n {#- Handle current_location -#}\n {%- if system_message and system_message.current_location -%}\n {{- '\\n' ~ 'Current location: ' + system_message.current_location }}\n {%- endif -%}\n{%- endmacro -%}\n{#- Main Template Logic ================================================= -#}\n{#- Extract system message (only first message if it's system) -#}\n{%- set system_message = none -%}\n{%- set conversation_messages = messages -%}\n{%- if messages and messages[0].role == \"system\" -%}\n {%- set system_message = messages[0] -%}\n {%- set conversation_messages = messages[1:] -%}\n{%- endif -%}\n{#- Get the last user message turn, for interleved thinking -#}\n{%- set ns = namespace(last_user_index=-1) %}\n{% for m in conversation_messages %}\n {%- if m.role == 'user' %}\n {% set ns.last_user_index = loop.index0 -%}\n {%- endif %}\n{%- endfor %}\n{#- Render system message -#}\n{{- ']~!b[' ~ ']~b]system' ~ '\\n' }}\n{{- build_system_message(system_message) }}\n{#- Render tools if available -#}\n{%- if tools -%}\n {{- '\\n\\n' ~ '# Tools' ~ '\\n' ~ 'You may call one or more tools to assist with the user query.\\nHere are the tools available in JSONSchema format:' ~ '\\n' }}\n {{- '\\n' ~ '<tools>' ~ '\\n' }}\n {{- render_tool_namespace(\"functions\", tools) }}\n {{- '</tools>' ~ '\\n\\n' }}\n{{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\\n' }}\n{{- '\\n' ~ toolcall_begin_token }}\n<invoke name=\"tool-name-1\">\n<parameter name=\"param-key-1\">param-value-1</parameter>\n<parameter name=\"param-key-2\">param-value-2</parameter>\n...\n</invoke>\n{{- '\\n' ~ toolcall_end_token }}\n{%- endif -%}\n{{- '[e~[\\n' }}\n\n{#- Render messages -#}\n{%- set last_tool_call = namespace(name=none) -%}\n{%- for message in conversation_messages -%}\n {%- if message.role == 'assistant' -%}\n {#- Only render reasoning_content if no user message follows -#}\n {{- ']~b]ai' ~ '\\n' }}\n\n {%- set reasoning_content = '' %}\n {%- set content = visible_text(message.content) %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].strip('\\n').split('<think>')[-1].strip('\\n') %}\n {%- set content = content.split('</think>')[-1].strip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if reasoning_content and loop.index0 > ns.last_user_index -%}\n {{- '<think>' ~ '\\n' ~ reasoning_content ~ '\\n' ~ '</think>' ~ '\\n\\n' }}\n {%- endif -%}\n {%- if content -%}\n {{- content }}\n {%- endif -%}\n {%- if message.tool_calls -%}\n {{- '\\n' ~ toolcall_begin_token ~ '\\n' }}\n\n {%- for tool_call in message.tool_calls -%}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<invoke name=\"' + tool_call.name + '\">' }}\n {% set _args = tool_call.arguments %}\n {%- for k, v in _args.items() %}\n {{- '<parameter name=\"' + k + '\">' }}\n {{- v | tojson(ensure_ascii=False) if v is not string else v }}\n {{- '</parameter>' }}\n {% endfor %}\n {{- '</invoke>' ~ '\\n' }}\n {%- endfor -%}\n \n {{- toolcall_end_token}}\n {%- set last_tool_call.name = message.tool_calls[-1].name -%}\n {%- else -%}\n {%- set last_tool_call.name = none -%}\n {%- endif -%}\n {{- '[e~[' ~ '\\n' }}\n \n {%- elif message.role == 'tool' -%}\n {%- if last_tool_call.name is none -%}\n {{- raise_exception(\"Message has tool role, but there was no previous assistant message with a tool call!\") }}\n {%- endif -%}\n {%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%}\n {{- ']~b]tool' }}\n {%- endif -%}\n {%- if message.content is string -%}\n {{- '\\n<response>' }}\n {{- message.content }}\n {{- '</response>' }}\n {%- else -%}\n {%- for tr in message.content -%}\n {{- '\\n<response>' }}\n {{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }}\n {{- '\\n</response>' }}\n {%- endfor -%}\n {%- endif -%}\n {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}\n {{- '[e~[\\n' -}}\n {%- endif -%}\n \n {%- elif message.role == 'user' -%}\n {{- ']~b]user' ~ '\\n' }}\n {{- visible_text(message.content) }}\n {{- '[e~[' ~ '\\n' }}\n {%- endif -%}\n{%- endfor -%}\n\n{#- Generation prompt -#}\n{%- if add_generation_prompt -%}\n{{- ']~b]ai' ~ '\\n' ~ '<think>' ~ '\\n' }}\n{%- endif -%}",
|
|
22977
|
+
"stop_token_ids": [
|
|
22978
|
+
200020
|
|
22979
|
+
],
|
|
22980
|
+
"stop": [
|
|
22981
|
+
"[e~["
|
|
22982
|
+
],
|
|
22983
|
+
"reasoning_start_tag": "<think>",
|
|
22984
|
+
"reasoning_end_tag": "</think>",
|
|
22985
|
+
"tool_parser": "minimax",
|
|
22986
|
+
"version": 2,
|
|
22987
|
+
"virtualenv": {
|
|
22988
|
+
"packages": []
|
|
22989
|
+
}
|
|
22677
22990
|
}
|
|
22678
22991
|
]
|
|
@@ -19,6 +19,8 @@ import uuid
|
|
|
19
19
|
from threading import Thread
|
|
20
20
|
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
|
21
21
|
|
|
22
|
+
import torch
|
|
23
|
+
|
|
22
24
|
from .....types import (
|
|
23
25
|
ChatCompletion,
|
|
24
26
|
ChatCompletionAudio,
|
|
@@ -35,12 +37,20 @@ logger = logging.getLogger(__name__)
|
|
|
35
37
|
|
|
36
38
|
@register_transformer
|
|
37
39
|
@register_non_default_model("qwen2.5-omni")
|
|
38
|
-
|
|
40
|
+
@register_non_default_model("Qwen3-Omni-Thinking")
|
|
41
|
+
@register_non_default_model("Qwen3-Omni-Instruct")
|
|
42
|
+
class QwenOmniChatModel(PytorchMultiModalModel):
|
|
39
43
|
DEFAULT_SYSTEM_PROMPT = (
|
|
40
44
|
"You are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, "
|
|
41
45
|
"capable of perceiving auditory and visual inputs, as well as generating text and speech."
|
|
42
46
|
)
|
|
43
47
|
|
|
48
|
+
def __init__(self, *args, **kwargs):
|
|
49
|
+
super().__init__(*args, **kwargs)
|
|
50
|
+
# 2.5 or 3
|
|
51
|
+
model_family = self.model_family.model_family or self.model_family.model_name
|
|
52
|
+
self._omni_version = "2.5" if "2.5" in model_family else "3"
|
|
53
|
+
|
|
44
54
|
@classmethod
|
|
45
55
|
def match_json(
|
|
46
56
|
cls, model_family: "LLMFamilyV2", model_spec: "LLMSpecV1", quantization: str
|
|
@@ -48,7 +58,10 @@ class Qwen2_5OmniChatModel(PytorchMultiModalModel):
|
|
|
48
58
|
if model_spec.model_format not in ["pytorch", "gptq", "awq", "bnb"]:
|
|
49
59
|
return False
|
|
50
60
|
llm_family = model_family.model_family or model_family.model_name
|
|
51
|
-
if
|
|
61
|
+
if (
|
|
62
|
+
"qwen2.5-omni".lower() in llm_family.lower()
|
|
63
|
+
or "qwen3-omni".lower() in llm_family.lower()
|
|
64
|
+
):
|
|
52
65
|
return True
|
|
53
66
|
return False
|
|
54
67
|
|
|
@@ -58,15 +71,25 @@ class Qwen2_5OmniChatModel(PytorchMultiModalModel):
|
|
|
58
71
|
self._device = device
|
|
59
72
|
|
|
60
73
|
def load_processor(self):
|
|
61
|
-
|
|
74
|
+
if self._omni_version == "2.5":
|
|
75
|
+
from transformers import Qwen2_5OmniProcessor as QwenOminiProcessor
|
|
76
|
+
else:
|
|
77
|
+
from transformers import Qwen3OmniMoeProcessor as QwenOminiProcessor
|
|
62
78
|
|
|
63
|
-
self._processor =
|
|
79
|
+
self._processor = QwenOminiProcessor.from_pretrained(
|
|
64
80
|
self.model_path, trust_remote_code=True
|
|
65
81
|
)
|
|
66
82
|
self._tokenizer = self._processor.tokenizer
|
|
67
83
|
|
|
68
84
|
def load_multimodal_model(self):
|
|
69
|
-
|
|
85
|
+
if self._omni_version == "2.5":
|
|
86
|
+
from transformers import (
|
|
87
|
+
Qwen2_5OmniForConditionalGeneration as QwenOmniForConditionalGeneration,
|
|
88
|
+
)
|
|
89
|
+
else:
|
|
90
|
+
from transformers import (
|
|
91
|
+
Qwen3OmniMoeForConditionalGeneration as QwenOmniForConditionalGeneration,
|
|
92
|
+
)
|
|
70
93
|
|
|
71
94
|
# for multiple GPU, set back to auto to make multiple devices work
|
|
72
95
|
device = "auto" if self._device == "cuda" else self._device
|
|
@@ -79,7 +102,7 @@ class Qwen2_5OmniChatModel(PytorchMultiModalModel):
|
|
|
79
102
|
kwargs = self.apply_bnb_quantization(kwargs)
|
|
80
103
|
logger.debug("Loading model with extra kwargs: %s", kwargs)
|
|
81
104
|
|
|
82
|
-
self._model =
|
|
105
|
+
self._model = QwenOmniForConditionalGeneration.from_pretrained(
|
|
83
106
|
self.model_path,
|
|
84
107
|
torch_dtype="auto",
|
|
85
108
|
device_map=device,
|
|
@@ -181,11 +204,37 @@ class Qwen2_5OmniChatModel(PytorchMultiModalModel):
|
|
|
181
204
|
inputs = self.build_inputs_from_messages(messages, generate_config) # type: ignore
|
|
182
205
|
use_audio_in_video = generate_config.get("use_audio_in_video", True)
|
|
183
206
|
gen_kwargs = dict(**inputs, **config, use_audio_in_video=use_audio_in_video)
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
207
|
+
# === Run model.generate() (handle both (ids, audio) and ids-only cases) ===
|
|
208
|
+
result = self._model.generate(**gen_kwargs)
|
|
209
|
+
if isinstance(result, tuple) and len(result) == 2:
|
|
210
|
+
# Qwen2.5-Omni returns (generated_ids, audio)
|
|
211
|
+
generated_ids, audio = result
|
|
212
|
+
else:
|
|
213
|
+
# Qwen3-Omni returns only generated_ids
|
|
214
|
+
generated_ids, audio = result, None
|
|
215
|
+
if hasattr(generated_ids, "sequences"):
|
|
216
|
+
generated_ids = generated_ids.sequences
|
|
217
|
+
|
|
218
|
+
# === Handle text decoding ===
|
|
219
|
+
input_len = inputs.input_ids.shape[1]
|
|
220
|
+
# Ensure we have a consistent 2D structure
|
|
221
|
+
# Normalize to list[list[int]]
|
|
222
|
+
if isinstance(generated_ids, torch.Tensor):
|
|
223
|
+
generated_ids = generated_ids.tolist()
|
|
224
|
+
elif isinstance(generated_ids, list) and all(
|
|
225
|
+
isinstance(x, int) for x in generated_ids
|
|
226
|
+
):
|
|
227
|
+
# Single sequence as flat list of ints
|
|
228
|
+
generated_ids = [generated_ids]
|
|
229
|
+
elif isinstance(generated_ids, list) and all(
|
|
230
|
+
isinstance(x, list) for x in generated_ids
|
|
231
|
+
):
|
|
232
|
+
pass # already correct
|
|
233
|
+
else:
|
|
234
|
+
raise TypeError(f"Unexpected generated_ids type: {type(generated_ids)}")
|
|
235
|
+
|
|
236
|
+
# Remove prompt tokens
|
|
237
|
+
generated_ids_trimmed = [out_ids[input_len:] for out_ids in generated_ids]
|
|
189
238
|
output_text = self._processor.batch_decode(
|
|
190
239
|
generated_ids_trimmed,
|
|
191
240
|
skip_special_tokens=True,
|
|
@@ -102,9 +102,9 @@ class Qwen2VLChatModel(PytorchMultiModalModel):
|
|
|
102
102
|
|
|
103
103
|
kwargs = self.apply_bnb_quantization()
|
|
104
104
|
llm_family = self.model_family.model_family or self.model_family.model_name
|
|
105
|
-
if "qwen2.5" in llm_family:
|
|
105
|
+
if "qwen2.5" in llm_family.lower():
|
|
106
106
|
model_cls = Qwen2_5_VLForConditionalGeneration
|
|
107
|
-
elif "qwen3" in llm_family:
|
|
107
|
+
elif "qwen3" in llm_family.lower():
|
|
108
108
|
model_cls = AutoModelForImageTextToText
|
|
109
109
|
else:
|
|
110
110
|
model_cls = Qwen2VLForConditionalGeneration
|