xinference 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +50 -1
- xinference/client/restful/restful_client.py +82 -2
- xinference/constants.py +3 -0
- xinference/core/chat_interface.py +297 -83
- xinference/core/model.py +1 -0
- xinference/core/progress_tracker.py +16 -8
- xinference/core/supervisor.py +45 -1
- xinference/core/worker.py +262 -37
- xinference/deploy/cmdline.py +33 -1
- xinference/model/audio/core.py +11 -1
- xinference/model/audio/megatts.py +105 -0
- xinference/model/audio/model_spec.json +24 -1
- xinference/model/audio/model_spec_modelscope.json +26 -1
- xinference/model/core.py +14 -0
- xinference/model/embedding/core.py +6 -1
- xinference/model/flexible/core.py +6 -1
- xinference/model/image/core.py +6 -1
- xinference/model/image/model_spec.json +17 -1
- xinference/model/image/model_spec_modelscope.json +17 -1
- xinference/model/llm/__init__.py +0 -4
- xinference/model/llm/core.py +4 -0
- xinference/model/llm/llama_cpp/core.py +40 -16
- xinference/model/llm/llm_family.json +413 -84
- xinference/model/llm/llm_family.py +24 -1
- xinference/model/llm/llm_family_modelscope.json +447 -0
- xinference/model/llm/mlx/core.py +16 -2
- xinference/model/llm/transformers/__init__.py +14 -0
- xinference/model/llm/transformers/core.py +30 -6
- xinference/model/llm/transformers/gemma3.py +17 -2
- xinference/model/llm/transformers/intern_vl.py +28 -18
- xinference/model/llm/transformers/minicpmv26.py +21 -2
- xinference/model/llm/transformers/qwen-omni.py +308 -0
- xinference/model/llm/transformers/qwen2_audio.py +1 -1
- xinference/model/llm/transformers/qwen2_vl.py +20 -4
- xinference/model/llm/utils.py +11 -1
- xinference/model/llm/vllm/core.py +35 -0
- xinference/model/llm/vllm/distributed_executor.py +8 -2
- xinference/model/rerank/core.py +6 -1
- xinference/model/utils.py +118 -1
- xinference/model/video/core.py +6 -1
- xinference/thirdparty/megatts3/__init__.py +0 -0
- xinference/thirdparty/megatts3/tts/frontend_function.py +175 -0
- xinference/thirdparty/megatts3/tts/gradio_api.py +93 -0
- xinference/thirdparty/megatts3/tts/infer_cli.py +277 -0
- xinference/thirdparty/megatts3/tts/modules/aligner/whisper_small.py +318 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/ar_dur_predictor.py +362 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/layers.py +64 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/nar_tts_modules.py +73 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rel_transformer.py +403 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rot_transformer.py +649 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/seq_utils.py +342 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/transformer.py +767 -0
- xinference/thirdparty/megatts3/tts/modules/llm_dit/cfm.py +309 -0
- xinference/thirdparty/megatts3/tts/modules/llm_dit/dit.py +180 -0
- xinference/thirdparty/megatts3/tts/modules/llm_dit/time_embedding.py +44 -0
- xinference/thirdparty/megatts3/tts/modules/llm_dit/transformer.py +230 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/diag_gaussian.py +67 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/hifigan_modules.py +283 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/seanet_encoder.py +38 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/wavvae_v3.py +60 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/conv.py +154 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/lstm.py +51 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/seanet.py +126 -0
- xinference/thirdparty/megatts3/tts/utils/audio_utils/align.py +36 -0
- xinference/thirdparty/megatts3/tts/utils/audio_utils/io.py +95 -0
- xinference/thirdparty/megatts3/tts/utils/audio_utils/plot.py +90 -0
- xinference/thirdparty/megatts3/tts/utils/commons/ckpt_utils.py +171 -0
- xinference/thirdparty/megatts3/tts/utils/commons/hparams.py +215 -0
- xinference/thirdparty/megatts3/tts/utils/text_utils/dict.json +1 -0
- xinference/thirdparty/megatts3/tts/utils/text_utils/ph_tone_convert.py +94 -0
- xinference/thirdparty/megatts3/tts/utils/text_utils/split_text.py +90 -0
- xinference/thirdparty/megatts3/tts/utils/text_utils/text_encoder.py +280 -0
- xinference/types.py +10 -0
- xinference/utils.py +54 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.0f6523be.css +2 -0
- xinference/web/ui/build/static/css/main.0f6523be.css.map +1 -0
- xinference/web/ui/build/static/js/main.58bd483c.js +3 -0
- xinference/web/ui/build/static/js/main.58bd483c.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3bff8cbe9141f937f4d98879a9771b0f48e0e4e0dbee8e647adbfe23859e7048.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4500b1a622a031011f0a291701e306b87e08cbc749c50e285103536b85b6a914.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69081049f0c7447544b7cfd73dd13d8846c02fe5febe4d81587e95c89a412d5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bf2b211b0d1b6465eff512d64c869d748f803c5651a7c24e48de6ea3484a7bfe.json +1 -0
- xinference/web/ui/src/locales/en.json +2 -1
- xinference/web/ui/src/locales/zh.json +2 -1
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/METADATA +127 -114
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/RECORD +96 -60
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/WHEEL +1 -1
- xinference/web/ui/build/static/css/main.b494ae7e.css +0 -2
- xinference/web/ui/build/static/css/main.b494ae7e.css.map +0 -1
- xinference/web/ui/build/static/js/main.5ca4eea1.js +0 -3
- xinference/web/ui/build/static/js/main.5ca4eea1.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f0967acaec5df1d45b80010949c258d64297ebbb0f44b8bb3afcbd45c6f0ec4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/27bcada3ee8f89d21184b359f022fc965f350ffaca52c9814c29f1fc37121173.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/68249645124f37d01eef83b1d897e751f895bea919b6fb466f907c1f87cebc84.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e547bbb18abb4a474b675a8d5782d25617566bea0af8caa9b836ce5649e2250a.json +0 -1
- /xinference/web/ui/build/static/js/{main.5ca4eea1.js.LICENSE.txt → main.58bd483c.js.LICENSE.txt} +0 -0
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info/licenses}/LICENSE +0 -0
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/top_level.txt +0 -0
|
@@ -5038,6 +5038,169 @@
|
|
|
5038
5038
|
"stop_token_ids": [],
|
|
5039
5039
|
"stop": []
|
|
5040
5040
|
},
|
|
5041
|
+
{
|
|
5042
|
+
"version": 1,
|
|
5043
|
+
"context_length": 8192,
|
|
5044
|
+
"model_name": "InternVL3",
|
|
5045
|
+
"model_lang": [
|
|
5046
|
+
"en",
|
|
5047
|
+
"zh"
|
|
5048
|
+
],
|
|
5049
|
+
"model_ability": [
|
|
5050
|
+
"chat",
|
|
5051
|
+
"vision"
|
|
5052
|
+
],
|
|
5053
|
+
"model_description": "InternVL3, an advanced multimodal large language model (MLLM) series that demonstrates superior overall performance.",
|
|
5054
|
+
"model_specs": [
|
|
5055
|
+
{
|
|
5056
|
+
"model_format": "pytorch",
|
|
5057
|
+
"model_size_in_billions": 1,
|
|
5058
|
+
"quantizations": [
|
|
5059
|
+
"8-bit",
|
|
5060
|
+
"none"
|
|
5061
|
+
],
|
|
5062
|
+
"model_id": "OpenGVLab/InternVL3-1B",
|
|
5063
|
+
"model_hub": "modelscope",
|
|
5064
|
+
"model_revision": "master"
|
|
5065
|
+
},
|
|
5066
|
+
{
|
|
5067
|
+
"model_format": "awq",
|
|
5068
|
+
"model_size_in_billions": 1,
|
|
5069
|
+
"quantizations": [
|
|
5070
|
+
"Int4"
|
|
5071
|
+
],
|
|
5072
|
+
"model_id": "OpenGVLab/InternVL3-1B-AWQ",
|
|
5073
|
+
"model_hub": "modelscope"
|
|
5074
|
+
},
|
|
5075
|
+
{
|
|
5076
|
+
"model_format": "pytorch",
|
|
5077
|
+
"model_size_in_billions": 2,
|
|
5078
|
+
"quantizations": [
|
|
5079
|
+
"8-bit",
|
|
5080
|
+
"none"
|
|
5081
|
+
],
|
|
5082
|
+
"model_id": "OpenGVLab/InternVL3-2B",
|
|
5083
|
+
"model_hub": "modelscope",
|
|
5084
|
+
"model_revision": "master"
|
|
5085
|
+
},
|
|
5086
|
+
{
|
|
5087
|
+
"model_format": "awq",
|
|
5088
|
+
"model_size_in_billions": 2,
|
|
5089
|
+
"quantizations": [
|
|
5090
|
+
"Int4"
|
|
5091
|
+
],
|
|
5092
|
+
"model_id": "OpenGVLab/InternVL3-2B-AWQ",
|
|
5093
|
+
"model_hub": "modelscope"
|
|
5094
|
+
},
|
|
5095
|
+
{
|
|
5096
|
+
"model_format": "pytorch",
|
|
5097
|
+
"model_size_in_billions": 8,
|
|
5098
|
+
"quantizations": [
|
|
5099
|
+
"8-bit",
|
|
5100
|
+
"none"
|
|
5101
|
+
],
|
|
5102
|
+
"model_id": "OpenGVLab/InternVL3-8B",
|
|
5103
|
+
"model_hub": "modelscope",
|
|
5104
|
+
"model_revision": "master"
|
|
5105
|
+
},
|
|
5106
|
+
{
|
|
5107
|
+
"model_format": "awq",
|
|
5108
|
+
"model_size_in_billions": 8,
|
|
5109
|
+
"quantizations": [
|
|
5110
|
+
"Int4"
|
|
5111
|
+
],
|
|
5112
|
+
"model_id": "OpenGVLab/InternVL3-8B-AWQ",
|
|
5113
|
+
"model_hub": "modelscope"
|
|
5114
|
+
},
|
|
5115
|
+
{
|
|
5116
|
+
"model_format": "pytorch",
|
|
5117
|
+
"model_size_in_billions": 9,
|
|
5118
|
+
"quantizations": [
|
|
5119
|
+
"8-bit",
|
|
5120
|
+
"none"
|
|
5121
|
+
],
|
|
5122
|
+
"model_id": "OpenGVLab/InternVL3-9B",
|
|
5123
|
+
"model_hub": "modelscope",
|
|
5124
|
+
"model_revision": "master"
|
|
5125
|
+
},
|
|
5126
|
+
{
|
|
5127
|
+
"model_format": "awq",
|
|
5128
|
+
"model_size_in_billions": 9,
|
|
5129
|
+
"quantizations": [
|
|
5130
|
+
"Int4"
|
|
5131
|
+
],
|
|
5132
|
+
"model_id": "OpenGVLab/InternVL3-9B-AWQ",
|
|
5133
|
+
"model_hub": "modelscope"
|
|
5134
|
+
},
|
|
5135
|
+
{
|
|
5136
|
+
"model_format": "pytorch",
|
|
5137
|
+
"model_size_in_billions": 14,
|
|
5138
|
+
"quantizations": [
|
|
5139
|
+
"8-bit",
|
|
5140
|
+
"none"
|
|
5141
|
+
],
|
|
5142
|
+
"model_id": "OpenGVLab/InternVL3-14B",
|
|
5143
|
+
"model_hub": "modelscope",
|
|
5144
|
+
"model_revision": "master"
|
|
5145
|
+
},
|
|
5146
|
+
{
|
|
5147
|
+
"model_format": "awq",
|
|
5148
|
+
"model_size_in_billions": 14,
|
|
5149
|
+
"quantizations": [
|
|
5150
|
+
"Int4"
|
|
5151
|
+
],
|
|
5152
|
+
"model_id": "OpenGVLab/InternVL3-14B-AWQ",
|
|
5153
|
+
"model_hub": "modelscope"
|
|
5154
|
+
},
|
|
5155
|
+
{
|
|
5156
|
+
"model_format": "pytorch",
|
|
5157
|
+
"model_size_in_billions": 38,
|
|
5158
|
+
"quantizations": [
|
|
5159
|
+
"8-bit",
|
|
5160
|
+
"none"
|
|
5161
|
+
],
|
|
5162
|
+
"model_id": "OpenGVLab/InternVL3-38B",
|
|
5163
|
+
"model_hub": "modelscope",
|
|
5164
|
+
"model_revision": "master"
|
|
5165
|
+
},
|
|
5166
|
+
{
|
|
5167
|
+
"model_format": "awq",
|
|
5168
|
+
"model_size_in_billions": 38,
|
|
5169
|
+
"quantizations": [
|
|
5170
|
+
"Int4"
|
|
5171
|
+
],
|
|
5172
|
+
"model_id": "OpenGVLab/InternVL3-38B-AWQ",
|
|
5173
|
+
"model_hub": "modelscope"
|
|
5174
|
+
},
|
|
5175
|
+
{
|
|
5176
|
+
"model_format": "pytorch",
|
|
5177
|
+
"model_size_in_billions": 78,
|
|
5178
|
+
"quantizations": [
|
|
5179
|
+
"8-bit",
|
|
5180
|
+
"none"
|
|
5181
|
+
],
|
|
5182
|
+
"model_id": "OpenGVLab/InternVL3-78B",
|
|
5183
|
+
"model_hub": "modelscope",
|
|
5184
|
+
"model_revision": "master"
|
|
5185
|
+
},
|
|
5186
|
+
{
|
|
5187
|
+
"model_format": "awq",
|
|
5188
|
+
"model_size_in_billions": 78,
|
|
5189
|
+
"quantizations": [
|
|
5190
|
+
"Int4"
|
|
5191
|
+
],
|
|
5192
|
+
"model_id": "OpenGVLab/InternVL3-78B-AWQ",
|
|
5193
|
+
"model_hub": "modelscope"
|
|
5194
|
+
}
|
|
5195
|
+
],
|
|
5196
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
5197
|
+
"stop_token_ids": [
|
|
5198
|
+
151645
|
|
5199
|
+
],
|
|
5200
|
+
"stop": [
|
|
5201
|
+
"<|im_end|>"
|
|
5202
|
+
]
|
|
5203
|
+
},
|
|
5041
5204
|
{
|
|
5042
5205
|
"version": 1,
|
|
5043
5206
|
"context_length": 8192,
|
|
@@ -5503,6 +5666,48 @@
|
|
|
5503
5666
|
"<|endoftext|>"
|
|
5504
5667
|
]
|
|
5505
5668
|
},
|
|
5669
|
+
{
|
|
5670
|
+
"version":1,
|
|
5671
|
+
"context_length":32768,
|
|
5672
|
+
"model_name":"qwen2.5-omni",
|
|
5673
|
+
"model_lang":[
|
|
5674
|
+
"en",
|
|
5675
|
+
"zh"
|
|
5676
|
+
],
|
|
5677
|
+
"model_ability":[
|
|
5678
|
+
"chat",
|
|
5679
|
+
"vision",
|
|
5680
|
+
"audio",
|
|
5681
|
+
"omni"
|
|
5682
|
+
],
|
|
5683
|
+
"model_description":"Qwen2.5-Omni: the new flagship end-to-end multimodal model in the Qwen series.",
|
|
5684
|
+
"model_specs":[
|
|
5685
|
+
{
|
|
5686
|
+
"model_format":"pytorch",
|
|
5687
|
+
"model_size_in_billions":7,
|
|
5688
|
+
"quantizations":[
|
|
5689
|
+
"none"
|
|
5690
|
+
],
|
|
5691
|
+
"model_hub": "modelscope",
|
|
5692
|
+
"model_id":"Qwen/Qwen2.5-Omni-7B"
|
|
5693
|
+
}
|
|
5694
|
+
],
|
|
5695
|
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
5696
|
+
"stop_token_ids": [
|
|
5697
|
+
151645,
|
|
5698
|
+
151643
|
|
5699
|
+
],
|
|
5700
|
+
"stop": [
|
|
5701
|
+
"<|im_end|>",
|
|
5702
|
+
"<|endoftext|>"
|
|
5703
|
+
],
|
|
5704
|
+
"virtualenv": {
|
|
5705
|
+
"packages": [
|
|
5706
|
+
"git+https://github.com/huggingface/transformers@v4.51.3-Qwen2.5-Omni-preview",
|
|
5707
|
+
"numpy==1.26.4"
|
|
5708
|
+
]
|
|
5709
|
+
}
|
|
5710
|
+
},
|
|
5506
5711
|
{
|
|
5507
5712
|
"version": 1,
|
|
5508
5713
|
"context_length": 32768,
|
|
@@ -7546,8 +7751,30 @@
|
|
|
7546
7751
|
"q6_k",
|
|
7547
7752
|
"q8_0"
|
|
7548
7753
|
],
|
|
7754
|
+
"quantization_parts": {
|
|
7755
|
+
"fp16": [
|
|
7756
|
+
"00001-of-00017",
|
|
7757
|
+
"00002-of-00017",
|
|
7758
|
+
"00003-of-00017",
|
|
7759
|
+
"00004-of-00017",
|
|
7760
|
+
"00005-of-00017",
|
|
7761
|
+
"00006-of-00017",
|
|
7762
|
+
"00007-of-00017",
|
|
7763
|
+
"00008-of-00017",
|
|
7764
|
+
"00009-of-00017",
|
|
7765
|
+
"00010-of-00017",
|
|
7766
|
+
"00011-of-00017",
|
|
7767
|
+
"00012-of-00017",
|
|
7768
|
+
"00013-of-00017",
|
|
7769
|
+
"00014-of-00017",
|
|
7770
|
+
"00015-of-00017",
|
|
7771
|
+
"00016-of-00017",
|
|
7772
|
+
"00017-of-00017"
|
|
7773
|
+
]
|
|
7774
|
+
},
|
|
7549
7775
|
"model_id": "Qwen/QwQ-32B-GGUF",
|
|
7550
7776
|
"model_file_name_template": "qwq-32b-{quantization}.gguf",
|
|
7777
|
+
"model_file_name_split_template": "qwq-32b-{quantization}-{part}.gguf",
|
|
7551
7778
|
"model_hub": "modelscope"
|
|
7552
7779
|
}
|
|
7553
7780
|
],
|
|
@@ -8544,5 +8771,225 @@
|
|
|
8544
8771
|
"stop": [
|
|
8545
8772
|
"<|end▁of▁sentence|>"
|
|
8546
8773
|
]
|
|
8774
|
+
},
|
|
8775
|
+
{
|
|
8776
|
+
"version": 1,
|
|
8777
|
+
"context_length": 32768,
|
|
8778
|
+
"model_name": "seallms-v3",
|
|
8779
|
+
"model_lang": [
|
|
8780
|
+
"en",
|
|
8781
|
+
"zh",
|
|
8782
|
+
"id",
|
|
8783
|
+
"vi",
|
|
8784
|
+
"th",
|
|
8785
|
+
"ph",
|
|
8786
|
+
"ms",
|
|
8787
|
+
"mm",
|
|
8788
|
+
"kh",
|
|
8789
|
+
"la",
|
|
8790
|
+
"in"
|
|
8791
|
+
],
|
|
8792
|
+
"model_ability": [
|
|
8793
|
+
"chat"
|
|
8794
|
+
],
|
|
8795
|
+
"model_description": "SeaLLMs - Large Language Models for Southeast Asia",
|
|
8796
|
+
"model_specs": [
|
|
8797
|
+
{
|
|
8798
|
+
"model_format": "pytorch",
|
|
8799
|
+
"model_size_in_billions": "1_5",
|
|
8800
|
+
"quantizations": [
|
|
8801
|
+
"none"
|
|
8802
|
+
],
|
|
8803
|
+
"model_id": "SeaLLMs/SeaLLMs-v3-1.5B-Chat",
|
|
8804
|
+
"model_hub": "modelscope"
|
|
8805
|
+
},
|
|
8806
|
+
{
|
|
8807
|
+
"model_format": "pytorch",
|
|
8808
|
+
"model_size_in_billions": 7,
|
|
8809
|
+
"quantizations": [
|
|
8810
|
+
"none"
|
|
8811
|
+
],
|
|
8812
|
+
"model_id": "SeaLLMs/SeaLLMs-v3-7B-Chat",
|
|
8813
|
+
"model_hub": "modelscope"
|
|
8814
|
+
}
|
|
8815
|
+
],
|
|
8816
|
+
"chat_template": "{% set system_message = 'You are a helpful assistant.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
|
|
8817
|
+
"stop_token_ids": [
|
|
8818
|
+
151643,
|
|
8819
|
+
151644,
|
|
8820
|
+
151645
|
|
8821
|
+
],
|
|
8822
|
+
"stop": [
|
|
8823
|
+
"<|endoftext|>",
|
|
8824
|
+
"<|im_start|>",
|
|
8825
|
+
"<|im_end|>"
|
|
8826
|
+
]
|
|
8827
|
+
},
|
|
8828
|
+
{
|
|
8829
|
+
"version": 1,
|
|
8830
|
+
"context_length": 32768,
|
|
8831
|
+
"model_name": "glm4-0414",
|
|
8832
|
+
"model_lang": [
|
|
8833
|
+
"en",
|
|
8834
|
+
"zh"
|
|
8835
|
+
],
|
|
8836
|
+
"model_ability": [
|
|
8837
|
+
"chat",
|
|
8838
|
+
"tools"
|
|
8839
|
+
],
|
|
8840
|
+
"model_description": "The GLM family welcomes new members, the GLM-4-32B-0414 series models, featuring 32 billion parameters. Its performance is comparable to OpenAI’s GPT series and DeepSeek’s V3/R1 series",
|
|
8841
|
+
"model_specs": [
|
|
8842
|
+
{
|
|
8843
|
+
"model_format": "pytorch",
|
|
8844
|
+
"model_size_in_billions": 9,
|
|
8845
|
+
"quantizations": [
|
|
8846
|
+
"none"
|
|
8847
|
+
],
|
|
8848
|
+
"model_id": "ZhipuAI/GLM-4-9B-0414",
|
|
8849
|
+
"model_hub": "modelscope"
|
|
8850
|
+
},
|
|
8851
|
+
{
|
|
8852
|
+
"model_format": "pytorch",
|
|
8853
|
+
"model_size_in_billions": 32,
|
|
8854
|
+
"quantizations": [
|
|
8855
|
+
"none"
|
|
8856
|
+
],
|
|
8857
|
+
"model_id": "ZhipuAI/GLM-4-32B-0414",
|
|
8858
|
+
"model_hub": "modelscope"
|
|
8859
|
+
}
|
|
8860
|
+
],
|
|
8861
|
+
"chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>\n# 可用工具\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ content }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ content }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ content }}{%- elif role == 'observation' %}<|observation|>\n{{ content }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
|
|
8862
|
+
"stop_token_ids": [
|
|
8863
|
+
151329,
|
|
8864
|
+
151336,
|
|
8865
|
+
151338
|
|
8866
|
+
],
|
|
8867
|
+
"stop": [
|
|
8868
|
+
"<|endoftext|>",
|
|
8869
|
+
"<|user|>",
|
|
8870
|
+
"<|observation|>"
|
|
8871
|
+
]
|
|
8872
|
+
},
|
|
8873
|
+
{
|
|
8874
|
+
"version": 1,
|
|
8875
|
+
"context_length": 32768,
|
|
8876
|
+
"model_name": "skywork-or1-preview",
|
|
8877
|
+
"model_lang": [
|
|
8878
|
+
"en",
|
|
8879
|
+
"zh"
|
|
8880
|
+
],
|
|
8881
|
+
"model_ability": [
|
|
8882
|
+
"chat"
|
|
8883
|
+
],
|
|
8884
|
+
"model_description": "The Skywork-OR1 (Open Reasoner 1) model series consists of powerful math and code reasoning models trained using large-scale rule-based reinforcement learning with carefully designed datasets and training recipes.",
|
|
8885
|
+
"model_specs": [
|
|
8886
|
+
{
|
|
8887
|
+
"model_format": "pytorch",
|
|
8888
|
+
"model_size_in_billions": 32,
|
|
8889
|
+
"quantizations": [
|
|
8890
|
+
"none"
|
|
8891
|
+
],
|
|
8892
|
+
"model_id": "Skywork/Skywork-OR1-32B-Preview",
|
|
8893
|
+
"model_hub": "modelscope"
|
|
8894
|
+
},
|
|
8895
|
+
{
|
|
8896
|
+
"model_format": "gptq",
|
|
8897
|
+
"model_size_in_billions": 32,
|
|
8898
|
+
"quantizations": [
|
|
8899
|
+
"Int4",
|
|
8900
|
+
"int8"
|
|
8901
|
+
],
|
|
8902
|
+
"model_id": "JunHowie/Skywork-OR1-32B-Preview-GPTQ-{quantization}",
|
|
8903
|
+
"model_hub": "modelscope"
|
|
8904
|
+
},
|
|
8905
|
+
{
|
|
8906
|
+
"model_format": "pytorch",
|
|
8907
|
+
"model_size_in_billions": 7,
|
|
8908
|
+
"quantizations": [
|
|
8909
|
+
"none"
|
|
8910
|
+
],
|
|
8911
|
+
"model_id": "Skywork/Skywork-OR1-7B-Preview",
|
|
8912
|
+
"model_hub": "modelscope"
|
|
8913
|
+
},
|
|
8914
|
+
{
|
|
8915
|
+
"model_format": "ggufv2",
|
|
8916
|
+
"model_size_in_billions": 32,
|
|
8917
|
+
"quantizations": [
|
|
8918
|
+
"IQ2_M",
|
|
8919
|
+
"IQ2_S",
|
|
8920
|
+
"IQ2_XS",
|
|
8921
|
+
"IQ3_M",
|
|
8922
|
+
"IQ3_XS",
|
|
8923
|
+
"IQ3_XXS",
|
|
8924
|
+
"IQ4_NL",
|
|
8925
|
+
"IQ4_XS",
|
|
8926
|
+
"Q2_K",
|
|
8927
|
+
"Q2_K_L",
|
|
8928
|
+
"Q3_K_L",
|
|
8929
|
+
"Q3_K_M",
|
|
8930
|
+
"Q3_K_S",
|
|
8931
|
+
"Q3_K_XL",
|
|
8932
|
+
"Q4_0",
|
|
8933
|
+
"Q4_1",
|
|
8934
|
+
"Q4_K_L",
|
|
8935
|
+
"Q4_K_M",
|
|
8936
|
+
"Q4_K_S",
|
|
8937
|
+
"Q5_K_L",
|
|
8938
|
+
"Q5_K_M",
|
|
8939
|
+
"Q5_K_S",
|
|
8940
|
+
"Q6_K",
|
|
8941
|
+
"Q6_K_L",
|
|
8942
|
+
"Q8_0"
|
|
8943
|
+
],
|
|
8944
|
+
"model_id": "bartowski/Skywork_Skywork-OR1-32B-Preview-GGUF",
|
|
8945
|
+
"model_file_name_template": "Skywork_Skywork-OR1-32B-Preview-{quantization}.gguf",
|
|
8946
|
+
"model_hub": "modelscope"
|
|
8947
|
+
},
|
|
8948
|
+
{
|
|
8949
|
+
"model_format": "ggufv2",
|
|
8950
|
+
"model_size_in_billions": 7,
|
|
8951
|
+
"quantizations": [
|
|
8952
|
+
"IQ2_M",
|
|
8953
|
+
"IQ2_S",
|
|
8954
|
+
"IQ2_XS",
|
|
8955
|
+
"IQ3_M",
|
|
8956
|
+
"IQ3_XS",
|
|
8957
|
+
"IQ3_XXS",
|
|
8958
|
+
"IQ4_NL",
|
|
8959
|
+
"IQ4_XS",
|
|
8960
|
+
"Q2_K",
|
|
8961
|
+
"Q2_K_L",
|
|
8962
|
+
"Q3_K_L",
|
|
8963
|
+
"Q3_K_M",
|
|
8964
|
+
"Q3_K_S",
|
|
8965
|
+
"Q3_K_XL",
|
|
8966
|
+
"Q4_0",
|
|
8967
|
+
"Q4_1",
|
|
8968
|
+
"Q4_K_L",
|
|
8969
|
+
"Q4_K_M",
|
|
8970
|
+
"Q4_K_S",
|
|
8971
|
+
"Q5_K_L",
|
|
8972
|
+
"Q5_K_M",
|
|
8973
|
+
"Q5_K_S",
|
|
8974
|
+
"Q6_K",
|
|
8975
|
+
"Q6_K_L",
|
|
8976
|
+
"Q8_0"
|
|
8977
|
+
],
|
|
8978
|
+
"model_id": "bartowski/Skywork_Skywork-OR1-7B-Preview-GGUF",
|
|
8979
|
+
"model_file_name_template": "Skywork_Skywork-OR1-7B-Preview-{quantization}.gguf",
|
|
8980
|
+
"model_hub": "modelscope"
|
|
8981
|
+
}
|
|
8982
|
+
],
|
|
8983
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
|
8984
|
+
"stop_token_ids": [
|
|
8985
|
+
151643,
|
|
8986
|
+
151644,
|
|
8987
|
+
151645
|
|
8988
|
+
],
|
|
8989
|
+
"stop": [
|
|
8990
|
+
"<|endoftext|>",
|
|
8991
|
+
"<|im_start|>",
|
|
8992
|
+
"<|im_end|>"
|
|
8993
|
+
]
|
|
8547
8994
|
}
|
|
8548
8995
|
]
|
xinference/model/llm/mlx/core.py
CHANGED
|
@@ -213,7 +213,16 @@ class MLXModel(LLM):
|
|
|
213
213
|
return prompt
|
|
214
214
|
|
|
215
215
|
def _generate_stream_inner(self, **kwargs):
|
|
216
|
-
|
|
216
|
+
try:
|
|
217
|
+
from mlx_lm.utils import (
|
|
218
|
+
make_logits_processors,
|
|
219
|
+
make_sampler,
|
|
220
|
+
stream_generate,
|
|
221
|
+
)
|
|
222
|
+
except ImportError:
|
|
223
|
+
# for mlx-lm >= 0.22.3
|
|
224
|
+
from mlx_lm.generate import stream_generate
|
|
225
|
+
from mlx_lm.sample_utils import make_logits_processors, make_sampler
|
|
217
226
|
|
|
218
227
|
sampler = make_sampler(
|
|
219
228
|
temp=kwargs.pop("temperature"), top_p=kwargs.pop("top_p")
|
|
@@ -508,7 +517,12 @@ class MLXVisionModel(MLXModel, ChatModelMixin):
|
|
|
508
517
|
|
|
509
518
|
def _generate_stream_inner(self, **kwargs):
|
|
510
519
|
import mlx.core as mx
|
|
511
|
-
|
|
520
|
+
|
|
521
|
+
try:
|
|
522
|
+
from mlx_lm.utils import GenerationResponse
|
|
523
|
+
except ImportError:
|
|
524
|
+
# for mlx-lm >= 0.22.3
|
|
525
|
+
from mlx_lm.generate import GenerationResponse
|
|
512
526
|
from mlx_vlm.utils import generate_step
|
|
513
527
|
|
|
514
528
|
inputs = kwargs.pop("prompt_token_ids")
|
|
@@ -11,3 +11,17 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
# Automatically scan and import all python scripts at the same level
|
|
16
|
+
import importlib
|
|
17
|
+
import os
|
|
18
|
+
import pkgutil
|
|
19
|
+
|
|
20
|
+
# Get the path of the current package
|
|
21
|
+
__path__ = [os.path.dirname(os.path.abspath(__file__))]
|
|
22
|
+
|
|
23
|
+
# Automatically import all modules under the current package
|
|
24
|
+
for _, module_name, is_pkg in pkgutil.iter_modules(__path__):
|
|
25
|
+
if not module_name.startswith("_"): # Skip modules starting with underscore
|
|
26
|
+
module = importlib.import_module(f"{__name__}.{module_name}")
|
|
27
|
+
globals()[module_name] = module
|
|
@@ -59,17 +59,11 @@ NON_DEFAULT_MODEL_LIST: List[str] = [
|
|
|
59
59
|
"OmniLMM",
|
|
60
60
|
"yi-vl-chat",
|
|
61
61
|
"deepseek-vl-chat",
|
|
62
|
-
"internvl-chat",
|
|
63
|
-
"internvl2",
|
|
64
|
-
"Internvl2.5",
|
|
65
|
-
"Internvl2.5-MPO",
|
|
66
62
|
"cogvlm2",
|
|
67
63
|
"cogvlm2-video-llama3-chat",
|
|
68
64
|
"MiniCPM-Llama3-V-2_5",
|
|
69
65
|
"MiniCPM-V-2.6",
|
|
70
66
|
"glm-4v",
|
|
71
|
-
"qwen2-vl-instruct",
|
|
72
|
-
"qwen2.5-vl-instruct",
|
|
73
67
|
"qwen2-audio",
|
|
74
68
|
"qwen2-audio-instruct",
|
|
75
69
|
"deepseek-v2",
|
|
@@ -85,6 +79,36 @@ NON_DEFAULT_MODEL_LIST: List[str] = [
|
|
|
85
79
|
]
|
|
86
80
|
|
|
87
81
|
|
|
82
|
+
# Define the decorator to support multiple names registration
|
|
83
|
+
def register_non_default_model(*model_names: str):
|
|
84
|
+
"""
|
|
85
|
+
Decorator for registering new non-default model names (supports multiple names).
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
*model_names (str): One or more model names to be registered as non-default models.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
A decorator function that adds the provided model names to the NON_DEFAULT_MODEL_LIST.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
def decorator(cls):
|
|
95
|
+
"""
|
|
96
|
+
Inner decorator function that modifies the class by registering model names.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
cls: The class to be decorated.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
The original class after registering the model names.
|
|
103
|
+
"""
|
|
104
|
+
for name in model_names:
|
|
105
|
+
if name not in NON_DEFAULT_MODEL_LIST:
|
|
106
|
+
NON_DEFAULT_MODEL_LIST.append(name)
|
|
107
|
+
return cls
|
|
108
|
+
|
|
109
|
+
return decorator
|
|
110
|
+
|
|
111
|
+
|
|
88
112
|
class PytorchModel(LLM):
|
|
89
113
|
def __init__(
|
|
90
114
|
self,
|
|
@@ -24,6 +24,7 @@ from ....types import (
|
|
|
24
24
|
ChatCompletionChunk,
|
|
25
25
|
ChatCompletionMessage,
|
|
26
26
|
CompletionChunk,
|
|
27
|
+
PytorchModelConfig,
|
|
27
28
|
)
|
|
28
29
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
29
30
|
from ..utils import generate_chat_completion, generate_completion_chunk
|
|
@@ -65,6 +66,15 @@ class Gemma3ChatModel(PytorchChatModel):
|
|
|
65
66
|
return True
|
|
66
67
|
return False
|
|
67
68
|
|
|
69
|
+
def _sanitize_model_config(
|
|
70
|
+
self, pytorch_model_config: Optional[PytorchModelConfig]
|
|
71
|
+
) -> PytorchModelConfig:
|
|
72
|
+
pytorch_model_config = super()._sanitize_model_config(pytorch_model_config)
|
|
73
|
+
assert pytorch_model_config is not None
|
|
74
|
+
pytorch_model_config.setdefault("min_pixels", 256 * 28 * 28)
|
|
75
|
+
pytorch_model_config.setdefault("max_pixels", 1280 * 28 * 28)
|
|
76
|
+
return pytorch_model_config
|
|
77
|
+
|
|
68
78
|
def load(self):
|
|
69
79
|
from transformers import AutoProcessor, Gemma3ForConditionalGeneration
|
|
70
80
|
|
|
@@ -73,8 +83,13 @@ class Gemma3ChatModel(PytorchChatModel):
|
|
|
73
83
|
self._device = device
|
|
74
84
|
# for multiple GPU, set back to auto to make multiple devices work
|
|
75
85
|
device = "auto" if device == "cuda" else device
|
|
76
|
-
|
|
77
|
-
|
|
86
|
+
min_pixels = self._pytorch_model_config.get("min_pixels")
|
|
87
|
+
max_pixels = self._pytorch_model_config.get("max_pixels")
|
|
88
|
+
self._processor = AutoProcessor.from_pretrained(
|
|
89
|
+
self.model_path,
|
|
90
|
+
min_pixels=min_pixels,
|
|
91
|
+
max_pixels=max_pixels,
|
|
92
|
+
)
|
|
78
93
|
self._tokenizer = self._processor.tokenizer
|
|
79
94
|
self._model = Gemma3ForConditionalGeneration.from_pretrained(
|
|
80
95
|
self.model_path,
|