xinference 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +107 -11
- xinference/client/restful/restful_client.py +51 -11
- xinference/constants.py +5 -1
- xinference/core/media_interface.py +758 -0
- xinference/core/model.py +49 -9
- xinference/core/supervisor.py +1 -1
- xinference/core/utils.py +1 -1
- xinference/core/worker.py +33 -39
- xinference/deploy/cmdline.py +17 -0
- xinference/deploy/utils.py +0 -3
- xinference/model/audio/__init__.py +16 -27
- xinference/model/audio/core.py +2 -1
- xinference/model/audio/cosyvoice.py +4 -2
- xinference/model/audio/model_spec.json +63 -46
- xinference/model/audio/model_spec_modelscope.json +31 -14
- xinference/model/embedding/__init__.py +16 -24
- xinference/model/image/__init__.py +15 -25
- xinference/model/llm/__init__.py +40 -115
- xinference/model/llm/core.py +29 -6
- xinference/model/llm/llama_cpp/core.py +30 -347
- xinference/model/llm/llm_family.json +1674 -2203
- xinference/model/llm/llm_family.py +71 -7
- xinference/model/llm/llm_family_csghub.json +0 -32
- xinference/model/llm/llm_family_modelscope.json +1838 -2016
- xinference/model/llm/llm_family_openmind_hub.json +19 -325
- xinference/model/llm/lmdeploy/core.py +7 -2
- xinference/model/llm/mlx/core.py +23 -7
- xinference/model/llm/reasoning_parser.py +281 -5
- xinference/model/llm/sglang/core.py +39 -11
- xinference/model/llm/transformers/chatglm.py +9 -2
- xinference/model/llm/transformers/cogagent.py +10 -12
- xinference/model/llm/transformers/cogvlm2.py +6 -3
- xinference/model/llm/transformers/cogvlm2_video.py +3 -6
- xinference/model/llm/transformers/core.py +58 -60
- xinference/model/llm/transformers/deepseek_v2.py +4 -2
- xinference/model/llm/transformers/deepseek_vl.py +10 -4
- xinference/model/llm/transformers/deepseek_vl2.py +9 -4
- xinference/model/llm/transformers/gemma3.py +4 -5
- xinference/model/llm/transformers/glm4v.py +3 -21
- xinference/model/llm/transformers/glm_edge_v.py +3 -20
- xinference/model/llm/transformers/intern_vl.py +3 -6
- xinference/model/llm/transformers/internlm2.py +1 -1
- xinference/model/llm/transformers/minicpmv25.py +4 -2
- xinference/model/llm/transformers/minicpmv26.py +5 -3
- xinference/model/llm/transformers/omnilmm.py +1 -1
- xinference/model/llm/transformers/opt.py +1 -1
- xinference/model/llm/transformers/ovis2.py +302 -0
- xinference/model/llm/transformers/qwen-omni.py +8 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +5 -1
- xinference/model/llm/transformers/qwen_vl.py +5 -2
- xinference/model/llm/utils.py +96 -45
- xinference/model/llm/vllm/core.py +108 -24
- xinference/model/llm/vllm/distributed_executor.py +8 -7
- xinference/model/llm/vllm/xavier/allocator.py +1 -1
- xinference/model/llm/vllm/xavier/block_manager.py +1 -1
- xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
- xinference/model/llm/vllm/xavier/executor.py +1 -1
- xinference/model/llm/vllm/xavier/test/test_xavier.py +2 -11
- xinference/model/rerank/__init__.py +13 -24
- xinference/model/video/__init__.py +15 -25
- xinference/model/video/core.py +3 -3
- xinference/model/video/diffusers.py +157 -13
- xinference/model/video/model_spec.json +100 -0
- xinference/model/video/model_spec_modelscope.json +104 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
- xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
- xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
- xinference/thirdparty/cosyvoice/bin/train.py +7 -2
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
- xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
- xinference/thirdparty/cosyvoice/cli/model.py +140 -155
- xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
- xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
- xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
- xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
- xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
- xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
- xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
- xinference/thirdparty/cosyvoice/utils/common.py +1 -1
- xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
- xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
- xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
- xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
- xinference/types.py +2 -71
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
- xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
- xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
- xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
- xinference/web/ui/src/locales/en.json +7 -4
- xinference/web/ui/src/locales/zh.json +7 -4
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/METADATA +56 -36
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/RECORD +120 -121
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/WHEEL +1 -1
- xinference/core/image_interface.py +0 -377
- xinference/model/llm/transformers/compression.py +0 -258
- xinference/model/llm/transformers/yi_vl.py +0 -239
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
- xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
- xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
- xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
- /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -1,46 +1,4 @@
|
|
|
1
1
|
[
|
|
2
|
-
{
|
|
3
|
-
"version": 1,
|
|
4
|
-
"context_length": 32768,
|
|
5
|
-
"model_name": "internlm2-chat",
|
|
6
|
-
"model_lang": [
|
|
7
|
-
"en",
|
|
8
|
-
"zh"
|
|
9
|
-
],
|
|
10
|
-
"model_ability": [
|
|
11
|
-
"chat"
|
|
12
|
-
],
|
|
13
|
-
"model_description": "The second generation of the InternLM model, InternLM2.",
|
|
14
|
-
"model_specs": [
|
|
15
|
-
{
|
|
16
|
-
"model_format": "pytorch",
|
|
17
|
-
"model_size_in_billions": 7,
|
|
18
|
-
"quantizations": [
|
|
19
|
-
"none"
|
|
20
|
-
],
|
|
21
|
-
"model_id": "PyTorch-NPU/internlm2_chat_7b",
|
|
22
|
-
"model_hub": "openmind_hub"
|
|
23
|
-
},
|
|
24
|
-
{
|
|
25
|
-
"model_format": "pytorch",
|
|
26
|
-
"model_size_in_billions": 20,
|
|
27
|
-
"quantizations": [
|
|
28
|
-
"none"
|
|
29
|
-
],
|
|
30
|
-
"model_id": "AI-Research/internlm2-chat-20b",
|
|
31
|
-
"model_hub": "openmind_hub"
|
|
32
|
-
}
|
|
33
|
-
],
|
|
34
|
-
"chat_template": "{{ '<s>' }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
35
|
-
"stop_token_ids": [
|
|
36
|
-
2,
|
|
37
|
-
92542
|
|
38
|
-
],
|
|
39
|
-
"stop": [
|
|
40
|
-
"</s>",
|
|
41
|
-
"<|im_end|>"
|
|
42
|
-
]
|
|
43
|
-
},
|
|
44
2
|
{
|
|
45
3
|
"version": 1,
|
|
46
4
|
"context_length": 4096,
|
|
@@ -58,8 +16,6 @@
|
|
|
58
16
|
"model_format": "pytorch",
|
|
59
17
|
"model_size_in_billions": 7,
|
|
60
18
|
"quantizations": [
|
|
61
|
-
"4-bit",
|
|
62
|
-
"8-bit",
|
|
63
19
|
"none"
|
|
64
20
|
],
|
|
65
21
|
"model_id": "Baichuan/Baichuan2_7b_chat_pt",
|
|
@@ -69,8 +25,6 @@
|
|
|
69
25
|
"model_format": "pytorch",
|
|
70
26
|
"model_size_in_billions": 13,
|
|
71
27
|
"quantizations": [
|
|
72
|
-
"4-bit",
|
|
73
|
-
"8-bit",
|
|
74
28
|
"none"
|
|
75
29
|
],
|
|
76
30
|
"model_id": "Baichuan/Baichuan2_13b_chat_pt",
|
|
@@ -101,8 +55,6 @@
|
|
|
101
55
|
"model_format": "pytorch",
|
|
102
56
|
"model_size_in_billions": 7,
|
|
103
57
|
"quantizations": [
|
|
104
|
-
"4-bit",
|
|
105
|
-
"8-bit",
|
|
106
58
|
"none"
|
|
107
59
|
],
|
|
108
60
|
"model_id": "PyTorch-NPU/baichuan2_7b_base",
|
|
@@ -112,8 +64,6 @@
|
|
|
112
64
|
"model_format": "pytorch",
|
|
113
65
|
"model_size_in_billions": 13,
|
|
114
66
|
"quantizations": [
|
|
115
|
-
"4-bit",
|
|
116
|
-
"8-bit",
|
|
117
67
|
"none"
|
|
118
68
|
],
|
|
119
69
|
"model_id": "Baichuan/Baichuan2_13b_base_pt",
|
|
@@ -139,8 +89,6 @@
|
|
|
139
89
|
"model_format": "pytorch",
|
|
140
90
|
"model_size_in_billions": 7,
|
|
141
91
|
"quantizations": [
|
|
142
|
-
"4-bit",
|
|
143
|
-
"8-bit",
|
|
144
92
|
"none"
|
|
145
93
|
],
|
|
146
94
|
"model_id": "PyTorch-NPU/qwen1.5_7b_chat",
|
|
@@ -176,8 +124,6 @@
|
|
|
176
124
|
"model_format": "pytorch",
|
|
177
125
|
"model_size_in_billions": 7,
|
|
178
126
|
"quantizations": [
|
|
179
|
-
"4-bit",
|
|
180
|
-
"8-bit",
|
|
181
127
|
"none"
|
|
182
128
|
],
|
|
183
129
|
"model_id": "PyTorch-NPU/qwen1.5_7b",
|
|
@@ -203,8 +149,6 @@
|
|
|
203
149
|
"model_format": "pytorch",
|
|
204
150
|
"model_size_in_billions": 9,
|
|
205
151
|
"quantizations": [
|
|
206
|
-
"4-bit",
|
|
207
|
-
"8-bit",
|
|
208
152
|
"none"
|
|
209
153
|
],
|
|
210
154
|
"model_id": "AI-Research/glm-4-9b-chat",
|
|
@@ -241,8 +185,6 @@
|
|
|
241
185
|
"model_format": "pytorch",
|
|
242
186
|
"model_size_in_billions": 9,
|
|
243
187
|
"quantizations": [
|
|
244
|
-
"4-bit",
|
|
245
|
-
"8-bit",
|
|
246
188
|
"none"
|
|
247
189
|
],
|
|
248
190
|
"model_id": "AI-Research/glm-4-9b-chat-1m",
|
|
@@ -279,8 +221,6 @@
|
|
|
279
221
|
"model_format": "pytorch",
|
|
280
222
|
"model_size_in_billions": 9,
|
|
281
223
|
"quantizations": [
|
|
282
|
-
"4-bit",
|
|
283
|
-
"8-bit",
|
|
284
224
|
"none"
|
|
285
225
|
],
|
|
286
226
|
"model_id": "AI-Research/glm-4v-9b",
|
|
@@ -315,8 +255,6 @@
|
|
|
315
255
|
"model_format": "pytorch",
|
|
316
256
|
"model_size_in_billions": 8,
|
|
317
257
|
"quantizations": [
|
|
318
|
-
"4-bit",
|
|
319
|
-
"8-bit",
|
|
320
258
|
"none"
|
|
321
259
|
],
|
|
322
260
|
"model_id": "wuhaicc/Meta-Llama-3-8B-Instruct",
|
|
@@ -326,8 +264,6 @@
|
|
|
326
264
|
"model_format": "pytorch",
|
|
327
265
|
"model_size_in_billions": 70,
|
|
328
266
|
"quantizations": [
|
|
329
|
-
"4-bit",
|
|
330
|
-
"8-bit",
|
|
331
267
|
"none"
|
|
332
268
|
],
|
|
333
269
|
"model_id": "wuhaicc/Meta-Llama-3-70B-Instruct",
|
|
@@ -367,8 +303,6 @@
|
|
|
367
303
|
"model_format": "pytorch",
|
|
368
304
|
"model_size_in_billions": 8,
|
|
369
305
|
"quantizations": [
|
|
370
|
-
"4-bit",
|
|
371
|
-
"8-bit",
|
|
372
306
|
"none"
|
|
373
307
|
],
|
|
374
308
|
"model_id": "AI-Research/Meta-Llama-3.1-8B",
|
|
@@ -435,8 +369,6 @@
|
|
|
435
369
|
"model_format": "pytorch",
|
|
436
370
|
"model_size_in_billions": "1_8",
|
|
437
371
|
"quantizations": [
|
|
438
|
-
"4-bit",
|
|
439
|
-
"8-bit",
|
|
440
372
|
"none"
|
|
441
373
|
],
|
|
442
374
|
"model_id": "HangZhou_Ascend/Qwen-1_8B-Chat",
|
|
@@ -446,8 +378,6 @@
|
|
|
446
378
|
"model_format": "pytorch",
|
|
447
379
|
"model_size_in_billions": 7,
|
|
448
380
|
"quantizations": [
|
|
449
|
-
"4-bit",
|
|
450
|
-
"8-bit",
|
|
451
381
|
"none"
|
|
452
382
|
],
|
|
453
383
|
"model_id": "wuhaicc/Qwen-7B-Chat",
|
|
@@ -457,8 +387,6 @@
|
|
|
457
387
|
"model_format": "pytorch",
|
|
458
388
|
"model_size_in_billions": 14,
|
|
459
389
|
"quantizations": [
|
|
460
|
-
"4-bit",
|
|
461
|
-
"8-bit",
|
|
462
390
|
"none"
|
|
463
391
|
],
|
|
464
392
|
"model_id": "wuhaicc/Qwen-14B-Chat",
|
|
@@ -495,8 +423,6 @@
|
|
|
495
423
|
"model_format": "pytorch",
|
|
496
424
|
"model_size_in_billions": "0_5",
|
|
497
425
|
"quantizations": [
|
|
498
|
-
"4-bit",
|
|
499
|
-
"8-bit",
|
|
500
426
|
"none"
|
|
501
427
|
],
|
|
502
428
|
"model_id": "HangZhou_Ascend/Qwen1.5-0.5B-Chat",
|
|
@@ -506,8 +432,6 @@
|
|
|
506
432
|
"model_format": "pytorch",
|
|
507
433
|
"model_size_in_billions": 4,
|
|
508
434
|
"quantizations": [
|
|
509
|
-
"4-bit",
|
|
510
|
-
"8-bit",
|
|
511
435
|
"none"
|
|
512
436
|
],
|
|
513
437
|
"model_id": "HangZhou_Ascend/Qwen1.5-4B-Chat",
|
|
@@ -517,8 +441,6 @@
|
|
|
517
441
|
"model_format": "pytorch",
|
|
518
442
|
"model_size_in_billions": 7,
|
|
519
443
|
"quantizations": [
|
|
520
|
-
"4-bit",
|
|
521
|
-
"8-bit",
|
|
522
444
|
"none"
|
|
523
445
|
],
|
|
524
446
|
"model_id": "PyTorch-NPU/qwen1.5_7b_chat",
|
|
@@ -528,8 +450,6 @@
|
|
|
528
450
|
"model_format": "pytorch",
|
|
529
451
|
"model_size_in_billions": 14,
|
|
530
452
|
"quantizations": [
|
|
531
|
-
"4-bit",
|
|
532
|
-
"8-bit",
|
|
533
453
|
"none"
|
|
534
454
|
],
|
|
535
455
|
"model_id": "State_Cloud/Qwen1.5-14B-Chat",
|
|
@@ -539,8 +459,6 @@
|
|
|
539
459
|
"model_format": "pytorch",
|
|
540
460
|
"model_size_in_billions": 32,
|
|
541
461
|
"quantizations": [
|
|
542
|
-
"4-bit",
|
|
543
|
-
"8-bit",
|
|
544
462
|
"none"
|
|
545
463
|
],
|
|
546
464
|
"model_id": "State_Cloud/Qwen1.5-32b-chat",
|
|
@@ -550,8 +468,6 @@
|
|
|
550
468
|
"model_format": "pytorch",
|
|
551
469
|
"model_size_in_billions": 72,
|
|
552
470
|
"quantizations": [
|
|
553
|
-
"4-bit",
|
|
554
|
-
"8-bit",
|
|
555
471
|
"none"
|
|
556
472
|
],
|
|
557
473
|
"model_id": "State_Cloud/Qwen1.5-72b-chat",
|
|
@@ -587,8 +503,6 @@
|
|
|
587
503
|
"model_format": "pytorch",
|
|
588
504
|
"model_size_in_billions": 7,
|
|
589
505
|
"quantizations": [
|
|
590
|
-
"4-bit",
|
|
591
|
-
"8-bit",
|
|
592
506
|
"none"
|
|
593
507
|
],
|
|
594
508
|
"model_id": "HangZhou_Ascend/CodeQwen1.5-7B",
|
|
@@ -613,8 +527,6 @@
|
|
|
613
527
|
"model_format": "pytorch",
|
|
614
528
|
"model_size_in_billions": 7,
|
|
615
529
|
"quantizations": [
|
|
616
|
-
"4-bit",
|
|
617
|
-
"8-bit",
|
|
618
530
|
"none"
|
|
619
531
|
],
|
|
620
532
|
"model_id": "HangZhou_Ascend/CodeQwen1.5-7B-Chat",
|
|
@@ -651,8 +563,6 @@
|
|
|
651
563
|
"model_format": "pytorch",
|
|
652
564
|
"model_size_in_billions": "0_5",
|
|
653
565
|
"quantizations": [
|
|
654
|
-
"4-bit",
|
|
655
|
-
"8-bit",
|
|
656
566
|
"none"
|
|
657
567
|
],
|
|
658
568
|
"model_id": "wuhaicc/Qwen2-0.5B-Instruct",
|
|
@@ -662,8 +572,6 @@
|
|
|
662
572
|
"model_format": "pytorch",
|
|
663
573
|
"model_size_in_billions": "1_5",
|
|
664
574
|
"quantizations": [
|
|
665
|
-
"4-bit",
|
|
666
|
-
"8-bit",
|
|
667
575
|
"none"
|
|
668
576
|
],
|
|
669
577
|
"model_id": "HangZhou_Ascend/Qwen2-1.5B-Instruct",
|
|
@@ -673,8 +581,6 @@
|
|
|
673
581
|
"model_format": "pytorch",
|
|
674
582
|
"model_size_in_billions": 7,
|
|
675
583
|
"quantizations": [
|
|
676
|
-
"4-bit",
|
|
677
|
-
"8-bit",
|
|
678
584
|
"none"
|
|
679
585
|
],
|
|
680
586
|
"model_id": "wuhaicc/Qwen2-7B-Instruct",
|
|
@@ -684,8 +590,6 @@
|
|
|
684
590
|
"model_format": "pytorch",
|
|
685
591
|
"model_size_in_billions": 72,
|
|
686
592
|
"quantizations": [
|
|
687
|
-
"4-bit",
|
|
688
|
-
"8-bit",
|
|
689
593
|
"none"
|
|
690
594
|
],
|
|
691
595
|
"model_id": "State_Cloud/Qwen2-72B-Instruct",
|
|
@@ -720,8 +624,6 @@
|
|
|
720
624
|
"model_format": "pytorch",
|
|
721
625
|
"model_size_in_billions": 7,
|
|
722
626
|
"quantizations": [
|
|
723
|
-
"4-bit",
|
|
724
|
-
"8-bit",
|
|
725
627
|
"none"
|
|
726
628
|
],
|
|
727
629
|
"model_id": "PyTorch-NPU/mistral_7b_v0.1",
|
|
@@ -746,8 +648,6 @@
|
|
|
746
648
|
"model_format": "pytorch",
|
|
747
649
|
"model_size_in_billions": 6,
|
|
748
650
|
"quantizations": [
|
|
749
|
-
"4-bit",
|
|
750
|
-
"8-bit",
|
|
751
651
|
"none"
|
|
752
652
|
],
|
|
753
653
|
"model_id": "wuhaicc/Yi-6B",
|
|
@@ -757,8 +657,6 @@
|
|
|
757
657
|
"model_format": "pytorch",
|
|
758
658
|
"model_size_in_billions": 9,
|
|
759
659
|
"quantizations": [
|
|
760
|
-
"4-bit",
|
|
761
|
-
"8-bit",
|
|
762
660
|
"none"
|
|
763
661
|
],
|
|
764
662
|
"model_id": "wuhaicc/Yi-9B",
|
|
@@ -783,8 +681,6 @@
|
|
|
783
681
|
"model_format": "pytorch",
|
|
784
682
|
"model_size_in_billions": 6,
|
|
785
683
|
"quantizations": [
|
|
786
|
-
"4-bit",
|
|
787
|
-
"8-bit",
|
|
788
684
|
"none"
|
|
789
685
|
],
|
|
790
686
|
"model_id": "wuhaicc/Yi-6B-200K",
|
|
@@ -809,8 +705,6 @@
|
|
|
809
705
|
"model_format": "pytorch",
|
|
810
706
|
"model_size_in_billions": 6,
|
|
811
707
|
"quantizations": [
|
|
812
|
-
"4-bit",
|
|
813
|
-
"8-bit",
|
|
814
708
|
"none"
|
|
815
709
|
],
|
|
816
710
|
"model_id": "HangZhou_Ascend/Yi-1.5-6B",
|
|
@@ -820,8 +714,6 @@
|
|
|
820
714
|
"model_format": "pytorch",
|
|
821
715
|
"model_size_in_billions": 9,
|
|
822
716
|
"quantizations": [
|
|
823
|
-
"4-bit",
|
|
824
|
-
"8-bit",
|
|
825
717
|
"none"
|
|
826
718
|
],
|
|
827
719
|
"model_id": "HangZhou_Ascend/Yi-1.5-9B",
|
|
@@ -829,184 +721,6 @@
|
|
|
829
721
|
}
|
|
830
722
|
]
|
|
831
723
|
},
|
|
832
|
-
{
|
|
833
|
-
"version": 1,
|
|
834
|
-
"context_length": 32768,
|
|
835
|
-
"model_name": "internlm2.5-chat",
|
|
836
|
-
"model_lang": [
|
|
837
|
-
"en",
|
|
838
|
-
"zh"
|
|
839
|
-
],
|
|
840
|
-
"model_ability": [
|
|
841
|
-
"chat"
|
|
842
|
-
],
|
|
843
|
-
"model_description": "InternLM2.5 series of the InternLM model.",
|
|
844
|
-
"model_specs": [
|
|
845
|
-
{
|
|
846
|
-
"model_format": "pytorch",
|
|
847
|
-
"model_size_in_billions": "1_8",
|
|
848
|
-
"quantizations": [
|
|
849
|
-
"none"
|
|
850
|
-
],
|
|
851
|
-
"model_id": "Intern/internlm2_5-1_8b-chat",
|
|
852
|
-
"model_hub": "openmind_hub"
|
|
853
|
-
},
|
|
854
|
-
{
|
|
855
|
-
"model_format": "pytorch",
|
|
856
|
-
"model_size_in_billions": 7,
|
|
857
|
-
"quantizations": [
|
|
858
|
-
"none"
|
|
859
|
-
],
|
|
860
|
-
"model_id": "Intern/internlm2_5-7b-chat",
|
|
861
|
-
"model_hub": "openmind_hub"
|
|
862
|
-
},
|
|
863
|
-
{
|
|
864
|
-
"model_format": "pytorch",
|
|
865
|
-
"model_size_in_billions": 20,
|
|
866
|
-
"quantizations": [
|
|
867
|
-
"none"
|
|
868
|
-
],
|
|
869
|
-
"model_id": "Intern/internlm2_5-20b-chat",
|
|
870
|
-
"model_hub": "openmind_hub"
|
|
871
|
-
}
|
|
872
|
-
],
|
|
873
|
-
"chat_template": "{{ '<s>' }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
874
|
-
"stop_token_ids": [
|
|
875
|
-
2,
|
|
876
|
-
92542
|
|
877
|
-
],
|
|
878
|
-
"stop": [
|
|
879
|
-
"</s>",
|
|
880
|
-
"<|im_end|>"
|
|
881
|
-
]
|
|
882
|
-
},
|
|
883
|
-
{
|
|
884
|
-
"version": 1,
|
|
885
|
-
"context_length": 262144,
|
|
886
|
-
"model_name": "internlm2.5-chat-1m",
|
|
887
|
-
"model_lang": [
|
|
888
|
-
"en",
|
|
889
|
-
"zh"
|
|
890
|
-
],
|
|
891
|
-
"model_ability": [
|
|
892
|
-
"chat"
|
|
893
|
-
],
|
|
894
|
-
"model_description": "InternLM2.5 series of the InternLM model supports 1M long-context",
|
|
895
|
-
"model_specs": [
|
|
896
|
-
{
|
|
897
|
-
"model_format": "pytorch",
|
|
898
|
-
"model_size_in_billions": 7,
|
|
899
|
-
"quantizations": [
|
|
900
|
-
"none"
|
|
901
|
-
],
|
|
902
|
-
"model_id": "Intern/internlm2_5-7b-chat-1m",
|
|
903
|
-
"model_hub": "openmind_hub"
|
|
904
|
-
}
|
|
905
|
-
],
|
|
906
|
-
"chat_template": "{{ '<s>' }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
907
|
-
"stop_token_ids": [
|
|
908
|
-
2,
|
|
909
|
-
92542
|
|
910
|
-
],
|
|
911
|
-
"stop": [
|
|
912
|
-
"</s>",
|
|
913
|
-
"<|im_end|>"
|
|
914
|
-
]
|
|
915
|
-
},
|
|
916
|
-
{
|
|
917
|
-
"version": 1,
|
|
918
|
-
"context_length": 8192,
|
|
919
|
-
"model_name": "gemma-it",
|
|
920
|
-
"model_lang": [
|
|
921
|
-
"en"
|
|
922
|
-
],
|
|
923
|
-
"model_ability": [
|
|
924
|
-
"chat"
|
|
925
|
-
],
|
|
926
|
-
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
927
|
-
"model_specs": [
|
|
928
|
-
{
|
|
929
|
-
"model_format": "pytorch",
|
|
930
|
-
"model_size_in_billions": 2,
|
|
931
|
-
"quantizations": [
|
|
932
|
-
"none",
|
|
933
|
-
"4-bit",
|
|
934
|
-
"8-bit"
|
|
935
|
-
],
|
|
936
|
-
"model_id": "SY_AICC/gemma-2b-it",
|
|
937
|
-
"model_hub": "openmind_hub"
|
|
938
|
-
},
|
|
939
|
-
{
|
|
940
|
-
"model_format": "pytorch",
|
|
941
|
-
"model_size_in_billions": 7,
|
|
942
|
-
"quantizations": [
|
|
943
|
-
"none",
|
|
944
|
-
"4-bit",
|
|
945
|
-
"8-bit"
|
|
946
|
-
],
|
|
947
|
-
"model_id": "SY_AICC/gemma-7b-it",
|
|
948
|
-
"model_hub": "openmind_hub"
|
|
949
|
-
}
|
|
950
|
-
],
|
|
951
|
-
"chat_template": "{{ '<bos>' }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
|
|
952
|
-
"stop_token_ids": [
|
|
953
|
-
1,
|
|
954
|
-
106,
|
|
955
|
-
107
|
|
956
|
-
],
|
|
957
|
-
"stop": [
|
|
958
|
-
"<eos>",
|
|
959
|
-
"<end_of_turn>",
|
|
960
|
-
"<start_of_turn>"
|
|
961
|
-
]
|
|
962
|
-
},
|
|
963
|
-
{
|
|
964
|
-
"version": 1,
|
|
965
|
-
"context_length": 8192,
|
|
966
|
-
"model_name": "gemma-2-it",
|
|
967
|
-
"model_lang": [
|
|
968
|
-
"en"
|
|
969
|
-
],
|
|
970
|
-
"model_ability": [
|
|
971
|
-
"chat"
|
|
972
|
-
],
|
|
973
|
-
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
974
|
-
"model_specs": [
|
|
975
|
-
{
|
|
976
|
-
"model_format": "pytorch",
|
|
977
|
-
"model_size_in_billions": 2,
|
|
978
|
-
"quantizations": [
|
|
979
|
-
"none",
|
|
980
|
-
"4-bit",
|
|
981
|
-
"8-bit"
|
|
982
|
-
],
|
|
983
|
-
"model_id": "LlamaFactory/gemma-2-2b-it",
|
|
984
|
-
"model_hub": "openmind_hub"
|
|
985
|
-
},
|
|
986
|
-
{
|
|
987
|
-
"model_format": "pytorch",
|
|
988
|
-
"model_size_in_billions": 9,
|
|
989
|
-
"quantizations": [
|
|
990
|
-
"none",
|
|
991
|
-
"4-bit",
|
|
992
|
-
"8-bit"
|
|
993
|
-
],
|
|
994
|
-
"model_id": "LlamaFactory/gemma-2-9b-it",
|
|
995
|
-
"model_hub": "openmind_hub"
|
|
996
|
-
}
|
|
997
|
-
],
|
|
998
|
-
"chat_template": "{{ '<bos>' }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
|
|
999
|
-
"stop_token_ids": [
|
|
1000
|
-
1,
|
|
1001
|
-
106,
|
|
1002
|
-
107
|
|
1003
|
-
],
|
|
1004
|
-
"stop": [
|
|
1005
|
-
"<eos>",
|
|
1006
|
-
"<end_of_turn>",
|
|
1007
|
-
"<start_of_turn>"
|
|
1008
|
-
]
|
|
1009
|
-
},
|
|
1010
724
|
{
|
|
1011
725
|
"version": 1,
|
|
1012
726
|
"context_length": 4096,
|
|
@@ -1076,12 +790,12 @@
|
|
|
1076
790
|
"context_length": 8192,
|
|
1077
791
|
"model_name": "cogvlm2",
|
|
1078
792
|
"model_lang": [
|
|
1079
|
-
|
|
1080
|
-
|
|
793
|
+
"en",
|
|
794
|
+
"zh"
|
|
1081
795
|
],
|
|
1082
796
|
"model_ability": [
|
|
1083
|
-
|
|
1084
|
-
|
|
797
|
+
"chat",
|
|
798
|
+
"vision"
|
|
1085
799
|
],
|
|
1086
800
|
"model_description": "CogVLM2 have achieved good results in many lists compared to the previous generation of CogVLM open source models. Its excellent performance can compete with some non-open source models.",
|
|
1087
801
|
"model_specs": [
|
|
@@ -1122,8 +836,6 @@
|
|
|
1122
836
|
"model_format": "pytorch",
|
|
1123
837
|
"model_size_in_billions": 7,
|
|
1124
838
|
"quantizations": [
|
|
1125
|
-
"4-bit",
|
|
1126
|
-
"8-bit",
|
|
1127
839
|
"none"
|
|
1128
840
|
],
|
|
1129
841
|
"model_id": "TeleAI/TeleChat-7B-pt",
|
|
@@ -1133,8 +845,6 @@
|
|
|
1133
845
|
"model_format": "pytorch",
|
|
1134
846
|
"model_size_in_billions": 12,
|
|
1135
847
|
"quantizations": [
|
|
1136
|
-
"4-bit",
|
|
1137
|
-
"8-bit",
|
|
1138
848
|
"none"
|
|
1139
849
|
],
|
|
1140
850
|
"model_id": "TeleAI/TeleChat-12B-pt",
|
|
@@ -1144,8 +854,6 @@
|
|
|
1144
854
|
"model_format": "pytorch",
|
|
1145
855
|
"model_size_in_billions": 52,
|
|
1146
856
|
"quantizations": [
|
|
1147
|
-
"4-bit",
|
|
1148
|
-
"8-bit",
|
|
1149
857
|
"none"
|
|
1150
858
|
],
|
|
1151
859
|
"model_id": "TeleAI/TeleChat-52B-pt",
|
|
@@ -1163,35 +871,35 @@
|
|
|
1163
871
|
]
|
|
1164
872
|
},
|
|
1165
873
|
{
|
|
1166
|
-
"version":1,
|
|
1167
|
-
"context_length":32768,
|
|
1168
|
-
"model_name":"qwen2-vl-instruct",
|
|
1169
|
-
"model_lang":[
|
|
874
|
+
"version": 1,
|
|
875
|
+
"context_length": 32768,
|
|
876
|
+
"model_name": "qwen2-vl-instruct",
|
|
877
|
+
"model_lang": [
|
|
1170
878
|
"en",
|
|
1171
879
|
"zh"
|
|
1172
880
|
],
|
|
1173
|
-
"model_ability":[
|
|
881
|
+
"model_ability": [
|
|
1174
882
|
"chat",
|
|
1175
883
|
"vision"
|
|
1176
884
|
],
|
|
1177
|
-
"model_description":"Qwen2-VL: To See the World More Clearly.Qwen2-VL is the latest version of the vision language models in the Qwen model familities.",
|
|
1178
|
-
"model_specs":[
|
|
885
|
+
"model_description": "Qwen2-VL: To See the World More Clearly.Qwen2-VL is the latest version of the vision language models in the Qwen model familities.",
|
|
886
|
+
"model_specs": [
|
|
1179
887
|
{
|
|
1180
|
-
"model_format":"pytorch",
|
|
1181
|
-
"model_size_in_billions":2,
|
|
1182
|
-
"quantizations":[
|
|
888
|
+
"model_format": "pytorch",
|
|
889
|
+
"model_size_in_billions": 2,
|
|
890
|
+
"quantizations": [
|
|
1183
891
|
"none"
|
|
1184
892
|
],
|
|
1185
|
-
"model_id":"LlamaFactory/Qwen2-VL-2B-Instruct",
|
|
893
|
+
"model_id": "LlamaFactory/Qwen2-VL-2B-Instruct",
|
|
1186
894
|
"model_hub": "openmind_hub"
|
|
1187
895
|
},
|
|
1188
896
|
{
|
|
1189
|
-
"model_format":"pytorch",
|
|
1190
|
-
"model_size_in_billions":7,
|
|
1191
|
-
"quantizations":[
|
|
897
|
+
"model_format": "pytorch",
|
|
898
|
+
"model_size_in_billions": 7,
|
|
899
|
+
"quantizations": [
|
|
1192
900
|
"none"
|
|
1193
901
|
],
|
|
1194
|
-
"model_id":"LlamaFactory/Qwen2-VL-7B-Instruct",
|
|
902
|
+
"model_id": "LlamaFactory/Qwen2-VL-7B-Instruct",
|
|
1195
903
|
"model_hub": "openmind_hub"
|
|
1196
904
|
}
|
|
1197
905
|
],
|
|
@@ -1254,8 +962,6 @@
|
|
|
1254
962
|
"model_format": "pytorch",
|
|
1255
963
|
"model_size_in_billions": "0_5",
|
|
1256
964
|
"quantizations": [
|
|
1257
|
-
"4-bit",
|
|
1258
|
-
"8-bit",
|
|
1259
965
|
"none"
|
|
1260
966
|
],
|
|
1261
967
|
"model_id": "Tianjin_Ascend/qwen2.5-0.5b",
|
|
@@ -1265,8 +971,6 @@
|
|
|
1265
971
|
"model_format": "pytorch",
|
|
1266
972
|
"model_size_in_billions": "1_5",
|
|
1267
973
|
"quantizations": [
|
|
1268
|
-
"4-bit",
|
|
1269
|
-
"8-bit",
|
|
1270
974
|
"none"
|
|
1271
975
|
],
|
|
1272
976
|
"model_id": "Tianjin_Ascend/Qwen2.5-1.5B",
|
|
@@ -1276,8 +980,6 @@
|
|
|
1276
980
|
"model_format": "pytorch",
|
|
1277
981
|
"model_size_in_billions": 3,
|
|
1278
982
|
"quantizations": [
|
|
1279
|
-
"4-bit",
|
|
1280
|
-
"8-bit",
|
|
1281
983
|
"none"
|
|
1282
984
|
],
|
|
1283
985
|
"model_id": "Tianjin_Ascend/Qwen2.5-3B",
|
|
@@ -1287,8 +989,6 @@
|
|
|
1287
989
|
"model_format": "pytorch",
|
|
1288
990
|
"model_size_in_billions": 7,
|
|
1289
991
|
"quantizations": [
|
|
1290
|
-
"4-bit",
|
|
1291
|
-
"8-bit",
|
|
1292
992
|
"none"
|
|
1293
993
|
],
|
|
1294
994
|
"model_id": "AI-Research/Qwen2.5-7B",
|
|
@@ -1298,8 +998,6 @@
|
|
|
1298
998
|
"model_format": "pytorch",
|
|
1299
999
|
"model_size_in_billions": 32,
|
|
1300
1000
|
"quantizations": [
|
|
1301
|
-
"4-bit",
|
|
1302
|
-
"8-bit",
|
|
1303
1001
|
"none"
|
|
1304
1002
|
],
|
|
1305
1003
|
"model_id": "AI-Research/Qwen2.5-32B",
|
|
@@ -1325,8 +1023,6 @@
|
|
|
1325
1023
|
"model_format": "pytorch",
|
|
1326
1024
|
"model_size_in_billions": 7,
|
|
1327
1025
|
"quantizations": [
|
|
1328
|
-
"4-bit",
|
|
1329
|
-
"8-bit",
|
|
1330
1026
|
"none"
|
|
1331
1027
|
],
|
|
1332
1028
|
"model_id": "AI-Research/Qwen2.5-7B-Instruct",
|
|
@@ -1336,8 +1032,6 @@
|
|
|
1336
1032
|
"model_format": "pytorch",
|
|
1337
1033
|
"model_size_in_billions": 32,
|
|
1338
1034
|
"quantizations": [
|
|
1339
|
-
"4-bit",
|
|
1340
|
-
"8-bit",
|
|
1341
1035
|
"none"
|
|
1342
1036
|
],
|
|
1343
1037
|
"model_id": "AI-Research/Qwen2.5-32B-Instruct",
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
import importlib.util
|
|
14
15
|
import logging
|
|
15
16
|
import uuid
|
|
16
17
|
from typing import AsyncGenerator, Dict, Iterator, List, Optional, TypedDict, Union
|
|
@@ -113,7 +114,11 @@ class LMDeployModel(LLM):
|
|
|
113
114
|
raise ValueError("LMDEPLOY engine has not supported generate yet.")
|
|
114
115
|
|
|
115
116
|
@classmethod
|
|
116
|
-
def
|
|
117
|
+
def check_lib(cls) -> bool:
|
|
118
|
+
return importlib.util.find_spec("lmdeploy") is not None
|
|
119
|
+
|
|
120
|
+
@classmethod
|
|
121
|
+
def match_json(
|
|
117
122
|
cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
|
|
118
123
|
) -> bool:
|
|
119
124
|
return False
|
|
@@ -166,7 +171,7 @@ class LMDeployChatModel(LMDeployModel, ChatModelMixin):
|
|
|
166
171
|
)
|
|
167
172
|
|
|
168
173
|
@classmethod
|
|
169
|
-
def
|
|
174
|
+
def match_json(
|
|
170
175
|
cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
|
|
171
176
|
) -> bool:
|
|
172
177
|
if llm_spec.model_format == "awq":
|