xinference 0.15.1__py3-none-any.whl → 0.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (34) hide show
  1. xinference/_version.py +3 -3
  2. xinference/core/model.py +2 -2
  3. xinference/model/audio/cosyvoice.py +3 -3
  4. xinference/model/embedding/core.py +14 -5
  5. xinference/model/embedding/model_spec.json +7 -0
  6. xinference/model/embedding/model_spec_modelscope.json +9 -1
  7. xinference/model/image/stable_diffusion/core.py +42 -19
  8. xinference/model/llm/__init__.py +1 -1
  9. xinference/model/llm/llm_family.json +862 -26
  10. xinference/model/llm/llm_family_modelscope.json +895 -10
  11. xinference/model/llm/sglang/core.py +4 -0
  12. xinference/model/llm/utils.py +14 -3
  13. xinference/model/llm/vllm/core.py +27 -6
  14. xinference/model/llm/vllm/utils.py +42 -0
  15. xinference/model/rerank/core.py +19 -0
  16. xinference/model/rerank/model_spec.json +8 -0
  17. xinference/model/rerank/model_spec_modelscope.json +8 -0
  18. xinference/model/utils.py +0 -25
  19. xinference/web/ui/build/asset-manifest.json +3 -3
  20. xinference/web/ui/build/index.html +1 -1
  21. xinference/web/ui/build/static/js/{main.754740c0.js → main.e51a356d.js} +3 -3
  22. xinference/web/ui/build/static/js/main.e51a356d.js.map +1 -0
  23. xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +1 -0
  24. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +1 -0
  25. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/METADATA +8 -7
  26. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/RECORD +31 -30
  27. xinference/web/ui/build/static/js/main.754740c0.js.map +0 -1
  28. xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +0 -1
  29. xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +0 -1
  30. /xinference/web/ui/build/static/js/{main.754740c0.js.LICENSE.txt → main.e51a356d.js.LICENSE.txt} +0 -0
  31. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/LICENSE +0 -0
  32. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/WHEEL +0 -0
  33. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/entry_points.txt +0 -0
  34. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/top_level.txt +0 -0
@@ -4334,16 +4334,8 @@
4334
4334
  }
4335
4335
  ],
4336
4336
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
4337
- "stop_token_ids": [
4338
- 151643,
4339
- 151644,
4340
- 151645
4341
- ],
4342
- "stop": [
4343
- "<|endoftext|>",
4344
- "<|im_start|>",
4345
- "<|im_end|>"
4346
- ]
4337
+ "stop_token_ids": [],
4338
+ "stop": []
4347
4339
  },
4348
4340
  {
4349
4341
  "version": 1,
@@ -4602,6 +4594,34 @@
4602
4594
  "model_hub": "modelscope",
4603
4595
  "model_id":"qwen/Qwen2-VL-2B-Instruct-AWQ",
4604
4596
  "model_revision":"master"
4597
+ },
4598
+ {
4599
+ "model_format":"pytorch",
4600
+ "model_size_in_billions":72,
4601
+ "quantizations":[
4602
+ "none"
4603
+ ],
4604
+ "model_id":"qwen/Qwen2-VL-72B-Instruct",
4605
+ "model_hub": "modelscope"
4606
+ },
4607
+ {
4608
+ "model_format":"awq",
4609
+ "model_size_in_billions":72,
4610
+ "quantizations":[
4611
+ "Int4"
4612
+ ],
4613
+ "model_id":"qwen/Qwen2-VL-72B-Instruct-AWQ",
4614
+ "model_hub": "modelscope"
4615
+ },
4616
+ {
4617
+ "model_format":"gptq",
4618
+ "model_size_in_billions":72,
4619
+ "quantizations":[
4620
+ "Int4",
4621
+ "Int8"
4622
+ ],
4623
+ "model_id":"qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}",
4624
+ "model_hub": "modelscope"
4605
4625
  }
4606
4626
  ],
4607
4627
  "prompt_style": {
@@ -4957,5 +4977,870 @@
4957
4977
  "model_revision": "master"
4958
4978
  }
4959
4979
  ]
4980
+ },
4981
+ {
4982
+ "version": 1,
4983
+ "context_length": 32768,
4984
+ "model_name": "qwen2.5",
4985
+ "model_lang": [
4986
+ "en",
4987
+ "zh"
4988
+ ],
4989
+ "model_ability": [
4990
+ "generate"
4991
+ ],
4992
+ "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.",
4993
+ "model_specs": [
4994
+ {
4995
+ "model_format": "pytorch",
4996
+ "model_size_in_billions": "0_5",
4997
+ "quantizations": [
4998
+ "4-bit",
4999
+ "8-bit",
5000
+ "none"
5001
+ ],
5002
+ "model_id": "qwen/Qwen2.5-0.5B",
5003
+ "model_revision": "master",
5004
+ "model_hub": "modelscope"
5005
+ },
5006
+ {
5007
+ "model_format": "pytorch",
5008
+ "model_size_in_billions": "1_5",
5009
+ "quantizations": [
5010
+ "4-bit",
5011
+ "8-bit",
5012
+ "none"
5013
+ ],
5014
+ "model_id": "qwen/Qwen2.5-1.5B",
5015
+ "model_revision": "master",
5016
+ "model_hub": "modelscope"
5017
+ },
5018
+ {
5019
+ "model_format": "pytorch",
5020
+ "model_size_in_billions": 3,
5021
+ "quantizations": [
5022
+ "4-bit",
5023
+ "8-bit",
5024
+ "none"
5025
+ ],
5026
+ "model_id": "qwen/Qwen2.5-3B",
5027
+ "model_revision": "master",
5028
+ "model_hub": "modelscope"
5029
+ },
5030
+ {
5031
+ "model_format": "pytorch",
5032
+ "model_size_in_billions": 7,
5033
+ "quantizations": [
5034
+ "4-bit",
5035
+ "8-bit",
5036
+ "none"
5037
+ ],
5038
+ "model_id": "qwen/Qwen2.5-7B",
5039
+ "model_revision": "master",
5040
+ "model_hub": "modelscope"
5041
+ },
5042
+ {
5043
+ "model_format": "pytorch",
5044
+ "model_size_in_billions": 14,
5045
+ "quantizations": [
5046
+ "4-bit",
5047
+ "8-bit",
5048
+ "none"
5049
+ ],
5050
+ "model_id": "qwen/Qwen2.5-14B",
5051
+ "model_revision": "master",
5052
+ "model_hub": "modelscope"
5053
+ },
5054
+ {
5055
+ "model_format": "pytorch",
5056
+ "model_size_in_billions": 32,
5057
+ "quantizations": [
5058
+ "4-bit",
5059
+ "8-bit",
5060
+ "none"
5061
+ ],
5062
+ "model_id": "qwen/Qwen2.5-32B",
5063
+ "model_revision": "master",
5064
+ "model_hub": "modelscope"
5065
+ },
5066
+ {
5067
+ "model_format": "pytorch",
5068
+ "model_size_in_billions": 72,
5069
+ "quantizations": [
5070
+ "4-bit",
5071
+ "8-bit",
5072
+ "none"
5073
+ ],
5074
+ "model_id": "qwen/Qwen2.5-72B",
5075
+ "model_revision": "master",
5076
+ "model_hub": "modelscope"
5077
+ }
5078
+ ]
5079
+ },
5080
+ {
5081
+ "version": 1,
5082
+ "context_length": 32768,
5083
+ "model_name": "qwen2.5-instruct",
5084
+ "model_lang": [
5085
+ "en",
5086
+ "zh"
5087
+ ],
5088
+ "model_ability": [
5089
+ "chat",
5090
+ "tools"
5091
+ ],
5092
+ "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.",
5093
+ "model_specs": [
5094
+ {
5095
+ "model_format": "pytorch",
5096
+ "model_size_in_billions": "0_5",
5097
+ "quantizations": [
5098
+ "4-bit",
5099
+ "8-bit",
5100
+ "none"
5101
+ ],
5102
+ "model_id": "qwen/Qwen2.5-0.5B-Instruct",
5103
+ "model_hub": "modelscope"
5104
+ },
5105
+ {
5106
+ "model_format": "pytorch",
5107
+ "model_size_in_billions": "1_5",
5108
+ "quantizations": [
5109
+ "4-bit",
5110
+ "8-bit",
5111
+ "none"
5112
+ ],
5113
+ "model_id": "qwen/Qwen2.5-1.5B-Instruct",
5114
+ "model_hub": "modelscope"
5115
+ },
5116
+ {
5117
+ "model_format": "pytorch",
5118
+ "model_size_in_billions": 3,
5119
+ "quantizations": [
5120
+ "4-bit",
5121
+ "8-bit",
5122
+ "none"
5123
+ ],
5124
+ "model_id": "qwen/Qwen2.5-3B-Instruct",
5125
+ "model_hub": "modelscope"
5126
+ },
5127
+ {
5128
+ "model_format": "pytorch",
5129
+ "model_size_in_billions": 7,
5130
+ "quantizations": [
5131
+ "4-bit",
5132
+ "8-bit",
5133
+ "none"
5134
+ ],
5135
+ "model_id": "qwen/Qwen2.5-7B-Instruct",
5136
+ "model_hub": "modelscope"
5137
+ },
5138
+ {
5139
+ "model_format": "pytorch",
5140
+ "model_size_in_billions": 14,
5141
+ "quantizations": [
5142
+ "4-bit",
5143
+ "8-bit",
5144
+ "none"
5145
+ ],
5146
+ "model_id": "qwen/Qwen2.5-14B-Instruct",
5147
+ "model_hub": "modelscope"
5148
+ },
5149
+ {
5150
+ "model_format": "pytorch",
5151
+ "model_size_in_billions": 32,
5152
+ "quantizations": [
5153
+ "4-bit",
5154
+ "8-bit",
5155
+ "none"
5156
+ ],
5157
+ "model_id": "qwen/Qwen2.5-32B-Instruct",
5158
+ "model_hub": "modelscope"
5159
+ },
5160
+ {
5161
+ "model_format": "pytorch",
5162
+ "model_size_in_billions": 72,
5163
+ "quantizations": [
5164
+ "4-bit",
5165
+ "8-bit",
5166
+ "none"
5167
+ ],
5168
+ "model_id": "qwen/Qwen2.5-72B-Instruct",
5169
+ "model_hub": "modelscope"
5170
+ },
5171
+ {
5172
+ "model_format": "gptq",
5173
+ "model_size_in_billions": "0_5",
5174
+ "quantizations": [
5175
+ "Int4",
5176
+ "Int8"
5177
+ ],
5178
+ "model_id": "qwen/Qwen2.5-0.5B-Instruct-GPTQ-{quantization}",
5179
+ "model_hub": "modelscope"
5180
+ },
5181
+ {
5182
+ "model_format": "gptq",
5183
+ "model_size_in_billions": "1_5",
5184
+ "quantizations": [
5185
+ "Int4",
5186
+ "Int8"
5187
+ ],
5188
+ "model_id": "qwen/Qwen2.5-1.5B-Instruct-GPTQ-{quantization}",
5189
+ "model_hub": "modelscope"
5190
+ },
5191
+ {
5192
+ "model_format": "gptq",
5193
+ "model_size_in_billions": 3,
5194
+ "quantizations": [
5195
+ "Int4",
5196
+ "Int8"
5197
+ ],
5198
+ "model_id": "qwen/Qwen2.5-3B-Instruct-GPTQ-{quantization}",
5199
+ "model_hub": "modelscope"
5200
+ },
5201
+ {
5202
+ "model_format": "gptq",
5203
+ "model_size_in_billions": 7,
5204
+ "quantizations": [
5205
+ "Int4",
5206
+ "Int8"
5207
+ ],
5208
+ "model_id": "qwen/Qwen2.5-7B-Instruct-GPTQ-{quantization}",
5209
+ "model_hub": "modelscope"
5210
+ },
5211
+ {
5212
+ "model_format": "gptq",
5213
+ "model_size_in_billions": 14,
5214
+ "quantizations": [
5215
+ "Int4",
5216
+ "Int8"
5217
+ ],
5218
+ "model_id": "qwen/Qwen2.5-14B-Instruct-GPTQ-{quantization}",
5219
+ "model_hub": "modelscope"
5220
+ },
5221
+ {
5222
+ "model_format": "gptq",
5223
+ "model_size_in_billions": 32,
5224
+ "quantizations": [
5225
+ "Int4",
5226
+ "Int8"
5227
+ ],
5228
+ "model_id": "qwen/Qwen2.5-32B-Instruct-GPTQ-{quantization}",
5229
+ "model_hub": "modelscope"
5230
+ },
5231
+ {
5232
+ "model_format": "gptq",
5233
+ "model_size_in_billions": 72,
5234
+ "quantizations": [
5235
+ "Int4",
5236
+ "Int8"
5237
+ ],
5238
+ "model_id": "qwen/Qwen2.5-72B-Instruct-GPTQ-{quantization}",
5239
+ "model_hub": "modelscope"
5240
+ },
5241
+ {
5242
+ "model_format": "awq",
5243
+ "model_size_in_billions": "0_5",
5244
+ "quantizations": [
5245
+ "Int4"
5246
+ ],
5247
+ "model_id": "qwen/Qwen2-0.5B-Instruct-AWQ",
5248
+ "model_hub": "modelscope"
5249
+ },
5250
+ {
5251
+ "model_format": "awq",
5252
+ "model_size_in_billions": "1_5",
5253
+ "quantizations": [
5254
+ "Int4"
5255
+ ],
5256
+ "model_id": "qwen/Qwen2-1.5B-Instruct-AWQ",
5257
+ "model_hub": "modelscope"
5258
+ },
5259
+ {
5260
+ "model_format": "awq",
5261
+ "model_size_in_billions": 3,
5262
+ "quantizations": [
5263
+ "Int4"
5264
+ ],
5265
+ "model_id": "qwen/Qwen2.5-3B-Instruct-AWQ",
5266
+ "model_hub": "modelscope"
5267
+ },
5268
+ {
5269
+ "model_format": "awq",
5270
+ "model_size_in_billions": 7,
5271
+ "quantizations": [
5272
+ "Int4"
5273
+ ],
5274
+ "model_id": "qwen/Qwen2.5-7B-Instruct-AWQ",
5275
+ "model_hub": "modelscope"
5276
+ },
5277
+ {
5278
+ "model_format": "awq",
5279
+ "model_size_in_billions":14,
5280
+ "quantizations": [
5281
+ "Int4"
5282
+ ],
5283
+ "model_id": "qwen/Qwen2.5-14B-Instruct-AWQ",
5284
+ "model_hub": "modelscope"
5285
+ },
5286
+ {
5287
+ "model_format": "awq",
5288
+ "model_size_in_billions": 32,
5289
+ "quantizations": [
5290
+ "Int4"
5291
+ ],
5292
+ "model_id": "qwen/Qwen2.5-32B-Instruct-AWQ",
5293
+ "model_hub": "modelscope"
5294
+ },
5295
+ {
5296
+ "model_format": "awq",
5297
+ "model_size_in_billions": 72,
5298
+ "quantizations": [
5299
+ "Int4"
5300
+ ],
5301
+ "model_id": "qwen/Qwen2.5-72B-Instruct-AWQ",
5302
+ "model_hub": "modelscope"
5303
+ },
5304
+ {
5305
+ "model_format": "ggufv2",
5306
+ "model_size_in_billions": "0_5",
5307
+ "quantizations": [
5308
+ "q2_k",
5309
+ "q3_k_m",
5310
+ "q4_0",
5311
+ "q4_k_m",
5312
+ "q5_0",
5313
+ "q5_k_m",
5314
+ "q6_k",
5315
+ "q8_0"
5316
+ ],
5317
+ "model_id": "qwen/Qwen2.5-0.5B-Instruct-GGUF",
5318
+ "model_file_name_template": "qwen2.5-0.5b-instruct-{quantization}.gguf",
5319
+ "model_hub": "modelscope"
5320
+ },
5321
+ {
5322
+ "model_format": "ggufv2",
5323
+ "model_size_in_billions": "1_5",
5324
+ "quantizations": [
5325
+ "q2_k",
5326
+ "q3_k_m",
5327
+ "q4_0",
5328
+ "q4_k_m",
5329
+ "q5_0",
5330
+ "q5_k_m",
5331
+ "q6_k",
5332
+ "q8_0"
5333
+ ],
5334
+ "model_id": "qwen/Qwen2.5-1.5B-Instruct-GGUF",
5335
+ "model_file_name_template": "qwen2.5-1.5b-instruct-{quantization}.gguf",
5336
+ "model_hub": "modelscope"
5337
+ },
5338
+ {
5339
+ "model_format": "ggufv2",
5340
+ "model_size_in_billions": 3,
5341
+ "quantizations": [
5342
+ "q2_k",
5343
+ "q3_k_m",
5344
+ "q4_0",
5345
+ "q4_k_m",
5346
+ "q5_0",
5347
+ "q5_k_m",
5348
+ "q6_k",
5349
+ "q8_0"
5350
+ ],
5351
+ "model_id": "qwen/Qwen2.5-3B-Instruct-GGUF",
5352
+ "model_file_name_template": "qwen2.5-3b-instruct-{quantization}.gguf",
5353
+ "model_hub": "modelscope"
5354
+ },
5355
+ {
5356
+ "model_format": "ggufv2",
5357
+ "model_size_in_billions": 7,
5358
+ "quantizations": [
5359
+ "q2_k",
5360
+ "q3_k_m",
5361
+ "q4_0",
5362
+ "q4_k_m",
5363
+ "q5_0",
5364
+ "q5_k_m",
5365
+ "q6_k",
5366
+ "q8_0"
5367
+ ],
5368
+ "model_id": "qwen/Qwen2.5-7B-Instruct-GGUF",
5369
+ "model_file_name_template": "qwen2_5-7b-instruct-{quantization}.gguf",
5370
+ "model_hub": "modelscope",
5371
+ "model_file_name_split_template": "qwen2.5-7b-instruct-{quantization}-{part}.gguf",
5372
+ "quantization_parts": {
5373
+ "q4_0": [
5374
+ "00001-of-00002",
5375
+ "00002-of-00002"
5376
+ ],
5377
+ "q4_k_m": [
5378
+ "00001-of-00002",
5379
+ "00002-of-00002"
5380
+ ],
5381
+ "q5_0": [
5382
+ "00001-of-00002",
5383
+ "00002-of-00002"
5384
+ ],
5385
+ "q5_k_m": [
5386
+ "00001-of-00002",
5387
+ "00002-of-00002"
5388
+ ],
5389
+ "q6_k": [
5390
+ "00001-of-00002",
5391
+ "00002-of-00002"
5392
+ ],
5393
+ "q8_0": [
5394
+ "00001-of-00002",
5395
+ "00002-of-00002"
5396
+ ]
5397
+ }
5398
+ },
5399
+ {
5400
+ "model_format": "ggufv2",
5401
+ "model_size_in_billions": 14,
5402
+ "quantizations": [
5403
+ "q2_k",
5404
+ "q3_k_m",
5405
+ "q4_0",
5406
+ "q4_k_m",
5407
+ "q5_0",
5408
+ "q5_k_m",
5409
+ "q6_k",
5410
+ "q8_0"
5411
+ ],
5412
+ "model_id": "qwen/Qwen2.5-14B-Instruct-GGUF",
5413
+ "model_file_name_template": "qwen2.5-14b-instruct-{quantization}.gguf",
5414
+ "model_file_name_split_template": "qwen2.5-14b-instruct-{quantization}-{part}.gguf",
5415
+ "quantization_parts": {
5416
+ "q2_k": [
5417
+ "00001-of-00002",
5418
+ "00002-of-00002"
5419
+ ],
5420
+ "q3_k_m": [
5421
+ "00001-of-00002",
5422
+ "00002-of-00002"
5423
+ ],
5424
+ "q4_0": [
5425
+ "00001-of-00003",
5426
+ "00002-of-00003",
5427
+ "00003-of-00003"
5428
+ ],
5429
+ "q4_k_m": [
5430
+ "00001-of-00003",
5431
+ "00002-of-00003",
5432
+ "00003-of-00003"
5433
+ ],
5434
+ "q5_0": [
5435
+ "00001-of-00003",
5436
+ "00002-of-00003",
5437
+ "00003-of-00003"
5438
+ ],
5439
+ "q5_k_m": [
5440
+ "00001-of-00003",
5441
+ "00002-of-00003",
5442
+ "00003-of-00003"
5443
+ ],
5444
+ "q6_k": [
5445
+ "00001-of-00004",
5446
+ "00002-of-00004",
5447
+ "00003-of-00004",
5448
+ "00004-of-00004"
5449
+ ],
5450
+ "q8_0": [
5451
+ "00001-of-00004",
5452
+ "00002-of-00004",
5453
+ "00003-of-00004",
5454
+ "00004-of-00004"
5455
+ ]
5456
+ },
5457
+ "model_hub": "modelscope"
5458
+ },
5459
+ {
5460
+ "model_format": "ggufv2",
5461
+ "model_size_in_billions": 32,
5462
+ "quantizations": [
5463
+ "q2_k",
5464
+ "q3_k_m",
5465
+ "q4_0",
5466
+ "q4_k_m",
5467
+ "q5_0",
5468
+ "q5_k_m",
5469
+ "q6_k",
5470
+ "q8_0"
5471
+ ],
5472
+ "model_id": "qwen/Qwen2.5-32B-Instruct-GGUF",
5473
+ "model_file_name_template": "qwen2_5-32b-instruct-{quantization}.gguf",
5474
+ "model_file_name_split_template": "qwen2.5-32b-instruct-{quantization}-{part}.gguf",
5475
+ "quantization_parts": {
5476
+ "q2_k": [
5477
+ "00001-of-00004",
5478
+ "00002-of-00004",
5479
+ "00003-of-00004",
5480
+ "00004-of-00004"
5481
+ ],
5482
+ "q3_k_m": [
5483
+ "00001-of-00005",
5484
+ "00002-of-00005",
5485
+ "00003-of-00005",
5486
+ "00004-of-00005",
5487
+ "00005-of-00005"
5488
+ ],
5489
+ "q4_0": [
5490
+ "00001-of-00005",
5491
+ "00002-of-00005",
5492
+ "00003-of-00005",
5493
+ "00004-of-00005",
5494
+ "00005-of-00005"
5495
+ ],
5496
+ "q4_k_m": [
5497
+ "00001-of-00005",
5498
+ "00002-of-00005",
5499
+ "00003-of-00005",
5500
+ "00004-of-00005",
5501
+ "00005-of-00005"
5502
+ ],
5503
+ "q5_0": [
5504
+ "00001-of-00006",
5505
+ "00002-of-00006",
5506
+ "00003-of-00006",
5507
+ "00004-of-00006",
5508
+ "00005-of-00006",
5509
+ "00006-of-00006"
5510
+ ],
5511
+ "q5_k_m": [
5512
+ "00001-of-00006",
5513
+ "00002-of-00006",
5514
+ "00003-of-00006",
5515
+ "00004-of-00006",
5516
+ "00005-of-00006",
5517
+ "00006-of-00006"
5518
+ ],
5519
+ "q6_k": [
5520
+ "00001-of-00007",
5521
+ "00002-of-00007",
5522
+ "00003-of-00007",
5523
+ "00004-of-00007",
5524
+ "00005-of-00007",
5525
+ "00006-of-00007",
5526
+ "00007-of-00007"
5527
+ ],
5528
+ "q8_0": [
5529
+ "00001-of-00009",
5530
+ "00002-of-00009",
5531
+ "00003-of-00009",
5532
+ "00004-of-00009",
5533
+ "00005-of-00009",
5534
+ "00006-of-00009",
5535
+ "00007-of-00009",
5536
+ "00008-of-00009",
5537
+ "00009-of-00009"
5538
+ ]
5539
+ },
5540
+ "model_hub": "modelscope"
5541
+ },
5542
+ {
5543
+ "model_format": "ggufv2",
5544
+ "model_size_in_billions": 72,
5545
+ "quantizations": [
5546
+ "q2_k",
5547
+ "q3_k_m",
5548
+ "q4_0",
5549
+ "q4_k_m",
5550
+ "q5_0",
5551
+ "q5_k_m",
5552
+ "q6_k",
5553
+ "q8_0"
5554
+ ],
5555
+ "model_id": "qwen/Qwen2.5-72B-Instruct-GGUF",
5556
+ "model_hub": "modelscope",
5557
+ "model_file_name_template": "qwen2_5-72b-instruct-{quantization}.gguf",
5558
+ "model_file_name_split_template": "qwen2.5-72b-instruct-{quantization}-{part}.gguf",
5559
+ "quantization_parts": {
5560
+ "q2_k": [
5561
+ "00001-of-00007",
5562
+ "00002-of-00007",
5563
+ "00003-of-00007",
5564
+ "00004-of-00007",
5565
+ "00005-of-00007",
5566
+ "00006-of-00007",
5567
+ "00007-of-00007"
5568
+ ],
5569
+ "q3_k_m": [
5570
+ "00001-of-00009",
5571
+ "00002-of-00009",
5572
+ "00003-of-00009",
5573
+ "00004-of-00009",
5574
+ "00005-of-00009",
5575
+ "00006-of-00009",
5576
+ "00007-of-00009",
5577
+ "00008-of-00009",
5578
+ "00009-of-00009"
5579
+ ],
5580
+ "q4_0": [
5581
+ "00001-of-00011",
5582
+ "00002-of-00011",
5583
+ "00003-of-00011",
5584
+ "00004-of-00011",
5585
+ "00005-of-00011",
5586
+ "00006-of-00011",
5587
+ "00007-of-00011",
5588
+ "00008-of-00011",
5589
+ "00009-of-00011",
5590
+ "00010-of-00011",
5591
+ "00011-of-00011"
5592
+ ],
5593
+ "q4_k_m": [
5594
+ "00001-of-00012",
5595
+ "00002-of-00012",
5596
+ "00003-of-00012",
5597
+ "00004-of-00012",
5598
+ "00005-of-00012",
5599
+ "00006-of-00012",
5600
+ "00007-of-00012",
5601
+ "00008-of-00012",
5602
+ "00009-of-00012",
5603
+ "00010-of-00012",
5604
+ "00011-of-00012",
5605
+ "00012-of-00012"
5606
+ ],
5607
+ "q5_0": [
5608
+ "00001-of-00013",
5609
+ "00002-of-00013",
5610
+ "00003-of-00013",
5611
+ "00004-of-00013",
5612
+ "00005-of-00013",
5613
+ "00006-of-00013",
5614
+ "00007-of-00013",
5615
+ "00008-of-00013",
5616
+ "00009-of-00013",
5617
+ "00010-of-00013",
5618
+ "00011-of-00013",
5619
+ "00012-of-00013",
5620
+ "00013-of-00013"
5621
+ ],
5622
+ "q5_k_m": [
5623
+ "00001-of-00014",
5624
+ "00002-of-00014",
5625
+ "00003-of-00014",
5626
+ "00004-of-00014",
5627
+ "00005-of-00014",
5628
+ "00006-of-00014",
5629
+ "00007-of-00014",
5630
+ "00008-of-00014",
5631
+ "00009-of-00014",
5632
+ "00010-of-00014",
5633
+ "00011-of-00014",
5634
+ "00012-of-00014",
5635
+ "00013-of-00014",
5636
+ "00014-of-00014"
5637
+ ],
5638
+ "q6_k": [
5639
+ "00001-of-00016",
5640
+ "00002-of-00016",
5641
+ "00003-of-00016",
5642
+ "00004-of-00016",
5643
+ "00005-of-00016",
5644
+ "00006-of-00016",
5645
+ "00007-of-00016",
5646
+ "00008-of-00016",
5647
+ "00009-of-00016",
5648
+ "00010-of-00016",
5649
+ "00011-of-00016",
5650
+ "00012-of-00016",
5651
+ "00013-of-00016",
5652
+ "00014-of-00016",
5653
+ "00015-of-00016",
5654
+ "00016-of-00016"
5655
+ ],
5656
+ "q8_0": [
5657
+ "00001-of-00021",
5658
+ "00002-of-00021",
5659
+ "00003-of-00021",
5660
+ "00004-of-00021",
5661
+ "00005-of-00021",
5662
+ "00006-of-00021",
5663
+ "00007-of-00021",
5664
+ "00008-of-00021",
5665
+ "00009-of-00021",
5666
+ "00010-of-00021",
5667
+ "00011-of-00021",
5668
+ "00012-of-00021",
5669
+ "00013-of-00021",
5670
+ "00014-of-00021",
5671
+ "00015-of-00021",
5672
+ "00016-of-00021",
5673
+ "00017-of-00021",
5674
+ "00018-of-00021",
5675
+ "00019-of-00021",
5676
+ "00020-of-00021",
5677
+ "00021-of-00021"
5678
+ ]
5679
+ }
5680
+ }
5681
+ ],
5682
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
5683
+ "stop_token_ids": [
5684
+ 151643,
5685
+ 151644,
5686
+ 151645
5687
+ ],
5688
+ "stop": [
5689
+ "<|endoftext|>",
5690
+ "<|im_start|>",
5691
+ "<|im_end|>"
5692
+ ]
5693
+ },
5694
+ {
5695
+ "version": 1,
5696
+ "context_length": 32768,
5697
+ "model_name": "qwen2.5-coder",
5698
+ "model_lang": [
5699
+ "en",
5700
+ "zh"
5701
+ ],
5702
+ "model_ability": [
5703
+ "generate"
5704
+ ],
5705
+ "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
5706
+ "model_specs": [
5707
+ {
5708
+ "model_format": "pytorch",
5709
+ "model_size_in_billions": "1_5",
5710
+ "quantizations": [
5711
+ "4-bit",
5712
+ "8-bit",
5713
+ "none"
5714
+ ],
5715
+ "model_id": "qwen/Qwen2.5-Coder-1.5B",
5716
+ "model_revision": "master",
5717
+ "model_hub": "modelscope"
5718
+ },
5719
+ {
5720
+ "model_format": "pytorch",
5721
+ "model_size_in_billions": 7,
5722
+ "quantizations": [
5723
+ "4-bit",
5724
+ "8-bit",
5725
+ "none"
5726
+ ],
5727
+ "model_id": "qwen/Qwen2.5-Coder-7B",
5728
+ "model_revision": "master",
5729
+ "model_hub": "modelscope"
5730
+ }
5731
+ ]
5732
+ },
5733
+ {
5734
+ "version": 1,
5735
+ "context_length": 32768,
5736
+ "model_name": "qwen2.5-coder-instruct",
5737
+ "model_lang": [
5738
+ "en",
5739
+ "zh"
5740
+ ],
5741
+ "model_ability": [
5742
+ "chat",
5743
+ "tools"
5744
+ ],
5745
+ "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
5746
+ "model_specs": [
5747
+ {
5748
+ "model_format": "pytorch",
5749
+ "model_size_in_billions": "1_5",
5750
+ "quantizations": [
5751
+ "4-bit",
5752
+ "8-bit",
5753
+ "none"
5754
+ ],
5755
+ "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct",
5756
+ "model_revision": "master",
5757
+ "model_hub": "modelscope"
5758
+ },
5759
+ {
5760
+ "model_format": "pytorch",
5761
+ "model_size_in_billions": 7,
5762
+ "quantizations": [
5763
+ "4-bit",
5764
+ "8-bit",
5765
+ "none"
5766
+ ],
5767
+ "model_id": "qwen/Qwen2.5-Coder-7B-Instruct",
5768
+ "model_revision": "master",
5769
+ "model_hub": "modelscope"
5770
+ },
5771
+ {
5772
+ "model_format": "ggufv2",
5773
+ "model_size_in_billions": "1_5",
5774
+ "quantizations": [
5775
+ "q2_k",
5776
+ "q3_k_m",
5777
+ "q4_0",
5778
+ "q4_k_m",
5779
+ "q5_0",
5780
+ "q5_k_m",
5781
+ "q6_k",
5782
+ "q8_0"
5783
+ ],
5784
+ "model_hub": "modelscope",
5785
+ "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
5786
+ "model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf"
5787
+ },
5788
+ {
5789
+ "model_format": "ggufv2",
5790
+ "model_size_in_billions": 7,
5791
+ "quantizations": [
5792
+ "q2_k",
5793
+ "q3_k_m",
5794
+ "q4_0",
5795
+ "q4_k_m",
5796
+ "q5_0",
5797
+ "q5_k_m",
5798
+ "q6_k",
5799
+ "q8_0"
5800
+ ],
5801
+ "model_hub": "modelscope",
5802
+ "model_id": "qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
5803
+ "model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf",
5804
+ "model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf",
5805
+ "quantization_parts": {
5806
+ "q4_0": [
5807
+ "00001-of-00002",
5808
+ "00002-of-00002"
5809
+ ],
5810
+ "q4_k_m": [
5811
+ "00001-of-00002",
5812
+ "00002-of-00002"
5813
+ ],
5814
+ "q5_0": [
5815
+ "00001-of-00002",
5816
+ "00002-of-00002"
5817
+ ],
5818
+ "q5_k_m": [
5819
+ "00001-of-00002",
5820
+ "00002-of-00002"
5821
+ ],
5822
+ "q6_k": [
5823
+ "00001-of-00002",
5824
+ "00002-of-00002"
5825
+ ],
5826
+ "q8_0": [
5827
+ "00001-of-00003",
5828
+ "00002-of-00003",
5829
+ "00003-of-00003"
5830
+ ]
5831
+ }
5832
+ }
5833
+ ],
5834
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
5835
+ "stop_token_ids": [
5836
+ 151643,
5837
+ 151644,
5838
+ 151645
5839
+ ],
5840
+ "stop": [
5841
+ "<|endoftext|>",
5842
+ "<|im_start|>",
5843
+ "<|im_end|>"
5844
+ ]
4960
5845
  }
4961
5846
  ]