xinference 0.15.1__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (23) hide show
  1. xinference/_version.py +3 -3
  2. xinference/core/model.py +2 -2
  3. xinference/model/audio/cosyvoice.py +3 -3
  4. xinference/model/image/stable_diffusion/core.py +30 -19
  5. xinference/model/llm/__init__.py +1 -1
  6. xinference/model/llm/llm_family.json +850 -2
  7. xinference/model/llm/llm_family_modelscope.json +893 -0
  8. xinference/model/llm/sglang/core.py +4 -0
  9. xinference/model/llm/vllm/core.py +5 -0
  10. xinference/web/ui/build/asset-manifest.json +3 -3
  11. xinference/web/ui/build/index.html +1 -1
  12. xinference/web/ui/build/static/js/{main.754740c0.js → main.29578905.js} +3 -3
  13. xinference/web/ui/build/static/js/main.29578905.js.map +1 -0
  14. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +1 -0
  15. {xinference-0.15.1.dist-info → xinference-0.15.2.dist-info}/METADATA +5 -5
  16. {xinference-0.15.1.dist-info → xinference-0.15.2.dist-info}/RECORD +21 -21
  17. xinference/web/ui/build/static/js/main.754740c0.js.map +0 -1
  18. xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +0 -1
  19. /xinference/web/ui/build/static/js/{main.754740c0.js.LICENSE.txt → main.29578905.js.LICENSE.txt} +0 -0
  20. {xinference-0.15.1.dist-info → xinference-0.15.2.dist-info}/LICENSE +0 -0
  21. {xinference-0.15.1.dist-info → xinference-0.15.2.dist-info}/WHEEL +0 -0
  22. {xinference-0.15.1.dist-info → xinference-0.15.2.dist-info}/entry_points.txt +0 -0
  23. {xinference-0.15.1.dist-info → xinference-0.15.2.dist-info}/top_level.txt +0 -0
@@ -4602,6 +4602,34 @@
4602
4602
  "model_hub": "modelscope",
4603
4603
  "model_id":"qwen/Qwen2-VL-2B-Instruct-AWQ",
4604
4604
  "model_revision":"master"
4605
+ },
4606
+ {
4607
+ "model_format":"pytorch",
4608
+ "model_size_in_billions":72,
4609
+ "quantizations":[
4610
+ "none"
4611
+ ],
4612
+ "model_id":"qwen/Qwen2-VL-72B-Instruct",
4613
+ "model_hub": "modelscope"
4614
+ },
4615
+ {
4616
+ "model_format":"awq",
4617
+ "model_size_in_billions":72,
4618
+ "quantizations":[
4619
+ "Int4"
4620
+ ],
4621
+ "model_id":"qwen/Qwen2-VL-72B-Instruct-AWQ",
4622
+ "model_hub": "modelscope"
4623
+ },
4624
+ {
4625
+ "model_format":"gptq",
4626
+ "model_size_in_billions":72,
4627
+ "quantizations":[
4628
+ "Int4",
4629
+ "Int8"
4630
+ ],
4631
+ "model_id":"qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}",
4632
+ "model_hub": "modelscope"
4605
4633
  }
4606
4634
  ],
4607
4635
  "prompt_style": {
@@ -4957,5 +4985,870 @@
4957
4985
  "model_revision": "master"
4958
4986
  }
4959
4987
  ]
4988
+ },
4989
+ {
4990
+ "version": 1,
4991
+ "context_length": 32768,
4992
+ "model_name": "qwen2.5",
4993
+ "model_lang": [
4994
+ "en",
4995
+ "zh"
4996
+ ],
4997
+ "model_ability": [
4998
+ "generate"
4999
+ ],
5000
+ "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.",
5001
+ "model_specs": [
5002
+ {
5003
+ "model_format": "pytorch",
5004
+ "model_size_in_billions": "0_5",
5005
+ "quantizations": [
5006
+ "4-bit",
5007
+ "8-bit",
5008
+ "none"
5009
+ ],
5010
+ "model_id": "qwen/Qwen2.5-0.5B",
5011
+ "model_revision": "master",
5012
+ "model_hub": "modelscope"
5013
+ },
5014
+ {
5015
+ "model_format": "pytorch",
5016
+ "model_size_in_billions": "1_5",
5017
+ "quantizations": [
5018
+ "4-bit",
5019
+ "8-bit",
5020
+ "none"
5021
+ ],
5022
+ "model_id": "qwen/Qwen2.5-1.5B",
5023
+ "model_revision": "master",
5024
+ "model_hub": "modelscope"
5025
+ },
5026
+ {
5027
+ "model_format": "pytorch",
5028
+ "model_size_in_billions": 3,
5029
+ "quantizations": [
5030
+ "4-bit",
5031
+ "8-bit",
5032
+ "none"
5033
+ ],
5034
+ "model_id": "qwen/Qwen2.5-3B",
5035
+ "model_revision": "master",
5036
+ "model_hub": "modelscope"
5037
+ },
5038
+ {
5039
+ "model_format": "pytorch",
5040
+ "model_size_in_billions": 7,
5041
+ "quantizations": [
5042
+ "4-bit",
5043
+ "8-bit",
5044
+ "none"
5045
+ ],
5046
+ "model_id": "qwen/Qwen2.5-7B",
5047
+ "model_revision": "master",
5048
+ "model_hub": "modelscope"
5049
+ },
5050
+ {
5051
+ "model_format": "pytorch",
5052
+ "model_size_in_billions": 14,
5053
+ "quantizations": [
5054
+ "4-bit",
5055
+ "8-bit",
5056
+ "none"
5057
+ ],
5058
+ "model_id": "qwen/Qwen2.5-14B",
5059
+ "model_revision": "master",
5060
+ "model_hub": "modelscope"
5061
+ },
5062
+ {
5063
+ "model_format": "pytorch",
5064
+ "model_size_in_billions": 32,
5065
+ "quantizations": [
5066
+ "4-bit",
5067
+ "8-bit",
5068
+ "none"
5069
+ ],
5070
+ "model_id": "qwen/Qwen2.5-32B",
5071
+ "model_revision": "master",
5072
+ "model_hub": "modelscope"
5073
+ },
5074
+ {
5075
+ "model_format": "pytorch",
5076
+ "model_size_in_billions": 72,
5077
+ "quantizations": [
5078
+ "4-bit",
5079
+ "8-bit",
5080
+ "none"
5081
+ ],
5082
+ "model_id": "qwen/Qwen2.5-72B",
5083
+ "model_revision": "master",
5084
+ "model_hub": "modelscope"
5085
+ }
5086
+ ]
5087
+ },
5088
+ {
5089
+ "version": 1,
5090
+ "context_length": 32768,
5091
+ "model_name": "qwen2.5-instruct",
5092
+ "model_lang": [
5093
+ "en",
5094
+ "zh"
5095
+ ],
5096
+ "model_ability": [
5097
+ "chat",
5098
+ "tools"
5099
+ ],
5100
+ "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.",
5101
+ "model_specs": [
5102
+ {
5103
+ "model_format": "pytorch",
5104
+ "model_size_in_billions": "0_5",
5105
+ "quantizations": [
5106
+ "4-bit",
5107
+ "8-bit",
5108
+ "none"
5109
+ ],
5110
+ "model_id": "qwen/Qwen2.5-0.5B-Instruct",
5111
+ "model_hub": "modelscope"
5112
+ },
5113
+ {
5114
+ "model_format": "pytorch",
5115
+ "model_size_in_billions": "1_5",
5116
+ "quantizations": [
5117
+ "4-bit",
5118
+ "8-bit",
5119
+ "none"
5120
+ ],
5121
+ "model_id": "qwen/Qwen2.5-1.5B-Instruct",
5122
+ "model_hub": "modelscope"
5123
+ },
5124
+ {
5125
+ "model_format": "pytorch",
5126
+ "model_size_in_billions": 3,
5127
+ "quantizations": [
5128
+ "4-bit",
5129
+ "8-bit",
5130
+ "none"
5131
+ ],
5132
+ "model_id": "qwen/Qwen2.5-3B-Instruct",
5133
+ "model_hub": "modelscope"
5134
+ },
5135
+ {
5136
+ "model_format": "pytorch",
5137
+ "model_size_in_billions": 7,
5138
+ "quantizations": [
5139
+ "4-bit",
5140
+ "8-bit",
5141
+ "none"
5142
+ ],
5143
+ "model_id": "qwen/Qwen2.5-7B-Instruct",
5144
+ "model_hub": "modelscope"
5145
+ },
5146
+ {
5147
+ "model_format": "pytorch",
5148
+ "model_size_in_billions": 14,
5149
+ "quantizations": [
5150
+ "4-bit",
5151
+ "8-bit",
5152
+ "none"
5153
+ ],
5154
+ "model_id": "qwen/Qwen2.5-14B-Instruct",
5155
+ "model_hub": "modelscope"
5156
+ },
5157
+ {
5158
+ "model_format": "pytorch",
5159
+ "model_size_in_billions": 32,
5160
+ "quantizations": [
5161
+ "4-bit",
5162
+ "8-bit",
5163
+ "none"
5164
+ ],
5165
+ "model_id": "qwen/Qwen2.5-32B-Instruct",
5166
+ "model_hub": "modelscope"
5167
+ },
5168
+ {
5169
+ "model_format": "pytorch",
5170
+ "model_size_in_billions": 72,
5171
+ "quantizations": [
5172
+ "4-bit",
5173
+ "8-bit",
5174
+ "none"
5175
+ ],
5176
+ "model_id": "qwen/Qwen2.5-72B-Instruct",
5177
+ "model_hub": "modelscope"
5178
+ },
5179
+ {
5180
+ "model_format": "gptq",
5181
+ "model_size_in_billions": "0_5",
5182
+ "quantizations": [
5183
+ "Int4",
5184
+ "Int8"
5185
+ ],
5186
+ "model_id": "qwen/Qwen2.5-0.5B-Instruct-GPTQ-{quantization}",
5187
+ "model_hub": "modelscope"
5188
+ },
5189
+ {
5190
+ "model_format": "gptq",
5191
+ "model_size_in_billions": "1_5",
5192
+ "quantizations": [
5193
+ "Int4",
5194
+ "Int8"
5195
+ ],
5196
+ "model_id": "qwen/Qwen2.5-1.5B-Instruct-GPTQ-{quantization}",
5197
+ "model_hub": "modelscope"
5198
+ },
5199
+ {
5200
+ "model_format": "gptq",
5201
+ "model_size_in_billions": 3,
5202
+ "quantizations": [
5203
+ "Int4",
5204
+ "Int8"
5205
+ ],
5206
+ "model_id": "qwen/Qwen2.5-3B-Instruct-GPTQ-{quantization}",
5207
+ "model_hub": "modelscope"
5208
+ },
5209
+ {
5210
+ "model_format": "gptq",
5211
+ "model_size_in_billions": 7,
5212
+ "quantizations": [
5213
+ "Int4",
5214
+ "Int8"
5215
+ ],
5216
+ "model_id": "qwen/Qwen2.5-7B-Instruct-GPTQ-{quantization}",
5217
+ "model_hub": "modelscope"
5218
+ },
5219
+ {
5220
+ "model_format": "gptq",
5221
+ "model_size_in_billions": 14,
5222
+ "quantizations": [
5223
+ "Int4",
5224
+ "Int8"
5225
+ ],
5226
+ "model_id": "qwen/Qwen2.5-14B-Instruct-GPTQ-{quantization}",
5227
+ "model_hub": "modelscope"
5228
+ },
5229
+ {
5230
+ "model_format": "gptq",
5231
+ "model_size_in_billions": 32,
5232
+ "quantizations": [
5233
+ "Int4",
5234
+ "Int8"
5235
+ ],
5236
+ "model_id": "qwen/Qwen2.5-32B-Instruct-GPTQ-{quantization}",
5237
+ "model_hub": "modelscope"
5238
+ },
5239
+ {
5240
+ "model_format": "gptq",
5241
+ "model_size_in_billions": 72,
5242
+ "quantizations": [
5243
+ "Int4",
5244
+ "Int8"
5245
+ ],
5246
+ "model_id": "qwen/Qwen2.5-72B-Instruct-GPTQ-{quantization}",
5247
+ "model_hub": "modelscope"
5248
+ },
5249
+ {
5250
+ "model_format": "awq",
5251
+ "model_size_in_billions": "0_5",
5252
+ "quantizations": [
5253
+ "Int4"
5254
+ ],
5255
+ "model_id": "qwen/Qwen2-0.5B-Instruct-AWQ",
5256
+ "model_hub": "modelscope"
5257
+ },
5258
+ {
5259
+ "model_format": "awq",
5260
+ "model_size_in_billions": "1_5",
5261
+ "quantizations": [
5262
+ "Int4"
5263
+ ],
5264
+ "model_id": "qwen/Qwen2-1.5B-Instruct-AWQ",
5265
+ "model_hub": "modelscope"
5266
+ },
5267
+ {
5268
+ "model_format": "awq",
5269
+ "model_size_in_billions": 3,
5270
+ "quantizations": [
5271
+ "Int4"
5272
+ ],
5273
+ "model_id": "qwen/Qwen2.5-3B-Instruct-AWQ",
5274
+ "model_hub": "modelscope"
5275
+ },
5276
+ {
5277
+ "model_format": "awq",
5278
+ "model_size_in_billions": 7,
5279
+ "quantizations": [
5280
+ "Int4"
5281
+ ],
5282
+ "model_id": "qwen/Qwen2.5-7B-Instruct-AWQ",
5283
+ "model_hub": "modelscope"
5284
+ },
5285
+ {
5286
+ "model_format": "awq",
5287
+ "model_size_in_billions":14,
5288
+ "quantizations": [
5289
+ "Int4"
5290
+ ],
5291
+ "model_id": "qwen/Qwen2.5-14B-Instruct-AWQ",
5292
+ "model_hub": "modelscope"
5293
+ },
5294
+ {
5295
+ "model_format": "awq",
5296
+ "model_size_in_billions": 32,
5297
+ "quantizations": [
5298
+ "Int4"
5299
+ ],
5300
+ "model_id": "qwen/Qwen2.5-32B-Instruct-AWQ",
5301
+ "model_hub": "modelscope"
5302
+ },
5303
+ {
5304
+ "model_format": "awq",
5305
+ "model_size_in_billions": 72,
5306
+ "quantizations": [
5307
+ "Int4"
5308
+ ],
5309
+ "model_id": "qwen/Qwen2.5-72B-Instruct-AWQ",
5310
+ "model_hub": "modelscope"
5311
+ },
5312
+ {
5313
+ "model_format": "ggufv2",
5314
+ "model_size_in_billions": "0_5",
5315
+ "quantizations": [
5316
+ "q2_k",
5317
+ "q3_k_m",
5318
+ "q4_0",
5319
+ "q4_k_m",
5320
+ "q5_0",
5321
+ "q5_k_m",
5322
+ "q6_k",
5323
+ "q8_0"
5324
+ ],
5325
+ "model_id": "qwen/Qwen2.5-0.5B-Instruct-GGUF",
5326
+ "model_file_name_template": "qwen2.5-0.5b-instruct-{quantization}.gguf",
5327
+ "model_hub": "modelscope"
5328
+ },
5329
+ {
5330
+ "model_format": "ggufv2",
5331
+ "model_size_in_billions": "1_5",
5332
+ "quantizations": [
5333
+ "q2_k",
5334
+ "q3_k_m",
5335
+ "q4_0",
5336
+ "q4_k_m",
5337
+ "q5_0",
5338
+ "q5_k_m",
5339
+ "q6_k",
5340
+ "q8_0"
5341
+ ],
5342
+ "model_id": "qwen/Qwen2.5-1.5B-Instruct-GGUF",
5343
+ "model_file_name_template": "qwen2.5-1.5b-instruct-{quantization}.gguf",
5344
+ "model_hub": "modelscope"
5345
+ },
5346
+ {
5347
+ "model_format": "ggufv2",
5348
+ "model_size_in_billions": 3,
5349
+ "quantizations": [
5350
+ "q2_k",
5351
+ "q3_k_m",
5352
+ "q4_0",
5353
+ "q4_k_m",
5354
+ "q5_0",
5355
+ "q5_k_m",
5356
+ "q6_k",
5357
+ "q8_0"
5358
+ ],
5359
+ "model_id": "qwen/Qwen2.5-3B-Instruct-GGUF",
5360
+ "model_file_name_template": "qwen2.5-3b-instruct-{quantization}.gguf",
5361
+ "model_hub": "modelscope"
5362
+ },
5363
+ {
5364
+ "model_format": "ggufv2",
5365
+ "model_size_in_billions": 7,
5366
+ "quantizations": [
5367
+ "q2_k",
5368
+ "q3_k_m",
5369
+ "q4_0",
5370
+ "q4_k_m",
5371
+ "q5_0",
5372
+ "q5_k_m",
5373
+ "q6_k",
5374
+ "q8_0"
5375
+ ],
5376
+ "model_id": "qwen/Qwen2.5-7B-Instruct-GGUF",
5377
+ "model_file_name_template": "qwen2_5-7b-instruct-{quantization}.gguf",
5378
+ "model_hub": "modelscope",
5379
+ "model_file_name_split_template": "qwen2.5-7b-instruct-{quantization}-{part}.gguf",
5380
+ "quantization_parts": {
5381
+ "q4_0": [
5382
+ "00001-of-00002",
5383
+ "00002-of-00002"
5384
+ ],
5385
+ "q4_k_m": [
5386
+ "00001-of-00002",
5387
+ "00002-of-00002"
5388
+ ],
5389
+ "q5_0": [
5390
+ "00001-of-00002",
5391
+ "00002-of-00002"
5392
+ ],
5393
+ "q5_k_m": [
5394
+ "00001-of-00002",
5395
+ "00002-of-00002"
5396
+ ],
5397
+ "q6_k": [
5398
+ "00001-of-00002",
5399
+ "00002-of-00002"
5400
+ ],
5401
+ "q8_0": [
5402
+ "00001-of-00002",
5403
+ "00002-of-00002"
5404
+ ]
5405
+ }
5406
+ },
5407
+ {
5408
+ "model_format": "ggufv2",
5409
+ "model_size_in_billions": 14,
5410
+ "quantizations": [
5411
+ "q2_k",
5412
+ "q3_k_m",
5413
+ "q4_0",
5414
+ "q4_k_m",
5415
+ "q5_0",
5416
+ "q5_k_m",
5417
+ "q6_k",
5418
+ "q8_0"
5419
+ ],
5420
+ "model_id": "qwen/Qwen2.5-14B-Instruct-GGUF",
5421
+ "model_file_name_template": "qwen2.5-14b-instruct-{quantization}.gguf",
5422
+ "model_file_name_split_template": "qwen2.5-14b-instruct-{quantization}-{part}.gguf",
5423
+ "quantization_parts": {
5424
+ "q2_k": [
5425
+ "00001-of-00002",
5426
+ "00002-of-00002"
5427
+ ],
5428
+ "q3_k_m": [
5429
+ "00001-of-00002",
5430
+ "00002-of-00002"
5431
+ ],
5432
+ "q4_0": [
5433
+ "00001-of-00003",
5434
+ "00002-of-00003",
5435
+ "00003-of-00003"
5436
+ ],
5437
+ "q4_k_m": [
5438
+ "00001-of-00003",
5439
+ "00002-of-00003",
5440
+ "00003-of-00003"
5441
+ ],
5442
+ "q5_0": [
5443
+ "00001-of-00003",
5444
+ "00002-of-00003",
5445
+ "00003-of-00003"
5446
+ ],
5447
+ "q5_k_m": [
5448
+ "00001-of-00003",
5449
+ "00002-of-00003",
5450
+ "00003-of-00003"
5451
+ ],
5452
+ "q6_k": [
5453
+ "00001-of-00004",
5454
+ "00002-of-00004",
5455
+ "00003-of-00004",
5456
+ "00004-of-00004"
5457
+ ],
5458
+ "q8_0": [
5459
+ "00001-of-00004",
5460
+ "00002-of-00004",
5461
+ "00003-of-00004",
5462
+ "00004-of-00004"
5463
+ ]
5464
+ },
5465
+ "model_hub": "modelscope"
5466
+ },
5467
+ {
5468
+ "model_format": "ggufv2",
5469
+ "model_size_in_billions": 32,
5470
+ "quantizations": [
5471
+ "q2_k",
5472
+ "q3_k_m",
5473
+ "q4_0",
5474
+ "q4_k_m",
5475
+ "q5_0",
5476
+ "q5_k_m",
5477
+ "q6_k",
5478
+ "q8_0"
5479
+ ],
5480
+ "model_id": "qwen/Qwen2.5-32B-Instruct-GGUF",
5481
+ "model_file_name_template": "qwen2_5-32b-instruct-{quantization}.gguf",
5482
+ "model_file_name_split_template": "qwen2.5-32b-instruct-{quantization}-{part}.gguf",
5483
+ "quantization_parts": {
5484
+ "q2_k": [
5485
+ "00001-of-00004",
5486
+ "00002-of-00004",
5487
+ "00003-of-00004",
5488
+ "00004-of-00004"
5489
+ ],
5490
+ "q3_k_m": [
5491
+ "00001-of-00005",
5492
+ "00002-of-00005",
5493
+ "00003-of-00005",
5494
+ "00004-of-00005",
5495
+ "00005-of-00005"
5496
+ ],
5497
+ "q4_0": [
5498
+ "00001-of-00005",
5499
+ "00002-of-00005",
5500
+ "00003-of-00005",
5501
+ "00004-of-00005",
5502
+ "00005-of-00005"
5503
+ ],
5504
+ "q4_k_m": [
5505
+ "00001-of-00005",
5506
+ "00002-of-00005",
5507
+ "00003-of-00005",
5508
+ "00004-of-00005",
5509
+ "00005-of-00005"
5510
+ ],
5511
+ "q5_0": [
5512
+ "00001-of-00006",
5513
+ "00002-of-00006",
5514
+ "00003-of-00006",
5515
+ "00004-of-00006",
5516
+ "00005-of-00006",
5517
+ "00006-of-00006"
5518
+ ],
5519
+ "q5_k_m": [
5520
+ "00001-of-00006",
5521
+ "00002-of-00006",
5522
+ "00003-of-00006",
5523
+ "00004-of-00006",
5524
+ "00005-of-00006",
5525
+ "00006-of-00006"
5526
+ ],
5527
+ "q6_k": [
5528
+ "00001-of-00007",
5529
+ "00002-of-00007",
5530
+ "00003-of-00007",
5531
+ "00004-of-00007",
5532
+ "00005-of-00007",
5533
+ "00006-of-00007",
5534
+ "00007-of-00007"
5535
+ ],
5536
+ "q8_0": [
5537
+ "00001-of-00009",
5538
+ "00002-of-00009",
5539
+ "00003-of-00009",
5540
+ "00004-of-00009",
5541
+ "00005-of-00009",
5542
+ "00006-of-00009",
5543
+ "00007-of-00009",
5544
+ "00008-of-00009",
5545
+ "00009-of-00009"
5546
+ ]
5547
+ },
5548
+ "model_hub": "modelscope"
5549
+ },
5550
+ {
5551
+ "model_format": "ggufv2",
5552
+ "model_size_in_billions": 72,
5553
+ "quantizations": [
5554
+ "q2_k",
5555
+ "q3_k_m",
5556
+ "q4_0",
5557
+ "q4_k_m",
5558
+ "q5_0",
5559
+ "q5_k_m",
5560
+ "q6_k",
5561
+ "q8_0"
5562
+ ],
5563
+ "model_id": "qwen/Qwen2.5-72B-Instruct-GGUF",
5564
+ "model_hub": "modelscope",
5565
+ "model_file_name_template": "qwen2_5-72b-instruct-{quantization}.gguf",
5566
+ "model_file_name_split_template": "qwen2.5-72b-instruct-{quantization}-{part}.gguf",
5567
+ "quantization_parts": {
5568
+ "q2_k": [
5569
+ "00001-of-00007",
5570
+ "00002-of-00007",
5571
+ "00003-of-00007",
5572
+ "00004-of-00007",
5573
+ "00005-of-00007",
5574
+ "00006-of-00007",
5575
+ "00007-of-00007"
5576
+ ],
5577
+ "q3_k_m": [
5578
+ "00001-of-00009",
5579
+ "00002-of-00009",
5580
+ "00003-of-00009",
5581
+ "00004-of-00009",
5582
+ "00005-of-00009",
5583
+ "00006-of-00009",
5584
+ "00007-of-00009",
5585
+ "00008-of-00009",
5586
+ "00009-of-00009"
5587
+ ],
5588
+ "q4_0": [
5589
+ "00001-of-00011",
5590
+ "00002-of-00011",
5591
+ "00003-of-00011",
5592
+ "00004-of-00011",
5593
+ "00005-of-00011",
5594
+ "00006-of-00011",
5595
+ "00007-of-00011",
5596
+ "00008-of-00011",
5597
+ "00009-of-00011",
5598
+ "00010-of-00011",
5599
+ "00011-of-00011"
5600
+ ],
5601
+ "q4_k_m": [
5602
+ "00001-of-00012",
5603
+ "00002-of-00012",
5604
+ "00003-of-00012",
5605
+ "00004-of-00012",
5606
+ "00005-of-00012",
5607
+ "00006-of-00012",
5608
+ "00007-of-00012",
5609
+ "00008-of-00012",
5610
+ "00009-of-00012",
5611
+ "00010-of-00012",
5612
+ "00011-of-00012",
5613
+ "00012-of-00012"
5614
+ ],
5615
+ "q5_0": [
5616
+ "00001-of-00013",
5617
+ "00002-of-00013",
5618
+ "00003-of-00013",
5619
+ "00004-of-00013",
5620
+ "00005-of-00013",
5621
+ "00006-of-00013",
5622
+ "00007-of-00013",
5623
+ "00008-of-00013",
5624
+ "00009-of-00013",
5625
+ "00010-of-00013",
5626
+ "00011-of-00013",
5627
+ "00012-of-00013",
5628
+ "00013-of-00013"
5629
+ ],
5630
+ "q5_k_m": [
5631
+ "00001-of-00014",
5632
+ "00002-of-00014",
5633
+ "00003-of-00014",
5634
+ "00004-of-00014",
5635
+ "00005-of-00014",
5636
+ "00006-of-00014",
5637
+ "00007-of-00014",
5638
+ "00008-of-00014",
5639
+ "00009-of-00014",
5640
+ "00010-of-00014",
5641
+ "00011-of-00014",
5642
+ "00012-of-00014",
5643
+ "00013-of-00014",
5644
+ "00014-of-00014"
5645
+ ],
5646
+ "q6_k": [
5647
+ "00001-of-00016",
5648
+ "00002-of-00016",
5649
+ "00003-of-00016",
5650
+ "00004-of-00016",
5651
+ "00005-of-00016",
5652
+ "00006-of-00016",
5653
+ "00007-of-00016",
5654
+ "00008-of-00016",
5655
+ "00009-of-00016",
5656
+ "00010-of-00016",
5657
+ "00011-of-00016",
5658
+ "00012-of-00016",
5659
+ "00013-of-00016",
5660
+ "00014-of-00016",
5661
+ "00015-of-00016",
5662
+ "00016-of-00016"
5663
+ ],
5664
+ "q8_0": [
5665
+ "00001-of-00021",
5666
+ "00002-of-00021",
5667
+ "00003-of-00021",
5668
+ "00004-of-00021",
5669
+ "00005-of-00021",
5670
+ "00006-of-00021",
5671
+ "00007-of-00021",
5672
+ "00008-of-00021",
5673
+ "00009-of-00021",
5674
+ "00010-of-00021",
5675
+ "00011-of-00021",
5676
+ "00012-of-00021",
5677
+ "00013-of-00021",
5678
+ "00014-of-00021",
5679
+ "00015-of-00021",
5680
+ "00016-of-00021",
5681
+ "00017-of-00021",
5682
+ "00018-of-00021",
5683
+ "00019-of-00021",
5684
+ "00020-of-00021",
5685
+ "00021-of-00021"
5686
+ ]
5687
+ }
5688
+ }
5689
+ ],
5690
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
5691
+ "stop_token_ids": [
5692
+ 151643,
5693
+ 151644,
5694
+ 151645
5695
+ ],
5696
+ "stop": [
5697
+ "<|endoftext|>",
5698
+ "<|im_start|>",
5699
+ "<|im_end|>"
5700
+ ]
5701
+ },
5702
+ {
5703
+ "version": 1,
5704
+ "context_length": 32768,
5705
+ "model_name": "qwen2.5-coder",
5706
+ "model_lang": [
5707
+ "en",
5708
+ "zh"
5709
+ ],
5710
+ "model_ability": [
5711
+ "generate"
5712
+ ],
5713
+ "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
5714
+ "model_specs": [
5715
+ {
5716
+ "model_format": "pytorch",
5717
+ "model_size_in_billions": "1_5",
5718
+ "quantizations": [
5719
+ "4-bit",
5720
+ "8-bit",
5721
+ "none"
5722
+ ],
5723
+ "model_id": "qwen/Qwen2.5-Coder-1.5B",
5724
+ "model_revision": "master",
5725
+ "model_hub": "modelscope"
5726
+ },
5727
+ {
5728
+ "model_format": "pytorch",
5729
+ "model_size_in_billions": 7,
5730
+ "quantizations": [
5731
+ "4-bit",
5732
+ "8-bit",
5733
+ "none"
5734
+ ],
5735
+ "model_id": "qwen/Qwen2.5-Coder-7B",
5736
+ "model_revision": "master",
5737
+ "model_hub": "modelscope"
5738
+ }
5739
+ ]
5740
+ },
5741
+ {
5742
+ "version": 1,
5743
+ "context_length": 32768,
5744
+ "model_name": "qwen2.5-coder-instruct",
5745
+ "model_lang": [
5746
+ "en",
5747
+ "zh"
5748
+ ],
5749
+ "model_ability": [
5750
+ "chat",
5751
+ "tools"
5752
+ ],
5753
+ "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
5754
+ "model_specs": [
5755
+ {
5756
+ "model_format": "pytorch",
5757
+ "model_size_in_billions": "1_5",
5758
+ "quantizations": [
5759
+ "4-bit",
5760
+ "8-bit",
5761
+ "none"
5762
+ ],
5763
+ "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct",
5764
+ "model_revision": "master",
5765
+ "model_hub": "modelscope"
5766
+ },
5767
+ {
5768
+ "model_format": "pytorch",
5769
+ "model_size_in_billions": 7,
5770
+ "quantizations": [
5771
+ "4-bit",
5772
+ "8-bit",
5773
+ "none"
5774
+ ],
5775
+ "model_id": "qwen/Qwen2.5-Coder-7B-Instruct",
5776
+ "model_revision": "master",
5777
+ "model_hub": "modelscope"
5778
+ },
5779
+ {
5780
+ "model_format": "ggufv2",
5781
+ "model_size_in_billions": "1_5",
5782
+ "quantizations": [
5783
+ "q2_k",
5784
+ "q3_k_m",
5785
+ "q4_0",
5786
+ "q4_k_m",
5787
+ "q5_0",
5788
+ "q5_k_m",
5789
+ "q6_k",
5790
+ "q8_0"
5791
+ ],
5792
+ "model_hub": "modelscope",
5793
+ "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
5794
+ "model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf"
5795
+ },
5796
+ {
5797
+ "model_format": "ggufv2",
5798
+ "model_size_in_billions": 7,
5799
+ "quantizations": [
5800
+ "q2_k",
5801
+ "q3_k_m",
5802
+ "q4_0",
5803
+ "q4_k_m",
5804
+ "q5_0",
5805
+ "q5_k_m",
5806
+ "q6_k",
5807
+ "q8_0"
5808
+ ],
5809
+ "model_hub": "modelscope",
5810
+ "model_id": "qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
5811
+ "model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf",
5812
+ "model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf",
5813
+ "quantization_parts": {
5814
+ "q4_0": [
5815
+ "00001-of-00002",
5816
+ "00002-of-00002"
5817
+ ],
5818
+ "q4_k_m": [
5819
+ "00001-of-00002",
5820
+ "00002-of-00002"
5821
+ ],
5822
+ "q5_0": [
5823
+ "00001-of-00002",
5824
+ "00002-of-00002"
5825
+ ],
5826
+ "q5_k_m": [
5827
+ "00001-of-00002",
5828
+ "00002-of-00002"
5829
+ ],
5830
+ "q6_k": [
5831
+ "00001-of-00002",
5832
+ "00002-of-00002"
5833
+ ],
5834
+ "q8_0": [
5835
+ "00001-of-00003",
5836
+ "00002-of-00003",
5837
+ "00003-of-00003"
5838
+ ]
5839
+ }
5840
+ }
5841
+ ],
5842
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
5843
+ "stop_token_ids": [
5844
+ 151643,
5845
+ 151644,
5846
+ 151645
5847
+ ],
5848
+ "stop": [
5849
+ "<|endoftext|>",
5850
+ "<|im_start|>",
5851
+ "<|im_end|>"
5852
+ ]
4960
5853
  }
4961
5854
  ]