xinference 0.15.1__py3-none-any.whl → 0.15.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (34) hide show
  1. xinference/_version.py +3 -3
  2. xinference/core/model.py +2 -2
  3. xinference/model/audio/cosyvoice.py +3 -3
  4. xinference/model/embedding/core.py +14 -5
  5. xinference/model/embedding/model_spec.json +7 -0
  6. xinference/model/embedding/model_spec_modelscope.json +9 -1
  7. xinference/model/image/stable_diffusion/core.py +42 -19
  8. xinference/model/llm/__init__.py +1 -1
  9. xinference/model/llm/llm_family.json +862 -26
  10. xinference/model/llm/llm_family_modelscope.json +895 -10
  11. xinference/model/llm/sglang/core.py +4 -0
  12. xinference/model/llm/utils.py +14 -3
  13. xinference/model/llm/vllm/core.py +27 -6
  14. xinference/model/llm/vllm/utils.py +42 -0
  15. xinference/model/rerank/core.py +19 -0
  16. xinference/model/rerank/model_spec.json +8 -0
  17. xinference/model/rerank/model_spec_modelscope.json +8 -0
  18. xinference/model/utils.py +0 -25
  19. xinference/web/ui/build/asset-manifest.json +3 -3
  20. xinference/web/ui/build/index.html +1 -1
  21. xinference/web/ui/build/static/js/{main.754740c0.js → main.e51a356d.js} +3 -3
  22. xinference/web/ui/build/static/js/main.e51a356d.js.map +1 -0
  23. xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +1 -0
  24. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +1 -0
  25. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/METADATA +8 -7
  26. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/RECORD +31 -30
  27. xinference/web/ui/build/static/js/main.754740c0.js.map +0 -1
  28. xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +0 -1
  29. xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +0 -1
  30. /xinference/web/ui/build/static/js/{main.754740c0.js.LICENSE.txt → main.e51a356d.js.LICENSE.txt} +0 -0
  31. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/LICENSE +0 -0
  32. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/WHEEL +0 -0
  33. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/entry_points.txt +0 -0
  34. {xinference-0.15.1.dist-info → xinference-0.15.3.dist-info}/top_level.txt +0 -0
@@ -6483,8 +6483,7 @@
6483
6483
  "8-bit",
6484
6484
  "none"
6485
6485
  ],
6486
- "model_id": "OpenGVLab/InternVL2-1B",
6487
- "model_revision": "a9fc14aea824b6ea1d44f8778cad6b35512c4ce1"
6486
+ "model_id": "OpenGVLab/InternVL2-1B"
6488
6487
  },
6489
6488
  {
6490
6489
  "model_format": "pytorch",
@@ -6494,8 +6493,7 @@
6494
6493
  "8-bit",
6495
6494
  "none"
6496
6495
  ],
6497
- "model_id": "OpenGVLab/InternVL2-2B",
6498
- "model_revision": "422ad7c6335917bfb514958233955512338485a6"
6496
+ "model_id": "OpenGVLab/InternVL2-2B"
6499
6497
  },
6500
6498
  {
6501
6499
  "model_format": "awq",
@@ -6503,8 +6501,7 @@
6503
6501
  "quantizations": [
6504
6502
  "Int4"
6505
6503
  ],
6506
- "model_id": "OpenGVLab/InternVL2-2B-AWQ",
6507
- "model_revision": "701bc3fc098a8a3b686b3b4135cfb77202be89e0"
6504
+ "model_id": "OpenGVLab/InternVL2-2B-AWQ"
6508
6505
  },
6509
6506
  {
6510
6507
  "model_format": "pytorch",
@@ -6514,8 +6511,7 @@
6514
6511
  "8-bit",
6515
6512
  "none"
6516
6513
  ],
6517
- "model_id": "OpenGVLab/InternVL2-4B",
6518
- "model_revision": "b50544dafada6c41e80bfde2f57cc9b0140fc21c"
6514
+ "model_id": "OpenGVLab/InternVL2-4B"
6519
6515
  },
6520
6516
  {
6521
6517
  "model_format": "pytorch",
@@ -6525,8 +6521,7 @@
6525
6521
  "8-bit",
6526
6522
  "none"
6527
6523
  ],
6528
- "model_id": "OpenGVLab/InternVL2-8B",
6529
- "model_revision": "3bfd3664dea4f3da628785f5125d30f889701253"
6524
+ "model_id": "OpenGVLab/InternVL2-8B"
6530
6525
  },
6531
6526
  {
6532
6527
  "model_format": "awq",
@@ -6534,8 +6529,7 @@
6534
6529
  "quantizations": [
6535
6530
  "Int4"
6536
6531
  ],
6537
- "model_id": "OpenGVLab/InternVL2-8B-AWQ",
6538
- "model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
6532
+ "model_id": "OpenGVLab/InternVL2-8B-AWQ"
6539
6533
  },
6540
6534
  {
6541
6535
  "model_format": "pytorch",
@@ -6545,8 +6539,7 @@
6545
6539
  "8-bit",
6546
6540
  "none"
6547
6541
  ],
6548
- "model_id": "OpenGVLab/InternVL2-26B",
6549
- "model_revision": "b9f3c7e6d575b0115e076a3ffc46fd20b7586899"
6542
+ "model_id": "OpenGVLab/InternVL2-26B"
6550
6543
  },
6551
6544
  {
6552
6545
  "model_format": "awq",
@@ -6554,8 +6547,7 @@
6554
6547
  "quantizations": [
6555
6548
  "Int4"
6556
6549
  ],
6557
- "model_id": "OpenGVLab/InternVL2-26B-AWQ",
6558
- "model_revision": "469e0019ffd251e22ff6501a5c2321964e86ef0d"
6550
+ "model_id": "OpenGVLab/InternVL2-26B-AWQ"
6559
6551
  },
6560
6552
  {
6561
6553
  "model_format": "pytorch",
@@ -6565,8 +6557,7 @@
6565
6557
  "8-bit",
6566
6558
  "none"
6567
6559
  ],
6568
- "model_id": "OpenGVLab/InternVL2-40B",
6569
- "model_revision": "725a12063bb855c966e30a0617d0ccd9e870d772"
6560
+ "model_id": "OpenGVLab/InternVL2-40B"
6570
6561
  },
6571
6562
  {
6572
6563
  "model_format": "awq",
@@ -6574,8 +6565,7 @@
6574
6565
  "quantizations": [
6575
6566
  "Int4"
6576
6567
  ],
6577
- "model_id": "OpenGVLab/InternVL2-40B-AWQ",
6578
- "model_revision": "d92e140f6dfe8ea9679924c6a31898f42c4e1846"
6568
+ "model_id": "OpenGVLab/InternVL2-40B-AWQ"
6579
6569
  },
6580
6570
  {
6581
6571
  "model_format": "pytorch",
@@ -6585,8 +6575,7 @@
6585
6575
  "8-bit",
6586
6576
  "none"
6587
6577
  ],
6588
- "model_id": "OpenGVLab/InternVL2-Llama3-76B",
6589
- "model_revision": "cf7914905f78e9e3560ddbd6f5dfc39becac494f"
6578
+ "model_id": "OpenGVLab/InternVL2-Llama3-76B"
6590
6579
  },
6591
6580
  {
6592
6581
  "model_format": "awq",
@@ -6594,8 +6583,7 @@
6594
6583
  "quantizations": [
6595
6584
  "Int4"
6596
6585
  ],
6597
- "model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
6598
- "model_revision": "1bc796bf80f2ebc7d6a14c15f55217a4600d50a4"
6586
+ "model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ"
6599
6587
  }
6600
6588
  ],
6601
6589
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
@@ -6874,7 +6862,7 @@
6874
6862
  "model_id":"Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8",
6875
6863
  "model_revision":"3d152a77eaccfd72d59baedb0b183a1b8fd56e48"
6876
6864
  },
6877
- {
6865
+ {
6878
6866
  "model_format":"gptq",
6879
6867
  "model_size_in_billions":7,
6880
6868
  "quantizations":[
@@ -6883,7 +6871,7 @@
6883
6871
  "model_id":"Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4",
6884
6872
  "model_revision":"5ab897112fa83b9699826be8753ef9184585c77d"
6885
6873
  },
6886
- {
6874
+ {
6887
6875
  "model_format":"awq",
6888
6876
  "model_size_in_billions":7,
6889
6877
  "quantizations":[
@@ -6891,6 +6879,31 @@
6891
6879
  ],
6892
6880
  "model_id":"Qwen/Qwen2-VL-7B-Instruct-AWQ",
6893
6881
  "model_revision":"f94216e8b513933bccd567bcd9b7350199f32538"
6882
+ },
6883
+ {
6884
+ "model_format":"pytorch",
6885
+ "model_size_in_billions":72,
6886
+ "quantizations":[
6887
+ "none"
6888
+ ],
6889
+ "model_id":"Qwen/Qwen2-VL-72B-Instruct"
6890
+ },
6891
+ {
6892
+ "model_format":"awq",
6893
+ "model_size_in_billions":72,
6894
+ "quantizations":[
6895
+ "Int4"
6896
+ ],
6897
+ "model_id":"Qwen/Qwen2-VL-72B-Instruct-AWQ"
6898
+ },
6899
+ {
6900
+ "model_format":"gptq",
6901
+ "model_size_in_billions":72,
6902
+ "quantizations":[
6903
+ "Int4",
6904
+ "Int8"
6905
+ ],
6906
+ "model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
6894
6907
  }
6895
6908
  ],
6896
6909
  "prompt_style":{
@@ -7244,5 +7257,828 @@
7244
7257
  "model_revision": "00e59e64f47d3c78e4cfbdd345888479797e8109"
7245
7258
  }
7246
7259
  ]
7260
+ },
7261
+ {
7262
+ "version": 1,
7263
+ "context_length": 32768,
7264
+ "model_name": "qwen2.5",
7265
+ "model_lang": [
7266
+ "en",
7267
+ "zh"
7268
+ ],
7269
+ "model_ability": [
7270
+ "generate"
7271
+ ],
7272
+ "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.",
7273
+ "model_specs": [
7274
+ {
7275
+ "model_format": "pytorch",
7276
+ "model_size_in_billions": "0_5",
7277
+ "quantizations": [
7278
+ "4-bit",
7279
+ "8-bit",
7280
+ "none"
7281
+ ],
7282
+ "model_id": "Qwen/Qwen2.5-0.5B",
7283
+ "model_revision": "2630d3d2321bc1f1878f702166d1b2af019a7310"
7284
+ },
7285
+ {
7286
+ "model_format": "pytorch",
7287
+ "model_size_in_billions": "1_5",
7288
+ "quantizations": [
7289
+ "4-bit",
7290
+ "8-bit",
7291
+ "none"
7292
+ ],
7293
+ "model_id": "Qwen/Qwen2.5-1.5B",
7294
+ "model_revision": "e5dfabbcffd9b0c7b31d89b82c5a6b72e663f32c"
7295
+ },
7296
+ {
7297
+ "model_format": "pytorch",
7298
+ "model_size_in_billions": 3,
7299
+ "quantizations": [
7300
+ "4-bit",
7301
+ "8-bit",
7302
+ "none"
7303
+ ],
7304
+ "model_id": "Qwen/Qwen2.5-3B",
7305
+ "model_revision": "e4aa5ac50aa507415cda96cc99eb77ad0a3d2d34"
7306
+ },
7307
+ {
7308
+ "model_format": "pytorch",
7309
+ "model_size_in_billions": 7,
7310
+ "quantizations": [
7311
+ "4-bit",
7312
+ "8-bit",
7313
+ "none"
7314
+ ],
7315
+ "model_id": "Qwen/Qwen2.5-7B",
7316
+ "model_revision": "09a0bac5707b43ec44508eab308b0846320c1ed4"
7317
+ },
7318
+ {
7319
+ "model_format": "pytorch",
7320
+ "model_size_in_billions": 14,
7321
+ "quantizations": [
7322
+ "4-bit",
7323
+ "8-bit",
7324
+ "none"
7325
+ ],
7326
+ "model_id": "Qwen/Qwen2.5-14B",
7327
+ "model_revision": "d02b64ba1ce86bf9948668a13f82709600431ccc"
7328
+ },
7329
+ {
7330
+ "model_format": "pytorch",
7331
+ "model_size_in_billions": 32,
7332
+ "quantizations": [
7333
+ "4-bit",
7334
+ "8-bit",
7335
+ "none"
7336
+ ],
7337
+ "model_id": "Qwen/Qwen2.5-32B",
7338
+ "model_revision": "ff23665d01c3665be5fdb271d18a62090b65c06d"
7339
+ },
7340
+ {
7341
+ "model_format": "pytorch",
7342
+ "model_size_in_billions": 72,
7343
+ "quantizations": [
7344
+ "4-bit",
7345
+ "8-bit",
7346
+ "none"
7347
+ ],
7348
+ "model_id": "Qwen/Qwen2.5-72B",
7349
+ "model_revision": "587cc4061cf6a7cc0d429d05c109447e5cf063af"
7350
+ }
7351
+ ]
7352
+ },
7353
+ {
7354
+ "version": 1,
7355
+ "context_length": 32768,
7356
+ "model_name": "qwen2.5-instruct",
7357
+ "model_lang": [
7358
+ "en",
7359
+ "zh"
7360
+ ],
7361
+ "model_ability": [
7362
+ "chat",
7363
+ "tools"
7364
+ ],
7365
+ "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.",
7366
+ "model_specs": [
7367
+ {
7368
+ "model_format": "pytorch",
7369
+ "model_size_in_billions": "0_5",
7370
+ "quantizations": [
7371
+ "4-bit",
7372
+ "8-bit",
7373
+ "none"
7374
+ ],
7375
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct"
7376
+ },
7377
+ {
7378
+ "model_format": "pytorch",
7379
+ "model_size_in_billions": "1_5",
7380
+ "quantizations": [
7381
+ "4-bit",
7382
+ "8-bit",
7383
+ "none"
7384
+ ],
7385
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct"
7386
+ },
7387
+ {
7388
+ "model_format": "pytorch",
7389
+ "model_size_in_billions": 3,
7390
+ "quantizations": [
7391
+ "4-bit",
7392
+ "8-bit",
7393
+ "none"
7394
+ ],
7395
+ "model_id": "Qwen/Qwen2.5-3B-Instruct"
7396
+ },
7397
+ {
7398
+ "model_format": "pytorch",
7399
+ "model_size_in_billions": 7,
7400
+ "quantizations": [
7401
+ "4-bit",
7402
+ "8-bit",
7403
+ "none"
7404
+ ],
7405
+ "model_id": "Qwen/Qwen2.5-7B-Instruct"
7406
+ },
7407
+ {
7408
+ "model_format": "pytorch",
7409
+ "model_size_in_billions": 14,
7410
+ "quantizations": [
7411
+ "4-bit",
7412
+ "8-bit",
7413
+ "none"
7414
+ ],
7415
+ "model_id": "Qwen/Qwen2.5-14B-Instruct"
7416
+ },
7417
+ {
7418
+ "model_format": "pytorch",
7419
+ "model_size_in_billions": 32,
7420
+ "quantizations": [
7421
+ "4-bit",
7422
+ "8-bit",
7423
+ "none"
7424
+ ],
7425
+ "model_id": "Qwen/Qwen2.5-32B-Instruct"
7426
+ },
7427
+ {
7428
+ "model_format": "pytorch",
7429
+ "model_size_in_billions": 72,
7430
+ "quantizations": [
7431
+ "4-bit",
7432
+ "8-bit",
7433
+ "none"
7434
+ ],
7435
+ "model_id": "Qwen/Qwen2.5-72B-Instruct"
7436
+ },
7437
+ {
7438
+ "model_format": "gptq",
7439
+ "model_size_in_billions": "0_5",
7440
+ "quantizations": [
7441
+ "Int4",
7442
+ "Int8"
7443
+ ],
7444
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct-GPTQ-{quantization}"
7445
+ },
7446
+ {
7447
+ "model_format": "gptq",
7448
+ "model_size_in_billions": "1_5",
7449
+ "quantizations": [
7450
+ "Int4",
7451
+ "Int8"
7452
+ ],
7453
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct-GPTQ-{quantization}"
7454
+ },
7455
+ {
7456
+ "model_format": "gptq",
7457
+ "model_size_in_billions": 3,
7458
+ "quantizations": [
7459
+ "Int4",
7460
+ "Int8"
7461
+ ],
7462
+ "model_id": "Qwen/Qwen2.5-3B-Instruct-GPTQ-{quantization}"
7463
+ },
7464
+ {
7465
+ "model_format": "gptq",
7466
+ "model_size_in_billions": 7,
7467
+ "quantizations": [
7468
+ "Int4",
7469
+ "Int8"
7470
+ ],
7471
+ "model_id": "Qwen/Qwen2.5-7B-Instruct-GPTQ-{quantization}"
7472
+ },
7473
+ {
7474
+ "model_format": "gptq",
7475
+ "model_size_in_billions": 14,
7476
+ "quantizations": [
7477
+ "Int4",
7478
+ "Int8"
7479
+ ],
7480
+ "model_id": "Qwen/Qwen2.5-14B-Instruct-GPTQ-{quantization}"
7481
+ },
7482
+ {
7483
+ "model_format": "gptq",
7484
+ "model_size_in_billions": 32,
7485
+ "quantizations": [
7486
+ "Int4",
7487
+ "Int8"
7488
+ ],
7489
+ "model_id": "Qwen/Qwen2.5-32B-Instruct-GPTQ-{quantization}"
7490
+ },
7491
+ {
7492
+ "model_format": "gptq",
7493
+ "model_size_in_billions": 72,
7494
+ "quantizations": [
7495
+ "Int4",
7496
+ "Int8"
7497
+ ],
7498
+ "model_id": "Qwen/Qwen2.5-72B-Instruct-GPTQ-{quantization}"
7499
+ },
7500
+ {
7501
+ "model_format": "awq",
7502
+ "model_size_in_billions": "0_5",
7503
+ "quantizations": [
7504
+ "Int4"
7505
+ ],
7506
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct-AWQ"
7507
+ },
7508
+ {
7509
+ "model_format": "awq",
7510
+ "model_size_in_billions": "1_5",
7511
+ "quantizations": [
7512
+ "Int4"
7513
+ ],
7514
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct-AWQ"
7515
+ },
7516
+ {
7517
+ "model_format": "awq",
7518
+ "model_size_in_billions": 3,
7519
+ "quantizations": [
7520
+ "Int4"
7521
+ ],
7522
+ "model_id": "Qwen/Qwen2.5-3B-Instruct-AWQ"
7523
+ },
7524
+ {
7525
+ "model_format": "awq",
7526
+ "model_size_in_billions": 7,
7527
+ "quantizations": [
7528
+ "Int4"
7529
+ ],
7530
+ "model_id": "Qwen/Qwen2.5-7B-Instruct-AWQ"
7531
+ },
7532
+ {
7533
+ "model_format": "awq",
7534
+ "model_size_in_billions": 14,
7535
+ "quantizations": [
7536
+ "Int4"
7537
+ ],
7538
+ "model_id": "Qwen/Qwen2.5-14B-Instruct-AWQ"
7539
+ },
7540
+ {
7541
+ "model_format": "awq",
7542
+ "model_size_in_billions": 32,
7543
+ "quantizations": [
7544
+ "Int4"
7545
+ ],
7546
+ "model_id": "Qwen/Qwen2.5-32B-Instruct-AWQ"
7547
+ },
7548
+ {
7549
+ "model_format": "awq",
7550
+ "model_size_in_billions": 72,
7551
+ "quantizations": [
7552
+ "Int4"
7553
+ ],
7554
+ "model_id": "Qwen/Qwen2.5-72B-Instruct-AWQ"
7555
+ },
7556
+ {
7557
+ "model_format": "ggufv2",
7558
+ "model_size_in_billions": "0_5",
7559
+ "quantizations": [
7560
+ "q2_k",
7561
+ "q3_k_m",
7562
+ "q4_0",
7563
+ "q4_k_m",
7564
+ "q5_0",
7565
+ "q5_k_m",
7566
+ "q6_k",
7567
+ "q8_0"
7568
+ ],
7569
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
7570
+ "model_file_name_template": "qwen2.5-0.5b-instruct-{quantization}.gguf"
7571
+ },
7572
+ {
7573
+ "model_format": "ggufv2",
7574
+ "model_size_in_billions": "1_5",
7575
+ "quantizations": [
7576
+ "q2_k",
7577
+ "q3_k_m",
7578
+ "q4_0",
7579
+ "q4_k_m",
7580
+ "q5_0",
7581
+ "q5_k_m",
7582
+ "q6_k",
7583
+ "q8_0"
7584
+ ],
7585
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct-GGUF",
7586
+ "model_file_name_template": "qwen2.5-1.5b-instruct-{quantization}.gguf"
7587
+ },
7588
+ {
7589
+ "model_format": "ggufv2",
7590
+ "model_size_in_billions": 3,
7591
+ "quantizations": [
7592
+ "q2_k",
7593
+ "q3_k_m",
7594
+ "q4_0",
7595
+ "q4_k_m",
7596
+ "q5_0",
7597
+ "q5_k_m",
7598
+ "q6_k",
7599
+ "q8_0"
7600
+ ],
7601
+ "model_id": "Qwen/Qwen2.5-3B-Instruct-GGUF",
7602
+ "model_file_name_template": "qwen2.5-3b-instruct-{quantization}.gguf"
7603
+ },
7604
+ {
7605
+ "model_format": "ggufv2",
7606
+ "model_size_in_billions": 7,
7607
+ "quantizations": [
7608
+ "q2_k",
7609
+ "q3_k_m",
7610
+ "q4_0",
7611
+ "q4_k_m",
7612
+ "q5_0",
7613
+ "q5_k_m",
7614
+ "q6_k",
7615
+ "q8_0"
7616
+ ],
7617
+ "model_id": "Qwen/Qwen2.5-7B-Instruct-GGUF",
7618
+ "model_file_name_template": "qwen2.5-7b-instruct-{quantization}.gguf",
7619
+ "model_file_name_split_template": "qwen2.5-7b-instruct-{quantization}-{part}.gguf",
7620
+ "quantization_parts": {
7621
+ "q4_0": [
7622
+ "00001-of-00002",
7623
+ "00002-of-00002"
7624
+ ],
7625
+ "q4_k_m": [
7626
+ "00001-of-00002",
7627
+ "00002-of-00002"
7628
+ ],
7629
+ "q5_0": [
7630
+ "00001-of-00002",
7631
+ "00002-of-00002"
7632
+ ],
7633
+ "q5_k_m": [
7634
+ "00001-of-00002",
7635
+ "00002-of-00002"
7636
+ ],
7637
+ "q6_k": [
7638
+ "00001-of-00002",
7639
+ "00002-of-00002"
7640
+ ],
7641
+ "q8_0": [
7642
+ "00001-of-00002",
7643
+ "00002-of-00002"
7644
+ ]
7645
+ }
7646
+ },
7647
+ {
7648
+ "model_format": "ggufv2",
7649
+ "model_size_in_billions": 14,
7650
+ "quantizations": [
7651
+ "q2_k",
7652
+ "q3_k_m",
7653
+ "q4_0",
7654
+ "q4_k_m",
7655
+ "q5_0",
7656
+ "q5_k_m",
7657
+ "q6_k",
7658
+ "q8_0"
7659
+ ],
7660
+ "model_id": "Qwen/Qwen2.5-14B-Instruct-GGUF",
7661
+ "model_file_name_template": "qwen2.5-14b-instruct-{quantization}.gguf",
7662
+ "model_file_name_split_template": "qwen2.5-14b-instruct-{quantization}-{part}.gguf",
7663
+ "quantization_parts": {
7664
+ "q2_k": [
7665
+ "00001-of-00002",
7666
+ "00002-of-00002"
7667
+ ],
7668
+ "q3_k_m": [
7669
+ "00001-of-00002",
7670
+ "00002-of-00002"
7671
+ ],
7672
+ "q4_0": [
7673
+ "00001-of-00003",
7674
+ "00002-of-00003",
7675
+ "00003-of-00003"
7676
+ ],
7677
+ "q4_k_m": [
7678
+ "00001-of-00003",
7679
+ "00002-of-00003",
7680
+ "00003-of-00003"
7681
+ ],
7682
+ "q5_0": [
7683
+ "00001-of-00003",
7684
+ "00002-of-00003",
7685
+ "00003-of-00003"
7686
+ ],
7687
+ "q5_k_m": [
7688
+ "00001-of-00003",
7689
+ "00002-of-00003",
7690
+ "00003-of-00003"
7691
+ ],
7692
+ "q6_k": [
7693
+ "00001-of-00004",
7694
+ "00002-of-00004",
7695
+ "00003-of-00004",
7696
+ "00004-of-00004"
7697
+ ],
7698
+ "q8_0": [
7699
+ "00001-of-00004",
7700
+ "00002-of-00004",
7701
+ "00003-of-00004",
7702
+ "00004-of-00004"
7703
+ ]
7704
+ }
7705
+ },
7706
+ {
7707
+ "model_format": "ggufv2",
7708
+ "model_size_in_billions": 32,
7709
+ "quantizations": [
7710
+ "q2_k",
7711
+ "q3_k_m",
7712
+ "q4_0",
7713
+ "q4_k_m",
7714
+ "q5_0",
7715
+ "q5_k_m",
7716
+ "q6_k",
7717
+ "q8_0"
7718
+ ],
7719
+ "model_id": "Qwen/Qwen2.5-32B-Instruct-GGUF",
7720
+ "model_file_name_template": "qwen2_5-32b-instruct-{quantization}.gguf",
7721
+ "model_file_name_split_template": "qwen2.5-32b-instruct-{quantization}-{part}.gguf",
7722
+ "quantization_parts": {
7723
+ "q2_k": [
7724
+ "00001-of-00004",
7725
+ "00002-of-00004",
7726
+ "00003-of-00004",
7727
+ "00004-of-00004"
7728
+ ],
7729
+ "q3_k_m": [
7730
+ "00001-of-00005",
7731
+ "00002-of-00005",
7732
+ "00003-of-00005",
7733
+ "00004-of-00005",
7734
+ "00005-of-00005"
7735
+ ],
7736
+ "q4_0": [
7737
+ "00001-of-00005",
7738
+ "00002-of-00005",
7739
+ "00003-of-00005",
7740
+ "00004-of-00005",
7741
+ "00005-of-00005"
7742
+ ],
7743
+ "q4_k_m": [
7744
+ "00001-of-00005",
7745
+ "00002-of-00005",
7746
+ "00003-of-00005",
7747
+ "00004-of-00005",
7748
+ "00005-of-00005"
7749
+ ],
7750
+ "q5_0": [
7751
+ "00001-of-00006",
7752
+ "00002-of-00006",
7753
+ "00003-of-00006",
7754
+ "00004-of-00006",
7755
+ "00005-of-00006",
7756
+ "00006-of-00006"
7757
+ ],
7758
+ "q5_k_m": [
7759
+ "00001-of-00006",
7760
+ "00002-of-00006",
7761
+ "00003-of-00006",
7762
+ "00004-of-00006",
7763
+ "00005-of-00006",
7764
+ "00006-of-00006"
7765
+ ],
7766
+ "q6_k": [
7767
+ "00001-of-00007",
7768
+ "00002-of-00007",
7769
+ "00003-of-00007",
7770
+ "00004-of-00007",
7771
+ "00005-of-00007",
7772
+ "00006-of-00007",
7773
+ "00007-of-00007"
7774
+ ],
7775
+ "q8_0": [
7776
+ "00001-of-00009",
7777
+ "00002-of-00009",
7778
+ "00003-of-00009",
7779
+ "00004-of-00009",
7780
+ "00005-of-00009",
7781
+ "00006-of-00009",
7782
+ "00007-of-00009",
7783
+ "00008-of-00009",
7784
+ "00009-of-00009"
7785
+ ]
7786
+ }
7787
+ },
7788
+ {
7789
+ "model_format": "ggufv2",
7790
+ "model_size_in_billions": 72,
7791
+ "quantizations": [
7792
+ "q2_k",
7793
+ "q3_k_m",
7794
+ "q4_0",
7795
+ "q4_k_m",
7796
+ "q5_0",
7797
+ "q5_k_m",
7798
+ "q6_k",
7799
+ "q8_0",
7800
+ "fp16"
7801
+ ],
7802
+ "model_id": "Qwen/Qwen2.5-72B-Instruct-GGUF",
7803
+ "model_file_name_template": "qwen2_5-72b-instruct-{quantization}.gguf",
7804
+ "model_file_name_split_template": "qwen2.5-72b-instruct-{quantization}-{part}.gguf",
7805
+ "quantization_parts": {
7806
+ "q2_k": [
7807
+ "00001-of-00007",
7808
+ "00002-of-00007",
7809
+ "00003-of-00007",
7810
+ "00004-of-00007",
7811
+ "00005-of-00007",
7812
+ "00006-of-00007",
7813
+ "00007-of-00007"
7814
+ ],
7815
+ "q3_k_m": [
7816
+ "00001-of-00009",
7817
+ "00002-of-00009",
7818
+ "00003-of-00009",
7819
+ "00004-of-00009",
7820
+ "00005-of-00009",
7821
+ "00006-of-00009",
7822
+ "00007-of-00009",
7823
+ "00008-of-00009",
7824
+ "00009-of-00009"
7825
+ ],
7826
+ "q4_0": [
7827
+ "00001-of-00011",
7828
+ "00002-of-00011",
7829
+ "00003-of-00011",
7830
+ "00004-of-00011",
7831
+ "00005-of-00011",
7832
+ "00006-of-00011",
7833
+ "00007-of-00011",
7834
+ "00008-of-00011",
7835
+ "00009-of-00011",
7836
+ "00010-of-00011",
7837
+ "00011-of-00011"
7838
+ ],
7839
+ "q4_k_m": [
7840
+ "00001-of-00012",
7841
+ "00002-of-00012",
7842
+ "00003-of-00012",
7843
+ "00004-of-00012",
7844
+ "00005-of-00012",
7845
+ "00006-of-00012",
7846
+ "00007-of-00012",
7847
+ "00008-of-00012",
7848
+ "00009-of-00012",
7849
+ "00010-of-00012",
7850
+ "00011-of-00012",
7851
+ "00012-of-00012"
7852
+ ],
7853
+ "q5_0": [
7854
+ "00001-of-00013",
7855
+ "00002-of-00013",
7856
+ "00003-of-00013",
7857
+ "00004-of-00013",
7858
+ "00005-of-00013",
7859
+ "00006-of-00013",
7860
+ "00007-of-00013",
7861
+ "00008-of-00013",
7862
+ "00009-of-00013",
7863
+ "00010-of-00013",
7864
+ "00011-of-00013",
7865
+ "00012-of-00013",
7866
+ "00013-of-00013"
7867
+ ],
7868
+ "q5_k_m": [
7869
+ "00001-of-00014",
7870
+ "00002-of-00014",
7871
+ "00003-of-00014",
7872
+ "00004-of-00014",
7873
+ "00005-of-00014",
7874
+ "00006-of-00014",
7875
+ "00007-of-00014",
7876
+ "00008-of-00014",
7877
+ "00009-of-00014",
7878
+ "00010-of-00014",
7879
+ "00011-of-00014",
7880
+ "00012-of-00014",
7881
+ "00013-of-00014",
7882
+ "00014-of-00014"
7883
+ ],
7884
+ "q6_k": [
7885
+ "00001-of-00016",
7886
+ "00002-of-00016",
7887
+ "00003-of-00016",
7888
+ "00004-of-00016",
7889
+ "00005-of-00016",
7890
+ "00006-of-00016",
7891
+ "00007-of-00016",
7892
+ "00008-of-00016",
7893
+ "00009-of-00016",
7894
+ "00010-of-00016",
7895
+ "00011-of-00016",
7896
+ "00012-of-00016",
7897
+ "00013-of-00016",
7898
+ "00014-of-00016",
7899
+ "00015-of-00016",
7900
+ "00016-of-00016"
7901
+ ],
7902
+ "q8_0": [
7903
+ "00001-of-00021",
7904
+ "00002-of-00021",
7905
+ "00003-of-00021",
7906
+ "00004-of-00021",
7907
+ "00005-of-00021",
7908
+ "00006-of-00021",
7909
+ "00007-of-00021",
7910
+ "00008-of-00021",
7911
+ "00009-of-00021",
7912
+ "00010-of-00021",
7913
+ "00011-of-00021",
7914
+ "00012-of-00021",
7915
+ "00013-of-00021",
7916
+ "00014-of-00021",
7917
+ "00015-of-00021",
7918
+ "00016-of-00021",
7919
+ "00017-of-00021",
7920
+ "00018-of-00021",
7921
+ "00019-of-00021",
7922
+ "00020-of-00021",
7923
+ "00021-of-00021"
7924
+ ]
7925
+ }
7926
+ }
7927
+ ],
7928
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
7929
+ "stop_token_ids": [
7930
+ 151643,
7931
+ 151644,
7932
+ 151645
7933
+ ],
7934
+ "stop": [
7935
+ "<|endoftext|>",
7936
+ "<|im_start|>",
7937
+ "<|im_end|>"
7938
+ ]
7939
+ },
7940
+ {
7941
+ "version": 1,
7942
+ "context_length": 32768,
7943
+ "model_name": "qwen2.5-coder",
7944
+ "model_lang": [
7945
+ "en",
7946
+ "zh"
7947
+ ],
7948
+ "model_ability": [
7949
+ "generate"
7950
+ ],
7951
+ "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
7952
+ "model_specs": [
7953
+ {
7954
+ "model_format": "pytorch",
7955
+ "model_size_in_billions": "1_5",
7956
+ "quantizations": [
7957
+ "4-bit",
7958
+ "8-bit",
7959
+ "none"
7960
+ ],
7961
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B",
7962
+ "model_revision": "d3586cfe793730945f8e4d7ef31032a3ee50247d"
7963
+ },
7964
+ {
7965
+ "model_format": "pytorch",
7966
+ "model_size_in_billions": 7,
7967
+ "quantizations": [
7968
+ "4-bit",
7969
+ "8-bit",
7970
+ "none"
7971
+ ],
7972
+ "model_id": "Qwen/Qwen2.5-Coder-7B",
7973
+ "model_revision": "30b6a7e874a78d46b80fa1db3194ea427dd41b08"
7974
+ }
7975
+ ]
7976
+ },
7977
+ {
7978
+ "version": 1,
7979
+ "context_length": 32768,
7980
+ "model_name": "qwen2.5-coder-instruct",
7981
+ "model_lang": [
7982
+ "en",
7983
+ "zh"
7984
+ ],
7985
+ "model_ability": [
7986
+ "chat",
7987
+ "tools"
7988
+ ],
7989
+ "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
7990
+ "model_specs": [
7991
+ {
7992
+ "model_format": "pytorch",
7993
+ "model_size_in_billions": "1_5",
7994
+ "quantizations": [
7995
+ "4-bit",
7996
+ "8-bit",
7997
+ "none"
7998
+ ],
7999
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
8000
+ },
8001
+ {
8002
+ "model_format": "pytorch",
8003
+ "model_size_in_billions": 7,
8004
+ "quantizations": [
8005
+ "4-bit",
8006
+ "8-bit",
8007
+ "none"
8008
+ ],
8009
+ "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
8010
+ },
8011
+ {
8012
+ "model_format": "ggufv2",
8013
+ "model_size_in_billions": "1_5",
8014
+ "quantizations": [
8015
+ "q2_k",
8016
+ "q3_k_m",
8017
+ "q4_0",
8018
+ "q4_k_m",
8019
+ "q5_0",
8020
+ "q5_k_m",
8021
+ "q6_k",
8022
+ "q8_0"
8023
+ ],
8024
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
8025
+ "model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf"
8026
+ },
8027
+ {
8028
+ "model_format": "ggufv2",
8029
+ "model_size_in_billions": 7,
8030
+ "quantizations": [
8031
+ "q2_k",
8032
+ "q3_k_m",
8033
+ "q4_0",
8034
+ "q4_k_m",
8035
+ "q5_0",
8036
+ "q5_k_m",
8037
+ "q6_k",
8038
+ "q8_0"
8039
+ ],
8040
+ "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
8041
+ "model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf",
8042
+ "model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf",
8043
+ "quantization_parts": {
8044
+ "q4_0": [
8045
+ "00001-of-00002",
8046
+ "00002-of-00002"
8047
+ ],
8048
+ "q4_k_m": [
8049
+ "00001-of-00002",
8050
+ "00002-of-00002"
8051
+ ],
8052
+ "q5_0": [
8053
+ "00001-of-00002",
8054
+ "00002-of-00002"
8055
+ ],
8056
+ "q5_k_m": [
8057
+ "00001-of-00002",
8058
+ "00002-of-00002"
8059
+ ],
8060
+ "q6_k": [
8061
+ "00001-of-00002",
8062
+ "00002-of-00002"
8063
+ ],
8064
+ "q8_0": [
8065
+ "00001-of-00003",
8066
+ "00002-of-00003",
8067
+ "00003-of-00003"
8068
+ ]
8069
+ }
8070
+ }
8071
+ ],
8072
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
8073
+ "stop_token_ids": [
8074
+ 151643,
8075
+ 151644,
8076
+ 151645
8077
+ ],
8078
+ "stop": [
8079
+ "<|endoftext|>",
8080
+ "<|im_start|>",
8081
+ "<|im_end|>"
8082
+ ]
7247
8083
  }
7248
8084
  ]