xinference 0.15.1__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (23) hide show
  1. xinference/_version.py +3 -3
  2. xinference/core/model.py +2 -2
  3. xinference/model/audio/cosyvoice.py +3 -3
  4. xinference/model/image/stable_diffusion/core.py +30 -19
  5. xinference/model/llm/__init__.py +1 -1
  6. xinference/model/llm/llm_family.json +850 -2
  7. xinference/model/llm/llm_family_modelscope.json +893 -0
  8. xinference/model/llm/sglang/core.py +4 -0
  9. xinference/model/llm/vllm/core.py +5 -0
  10. xinference/web/ui/build/asset-manifest.json +3 -3
  11. xinference/web/ui/build/index.html +1 -1
  12. xinference/web/ui/build/static/js/{main.754740c0.js → main.29578905.js} +3 -3
  13. xinference/web/ui/build/static/js/main.29578905.js.map +1 -0
  14. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +1 -0
  15. {xinference-0.15.1.dist-info → xinference-0.15.2.dist-info}/METADATA +5 -5
  16. {xinference-0.15.1.dist-info → xinference-0.15.2.dist-info}/RECORD +21 -21
  17. xinference/web/ui/build/static/js/main.754740c0.js.map +0 -1
  18. xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +0 -1
  19. /xinference/web/ui/build/static/js/{main.754740c0.js.LICENSE.txt → main.29578905.js.LICENSE.txt} +0 -0
  20. {xinference-0.15.1.dist-info → xinference-0.15.2.dist-info}/LICENSE +0 -0
  21. {xinference-0.15.1.dist-info → xinference-0.15.2.dist-info}/WHEEL +0 -0
  22. {xinference-0.15.1.dist-info → xinference-0.15.2.dist-info}/entry_points.txt +0 -0
  23. {xinference-0.15.1.dist-info → xinference-0.15.2.dist-info}/top_level.txt +0 -0
@@ -6874,7 +6874,7 @@
6874
6874
  "model_id":"Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8",
6875
6875
  "model_revision":"3d152a77eaccfd72d59baedb0b183a1b8fd56e48"
6876
6876
  },
6877
- {
6877
+ {
6878
6878
  "model_format":"gptq",
6879
6879
  "model_size_in_billions":7,
6880
6880
  "quantizations":[
@@ -6883,7 +6883,7 @@
6883
6883
  "model_id":"Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4",
6884
6884
  "model_revision":"5ab897112fa83b9699826be8753ef9184585c77d"
6885
6885
  },
6886
- {
6886
+ {
6887
6887
  "model_format":"awq",
6888
6888
  "model_size_in_billions":7,
6889
6889
  "quantizations":[
@@ -6891,6 +6891,31 @@
6891
6891
  ],
6892
6892
  "model_id":"Qwen/Qwen2-VL-7B-Instruct-AWQ",
6893
6893
  "model_revision":"f94216e8b513933bccd567bcd9b7350199f32538"
6894
+ },
6895
+ {
6896
+ "model_format":"pytorch",
6897
+ "model_size_in_billions":72,
6898
+ "quantizations":[
6899
+ "none"
6900
+ ],
6901
+ "model_id":"Qwen/Qwen2-VL-72B-Instruct"
6902
+ },
6903
+ {
6904
+ "model_format":"awq",
6905
+ "model_size_in_billions":72,
6906
+ "quantizations":[
6907
+ "Int4"
6908
+ ],
6909
+ "model_id":"Qwen/Qwen2-VL-72B-Instruct-AWQ"
6910
+ },
6911
+ {
6912
+ "model_format":"gptq",
6913
+ "model_size_in_billions":72,
6914
+ "quantizations":[
6915
+ "Int4",
6916
+ "Int8"
6917
+ ],
6918
+ "model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
6894
6919
  }
6895
6920
  ],
6896
6921
  "prompt_style":{
@@ -7244,5 +7269,828 @@
7244
7269
  "model_revision": "00e59e64f47d3c78e4cfbdd345888479797e8109"
7245
7270
  }
7246
7271
  ]
7272
+ },
7273
+ {
7274
+ "version": 1,
7275
+ "context_length": 32768,
7276
+ "model_name": "qwen2.5",
7277
+ "model_lang": [
7278
+ "en",
7279
+ "zh"
7280
+ ],
7281
+ "model_ability": [
7282
+ "generate"
7283
+ ],
7284
+ "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.",
7285
+ "model_specs": [
7286
+ {
7287
+ "model_format": "pytorch",
7288
+ "model_size_in_billions": "0_5",
7289
+ "quantizations": [
7290
+ "4-bit",
7291
+ "8-bit",
7292
+ "none"
7293
+ ],
7294
+ "model_id": "Qwen/Qwen2.5-0.5B",
7295
+ "model_revision": "2630d3d2321bc1f1878f702166d1b2af019a7310"
7296
+ },
7297
+ {
7298
+ "model_format": "pytorch",
7299
+ "model_size_in_billions": "1_5",
7300
+ "quantizations": [
7301
+ "4-bit",
7302
+ "8-bit",
7303
+ "none"
7304
+ ],
7305
+ "model_id": "Qwen/Qwen2.5-1.5B",
7306
+ "model_revision": "e5dfabbcffd9b0c7b31d89b82c5a6b72e663f32c"
7307
+ },
7308
+ {
7309
+ "model_format": "pytorch",
7310
+ "model_size_in_billions": 3,
7311
+ "quantizations": [
7312
+ "4-bit",
7313
+ "8-bit",
7314
+ "none"
7315
+ ],
7316
+ "model_id": "Qwen/Qwen2.5-3B",
7317
+ "model_revision": "e4aa5ac50aa507415cda96cc99eb77ad0a3d2d34"
7318
+ },
7319
+ {
7320
+ "model_format": "pytorch",
7321
+ "model_size_in_billions": 7,
7322
+ "quantizations": [
7323
+ "4-bit",
7324
+ "8-bit",
7325
+ "none"
7326
+ ],
7327
+ "model_id": "Qwen/Qwen2.5-7B",
7328
+ "model_revision": "09a0bac5707b43ec44508eab308b0846320c1ed4"
7329
+ },
7330
+ {
7331
+ "model_format": "pytorch",
7332
+ "model_size_in_billions": 14,
7333
+ "quantizations": [
7334
+ "4-bit",
7335
+ "8-bit",
7336
+ "none"
7337
+ ],
7338
+ "model_id": "Qwen/Qwen2.5-14B",
7339
+ "model_revision": "d02b64ba1ce86bf9948668a13f82709600431ccc"
7340
+ },
7341
+ {
7342
+ "model_format": "pytorch",
7343
+ "model_size_in_billions": 32,
7344
+ "quantizations": [
7345
+ "4-bit",
7346
+ "8-bit",
7347
+ "none"
7348
+ ],
7349
+ "model_id": "Qwen/Qwen2.5-32B",
7350
+ "model_revision": "ff23665d01c3665be5fdb271d18a62090b65c06d"
7351
+ },
7352
+ {
7353
+ "model_format": "pytorch",
7354
+ "model_size_in_billions": 72,
7355
+ "quantizations": [
7356
+ "4-bit",
7357
+ "8-bit",
7358
+ "none"
7359
+ ],
7360
+ "model_id": "Qwen/Qwen2.5-72B",
7361
+ "model_revision": "587cc4061cf6a7cc0d429d05c109447e5cf063af"
7362
+ }
7363
+ ]
7364
+ },
7365
+ {
7366
+ "version": 1,
7367
+ "context_length": 32768,
7368
+ "model_name": "qwen2.5-instruct",
7369
+ "model_lang": [
7370
+ "en",
7371
+ "zh"
7372
+ ],
7373
+ "model_ability": [
7374
+ "chat",
7375
+ "tools"
7376
+ ],
7377
+ "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.",
7378
+ "model_specs": [
7379
+ {
7380
+ "model_format": "pytorch",
7381
+ "model_size_in_billions": "0_5",
7382
+ "quantizations": [
7383
+ "4-bit",
7384
+ "8-bit",
7385
+ "none"
7386
+ ],
7387
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct"
7388
+ },
7389
+ {
7390
+ "model_format": "pytorch",
7391
+ "model_size_in_billions": "1_5",
7392
+ "quantizations": [
7393
+ "4-bit",
7394
+ "8-bit",
7395
+ "none"
7396
+ ],
7397
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct"
7398
+ },
7399
+ {
7400
+ "model_format": "pytorch",
7401
+ "model_size_in_billions": 3,
7402
+ "quantizations": [
7403
+ "4-bit",
7404
+ "8-bit",
7405
+ "none"
7406
+ ],
7407
+ "model_id": "Qwen/Qwen2.5-3B-Instruct"
7408
+ },
7409
+ {
7410
+ "model_format": "pytorch",
7411
+ "model_size_in_billions": 7,
7412
+ "quantizations": [
7413
+ "4-bit",
7414
+ "8-bit",
7415
+ "none"
7416
+ ],
7417
+ "model_id": "Qwen/Qwen2.5-7B-Instruct"
7418
+ },
7419
+ {
7420
+ "model_format": "pytorch",
7421
+ "model_size_in_billions": 14,
7422
+ "quantizations": [
7423
+ "4-bit",
7424
+ "8-bit",
7425
+ "none"
7426
+ ],
7427
+ "model_id": "Qwen/Qwen2.5-14B-Instruct"
7428
+ },
7429
+ {
7430
+ "model_format": "pytorch",
7431
+ "model_size_in_billions": 32,
7432
+ "quantizations": [
7433
+ "4-bit",
7434
+ "8-bit",
7435
+ "none"
7436
+ ],
7437
+ "model_id": "Qwen/Qwen2.5-32B-Instruct"
7438
+ },
7439
+ {
7440
+ "model_format": "pytorch",
7441
+ "model_size_in_billions": 72,
7442
+ "quantizations": [
7443
+ "4-bit",
7444
+ "8-bit",
7445
+ "none"
7446
+ ],
7447
+ "model_id": "Qwen/Qwen2.5-72B-Instruct"
7448
+ },
7449
+ {
7450
+ "model_format": "gptq",
7451
+ "model_size_in_billions": "0_5",
7452
+ "quantizations": [
7453
+ "Int4",
7454
+ "Int8"
7455
+ ],
7456
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct-GPTQ-{quantization}"
7457
+ },
7458
+ {
7459
+ "model_format": "gptq",
7460
+ "model_size_in_billions": "1_5",
7461
+ "quantizations": [
7462
+ "Int4",
7463
+ "Int8"
7464
+ ],
7465
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct-GPTQ-{quantization}"
7466
+ },
7467
+ {
7468
+ "model_format": "gptq",
7469
+ "model_size_in_billions": 3,
7470
+ "quantizations": [
7471
+ "Int4",
7472
+ "Int8"
7473
+ ],
7474
+ "model_id": "Qwen/Qwen2.5-3B-Instruct-GPTQ-{quantization}"
7475
+ },
7476
+ {
7477
+ "model_format": "gptq",
7478
+ "model_size_in_billions": 7,
7479
+ "quantizations": [
7480
+ "Int4",
7481
+ "Int8"
7482
+ ],
7483
+ "model_id": "Qwen/Qwen2.5-7B-Instruct-GPTQ-{quantization}"
7484
+ },
7485
+ {
7486
+ "model_format": "gptq",
7487
+ "model_size_in_billions": 14,
7488
+ "quantizations": [
7489
+ "Int4",
7490
+ "Int8"
7491
+ ],
7492
+ "model_id": "Qwen/Qwen2.5-14B-Instruct-GPTQ-{quantization}"
7493
+ },
7494
+ {
7495
+ "model_format": "gptq",
7496
+ "model_size_in_billions": 32,
7497
+ "quantizations": [
7498
+ "Int4",
7499
+ "Int8"
7500
+ ],
7501
+ "model_id": "Qwen/Qwen2.5-32B-Instruct-GPTQ-{quantization}"
7502
+ },
7503
+ {
7504
+ "model_format": "gptq",
7505
+ "model_size_in_billions": 72,
7506
+ "quantizations": [
7507
+ "Int4",
7508
+ "Int8"
7509
+ ],
7510
+ "model_id": "Qwen/Qwen2.5-72B-Instruct-GPTQ-{quantization}"
7511
+ },
7512
+ {
7513
+ "model_format": "awq",
7514
+ "model_size_in_billions": "0_5",
7515
+ "quantizations": [
7516
+ "Int4"
7517
+ ],
7518
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct-AWQ"
7519
+ },
7520
+ {
7521
+ "model_format": "awq",
7522
+ "model_size_in_billions": "1_5",
7523
+ "quantizations": [
7524
+ "Int4"
7525
+ ],
7526
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct-AWQ"
7527
+ },
7528
+ {
7529
+ "model_format": "awq",
7530
+ "model_size_in_billions": 3,
7531
+ "quantizations": [
7532
+ "Int4"
7533
+ ],
7534
+ "model_id": "Qwen/Qwen2.5-3B-Instruct-AWQ"
7535
+ },
7536
+ {
7537
+ "model_format": "awq",
7538
+ "model_size_in_billions": 7,
7539
+ "quantizations": [
7540
+ "Int4"
7541
+ ],
7542
+ "model_id": "Qwen/Qwen2.5-7B-Instruct-AWQ"
7543
+ },
7544
+ {
7545
+ "model_format": "awq",
7546
+ "model_size_in_billions": 14,
7547
+ "quantizations": [
7548
+ "Int4"
7549
+ ],
7550
+ "model_id": "Qwen/Qwen2.5-14B-Instruct-AWQ"
7551
+ },
7552
+ {
7553
+ "model_format": "awq",
7554
+ "model_size_in_billions": 32,
7555
+ "quantizations": [
7556
+ "Int4"
7557
+ ],
7558
+ "model_id": "Qwen/Qwen2.5-32B-Instruct-AWQ"
7559
+ },
7560
+ {
7561
+ "model_format": "awq",
7562
+ "model_size_in_billions": 72,
7563
+ "quantizations": [
7564
+ "Int4"
7565
+ ],
7566
+ "model_id": "Qwen/Qwen2.5-72B-Instruct-AWQ"
7567
+ },
7568
+ {
7569
+ "model_format": "ggufv2",
7570
+ "model_size_in_billions": "0_5",
7571
+ "quantizations": [
7572
+ "q2_k",
7573
+ "q3_k_m",
7574
+ "q4_0",
7575
+ "q4_k_m",
7576
+ "q5_0",
7577
+ "q5_k_m",
7578
+ "q6_k",
7579
+ "q8_0"
7580
+ ],
7581
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct-GGUF",
7582
+ "model_file_name_template": "qwen2.5-0.5b-instruct-{quantization}.gguf"
7583
+ },
7584
+ {
7585
+ "model_format": "ggufv2",
7586
+ "model_size_in_billions": "1_5",
7587
+ "quantizations": [
7588
+ "q2_k",
7589
+ "q3_k_m",
7590
+ "q4_0",
7591
+ "q4_k_m",
7592
+ "q5_0",
7593
+ "q5_k_m",
7594
+ "q6_k",
7595
+ "q8_0"
7596
+ ],
7597
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct-GGUF",
7598
+ "model_file_name_template": "qwen2.5-1.5b-instruct-{quantization}.gguf"
7599
+ },
7600
+ {
7601
+ "model_format": "ggufv2",
7602
+ "model_size_in_billions": 3,
7603
+ "quantizations": [
7604
+ "q2_k",
7605
+ "q3_k_m",
7606
+ "q4_0",
7607
+ "q4_k_m",
7608
+ "q5_0",
7609
+ "q5_k_m",
7610
+ "q6_k",
7611
+ "q8_0"
7612
+ ],
7613
+ "model_id": "Qwen/Qwen2.5-3B-Instruct-GGUF",
7614
+ "model_file_name_template": "qwen2.5-3b-instruct-{quantization}.gguf"
7615
+ },
7616
+ {
7617
+ "model_format": "ggufv2",
7618
+ "model_size_in_billions": 7,
7619
+ "quantizations": [
7620
+ "q2_k",
7621
+ "q3_k_m",
7622
+ "q4_0",
7623
+ "q4_k_m",
7624
+ "q5_0",
7625
+ "q5_k_m",
7626
+ "q6_k",
7627
+ "q8_0"
7628
+ ],
7629
+ "model_id": "Qwen/Qwen2.5-7B-Instruct-GGUF",
7630
+ "model_file_name_template": "qwen2.5-7b-instruct-{quantization}.gguf",
7631
+ "model_file_name_split_template": "qwen2.5-7b-instruct-{quantization}-{part}.gguf",
7632
+ "quantization_parts": {
7633
+ "q4_0": [
7634
+ "00001-of-00002",
7635
+ "00002-of-00002"
7636
+ ],
7637
+ "q4_k_m": [
7638
+ "00001-of-00002",
7639
+ "00002-of-00002"
7640
+ ],
7641
+ "q5_0": [
7642
+ "00001-of-00002",
7643
+ "00002-of-00002"
7644
+ ],
7645
+ "q5_k_m": [
7646
+ "00001-of-00002",
7647
+ "00002-of-00002"
7648
+ ],
7649
+ "q6_k": [
7650
+ "00001-of-00002",
7651
+ "00002-of-00002"
7652
+ ],
7653
+ "q8_0": [
7654
+ "00001-of-00002",
7655
+ "00002-of-00002"
7656
+ ]
7657
+ }
7658
+ },
7659
+ {
7660
+ "model_format": "ggufv2",
7661
+ "model_size_in_billions": 14,
7662
+ "quantizations": [
7663
+ "q2_k",
7664
+ "q3_k_m",
7665
+ "q4_0",
7666
+ "q4_k_m",
7667
+ "q5_0",
7668
+ "q5_k_m",
7669
+ "q6_k",
7670
+ "q8_0"
7671
+ ],
7672
+ "model_id": "Qwen/Qwen2.5-14B-Instruct-GGUF",
7673
+ "model_file_name_template": "qwen2.5-14b-instruct-{quantization}.gguf",
7674
+ "model_file_name_split_template": "qwen2.5-14b-instruct-{quantization}-{part}.gguf",
7675
+ "quantization_parts": {
7676
+ "q2_k": [
7677
+ "00001-of-00002",
7678
+ "00002-of-00002"
7679
+ ],
7680
+ "q3_k_m": [
7681
+ "00001-of-00002",
7682
+ "00002-of-00002"
7683
+ ],
7684
+ "q4_0": [
7685
+ "00001-of-00003",
7686
+ "00002-of-00003",
7687
+ "00003-of-00003"
7688
+ ],
7689
+ "q4_k_m": [
7690
+ "00001-of-00003",
7691
+ "00002-of-00003",
7692
+ "00003-of-00003"
7693
+ ],
7694
+ "q5_0": [
7695
+ "00001-of-00003",
7696
+ "00002-of-00003",
7697
+ "00003-of-00003"
7698
+ ],
7699
+ "q5_k_m": [
7700
+ "00001-of-00003",
7701
+ "00002-of-00003",
7702
+ "00003-of-00003"
7703
+ ],
7704
+ "q6_k": [
7705
+ "00001-of-00004",
7706
+ "00002-of-00004",
7707
+ "00003-of-00004",
7708
+ "00004-of-00004"
7709
+ ],
7710
+ "q8_0": [
7711
+ "00001-of-00004",
7712
+ "00002-of-00004",
7713
+ "00003-of-00004",
7714
+ "00004-of-00004"
7715
+ ]
7716
+ }
7717
+ },
7718
+ {
7719
+ "model_format": "ggufv2",
7720
+ "model_size_in_billions": 32,
7721
+ "quantizations": [
7722
+ "q2_k",
7723
+ "q3_k_m",
7724
+ "q4_0",
7725
+ "q4_k_m",
7726
+ "q5_0",
7727
+ "q5_k_m",
7728
+ "q6_k",
7729
+ "q8_0"
7730
+ ],
7731
+ "model_id": "Qwen/Qwen2.5-32B-Instruct-GGUF",
7732
+ "model_file_name_template": "qwen2_5-32b-instruct-{quantization}.gguf",
7733
+ "model_file_name_split_template": "qwen2.5-32b-instruct-{quantization}-{part}.gguf",
7734
+ "quantization_parts": {
7735
+ "q2_k": [
7736
+ "00001-of-00004",
7737
+ "00002-of-00004",
7738
+ "00003-of-00004",
7739
+ "00004-of-00004"
7740
+ ],
7741
+ "q3_k_m": [
7742
+ "00001-of-00005",
7743
+ "00002-of-00005",
7744
+ "00003-of-00005",
7745
+ "00004-of-00005",
7746
+ "00005-of-00005"
7747
+ ],
7748
+ "q4_0": [
7749
+ "00001-of-00005",
7750
+ "00002-of-00005",
7751
+ "00003-of-00005",
7752
+ "00004-of-00005",
7753
+ "00005-of-00005"
7754
+ ],
7755
+ "q4_k_m": [
7756
+ "00001-of-00005",
7757
+ "00002-of-00005",
7758
+ "00003-of-00005",
7759
+ "00004-of-00005",
7760
+ "00005-of-00005"
7761
+ ],
7762
+ "q5_0": [
7763
+ "00001-of-00006",
7764
+ "00002-of-00006",
7765
+ "00003-of-00006",
7766
+ "00004-of-00006",
7767
+ "00005-of-00006",
7768
+ "00006-of-00006"
7769
+ ],
7770
+ "q5_k_m": [
7771
+ "00001-of-00006",
7772
+ "00002-of-00006",
7773
+ "00003-of-00006",
7774
+ "00004-of-00006",
7775
+ "00005-of-00006",
7776
+ "00006-of-00006"
7777
+ ],
7778
+ "q6_k": [
7779
+ "00001-of-00007",
7780
+ "00002-of-00007",
7781
+ "00003-of-00007",
7782
+ "00004-of-00007",
7783
+ "00005-of-00007",
7784
+ "00006-of-00007",
7785
+ "00007-of-00007"
7786
+ ],
7787
+ "q8_0": [
7788
+ "00001-of-00009",
7789
+ "00002-of-00009",
7790
+ "00003-of-00009",
7791
+ "00004-of-00009",
7792
+ "00005-of-00009",
7793
+ "00006-of-00009",
7794
+ "00007-of-00009",
7795
+ "00008-of-00009",
7796
+ "00009-of-00009"
7797
+ ]
7798
+ }
7799
+ },
7800
+ {
7801
+ "model_format": "ggufv2",
7802
+ "model_size_in_billions": 72,
7803
+ "quantizations": [
7804
+ "q2_k",
7805
+ "q3_k_m",
7806
+ "q4_0",
7807
+ "q4_k_m",
7808
+ "q5_0",
7809
+ "q5_k_m",
7810
+ "q6_k",
7811
+ "q8_0",
7812
+ "fp16"
7813
+ ],
7814
+ "model_id": "Qwen/Qwen2.5-72B-Instruct-GGUF",
7815
+ "model_file_name_template": "qwen2_5-72b-instruct-{quantization}.gguf",
7816
+ "model_file_name_split_template": "qwen2.5-72b-instruct-{quantization}-{part}.gguf",
7817
+ "quantization_parts": {
7818
+ "q2_k": [
7819
+ "00001-of-00007",
7820
+ "00002-of-00007",
7821
+ "00003-of-00007",
7822
+ "00004-of-00007",
7823
+ "00005-of-00007",
7824
+ "00006-of-00007",
7825
+ "00007-of-00007"
7826
+ ],
7827
+ "q3_k_m": [
7828
+ "00001-of-00009",
7829
+ "00002-of-00009",
7830
+ "00003-of-00009",
7831
+ "00004-of-00009",
7832
+ "00005-of-00009",
7833
+ "00006-of-00009",
7834
+ "00007-of-00009",
7835
+ "00008-of-00009",
7836
+ "00009-of-00009"
7837
+ ],
7838
+ "q4_0": [
7839
+ "00001-of-00011",
7840
+ "00002-of-00011",
7841
+ "00003-of-00011",
7842
+ "00004-of-00011",
7843
+ "00005-of-00011",
7844
+ "00006-of-00011",
7845
+ "00007-of-00011",
7846
+ "00008-of-00011",
7847
+ "00009-of-00011",
7848
+ "00010-of-00011",
7849
+ "00011-of-00011"
7850
+ ],
7851
+ "q4_k_m": [
7852
+ "00001-of-00012",
7853
+ "00002-of-00012",
7854
+ "00003-of-00012",
7855
+ "00004-of-00012",
7856
+ "00005-of-00012",
7857
+ "00006-of-00012",
7858
+ "00007-of-00012",
7859
+ "00008-of-00012",
7860
+ "00009-of-00012",
7861
+ "00010-of-00012",
7862
+ "00011-of-00012",
7863
+ "00012-of-00012"
7864
+ ],
7865
+ "q5_0": [
7866
+ "00001-of-00013",
7867
+ "00002-of-00013",
7868
+ "00003-of-00013",
7869
+ "00004-of-00013",
7870
+ "00005-of-00013",
7871
+ "00006-of-00013",
7872
+ "00007-of-00013",
7873
+ "00008-of-00013",
7874
+ "00009-of-00013",
7875
+ "00010-of-00013",
7876
+ "00011-of-00013",
7877
+ "00012-of-00013",
7878
+ "00013-of-00013"
7879
+ ],
7880
+ "q5_k_m": [
7881
+ "00001-of-00014",
7882
+ "00002-of-00014",
7883
+ "00003-of-00014",
7884
+ "00004-of-00014",
7885
+ "00005-of-00014",
7886
+ "00006-of-00014",
7887
+ "00007-of-00014",
7888
+ "00008-of-00014",
7889
+ "00009-of-00014",
7890
+ "00010-of-00014",
7891
+ "00011-of-00014",
7892
+ "00012-of-00014",
7893
+ "00013-of-00014",
7894
+ "00014-of-00014"
7895
+ ],
7896
+ "q6_k": [
7897
+ "00001-of-00016",
7898
+ "00002-of-00016",
7899
+ "00003-of-00016",
7900
+ "00004-of-00016",
7901
+ "00005-of-00016",
7902
+ "00006-of-00016",
7903
+ "00007-of-00016",
7904
+ "00008-of-00016",
7905
+ "00009-of-00016",
7906
+ "00010-of-00016",
7907
+ "00011-of-00016",
7908
+ "00012-of-00016",
7909
+ "00013-of-00016",
7910
+ "00014-of-00016",
7911
+ "00015-of-00016",
7912
+ "00016-of-00016"
7913
+ ],
7914
+ "q8_0": [
7915
+ "00001-of-00021",
7916
+ "00002-of-00021",
7917
+ "00003-of-00021",
7918
+ "00004-of-00021",
7919
+ "00005-of-00021",
7920
+ "00006-of-00021",
7921
+ "00007-of-00021",
7922
+ "00008-of-00021",
7923
+ "00009-of-00021",
7924
+ "00010-of-00021",
7925
+ "00011-of-00021",
7926
+ "00012-of-00021",
7927
+ "00013-of-00021",
7928
+ "00014-of-00021",
7929
+ "00015-of-00021",
7930
+ "00016-of-00021",
7931
+ "00017-of-00021",
7932
+ "00018-of-00021",
7933
+ "00019-of-00021",
7934
+ "00020-of-00021",
7935
+ "00021-of-00021"
7936
+ ]
7937
+ }
7938
+ }
7939
+ ],
7940
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
7941
+ "stop_token_ids": [
7942
+ 151643,
7943
+ 151644,
7944
+ 151645
7945
+ ],
7946
+ "stop": [
7947
+ "<|endoftext|>",
7948
+ "<|im_start|>",
7949
+ "<|im_end|>"
7950
+ ]
7951
+ },
7952
+ {
7953
+ "version": 1,
7954
+ "context_length": 32768,
7955
+ "model_name": "qwen2.5-coder",
7956
+ "model_lang": [
7957
+ "en",
7958
+ "zh"
7959
+ ],
7960
+ "model_ability": [
7961
+ "generate"
7962
+ ],
7963
+ "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
7964
+ "model_specs": [
7965
+ {
7966
+ "model_format": "pytorch",
7967
+ "model_size_in_billions": "1_5",
7968
+ "quantizations": [
7969
+ "4-bit",
7970
+ "8-bit",
7971
+ "none"
7972
+ ],
7973
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B",
7974
+ "model_revision": "d3586cfe793730945f8e4d7ef31032a3ee50247d"
7975
+ },
7976
+ {
7977
+ "model_format": "pytorch",
7978
+ "model_size_in_billions": 7,
7979
+ "quantizations": [
7980
+ "4-bit",
7981
+ "8-bit",
7982
+ "none"
7983
+ ],
7984
+ "model_id": "Qwen/Qwen2.5-Coder-7B",
7985
+ "model_revision": "30b6a7e874a78d46b80fa1db3194ea427dd41b08"
7986
+ }
7987
+ ]
7988
+ },
7989
+ {
7990
+ "version": 1,
7991
+ "context_length": 32768,
7992
+ "model_name": "qwen2.5-coder-instruct",
7993
+ "model_lang": [
7994
+ "en",
7995
+ "zh"
7996
+ ],
7997
+ "model_ability": [
7998
+ "chat",
7999
+ "tools"
8000
+ ],
8001
+ "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
8002
+ "model_specs": [
8003
+ {
8004
+ "model_format": "pytorch",
8005
+ "model_size_in_billions": "1_5",
8006
+ "quantizations": [
8007
+ "4-bit",
8008
+ "8-bit",
8009
+ "none"
8010
+ ],
8011
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
8012
+ },
8013
+ {
8014
+ "model_format": "pytorch",
8015
+ "model_size_in_billions": 7,
8016
+ "quantizations": [
8017
+ "4-bit",
8018
+ "8-bit",
8019
+ "none"
8020
+ ],
8021
+ "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
8022
+ },
8023
+ {
8024
+ "model_format": "ggufv2",
8025
+ "model_size_in_billions": "1_5",
8026
+ "quantizations": [
8027
+ "q2_k",
8028
+ "q3_k_m",
8029
+ "q4_0",
8030
+ "q4_k_m",
8031
+ "q5_0",
8032
+ "q5_k_m",
8033
+ "q6_k",
8034
+ "q8_0"
8035
+ ],
8036
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
8037
+ "model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf"
8038
+ },
8039
+ {
8040
+ "model_format": "ggufv2",
8041
+ "model_size_in_billions": 7,
8042
+ "quantizations": [
8043
+ "q2_k",
8044
+ "q3_k_m",
8045
+ "q4_0",
8046
+ "q4_k_m",
8047
+ "q5_0",
8048
+ "q5_k_m",
8049
+ "q6_k",
8050
+ "q8_0"
8051
+ ],
8052
+ "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
8053
+ "model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf",
8054
+ "model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf",
8055
+ "quantization_parts": {
8056
+ "q4_0": [
8057
+ "00001-of-00002",
8058
+ "00002-of-00002"
8059
+ ],
8060
+ "q4_k_m": [
8061
+ "00001-of-00002",
8062
+ "00002-of-00002"
8063
+ ],
8064
+ "q5_0": [
8065
+ "00001-of-00002",
8066
+ "00002-of-00002"
8067
+ ],
8068
+ "q5_k_m": [
8069
+ "00001-of-00002",
8070
+ "00002-of-00002"
8071
+ ],
8072
+ "q6_k": [
8073
+ "00001-of-00002",
8074
+ "00002-of-00002"
8075
+ ],
8076
+ "q8_0": [
8077
+ "00001-of-00003",
8078
+ "00002-of-00003",
8079
+ "00003-of-00003"
8080
+ ]
8081
+ }
8082
+ }
8083
+ ],
8084
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
8085
+ "stop_token_ids": [
8086
+ 151643,
8087
+ 151644,
8088
+ 151645
8089
+ ],
8090
+ "stop": [
8091
+ "<|endoftext|>",
8092
+ "<|im_start|>",
8093
+ "<|im_end|>"
8094
+ ]
7247
8095
  }
7248
8096
  ]