xinference 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (84) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +204 -1
  3. xinference/client/restful/restful_client.py +4 -2
  4. xinference/core/image_interface.py +28 -0
  5. xinference/core/model.py +30 -2
  6. xinference/core/supervisor.py +6 -0
  7. xinference/model/audio/cosyvoice.py +3 -3
  8. xinference/model/audio/fish_speech.py +9 -9
  9. xinference/model/audio/model_spec.json +9 -9
  10. xinference/model/audio/whisper.py +4 -1
  11. xinference/model/image/core.py +2 -1
  12. xinference/model/image/model_spec.json +16 -4
  13. xinference/model/image/model_spec_modelscope.json +16 -4
  14. xinference/model/image/sdapi.py +136 -0
  15. xinference/model/image/stable_diffusion/core.py +163 -24
  16. xinference/model/llm/__init__.py +9 -1
  17. xinference/model/llm/llm_family.json +1241 -0
  18. xinference/model/llm/llm_family.py +3 -1
  19. xinference/model/llm/llm_family_modelscope.json +1301 -3
  20. xinference/model/llm/sglang/core.py +7 -0
  21. xinference/model/llm/transformers/chatglm.py +1 -1
  22. xinference/model/llm/transformers/core.py +6 -0
  23. xinference/model/llm/transformers/deepseek_v2.py +340 -0
  24. xinference/model/llm/transformers/qwen2_audio.py +168 -0
  25. xinference/model/llm/transformers/qwen2_vl.py +31 -5
  26. xinference/model/llm/utils.py +104 -84
  27. xinference/model/llm/vllm/core.py +13 -0
  28. xinference/thirdparty/fish_speech/fish_speech/configs/firefly_gan_vq.yaml +2 -3
  29. xinference/thirdparty/fish_speech/fish_speech/configs/text2semantic_finetune.yaml +1 -1
  30. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +1 -1
  31. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +1 -1
  32. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +1 -1
  33. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +1 -1
  34. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +1 -1
  35. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +2 -2
  36. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +0 -3
  37. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +169 -198
  38. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +4 -27
  39. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +9 -47
  40. xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +2 -2
  41. xinference/thirdparty/fish_speech/fish_speech/train.py +2 -0
  42. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +12 -10
  43. xinference/thirdparty/fish_speech/tools/api.py +79 -134
  44. xinference/thirdparty/fish_speech/tools/commons.py +35 -0
  45. xinference/thirdparty/fish_speech/tools/download_models.py +3 -3
  46. xinference/thirdparty/fish_speech/tools/file.py +17 -0
  47. xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +1 -1
  48. xinference/thirdparty/fish_speech/tools/llama/generate.py +29 -24
  49. xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +1 -1
  50. xinference/thirdparty/fish_speech/tools/llama/quantize.py +2 -2
  51. xinference/thirdparty/fish_speech/tools/msgpack_api.py +34 -0
  52. xinference/thirdparty/fish_speech/tools/post_api.py +85 -44
  53. xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +1 -1
  54. xinference/thirdparty/fish_speech/tools/smart_pad.py +16 -3
  55. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +2 -2
  56. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +4 -2
  57. xinference/thirdparty/fish_speech/tools/webui.py +12 -146
  58. xinference/types.py +7 -4
  59. xinference/web/ui/build/asset-manifest.json +6 -6
  60. xinference/web/ui/build/index.html +1 -1
  61. xinference/web/ui/build/static/css/{main.632e9148.css → main.5061c4c3.css} +2 -2
  62. xinference/web/ui/build/static/css/main.5061c4c3.css.map +1 -0
  63. xinference/web/ui/build/static/js/{main.9cfafbd6.js → main.29578905.js} +3 -3
  64. xinference/web/ui/build/static/js/main.29578905.js.map +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +1 -0
  67. {xinference-0.15.0.dist-info → xinference-0.15.2.dist-info}/METADATA +13 -7
  68. {xinference-0.15.0.dist-info → xinference-0.15.2.dist-info}/RECORD +73 -75
  69. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +0 -442
  70. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +0 -44
  71. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +0 -115
  72. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +0 -225
  73. xinference/thirdparty/fish_speech/tools/auto_rerank.py +0 -159
  74. xinference/thirdparty/fish_speech/tools/gen_ref.py +0 -36
  75. xinference/thirdparty/fish_speech/tools/merge_asr_files.py +0 -55
  76. xinference/web/ui/build/static/css/main.632e9148.css.map +0 -1
  77. xinference/web/ui/build/static/js/main.9cfafbd6.js.map +0 -1
  78. xinference/web/ui/node_modules/.cache/babel-loader/01d6d198156bacbd436c51435edbd4b2cacd47a79db929105eba30f74b67d48d.json +0 -1
  79. xinference/web/ui/node_modules/.cache/babel-loader/59eb25f514afcc4fefd1b309d192b2455f1e0aec68a9de598ca4b2333fe2c774.json +0 -1
  80. /xinference/web/ui/build/static/js/{main.9cfafbd6.js.LICENSE.txt → main.29578905.js.LICENSE.txt} +0 -0
  81. {xinference-0.15.0.dist-info → xinference-0.15.2.dist-info}/LICENSE +0 -0
  82. {xinference-0.15.0.dist-info → xinference-0.15.2.dist-info}/WHEEL +0 -0
  83. {xinference-0.15.0.dist-info → xinference-0.15.2.dist-info}/entry_points.txt +0 -0
  84. {xinference-0.15.0.dist-info → xinference-0.15.2.dist-info}/top_level.txt +0 -0
@@ -4522,17 +4522,216 @@
4522
4522
  "vision"
4523
4523
  ],
4524
4524
  "model_description": "Qwen2-VL: To See the World More Clearly.Qwen2-VL is the latest version of the vision language models in the Qwen model familities.",
4525
+ "model_specs":[
4526
+ {
4527
+ "model_format":"pytorch",
4528
+ "model_size_in_billions":7,
4529
+ "quantizations":[
4530
+ "none"
4531
+ ],
4532
+ "model_hub": "modelscope",
4533
+ "model_id":"qwen/Qwen2-VL-7B-Instruct",
4534
+ "model_revision":"master"
4535
+ },
4536
+ {
4537
+ "model_format":"gptq",
4538
+ "model_size_in_billions":7,
4539
+ "quantizations":[
4540
+ "Int8"
4541
+ ],
4542
+ "model_hub": "modelscope",
4543
+ "model_id":"qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8",
4544
+ "model_revision":"master"
4545
+ },
4546
+ {
4547
+ "model_format":"gptq",
4548
+ "model_size_in_billions":7,
4549
+ "quantizations":[
4550
+ "Int4"
4551
+ ],
4552
+ "model_hub": "modelscope",
4553
+ "model_id":"qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4",
4554
+ "model_revision":"master"
4555
+ },
4556
+ {
4557
+ "model_format":"awq",
4558
+ "model_size_in_billions":7,
4559
+ "quantizations":[
4560
+ "Int4"
4561
+ ],
4562
+ "model_hub": "modelscope",
4563
+ "model_id":"qwen/Qwen2-VL-7B-Instruct-AWQ",
4564
+ "model_revision":"master"
4565
+ },
4566
+ {
4567
+ "model_format":"pytorch",
4568
+ "model_size_in_billions":2,
4569
+ "quantizations":[
4570
+ "none"
4571
+ ],
4572
+ "model_hub": "modelscope",
4573
+ "model_id":"qwen/Qwen2-VL-2B-Instruct",
4574
+ "model_revision":"master"
4575
+ },
4576
+ {
4577
+ "model_format":"gptq",
4578
+ "model_size_in_billions":2,
4579
+ "quantizations":[
4580
+ "Int8"
4581
+ ],
4582
+ "model_hub": "modelscope",
4583
+ "model_id":"qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8",
4584
+ "model_revision":"master"
4585
+ },
4586
+ {
4587
+ "model_format":"gptq",
4588
+ "model_size_in_billions":2,
4589
+ "quantizations":[
4590
+ "Int4"
4591
+ ],
4592
+ "model_hub": "modelscope",
4593
+ "model_id":"qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
4594
+ "model_revision":"master"
4595
+ },
4596
+ {
4597
+ "model_format":"awq",
4598
+ "model_size_in_billions":2,
4599
+ "quantizations":[
4600
+ "Int4"
4601
+ ],
4602
+ "model_hub": "modelscope",
4603
+ "model_id":"qwen/Qwen2-VL-2B-Instruct-AWQ",
4604
+ "model_revision":"master"
4605
+ },
4606
+ {
4607
+ "model_format":"pytorch",
4608
+ "model_size_in_billions":72,
4609
+ "quantizations":[
4610
+ "none"
4611
+ ],
4612
+ "model_id":"qwen/Qwen2-VL-72B-Instruct",
4613
+ "model_hub": "modelscope"
4614
+ },
4615
+ {
4616
+ "model_format":"awq",
4617
+ "model_size_in_billions":72,
4618
+ "quantizations":[
4619
+ "Int4"
4620
+ ],
4621
+ "model_id":"qwen/Qwen2-VL-72B-Instruct-AWQ",
4622
+ "model_hub": "modelscope"
4623
+ },
4624
+ {
4625
+ "model_format":"gptq",
4626
+ "model_size_in_billions":72,
4627
+ "quantizations":[
4628
+ "Int4",
4629
+ "Int8"
4630
+ ],
4631
+ "model_id":"qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}",
4632
+ "model_hub": "modelscope"
4633
+ }
4634
+ ],
4635
+ "prompt_style": {
4636
+ "style_name": "QWEN",
4637
+ "system_prompt": "You are a helpful assistant",
4638
+ "roles": [
4639
+ "user",
4640
+ "assistant"
4641
+ ]
4642
+ }
4643
+ },
4644
+ {
4645
+ "version": 1,
4646
+ "context_length": 32768,
4647
+ "model_name": "minicpm3-4b",
4648
+ "model_lang": [
4649
+ "zh"
4650
+ ],
4651
+ "model_ability": [
4652
+ "chat"
4653
+ ],
4654
+ "model_description": "MiniCPM3-4B is the 3rd generation of MiniCPM series. The overall performance of MiniCPM3-4B surpasses Phi-3.5-mini-Instruct and GPT-3.5-Turbo-0125, being comparable with many recent 7B~9B models.",
4525
4655
  "model_specs": [
4526
4656
  {
4527
4657
  "model_format": "pytorch",
4528
- "model_size_in_billions": 2,
4658
+ "model_size_in_billions": 4,
4529
4659
  "quantizations": [
4530
4660
  "none"
4531
4661
  ],
4532
4662
  "model_hub": "modelscope",
4533
- "model_id": "qwen/Qwen2-VL-2B-Instruct",
4663
+ "model_id": "OpenBMB/MiniCPM3-4B",
4534
4664
  "model_revision": "master"
4535
4665
  },
4666
+ {
4667
+ "model_format": "gptq",
4668
+ "model_size_in_billions": 4,
4669
+ "quantizations": [
4670
+ "Int4"
4671
+ ],
4672
+ "model_hub": "modelscope",
4673
+ "model_id": "OpenBMB/MiniCPM3-4B-GPTQ-Int4",
4674
+ "model_revision": "master"
4675
+ }
4676
+ ],
4677
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
4678
+ "stop_token_ids": [
4679
+ 1,
4680
+ 2
4681
+ ],
4682
+ "stop": [
4683
+ "<s>",
4684
+ "</s>"
4685
+ ]
4686
+ },
4687
+ {
4688
+ "version": 1,
4689
+ "context_length": 32768,
4690
+ "model_name": "qwen2-audio-instruct",
4691
+ "model_lang": [
4692
+ "en",
4693
+ "zh"
4694
+ ],
4695
+ "model_ability": [
4696
+ "chat",
4697
+ "audio"
4698
+ ],
4699
+ "model_description": "Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
4700
+ "model_specs": [
4701
+ {
4702
+ "model_format": "pytorch",
4703
+ "model_size_in_billions": 7,
4704
+ "quantizations": [
4705
+ "none"
4706
+ ],
4707
+ "model_hub": "modelscope",
4708
+ "model_id": "qwen/Qwen2-Audio-7B-Instruct",
4709
+ "model_revision": "master"
4710
+ }
4711
+ ],
4712
+ "prompt_style": {
4713
+ "style_name": "QWEN",
4714
+ "system_prompt": "You are a helpful assistant",
4715
+ "roles": [
4716
+ "user",
4717
+ "assistant"
4718
+ ]
4719
+ }
4720
+ },
4721
+ {
4722
+ "version": 1,
4723
+ "context_length": 32768,
4724
+ "model_name": "qwen2-audio",
4725
+ "model_lang": [
4726
+ "en",
4727
+ "zh"
4728
+ ],
4729
+ "model_ability": [
4730
+ "chat",
4731
+ "audio"
4732
+ ],
4733
+ "model_description": "Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
4734
+ "model_specs": [
4536
4735
  {
4537
4736
  "model_format": "pytorch",
4538
4737
  "model_size_in_billions": 7,
@@ -4540,7 +4739,7 @@
4540
4739
  "none"
4541
4740
  ],
4542
4741
  "model_hub": "modelscope",
4543
- "model_id": "qwen/Qwen2-VL-7B-Instruct",
4742
+ "model_id": "qwen/Qwen2-Audio-7B",
4544
4743
  "model_revision": "master"
4545
4744
  }
4546
4745
  ],
@@ -4552,5 +4751,1104 @@
4552
4751
  "assistant"
4553
4752
  ]
4554
4753
  }
4754
+ },
4755
+ {
4756
+ "version": 1,
4757
+ "context_length": 128000,
4758
+ "model_name": "deepseek-v2",
4759
+ "model_lang": [
4760
+ "en",
4761
+ "zh"
4762
+ ],
4763
+ "model_ability": [
4764
+ "chat"
4765
+ ],
4766
+ "model_description": "DeepSeek-V2, a strong Mixture-of-Experts (MoE) language model characterized by economical training and efficient inference. ",
4767
+ "model_specs": [
4768
+ {
4769
+ "model_format": "pytorch",
4770
+ "model_size_in_billions": 16,
4771
+ "quantizations": [
4772
+ "4-bit",
4773
+ "8-bit",
4774
+ "none"
4775
+ ],
4776
+ "model_id": "deepseek-ai/DeepSeek-V2-Lite",
4777
+ "model_hub": "modelscope",
4778
+ "model_revision": "master"
4779
+ },
4780
+ {
4781
+ "model_format": "pytorch",
4782
+ "model_size_in_billions": 236,
4783
+ "quantizations": [
4784
+ "4-bit",
4785
+ "8-bit",
4786
+ "none"
4787
+ ],
4788
+ "model_id": "deepseek-ai/DeepSeek-V2",
4789
+ "model_hub": "modelscope",
4790
+ "model_revision": "master"
4791
+ }
4792
+ ]
4793
+ },
4794
+ {
4795
+ "version": 1,
4796
+ "context_length": 128000,
4797
+ "model_name": "deepseek-v2-chat",
4798
+ "model_lang": [
4799
+ "en",
4800
+ "zh"
4801
+ ],
4802
+ "model_ability": [
4803
+ "chat"
4804
+ ],
4805
+ "model_description": "DeepSeek-V2, a strong Mixture-of-Experts (MoE) language model characterized by economical training and efficient inference. ",
4806
+ "model_specs": [
4807
+ {
4808
+ "model_format": "pytorch",
4809
+ "model_size_in_billions": 16,
4810
+ "quantizations": [
4811
+ "4-bit",
4812
+ "8-bit",
4813
+ "none"
4814
+ ],
4815
+ "model_id": "deepseek-ai/DeepSeek-V2-Lite-Chat",
4816
+ "model_hub": "modelscope",
4817
+ "model_revision": "master"
4818
+ },
4819
+ {
4820
+ "model_format": "pytorch",
4821
+ "model_size_in_billions": 236,
4822
+ "quantizations": [
4823
+ "4-bit",
4824
+ "8-bit",
4825
+ "none"
4826
+ ],
4827
+ "model_id": "deepseek-ai/DeepSeek-V2-Chat",
4828
+ "model_hub": "modelscope",
4829
+ "model_revision": "master"
4830
+ }
4831
+ ],
4832
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '<|begin▁of▁sentence|>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + '<|end▁of▁sentence|>' }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
4833
+ "stop_token_ids": [
4834
+ 100001
4835
+ ],
4836
+ "stop": [
4837
+ "<|end▁of▁sentence|>"
4838
+ ]
4839
+ },
4840
+ {
4841
+ "version": 1,
4842
+ "context_length": 128000,
4843
+ "model_name": "deepseek-v2-chat-0628",
4844
+ "model_lang": [
4845
+ "en",
4846
+ "zh"
4847
+ ],
4848
+ "model_ability": [
4849
+ "chat"
4850
+ ],
4851
+ "model_description": "DeepSeek-V2-Chat-0628 is an improved version of DeepSeek-V2-Chat. ",
4852
+ "model_specs": [
4853
+ {
4854
+ "model_format": "pytorch",
4855
+ "model_size_in_billions": 236,
4856
+ "quantizations": [
4857
+ "4-bit",
4858
+ "8-bit",
4859
+ "none"
4860
+ ],
4861
+ "model_id": "deepseek-ai/DeepSeek-V2-Chat-0628",
4862
+ "model_hub": "modelscope",
4863
+ "model_revision": "master"
4864
+ }
4865
+ ],
4866
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '<|begin▁of▁sentence|>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|User|>' + message['content'] }}{% elif message['role'] == 'assistant' %}{{ '<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>' }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|Assistant|>' }}{% endif %}",
4867
+ "stop_token_ids": [
4868
+ 100001
4869
+ ],
4870
+ "stop": [
4871
+ "<|end▁of▁sentence|>"
4872
+ ]
4873
+ },
4874
+ {
4875
+ "version": 1,
4876
+ "context_length": 128000,
4877
+ "model_name": "deepseek-v2.5",
4878
+ "model_lang": [
4879
+ "en",
4880
+ "zh"
4881
+ ],
4882
+ "model_ability": [
4883
+ "chat"
4884
+ ],
4885
+ "model_description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.",
4886
+ "model_specs": [
4887
+ {
4888
+ "model_format": "pytorch",
4889
+ "model_size_in_billions": 236,
4890
+ "quantizations": [
4891
+ "4-bit",
4892
+ "8-bit",
4893
+ "none"
4894
+ ],
4895
+ "model_id": "deepseek-ai/DeepSeek-V2.5",
4896
+ "model_hub": "modelscope",
4897
+ "model_revision": "master"
4898
+ }
4899
+ ],
4900
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %} {%- if message['role'] == 'system' %} {% set ns.system_prompt = message['content'] %} {%- endif %}{%- endfor %}{{'<|begin▁of▁sentence|>'}}{{ns.system_prompt}}{%- for message in messages %} {%- if message['role'] == 'user' %} {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is none %} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']%} {%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}} {%- set ns.is_first = true -%} {%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif %} {%- endfor %} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is not none %} {%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- set ns.is_tool = false -%} {%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- endif %} {%- endif %} {%- if message['role'] == 'tool' %} {%- set ns.is_tool = true -%} {%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- set ns.is_output_first = false %} {%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- endif %} {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
4901
+ "stop_token_ids": [
4902
+ 100001
4903
+ ],
4904
+ "stop": [
4905
+ "<|end▁of▁sentence|>"
4906
+ ]
4907
+ },
4908
+ {
4909
+ "version": 1,
4910
+ "context_length": 131072,
4911
+ "model_name": "yi-coder-chat",
4912
+ "model_lang": [
4913
+ "en"
4914
+ ],
4915
+ "model_ability": [
4916
+ "chat"
4917
+ ],
4918
+ "model_description": "Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.Excelling in long-context understanding with a maximum context length of 128K tokens.Supporting 52 major programming languages, including popular ones such as Java, Python, JavaScript, and C++.",
4919
+ "model_specs": [
4920
+ {
4921
+ "model_format": "pytorch",
4922
+ "model_size_in_billions": 9,
4923
+ "quantizations": [
4924
+ "none"
4925
+ ],
4926
+ "model_hub": "modelscope",
4927
+ "model_id": "01ai/Yi-Coder-9B-Chat",
4928
+ "model_revision": "master"
4929
+ },
4930
+ {
4931
+ "model_format": "pytorch",
4932
+ "model_size_in_billions": "1_5",
4933
+ "quantizations": [
4934
+ "none"
4935
+ ],
4936
+ "model_hub": "modelscope",
4937
+ "model_id": "01ai/Yi-Coder-1.5B-Chat",
4938
+ "model_revision": "master"
4939
+ }
4940
+ ],
4941
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
4942
+ "stop_token_ids": [
4943
+ 1,
4944
+ 2,
4945
+ 6,
4946
+ 7
4947
+ ],
4948
+ "stop": [
4949
+ "<|startoftext|>",
4950
+ "<|endoftext|>",
4951
+ "<|im_start|>",
4952
+ "<|im_end|>"
4953
+ ]
4954
+ },
4955
+ {
4956
+ "version": 1,
4957
+ "context_length": 131072,
4958
+ "model_name": "yi-coder",
4959
+ "model_lang": [
4960
+ "en"
4961
+ ],
4962
+ "model_ability": [
4963
+ "generate"
4964
+ ],
4965
+ "model_description": "Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.Excelling in long-context understanding with a maximum context length of 128K tokens.Supporting 52 major programming languages, including popular ones such as Java, Python, JavaScript, and C++.",
4966
+ "model_specs": [
4967
+ {
4968
+ "model_format": "pytorch",
4969
+ "model_size_in_billions": 9,
4970
+ "quantizations": [
4971
+ "none"
4972
+ ],
4973
+ "model_hub": "modelscope",
4974
+ "model_id": "01ai/Yi-Coder-9B",
4975
+ "model_revision": "master"
4976
+ },
4977
+ {
4978
+ "model_format": "pytorch",
4979
+ "model_size_in_billions": "1_5",
4980
+ "quantizations": [
4981
+ "none"
4982
+ ],
4983
+ "model_hub": "modelscope",
4984
+ "model_id": "01ai/Yi-Coder-1.5B",
4985
+ "model_revision": "master"
4986
+ }
4987
+ ]
4988
+ },
4989
+ {
4990
+ "version": 1,
4991
+ "context_length": 32768,
4992
+ "model_name": "qwen2.5",
4993
+ "model_lang": [
4994
+ "en",
4995
+ "zh"
4996
+ ],
4997
+ "model_ability": [
4998
+ "generate"
4999
+ ],
5000
+ "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.",
5001
+ "model_specs": [
5002
+ {
5003
+ "model_format": "pytorch",
5004
+ "model_size_in_billions": "0_5",
5005
+ "quantizations": [
5006
+ "4-bit",
5007
+ "8-bit",
5008
+ "none"
5009
+ ],
5010
+ "model_id": "qwen/Qwen2.5-0.5B",
5011
+ "model_revision": "master",
5012
+ "model_hub": "modelscope"
5013
+ },
5014
+ {
5015
+ "model_format": "pytorch",
5016
+ "model_size_in_billions": "1_5",
5017
+ "quantizations": [
5018
+ "4-bit",
5019
+ "8-bit",
5020
+ "none"
5021
+ ],
5022
+ "model_id": "qwen/Qwen2.5-1.5B",
5023
+ "model_revision": "master",
5024
+ "model_hub": "modelscope"
5025
+ },
5026
+ {
5027
+ "model_format": "pytorch",
5028
+ "model_size_in_billions": 3,
5029
+ "quantizations": [
5030
+ "4-bit",
5031
+ "8-bit",
5032
+ "none"
5033
+ ],
5034
+ "model_id": "qwen/Qwen2.5-3B",
5035
+ "model_revision": "master",
5036
+ "model_hub": "modelscope"
5037
+ },
5038
+ {
5039
+ "model_format": "pytorch",
5040
+ "model_size_in_billions": 7,
5041
+ "quantizations": [
5042
+ "4-bit",
5043
+ "8-bit",
5044
+ "none"
5045
+ ],
5046
+ "model_id": "qwen/Qwen2.5-7B",
5047
+ "model_revision": "master",
5048
+ "model_hub": "modelscope"
5049
+ },
5050
+ {
5051
+ "model_format": "pytorch",
5052
+ "model_size_in_billions": 14,
5053
+ "quantizations": [
5054
+ "4-bit",
5055
+ "8-bit",
5056
+ "none"
5057
+ ],
5058
+ "model_id": "qwen/Qwen2.5-14B",
5059
+ "model_revision": "master",
5060
+ "model_hub": "modelscope"
5061
+ },
5062
+ {
5063
+ "model_format": "pytorch",
5064
+ "model_size_in_billions": 32,
5065
+ "quantizations": [
5066
+ "4-bit",
5067
+ "8-bit",
5068
+ "none"
5069
+ ],
5070
+ "model_id": "qwen/Qwen2.5-32B",
5071
+ "model_revision": "master",
5072
+ "model_hub": "modelscope"
5073
+ },
5074
+ {
5075
+ "model_format": "pytorch",
5076
+ "model_size_in_billions": 72,
5077
+ "quantizations": [
5078
+ "4-bit",
5079
+ "8-bit",
5080
+ "none"
5081
+ ],
5082
+ "model_id": "qwen/Qwen2.5-72B",
5083
+ "model_revision": "master",
5084
+ "model_hub": "modelscope"
5085
+ }
5086
+ ]
5087
+ },
5088
+ {
5089
+ "version": 1,
5090
+ "context_length": 32768,
5091
+ "model_name": "qwen2.5-instruct",
5092
+ "model_lang": [
5093
+ "en",
5094
+ "zh"
5095
+ ],
5096
+ "model_ability": [
5097
+ "chat",
5098
+ "tools"
5099
+ ],
5100
+ "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.",
5101
+ "model_specs": [
5102
+ {
5103
+ "model_format": "pytorch",
5104
+ "model_size_in_billions": "0_5",
5105
+ "quantizations": [
5106
+ "4-bit",
5107
+ "8-bit",
5108
+ "none"
5109
+ ],
5110
+ "model_id": "qwen/Qwen2.5-0.5B-Instruct",
5111
+ "model_hub": "modelscope"
5112
+ },
5113
+ {
5114
+ "model_format": "pytorch",
5115
+ "model_size_in_billions": "1_5",
5116
+ "quantizations": [
5117
+ "4-bit",
5118
+ "8-bit",
5119
+ "none"
5120
+ ],
5121
+ "model_id": "qwen/Qwen2.5-1.5B-Instruct",
5122
+ "model_hub": "modelscope"
5123
+ },
5124
+ {
5125
+ "model_format": "pytorch",
5126
+ "model_size_in_billions": 3,
5127
+ "quantizations": [
5128
+ "4-bit",
5129
+ "8-bit",
5130
+ "none"
5131
+ ],
5132
+ "model_id": "qwen/Qwen2.5-3B-Instruct",
5133
+ "model_hub": "modelscope"
5134
+ },
5135
+ {
5136
+ "model_format": "pytorch",
5137
+ "model_size_in_billions": 7,
5138
+ "quantizations": [
5139
+ "4-bit",
5140
+ "8-bit",
5141
+ "none"
5142
+ ],
5143
+ "model_id": "qwen/Qwen2.5-7B-Instruct",
5144
+ "model_hub": "modelscope"
5145
+ },
5146
+ {
5147
+ "model_format": "pytorch",
5148
+ "model_size_in_billions": 14,
5149
+ "quantizations": [
5150
+ "4-bit",
5151
+ "8-bit",
5152
+ "none"
5153
+ ],
5154
+ "model_id": "qwen/Qwen2.5-14B-Instruct",
5155
+ "model_hub": "modelscope"
5156
+ },
5157
+ {
5158
+ "model_format": "pytorch",
5159
+ "model_size_in_billions": 32,
5160
+ "quantizations": [
5161
+ "4-bit",
5162
+ "8-bit",
5163
+ "none"
5164
+ ],
5165
+ "model_id": "qwen/Qwen2.5-32B-Instruct",
5166
+ "model_hub": "modelscope"
5167
+ },
5168
+ {
5169
+ "model_format": "pytorch",
5170
+ "model_size_in_billions": 72,
5171
+ "quantizations": [
5172
+ "4-bit",
5173
+ "8-bit",
5174
+ "none"
5175
+ ],
5176
+ "model_id": "qwen/Qwen2.5-72B-Instruct",
5177
+ "model_hub": "modelscope"
5178
+ },
5179
+ {
5180
+ "model_format": "gptq",
5181
+ "model_size_in_billions": "0_5",
5182
+ "quantizations": [
5183
+ "Int4",
5184
+ "Int8"
5185
+ ],
5186
+ "model_id": "qwen/Qwen2.5-0.5B-Instruct-GPTQ-{quantization}",
5187
+ "model_hub": "modelscope"
5188
+ },
5189
+ {
5190
+ "model_format": "gptq",
5191
+ "model_size_in_billions": "1_5",
5192
+ "quantizations": [
5193
+ "Int4",
5194
+ "Int8"
5195
+ ],
5196
+ "model_id": "qwen/Qwen2.5-1.5B-Instruct-GPTQ-{quantization}",
5197
+ "model_hub": "modelscope"
5198
+ },
5199
+ {
5200
+ "model_format": "gptq",
5201
+ "model_size_in_billions": 3,
5202
+ "quantizations": [
5203
+ "Int4",
5204
+ "Int8"
5205
+ ],
5206
+ "model_id": "qwen/Qwen2.5-3B-Instruct-GPTQ-{quantization}",
5207
+ "model_hub": "modelscope"
5208
+ },
5209
+ {
5210
+ "model_format": "gptq",
5211
+ "model_size_in_billions": 7,
5212
+ "quantizations": [
5213
+ "Int4",
5214
+ "Int8"
5215
+ ],
5216
+ "model_id": "qwen/Qwen2.5-7B-Instruct-GPTQ-{quantization}",
5217
+ "model_hub": "modelscope"
5218
+ },
5219
+ {
5220
+ "model_format": "gptq",
5221
+ "model_size_in_billions": 14,
5222
+ "quantizations": [
5223
+ "Int4",
5224
+ "Int8"
5225
+ ],
5226
+ "model_id": "qwen/Qwen2.5-14B-Instruct-GPTQ-{quantization}",
5227
+ "model_hub": "modelscope"
5228
+ },
5229
+ {
5230
+ "model_format": "gptq",
5231
+ "model_size_in_billions": 32,
5232
+ "quantizations": [
5233
+ "Int4",
5234
+ "Int8"
5235
+ ],
5236
+ "model_id": "qwen/Qwen2.5-32B-Instruct-GPTQ-{quantization}",
5237
+ "model_hub": "modelscope"
5238
+ },
5239
+ {
5240
+ "model_format": "gptq",
5241
+ "model_size_in_billions": 72,
5242
+ "quantizations": [
5243
+ "Int4",
5244
+ "Int8"
5245
+ ],
5246
+ "model_id": "qwen/Qwen2.5-72B-Instruct-GPTQ-{quantization}",
5247
+ "model_hub": "modelscope"
5248
+ },
5249
+ {
5250
+ "model_format": "awq",
5251
+ "model_size_in_billions": "0_5",
5252
+ "quantizations": [
5253
+ "Int4"
5254
+ ],
5255
+ "model_id": "qwen/Qwen2-0.5B-Instruct-AWQ",
5256
+ "model_hub": "modelscope"
5257
+ },
5258
+ {
5259
+ "model_format": "awq",
5260
+ "model_size_in_billions": "1_5",
5261
+ "quantizations": [
5262
+ "Int4"
5263
+ ],
5264
+ "model_id": "qwen/Qwen2-1.5B-Instruct-AWQ",
5265
+ "model_hub": "modelscope"
5266
+ },
5267
+ {
5268
+ "model_format": "awq",
5269
+ "model_size_in_billions": 3,
5270
+ "quantizations": [
5271
+ "Int4"
5272
+ ],
5273
+ "model_id": "qwen/Qwen2.5-3B-Instruct-AWQ",
5274
+ "model_hub": "modelscope"
5275
+ },
5276
+ {
5277
+ "model_format": "awq",
5278
+ "model_size_in_billions": 7,
5279
+ "quantizations": [
5280
+ "Int4"
5281
+ ],
5282
+ "model_id": "qwen/Qwen2.5-7B-Instruct-AWQ",
5283
+ "model_hub": "modelscope"
5284
+ },
5285
+ {
5286
+ "model_format": "awq",
5287
+ "model_size_in_billions":14,
5288
+ "quantizations": [
5289
+ "Int4"
5290
+ ],
5291
+ "model_id": "qwen/Qwen2.5-14B-Instruct-AWQ",
5292
+ "model_hub": "modelscope"
5293
+ },
5294
+ {
5295
+ "model_format": "awq",
5296
+ "model_size_in_billions": 32,
5297
+ "quantizations": [
5298
+ "Int4"
5299
+ ],
5300
+ "model_id": "qwen/Qwen2.5-32B-Instruct-AWQ",
5301
+ "model_hub": "modelscope"
5302
+ },
5303
+ {
5304
+ "model_format": "awq",
5305
+ "model_size_in_billions": 72,
5306
+ "quantizations": [
5307
+ "Int4"
5308
+ ],
5309
+ "model_id": "qwen/Qwen2.5-72B-Instruct-AWQ",
5310
+ "model_hub": "modelscope"
5311
+ },
5312
+ {
5313
+ "model_format": "ggufv2",
5314
+ "model_size_in_billions": "0_5",
5315
+ "quantizations": [
5316
+ "q2_k",
5317
+ "q3_k_m",
5318
+ "q4_0",
5319
+ "q4_k_m",
5320
+ "q5_0",
5321
+ "q5_k_m",
5322
+ "q6_k",
5323
+ "q8_0"
5324
+ ],
5325
+ "model_id": "qwen/Qwen2.5-0.5B-Instruct-GGUF",
5326
+ "model_file_name_template": "qwen2.5-0.5b-instruct-{quantization}.gguf",
5327
+ "model_hub": "modelscope"
5328
+ },
5329
+ {
5330
+ "model_format": "ggufv2",
5331
+ "model_size_in_billions": "1_5",
5332
+ "quantizations": [
5333
+ "q2_k",
5334
+ "q3_k_m",
5335
+ "q4_0",
5336
+ "q4_k_m",
5337
+ "q5_0",
5338
+ "q5_k_m",
5339
+ "q6_k",
5340
+ "q8_0"
5341
+ ],
5342
+ "model_id": "qwen/Qwen2.5-1.5B-Instruct-GGUF",
5343
+ "model_file_name_template": "qwen2.5-1.5b-instruct-{quantization}.gguf",
5344
+ "model_hub": "modelscope"
5345
+ },
5346
+ {
5347
+ "model_format": "ggufv2",
5348
+ "model_size_in_billions": 3,
5349
+ "quantizations": [
5350
+ "q2_k",
5351
+ "q3_k_m",
5352
+ "q4_0",
5353
+ "q4_k_m",
5354
+ "q5_0",
5355
+ "q5_k_m",
5356
+ "q6_k",
5357
+ "q8_0"
5358
+ ],
5359
+ "model_id": "qwen/Qwen2.5-3B-Instruct-GGUF",
5360
+ "model_file_name_template": "qwen2.5-3b-instruct-{quantization}.gguf",
5361
+ "model_hub": "modelscope"
5362
+ },
5363
+ {
5364
+ "model_format": "ggufv2",
5365
+ "model_size_in_billions": 7,
5366
+ "quantizations": [
5367
+ "q2_k",
5368
+ "q3_k_m",
5369
+ "q4_0",
5370
+ "q4_k_m",
5371
+ "q5_0",
5372
+ "q5_k_m",
5373
+ "q6_k",
5374
+ "q8_0"
5375
+ ],
5376
+ "model_id": "qwen/Qwen2.5-7B-Instruct-GGUF",
5377
+ "model_file_name_template": "qwen2_5-7b-instruct-{quantization}.gguf",
5378
+ "model_hub": "modelscope",
5379
+ "model_file_name_split_template": "qwen2.5-7b-instruct-{quantization}-{part}.gguf",
5380
+ "quantization_parts": {
5381
+ "q4_0": [
5382
+ "00001-of-00002",
5383
+ "00002-of-00002"
5384
+ ],
5385
+ "q4_k_m": [
5386
+ "00001-of-00002",
5387
+ "00002-of-00002"
5388
+ ],
5389
+ "q5_0": [
5390
+ "00001-of-00002",
5391
+ "00002-of-00002"
5392
+ ],
5393
+ "q5_k_m": [
5394
+ "00001-of-00002",
5395
+ "00002-of-00002"
5396
+ ],
5397
+ "q6_k": [
5398
+ "00001-of-00002",
5399
+ "00002-of-00002"
5400
+ ],
5401
+ "q8_0": [
5402
+ "00001-of-00002",
5403
+ "00002-of-00002"
5404
+ ]
5405
+ }
5406
+ },
5407
+ {
5408
+ "model_format": "ggufv2",
5409
+ "model_size_in_billions": 14,
5410
+ "quantizations": [
5411
+ "q2_k",
5412
+ "q3_k_m",
5413
+ "q4_0",
5414
+ "q4_k_m",
5415
+ "q5_0",
5416
+ "q5_k_m",
5417
+ "q6_k",
5418
+ "q8_0"
5419
+ ],
5420
+ "model_id": "qwen/Qwen2.5-14B-Instruct-GGUF",
5421
+ "model_file_name_template": "qwen2.5-14b-instruct-{quantization}.gguf",
5422
+ "model_file_name_split_template": "qwen2.5-14b-instruct-{quantization}-{part}.gguf",
5423
+ "quantization_parts": {
5424
+ "q2_k": [
5425
+ "00001-of-00002",
5426
+ "00002-of-00002"
5427
+ ],
5428
+ "q3_k_m": [
5429
+ "00001-of-00002",
5430
+ "00002-of-00002"
5431
+ ],
5432
+ "q4_0": [
5433
+ "00001-of-00003",
5434
+ "00002-of-00003",
5435
+ "00003-of-00003"
5436
+ ],
5437
+ "q4_k_m": [
5438
+ "00001-of-00003",
5439
+ "00002-of-00003",
5440
+ "00003-of-00003"
5441
+ ],
5442
+ "q5_0": [
5443
+ "00001-of-00003",
5444
+ "00002-of-00003",
5445
+ "00003-of-00003"
5446
+ ],
5447
+ "q5_k_m": [
5448
+ "00001-of-00003",
5449
+ "00002-of-00003",
5450
+ "00003-of-00003"
5451
+ ],
5452
+ "q6_k": [
5453
+ "00001-of-00004",
5454
+ "00002-of-00004",
5455
+ "00003-of-00004",
5456
+ "00004-of-00004"
5457
+ ],
5458
+ "q8_0": [
5459
+ "00001-of-00004",
5460
+ "00002-of-00004",
5461
+ "00003-of-00004",
5462
+ "00004-of-00004"
5463
+ ]
5464
+ },
5465
+ "model_hub": "modelscope"
5466
+ },
5467
+ {
5468
+ "model_format": "ggufv2",
5469
+ "model_size_in_billions": 32,
5470
+ "quantizations": [
5471
+ "q2_k",
5472
+ "q3_k_m",
5473
+ "q4_0",
5474
+ "q4_k_m",
5475
+ "q5_0",
5476
+ "q5_k_m",
5477
+ "q6_k",
5478
+ "q8_0"
5479
+ ],
5480
+ "model_id": "qwen/Qwen2.5-32B-Instruct-GGUF",
5481
+ "model_file_name_template": "qwen2_5-32b-instruct-{quantization}.gguf",
5482
+ "model_file_name_split_template": "qwen2.5-32b-instruct-{quantization}-{part}.gguf",
5483
+ "quantization_parts": {
5484
+ "q2_k": [
5485
+ "00001-of-00004",
5486
+ "00002-of-00004",
5487
+ "00003-of-00004",
5488
+ "00004-of-00004"
5489
+ ],
5490
+ "q3_k_m": [
5491
+ "00001-of-00005",
5492
+ "00002-of-00005",
5493
+ "00003-of-00005",
5494
+ "00004-of-00005",
5495
+ "00005-of-00005"
5496
+ ],
5497
+ "q4_0": [
5498
+ "00001-of-00005",
5499
+ "00002-of-00005",
5500
+ "00003-of-00005",
5501
+ "00004-of-00005",
5502
+ "00005-of-00005"
5503
+ ],
5504
+ "q4_k_m": [
5505
+ "00001-of-00005",
5506
+ "00002-of-00005",
5507
+ "00003-of-00005",
5508
+ "00004-of-00005",
5509
+ "00005-of-00005"
5510
+ ],
5511
+ "q5_0": [
5512
+ "00001-of-00006",
5513
+ "00002-of-00006",
5514
+ "00003-of-00006",
5515
+ "00004-of-00006",
5516
+ "00005-of-00006",
5517
+ "00006-of-00006"
5518
+ ],
5519
+ "q5_k_m": [
5520
+ "00001-of-00006",
5521
+ "00002-of-00006",
5522
+ "00003-of-00006",
5523
+ "00004-of-00006",
5524
+ "00005-of-00006",
5525
+ "00006-of-00006"
5526
+ ],
5527
+ "q6_k": [
5528
+ "00001-of-00007",
5529
+ "00002-of-00007",
5530
+ "00003-of-00007",
5531
+ "00004-of-00007",
5532
+ "00005-of-00007",
5533
+ "00006-of-00007",
5534
+ "00007-of-00007"
5535
+ ],
5536
+ "q8_0": [
5537
+ "00001-of-00009",
5538
+ "00002-of-00009",
5539
+ "00003-of-00009",
5540
+ "00004-of-00009",
5541
+ "00005-of-00009",
5542
+ "00006-of-00009",
5543
+ "00007-of-00009",
5544
+ "00008-of-00009",
5545
+ "00009-of-00009"
5546
+ ]
5547
+ },
5548
+ "model_hub": "modelscope"
5549
+ },
5550
+ {
5551
+ "model_format": "ggufv2",
5552
+ "model_size_in_billions": 72,
5553
+ "quantizations": [
5554
+ "q2_k",
5555
+ "q3_k_m",
5556
+ "q4_0",
5557
+ "q4_k_m",
5558
+ "q5_0",
5559
+ "q5_k_m",
5560
+ "q6_k",
5561
+ "q8_0"
5562
+ ],
5563
+ "model_id": "qwen/Qwen2.5-72B-Instruct-GGUF",
5564
+ "model_hub": "modelscope",
5565
+ "model_file_name_template": "qwen2_5-72b-instruct-{quantization}.gguf",
5566
+ "model_file_name_split_template": "qwen2.5-72b-instruct-{quantization}-{part}.gguf",
5567
+ "quantization_parts": {
5568
+ "q2_k": [
5569
+ "00001-of-00007",
5570
+ "00002-of-00007",
5571
+ "00003-of-00007",
5572
+ "00004-of-00007",
5573
+ "00005-of-00007",
5574
+ "00006-of-00007",
5575
+ "00007-of-00007"
5576
+ ],
5577
+ "q3_k_m": [
5578
+ "00001-of-00009",
5579
+ "00002-of-00009",
5580
+ "00003-of-00009",
5581
+ "00004-of-00009",
5582
+ "00005-of-00009",
5583
+ "00006-of-00009",
5584
+ "00007-of-00009",
5585
+ "00008-of-00009",
5586
+ "00009-of-00009"
5587
+ ],
5588
+ "q4_0": [
5589
+ "00001-of-00011",
5590
+ "00002-of-00011",
5591
+ "00003-of-00011",
5592
+ "00004-of-00011",
5593
+ "00005-of-00011",
5594
+ "00006-of-00011",
5595
+ "00007-of-00011",
5596
+ "00008-of-00011",
5597
+ "00009-of-00011",
5598
+ "00010-of-00011",
5599
+ "00011-of-00011"
5600
+ ],
5601
+ "q4_k_m": [
5602
+ "00001-of-00012",
5603
+ "00002-of-00012",
5604
+ "00003-of-00012",
5605
+ "00004-of-00012",
5606
+ "00005-of-00012",
5607
+ "00006-of-00012",
5608
+ "00007-of-00012",
5609
+ "00008-of-00012",
5610
+ "00009-of-00012",
5611
+ "00010-of-00012",
5612
+ "00011-of-00012",
5613
+ "00012-of-00012"
5614
+ ],
5615
+ "q5_0": [
5616
+ "00001-of-00013",
5617
+ "00002-of-00013",
5618
+ "00003-of-00013",
5619
+ "00004-of-00013",
5620
+ "00005-of-00013",
5621
+ "00006-of-00013",
5622
+ "00007-of-00013",
5623
+ "00008-of-00013",
5624
+ "00009-of-00013",
5625
+ "00010-of-00013",
5626
+ "00011-of-00013",
5627
+ "00012-of-00013",
5628
+ "00013-of-00013"
5629
+ ],
5630
+ "q5_k_m": [
5631
+ "00001-of-00014",
5632
+ "00002-of-00014",
5633
+ "00003-of-00014",
5634
+ "00004-of-00014",
5635
+ "00005-of-00014",
5636
+ "00006-of-00014",
5637
+ "00007-of-00014",
5638
+ "00008-of-00014",
5639
+ "00009-of-00014",
5640
+ "00010-of-00014",
5641
+ "00011-of-00014",
5642
+ "00012-of-00014",
5643
+ "00013-of-00014",
5644
+ "00014-of-00014"
5645
+ ],
5646
+ "q6_k": [
5647
+ "00001-of-00016",
5648
+ "00002-of-00016",
5649
+ "00003-of-00016",
5650
+ "00004-of-00016",
5651
+ "00005-of-00016",
5652
+ "00006-of-00016",
5653
+ "00007-of-00016",
5654
+ "00008-of-00016",
5655
+ "00009-of-00016",
5656
+ "00010-of-00016",
5657
+ "00011-of-00016",
5658
+ "00012-of-00016",
5659
+ "00013-of-00016",
5660
+ "00014-of-00016",
5661
+ "00015-of-00016",
5662
+ "00016-of-00016"
5663
+ ],
5664
+ "q8_0": [
5665
+ "00001-of-00021",
5666
+ "00002-of-00021",
5667
+ "00003-of-00021",
5668
+ "00004-of-00021",
5669
+ "00005-of-00021",
5670
+ "00006-of-00021",
5671
+ "00007-of-00021",
5672
+ "00008-of-00021",
5673
+ "00009-of-00021",
5674
+ "00010-of-00021",
5675
+ "00011-of-00021",
5676
+ "00012-of-00021",
5677
+ "00013-of-00021",
5678
+ "00014-of-00021",
5679
+ "00015-of-00021",
5680
+ "00016-of-00021",
5681
+ "00017-of-00021",
5682
+ "00018-of-00021",
5683
+ "00019-of-00021",
5684
+ "00020-of-00021",
5685
+ "00021-of-00021"
5686
+ ]
5687
+ }
5688
+ }
5689
+ ],
5690
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
5691
+ "stop_token_ids": [
5692
+ 151643,
5693
+ 151644,
5694
+ 151645
5695
+ ],
5696
+ "stop": [
5697
+ "<|endoftext|>",
5698
+ "<|im_start|>",
5699
+ "<|im_end|>"
5700
+ ]
5701
+ },
5702
+ {
5703
+ "version": 1,
5704
+ "context_length": 32768,
5705
+ "model_name": "qwen2.5-coder",
5706
+ "model_lang": [
5707
+ "en",
5708
+ "zh"
5709
+ ],
5710
+ "model_ability": [
5711
+ "generate"
5712
+ ],
5713
+ "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
5714
+ "model_specs": [
5715
+ {
5716
+ "model_format": "pytorch",
5717
+ "model_size_in_billions": "1_5",
5718
+ "quantizations": [
5719
+ "4-bit",
5720
+ "8-bit",
5721
+ "none"
5722
+ ],
5723
+ "model_id": "qwen/Qwen2.5-Coder-1.5B",
5724
+ "model_revision": "master",
5725
+ "model_hub": "modelscope"
5726
+ },
5727
+ {
5728
+ "model_format": "pytorch",
5729
+ "model_size_in_billions": 7,
5730
+ "quantizations": [
5731
+ "4-bit",
5732
+ "8-bit",
5733
+ "none"
5734
+ ],
5735
+ "model_id": "qwen/Qwen2.5-Coder-7B",
5736
+ "model_revision": "master",
5737
+ "model_hub": "modelscope"
5738
+ }
5739
+ ]
5740
+ },
5741
+ {
5742
+ "version": 1,
5743
+ "context_length": 32768,
5744
+ "model_name": "qwen2.5-coder-instruct",
5745
+ "model_lang": [
5746
+ "en",
5747
+ "zh"
5748
+ ],
5749
+ "model_ability": [
5750
+ "chat",
5751
+ "tools"
5752
+ ],
5753
+ "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
5754
+ "model_specs": [
5755
+ {
5756
+ "model_format": "pytorch",
5757
+ "model_size_in_billions": "1_5",
5758
+ "quantizations": [
5759
+ "4-bit",
5760
+ "8-bit",
5761
+ "none"
5762
+ ],
5763
+ "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct",
5764
+ "model_revision": "master",
5765
+ "model_hub": "modelscope"
5766
+ },
5767
+ {
5768
+ "model_format": "pytorch",
5769
+ "model_size_in_billions": 7,
5770
+ "quantizations": [
5771
+ "4-bit",
5772
+ "8-bit",
5773
+ "none"
5774
+ ],
5775
+ "model_id": "qwen/Qwen2.5-Coder-7B-Instruct",
5776
+ "model_revision": "master",
5777
+ "model_hub": "modelscope"
5778
+ },
5779
+ {
5780
+ "model_format": "ggufv2",
5781
+ "model_size_in_billions": "1_5",
5782
+ "quantizations": [
5783
+ "q2_k",
5784
+ "q3_k_m",
5785
+ "q4_0",
5786
+ "q4_k_m",
5787
+ "q5_0",
5788
+ "q5_k_m",
5789
+ "q6_k",
5790
+ "q8_0"
5791
+ ],
5792
+ "model_hub": "modelscope",
5793
+ "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
5794
+ "model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf"
5795
+ },
5796
+ {
5797
+ "model_format": "ggufv2",
5798
+ "model_size_in_billions": 7,
5799
+ "quantizations": [
5800
+ "q2_k",
5801
+ "q3_k_m",
5802
+ "q4_0",
5803
+ "q4_k_m",
5804
+ "q5_0",
5805
+ "q5_k_m",
5806
+ "q6_k",
5807
+ "q8_0"
5808
+ ],
5809
+ "model_hub": "modelscope",
5810
+ "model_id": "qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
5811
+ "model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf",
5812
+ "model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf",
5813
+ "quantization_parts": {
5814
+ "q4_0": [
5815
+ "00001-of-00002",
5816
+ "00002-of-00002"
5817
+ ],
5818
+ "q4_k_m": [
5819
+ "00001-of-00002",
5820
+ "00002-of-00002"
5821
+ ],
5822
+ "q5_0": [
5823
+ "00001-of-00002",
5824
+ "00002-of-00002"
5825
+ ],
5826
+ "q5_k_m": [
5827
+ "00001-of-00002",
5828
+ "00002-of-00002"
5829
+ ],
5830
+ "q6_k": [
5831
+ "00001-of-00002",
5832
+ "00002-of-00002"
5833
+ ],
5834
+ "q8_0": [
5835
+ "00001-of-00003",
5836
+ "00002-of-00003",
5837
+ "00003-of-00003"
5838
+ ]
5839
+ }
5840
+ }
5841
+ ],
5842
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
5843
+ "stop_token_ids": [
5844
+ 151643,
5845
+ 151644,
5846
+ 151645
5847
+ ],
5848
+ "stop": [
5849
+ "<|endoftext|>",
5850
+ "<|im_start|>",
5851
+ "<|im_end|>"
5852
+ ]
4555
5853
  }
4556
5854
  ]