xinference 0.15.0__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +204 -1
- xinference/client/restful/restful_client.py +4 -2
- xinference/core/image_interface.py +28 -0
- xinference/core/model.py +28 -0
- xinference/core/supervisor.py +6 -0
- xinference/model/audio/fish_speech.py +9 -9
- xinference/model/audio/model_spec.json +9 -9
- xinference/model/audio/whisper.py +4 -1
- xinference/model/image/core.py +2 -1
- xinference/model/image/model_spec.json +16 -4
- xinference/model/image/model_spec_modelscope.json +16 -4
- xinference/model/image/sdapi.py +136 -0
- xinference/model/image/stable_diffusion/core.py +148 -20
- xinference/model/llm/__init__.py +8 -0
- xinference/model/llm/llm_family.json +393 -0
- xinference/model/llm/llm_family.py +3 -1
- xinference/model/llm/llm_family_modelscope.json +408 -3
- xinference/model/llm/sglang/core.py +3 -0
- xinference/model/llm/transformers/chatglm.py +1 -1
- xinference/model/llm/transformers/core.py +6 -0
- xinference/model/llm/transformers/deepseek_v2.py +340 -0
- xinference/model/llm/transformers/qwen2_audio.py +168 -0
- xinference/model/llm/transformers/qwen2_vl.py +31 -5
- xinference/model/llm/utils.py +104 -84
- xinference/model/llm/vllm/core.py +8 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/firefly_gan_vq.yaml +2 -3
- xinference/thirdparty/fish_speech/fish_speech/configs/text2semantic_finetune.yaml +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +0 -3
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +169 -198
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +4 -27
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +9 -47
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +12 -10
- xinference/thirdparty/fish_speech/tools/api.py +79 -134
- xinference/thirdparty/fish_speech/tools/commons.py +35 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +3 -3
- xinference/thirdparty/fish_speech/tools/file.py +17 -0
- xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +1 -1
- xinference/thirdparty/fish_speech/tools/llama/generate.py +29 -24
- xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +1 -1
- xinference/thirdparty/fish_speech/tools/llama/quantize.py +2 -2
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +34 -0
- xinference/thirdparty/fish_speech/tools/post_api.py +85 -44
- xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +1 -1
- xinference/thirdparty/fish_speech/tools/smart_pad.py +16 -3
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +2 -2
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +4 -2
- xinference/thirdparty/fish_speech/tools/webui.py +12 -146
- xinference/types.py +7 -4
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.632e9148.css → main.5061c4c3.css} +2 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +1 -0
- xinference/web/ui/build/static/js/{main.9cfafbd6.js → main.754740c0.js} +3 -3
- xinference/web/ui/build/static/js/main.754740c0.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +1 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/METADATA +9 -3
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/RECORD +72 -74
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +0 -442
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +0 -44
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +0 -115
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +0 -225
- xinference/thirdparty/fish_speech/tools/auto_rerank.py +0 -159
- xinference/thirdparty/fish_speech/tools/gen_ref.py +0 -36
- xinference/thirdparty/fish_speech/tools/merge_asr_files.py +0 -55
- xinference/web/ui/build/static/css/main.632e9148.css.map +0 -1
- xinference/web/ui/build/static/js/main.9cfafbd6.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/01d6d198156bacbd436c51435edbd4b2cacd47a79db929105eba30f74b67d48d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/59eb25f514afcc4fefd1b309d192b2455f1e0aec68a9de598ca4b2333fe2c774.json +0 -1
- /xinference/web/ui/build/static/js/{main.9cfafbd6.js.LICENSE.txt → main.754740c0.js.LICENSE.txt} +0 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/LICENSE +0 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/WHEEL +0 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/top_level.txt +0 -0
|
@@ -4522,17 +4522,154 @@
|
|
|
4522
4522
|
"vision"
|
|
4523
4523
|
],
|
|
4524
4524
|
"model_description": "Qwen2-VL: To See the World More Clearly.Qwen2-VL is the latest version of the vision language models in the Qwen model familities.",
|
|
4525
|
+
"model_specs":[
|
|
4526
|
+
{
|
|
4527
|
+
"model_format":"pytorch",
|
|
4528
|
+
"model_size_in_billions":7,
|
|
4529
|
+
"quantizations":[
|
|
4530
|
+
"none"
|
|
4531
|
+
],
|
|
4532
|
+
"model_hub": "modelscope",
|
|
4533
|
+
"model_id":"qwen/Qwen2-VL-7B-Instruct",
|
|
4534
|
+
"model_revision":"master"
|
|
4535
|
+
},
|
|
4536
|
+
{
|
|
4537
|
+
"model_format":"gptq",
|
|
4538
|
+
"model_size_in_billions":7,
|
|
4539
|
+
"quantizations":[
|
|
4540
|
+
"Int8"
|
|
4541
|
+
],
|
|
4542
|
+
"model_hub": "modelscope",
|
|
4543
|
+
"model_id":"qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8",
|
|
4544
|
+
"model_revision":"master"
|
|
4545
|
+
},
|
|
4546
|
+
{
|
|
4547
|
+
"model_format":"gptq",
|
|
4548
|
+
"model_size_in_billions":7,
|
|
4549
|
+
"quantizations":[
|
|
4550
|
+
"Int4"
|
|
4551
|
+
],
|
|
4552
|
+
"model_hub": "modelscope",
|
|
4553
|
+
"model_id":"qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4",
|
|
4554
|
+
"model_revision":"master"
|
|
4555
|
+
},
|
|
4556
|
+
{
|
|
4557
|
+
"model_format":"awq",
|
|
4558
|
+
"model_size_in_billions":7,
|
|
4559
|
+
"quantizations":[
|
|
4560
|
+
"Int4"
|
|
4561
|
+
],
|
|
4562
|
+
"model_hub": "modelscope",
|
|
4563
|
+
"model_id":"qwen/Qwen2-VL-7B-Instruct-AWQ",
|
|
4564
|
+
"model_revision":"master"
|
|
4565
|
+
},
|
|
4566
|
+
{
|
|
4567
|
+
"model_format":"pytorch",
|
|
4568
|
+
"model_size_in_billions":2,
|
|
4569
|
+
"quantizations":[
|
|
4570
|
+
"none"
|
|
4571
|
+
],
|
|
4572
|
+
"model_hub": "modelscope",
|
|
4573
|
+
"model_id":"qwen/Qwen2-VL-2B-Instruct",
|
|
4574
|
+
"model_revision":"master"
|
|
4575
|
+
},
|
|
4576
|
+
{
|
|
4577
|
+
"model_format":"gptq",
|
|
4578
|
+
"model_size_in_billions":2,
|
|
4579
|
+
"quantizations":[
|
|
4580
|
+
"Int8"
|
|
4581
|
+
],
|
|
4582
|
+
"model_hub": "modelscope",
|
|
4583
|
+
"model_id":"qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8",
|
|
4584
|
+
"model_revision":"master"
|
|
4585
|
+
},
|
|
4586
|
+
{
|
|
4587
|
+
"model_format":"gptq",
|
|
4588
|
+
"model_size_in_billions":2,
|
|
4589
|
+
"quantizations":[
|
|
4590
|
+
"Int4"
|
|
4591
|
+
],
|
|
4592
|
+
"model_hub": "modelscope",
|
|
4593
|
+
"model_id":"qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
|
|
4594
|
+
"model_revision":"master"
|
|
4595
|
+
},
|
|
4596
|
+
{
|
|
4597
|
+
"model_format":"awq",
|
|
4598
|
+
"model_size_in_billions":2,
|
|
4599
|
+
"quantizations":[
|
|
4600
|
+
"Int4"
|
|
4601
|
+
],
|
|
4602
|
+
"model_hub": "modelscope",
|
|
4603
|
+
"model_id":"qwen/Qwen2-VL-2B-Instruct-AWQ",
|
|
4604
|
+
"model_revision":"master"
|
|
4605
|
+
}
|
|
4606
|
+
],
|
|
4607
|
+
"prompt_style": {
|
|
4608
|
+
"style_name": "QWEN",
|
|
4609
|
+
"system_prompt": "You are a helpful assistant",
|
|
4610
|
+
"roles": [
|
|
4611
|
+
"user",
|
|
4612
|
+
"assistant"
|
|
4613
|
+
]
|
|
4614
|
+
}
|
|
4615
|
+
},
|
|
4616
|
+
{
|
|
4617
|
+
"version": 1,
|
|
4618
|
+
"context_length": 32768,
|
|
4619
|
+
"model_name": "minicpm3-4b",
|
|
4620
|
+
"model_lang": [
|
|
4621
|
+
"zh"
|
|
4622
|
+
],
|
|
4623
|
+
"model_ability": [
|
|
4624
|
+
"chat"
|
|
4625
|
+
],
|
|
4626
|
+
"model_description": "MiniCPM3-4B is the 3rd generation of MiniCPM series. The overall performance of MiniCPM3-4B surpasses Phi-3.5-mini-Instruct and GPT-3.5-Turbo-0125, being comparable with many recent 7B~9B models.",
|
|
4525
4627
|
"model_specs": [
|
|
4526
4628
|
{
|
|
4527
4629
|
"model_format": "pytorch",
|
|
4528
|
-
"model_size_in_billions":
|
|
4630
|
+
"model_size_in_billions": 4,
|
|
4529
4631
|
"quantizations": [
|
|
4530
4632
|
"none"
|
|
4531
4633
|
],
|
|
4532
4634
|
"model_hub": "modelscope",
|
|
4533
|
-
"model_id": "
|
|
4635
|
+
"model_id": "OpenBMB/MiniCPM3-4B",
|
|
4534
4636
|
"model_revision": "master"
|
|
4535
4637
|
},
|
|
4638
|
+
{
|
|
4639
|
+
"model_format": "gptq",
|
|
4640
|
+
"model_size_in_billions": 4,
|
|
4641
|
+
"quantizations": [
|
|
4642
|
+
"Int4"
|
|
4643
|
+
],
|
|
4644
|
+
"model_hub": "modelscope",
|
|
4645
|
+
"model_id": "OpenBMB/MiniCPM3-4B-GPTQ-Int4",
|
|
4646
|
+
"model_revision": "master"
|
|
4647
|
+
}
|
|
4648
|
+
],
|
|
4649
|
+
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
4650
|
+
"stop_token_ids": [
|
|
4651
|
+
1,
|
|
4652
|
+
2
|
|
4653
|
+
],
|
|
4654
|
+
"stop": [
|
|
4655
|
+
"<s>",
|
|
4656
|
+
"</s>"
|
|
4657
|
+
]
|
|
4658
|
+
},
|
|
4659
|
+
{
|
|
4660
|
+
"version": 1,
|
|
4661
|
+
"context_length": 32768,
|
|
4662
|
+
"model_name": "qwen2-audio-instruct",
|
|
4663
|
+
"model_lang": [
|
|
4664
|
+
"en",
|
|
4665
|
+
"zh"
|
|
4666
|
+
],
|
|
4667
|
+
"model_ability": [
|
|
4668
|
+
"chat",
|
|
4669
|
+
"audio"
|
|
4670
|
+
],
|
|
4671
|
+
"model_description": "Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
|
|
4672
|
+
"model_specs": [
|
|
4536
4673
|
{
|
|
4537
4674
|
"model_format": "pytorch",
|
|
4538
4675
|
"model_size_in_billions": 7,
|
|
@@ -4540,7 +4677,7 @@
|
|
|
4540
4677
|
"none"
|
|
4541
4678
|
],
|
|
4542
4679
|
"model_hub": "modelscope",
|
|
4543
|
-
"model_id": "qwen/Qwen2-
|
|
4680
|
+
"model_id": "qwen/Qwen2-Audio-7B-Instruct",
|
|
4544
4681
|
"model_revision": "master"
|
|
4545
4682
|
}
|
|
4546
4683
|
],
|
|
@@ -4552,5 +4689,273 @@
|
|
|
4552
4689
|
"assistant"
|
|
4553
4690
|
]
|
|
4554
4691
|
}
|
|
4692
|
+
},
|
|
4693
|
+
{
|
|
4694
|
+
"version": 1,
|
|
4695
|
+
"context_length": 32768,
|
|
4696
|
+
"model_name": "qwen2-audio",
|
|
4697
|
+
"model_lang": [
|
|
4698
|
+
"en",
|
|
4699
|
+
"zh"
|
|
4700
|
+
],
|
|
4701
|
+
"model_ability": [
|
|
4702
|
+
"chat",
|
|
4703
|
+
"audio"
|
|
4704
|
+
],
|
|
4705
|
+
"model_description": "Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
|
|
4706
|
+
"model_specs": [
|
|
4707
|
+
{
|
|
4708
|
+
"model_format": "pytorch",
|
|
4709
|
+
"model_size_in_billions": 7,
|
|
4710
|
+
"quantizations": [
|
|
4711
|
+
"none"
|
|
4712
|
+
],
|
|
4713
|
+
"model_hub": "modelscope",
|
|
4714
|
+
"model_id": "qwen/Qwen2-Audio-7B",
|
|
4715
|
+
"model_revision": "master"
|
|
4716
|
+
}
|
|
4717
|
+
],
|
|
4718
|
+
"prompt_style": {
|
|
4719
|
+
"style_name": "QWEN",
|
|
4720
|
+
"system_prompt": "You are a helpful assistant",
|
|
4721
|
+
"roles": [
|
|
4722
|
+
"user",
|
|
4723
|
+
"assistant"
|
|
4724
|
+
]
|
|
4725
|
+
}
|
|
4726
|
+
},
|
|
4727
|
+
{
|
|
4728
|
+
"version": 1,
|
|
4729
|
+
"context_length": 128000,
|
|
4730
|
+
"model_name": "deepseek-v2",
|
|
4731
|
+
"model_lang": [
|
|
4732
|
+
"en",
|
|
4733
|
+
"zh"
|
|
4734
|
+
],
|
|
4735
|
+
"model_ability": [
|
|
4736
|
+
"chat"
|
|
4737
|
+
],
|
|
4738
|
+
"model_description": "DeepSeek-V2, a strong Mixture-of-Experts (MoE) language model characterized by economical training and efficient inference. ",
|
|
4739
|
+
"model_specs": [
|
|
4740
|
+
{
|
|
4741
|
+
"model_format": "pytorch",
|
|
4742
|
+
"model_size_in_billions": 16,
|
|
4743
|
+
"quantizations": [
|
|
4744
|
+
"4-bit",
|
|
4745
|
+
"8-bit",
|
|
4746
|
+
"none"
|
|
4747
|
+
],
|
|
4748
|
+
"model_id": "deepseek-ai/DeepSeek-V2-Lite",
|
|
4749
|
+
"model_hub": "modelscope",
|
|
4750
|
+
"model_revision": "master"
|
|
4751
|
+
},
|
|
4752
|
+
{
|
|
4753
|
+
"model_format": "pytorch",
|
|
4754
|
+
"model_size_in_billions": 236,
|
|
4755
|
+
"quantizations": [
|
|
4756
|
+
"4-bit",
|
|
4757
|
+
"8-bit",
|
|
4758
|
+
"none"
|
|
4759
|
+
],
|
|
4760
|
+
"model_id": "deepseek-ai/DeepSeek-V2",
|
|
4761
|
+
"model_hub": "modelscope",
|
|
4762
|
+
"model_revision": "master"
|
|
4763
|
+
}
|
|
4764
|
+
]
|
|
4765
|
+
},
|
|
4766
|
+
{
|
|
4767
|
+
"version": 1,
|
|
4768
|
+
"context_length": 128000,
|
|
4769
|
+
"model_name": "deepseek-v2-chat",
|
|
4770
|
+
"model_lang": [
|
|
4771
|
+
"en",
|
|
4772
|
+
"zh"
|
|
4773
|
+
],
|
|
4774
|
+
"model_ability": [
|
|
4775
|
+
"chat"
|
|
4776
|
+
],
|
|
4777
|
+
"model_description": "DeepSeek-V2, a strong Mixture-of-Experts (MoE) language model characterized by economical training and efficient inference. ",
|
|
4778
|
+
"model_specs": [
|
|
4779
|
+
{
|
|
4780
|
+
"model_format": "pytorch",
|
|
4781
|
+
"model_size_in_billions": 16,
|
|
4782
|
+
"quantizations": [
|
|
4783
|
+
"4-bit",
|
|
4784
|
+
"8-bit",
|
|
4785
|
+
"none"
|
|
4786
|
+
],
|
|
4787
|
+
"model_id": "deepseek-ai/DeepSeek-V2-Lite-Chat",
|
|
4788
|
+
"model_hub": "modelscope",
|
|
4789
|
+
"model_revision": "master"
|
|
4790
|
+
},
|
|
4791
|
+
{
|
|
4792
|
+
"model_format": "pytorch",
|
|
4793
|
+
"model_size_in_billions": 236,
|
|
4794
|
+
"quantizations": [
|
|
4795
|
+
"4-bit",
|
|
4796
|
+
"8-bit",
|
|
4797
|
+
"none"
|
|
4798
|
+
],
|
|
4799
|
+
"model_id": "deepseek-ai/DeepSeek-V2-Chat",
|
|
4800
|
+
"model_hub": "modelscope",
|
|
4801
|
+
"model_revision": "master"
|
|
4802
|
+
}
|
|
4803
|
+
],
|
|
4804
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '<|begin▁of▁sentence|>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + '<|end▁of▁sentence|>' }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
|
|
4805
|
+
"stop_token_ids": [
|
|
4806
|
+
100001
|
|
4807
|
+
],
|
|
4808
|
+
"stop": [
|
|
4809
|
+
"<|end▁of▁sentence|>"
|
|
4810
|
+
]
|
|
4811
|
+
},
|
|
4812
|
+
{
|
|
4813
|
+
"version": 1,
|
|
4814
|
+
"context_length": 128000,
|
|
4815
|
+
"model_name": "deepseek-v2-chat-0628",
|
|
4816
|
+
"model_lang": [
|
|
4817
|
+
"en",
|
|
4818
|
+
"zh"
|
|
4819
|
+
],
|
|
4820
|
+
"model_ability": [
|
|
4821
|
+
"chat"
|
|
4822
|
+
],
|
|
4823
|
+
"model_description": "DeepSeek-V2-Chat-0628 is an improved version of DeepSeek-V2-Chat. ",
|
|
4824
|
+
"model_specs": [
|
|
4825
|
+
{
|
|
4826
|
+
"model_format": "pytorch",
|
|
4827
|
+
"model_size_in_billions": 236,
|
|
4828
|
+
"quantizations": [
|
|
4829
|
+
"4-bit",
|
|
4830
|
+
"8-bit",
|
|
4831
|
+
"none"
|
|
4832
|
+
],
|
|
4833
|
+
"model_id": "deepseek-ai/DeepSeek-V2-Chat-0628",
|
|
4834
|
+
"model_hub": "modelscope",
|
|
4835
|
+
"model_revision": "master"
|
|
4836
|
+
}
|
|
4837
|
+
],
|
|
4838
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '<|begin▁of▁sentence|>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|User|>' + message['content'] }}{% elif message['role'] == 'assistant' %}{{ '<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>' }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|Assistant|>' }}{% endif %}",
|
|
4839
|
+
"stop_token_ids": [
|
|
4840
|
+
100001
|
|
4841
|
+
],
|
|
4842
|
+
"stop": [
|
|
4843
|
+
"<|end▁of▁sentence|>"
|
|
4844
|
+
]
|
|
4845
|
+
},
|
|
4846
|
+
{
|
|
4847
|
+
"version": 1,
|
|
4848
|
+
"context_length": 128000,
|
|
4849
|
+
"model_name": "deepseek-v2.5",
|
|
4850
|
+
"model_lang": [
|
|
4851
|
+
"en",
|
|
4852
|
+
"zh"
|
|
4853
|
+
],
|
|
4854
|
+
"model_ability": [
|
|
4855
|
+
"chat"
|
|
4856
|
+
],
|
|
4857
|
+
"model_description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.",
|
|
4858
|
+
"model_specs": [
|
|
4859
|
+
{
|
|
4860
|
+
"model_format": "pytorch",
|
|
4861
|
+
"model_size_in_billions": 236,
|
|
4862
|
+
"quantizations": [
|
|
4863
|
+
"4-bit",
|
|
4864
|
+
"8-bit",
|
|
4865
|
+
"none"
|
|
4866
|
+
],
|
|
4867
|
+
"model_id": "deepseek-ai/DeepSeek-V2.5",
|
|
4868
|
+
"model_hub": "modelscope",
|
|
4869
|
+
"model_revision": "master"
|
|
4870
|
+
}
|
|
4871
|
+
],
|
|
4872
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %} {%- if message['role'] == 'system' %} {% set ns.system_prompt = message['content'] %} {%- endif %}{%- endfor %}{{'<|begin▁of▁sentence|>'}}{{ns.system_prompt}}{%- for message in messages %} {%- if message['role'] == 'user' %} {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is none %} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']%} {%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}} {%- set ns.is_first = true -%} {%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif %} {%- endfor %} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is not none %} {%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- set ns.is_tool = false -%} {%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- endif %} {%- endif %} {%- if message['role'] == 'tool' %} {%- set ns.is_tool = true -%} {%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- set ns.is_output_first = false %} {%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- endif %} {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
4873
|
+
"stop_token_ids": [
|
|
4874
|
+
100001
|
|
4875
|
+
],
|
|
4876
|
+
"stop": [
|
|
4877
|
+
"<|end▁of▁sentence|>"
|
|
4878
|
+
]
|
|
4879
|
+
},
|
|
4880
|
+
{
|
|
4881
|
+
"version": 1,
|
|
4882
|
+
"context_length": 131072,
|
|
4883
|
+
"model_name": "yi-coder-chat",
|
|
4884
|
+
"model_lang": [
|
|
4885
|
+
"en"
|
|
4886
|
+
],
|
|
4887
|
+
"model_ability": [
|
|
4888
|
+
"chat"
|
|
4889
|
+
],
|
|
4890
|
+
"model_description": "Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.Excelling in long-context understanding with a maximum context length of 128K tokens.Supporting 52 major programming languages, including popular ones such as Java, Python, JavaScript, and C++.",
|
|
4891
|
+
"model_specs": [
|
|
4892
|
+
{
|
|
4893
|
+
"model_format": "pytorch",
|
|
4894
|
+
"model_size_in_billions": 9,
|
|
4895
|
+
"quantizations": [
|
|
4896
|
+
"none"
|
|
4897
|
+
],
|
|
4898
|
+
"model_hub": "modelscope",
|
|
4899
|
+
"model_id": "01ai/Yi-Coder-9B-Chat",
|
|
4900
|
+
"model_revision": "master"
|
|
4901
|
+
},
|
|
4902
|
+
{
|
|
4903
|
+
"model_format": "pytorch",
|
|
4904
|
+
"model_size_in_billions": "1_5",
|
|
4905
|
+
"quantizations": [
|
|
4906
|
+
"none"
|
|
4907
|
+
],
|
|
4908
|
+
"model_hub": "modelscope",
|
|
4909
|
+
"model_id": "01ai/Yi-Coder-1.5B-Chat",
|
|
4910
|
+
"model_revision": "master"
|
|
4911
|
+
}
|
|
4912
|
+
],
|
|
4913
|
+
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
|
|
4914
|
+
"stop_token_ids": [
|
|
4915
|
+
1,
|
|
4916
|
+
2,
|
|
4917
|
+
6,
|
|
4918
|
+
7
|
|
4919
|
+
],
|
|
4920
|
+
"stop": [
|
|
4921
|
+
"<|startoftext|>",
|
|
4922
|
+
"<|endoftext|>",
|
|
4923
|
+
"<|im_start|>",
|
|
4924
|
+
"<|im_end|>"
|
|
4925
|
+
]
|
|
4926
|
+
},
|
|
4927
|
+
{
|
|
4928
|
+
"version": 1,
|
|
4929
|
+
"context_length": 131072,
|
|
4930
|
+
"model_name": "yi-coder",
|
|
4931
|
+
"model_lang": [
|
|
4932
|
+
"en"
|
|
4933
|
+
],
|
|
4934
|
+
"model_ability": [
|
|
4935
|
+
"generate"
|
|
4936
|
+
],
|
|
4937
|
+
"model_description": "Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.Excelling in long-context understanding with a maximum context length of 128K tokens.Supporting 52 major programming languages, including popular ones such as Java, Python, JavaScript, and C++.",
|
|
4938
|
+
"model_specs": [
|
|
4939
|
+
{
|
|
4940
|
+
"model_format": "pytorch",
|
|
4941
|
+
"model_size_in_billions": 9,
|
|
4942
|
+
"quantizations": [
|
|
4943
|
+
"none"
|
|
4944
|
+
],
|
|
4945
|
+
"model_hub": "modelscope",
|
|
4946
|
+
"model_id": "01ai/Yi-Coder-9B",
|
|
4947
|
+
"model_revision": "master"
|
|
4948
|
+
},
|
|
4949
|
+
{
|
|
4950
|
+
"model_format": "pytorch",
|
|
4951
|
+
"model_size_in_billions": "1_5",
|
|
4952
|
+
"quantizations": [
|
|
4953
|
+
"none"
|
|
4954
|
+
],
|
|
4955
|
+
"model_hub": "modelscope",
|
|
4956
|
+
"model_id": "01ai/Yi-Coder-1.5B",
|
|
4957
|
+
"model_revision": "master"
|
|
4958
|
+
}
|
|
4959
|
+
]
|
|
4555
4960
|
}
|
|
4556
4961
|
]
|
|
@@ -317,7 +317,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
|
|
|
317
317
|
|
|
318
318
|
@staticmethod
|
|
319
319
|
def _get_generate_kwargs(generate_config):
|
|
320
|
-
kwargs: Dict[str, Any] = {}
|
|
320
|
+
kwargs: Dict[str, Any] = {} # type: ignore
|
|
321
321
|
generate_config = generate_config or {}
|
|
322
322
|
temperature = generate_config.get("temperature")
|
|
323
323
|
if temperature is not None:
|