xinference 0.15.0__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +204 -1
- xinference/client/restful/restful_client.py +4 -2
- xinference/core/image_interface.py +28 -0
- xinference/core/model.py +28 -0
- xinference/core/supervisor.py +6 -0
- xinference/model/audio/fish_speech.py +9 -9
- xinference/model/audio/model_spec.json +9 -9
- xinference/model/audio/whisper.py +4 -1
- xinference/model/image/core.py +2 -1
- xinference/model/image/model_spec.json +16 -4
- xinference/model/image/model_spec_modelscope.json +16 -4
- xinference/model/image/sdapi.py +136 -0
- xinference/model/image/stable_diffusion/core.py +148 -20
- xinference/model/llm/__init__.py +8 -0
- xinference/model/llm/llm_family.json +393 -0
- xinference/model/llm/llm_family.py +3 -1
- xinference/model/llm/llm_family_modelscope.json +408 -3
- xinference/model/llm/sglang/core.py +3 -0
- xinference/model/llm/transformers/chatglm.py +1 -1
- xinference/model/llm/transformers/core.py +6 -0
- xinference/model/llm/transformers/deepseek_v2.py +340 -0
- xinference/model/llm/transformers/qwen2_audio.py +168 -0
- xinference/model/llm/transformers/qwen2_vl.py +31 -5
- xinference/model/llm/utils.py +104 -84
- xinference/model/llm/vllm/core.py +8 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/firefly_gan_vq.yaml +2 -3
- xinference/thirdparty/fish_speech/fish_speech/configs/text2semantic_finetune.yaml +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +0 -3
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +169 -198
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +4 -27
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +9 -47
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +12 -10
- xinference/thirdparty/fish_speech/tools/api.py +79 -134
- xinference/thirdparty/fish_speech/tools/commons.py +35 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +3 -3
- xinference/thirdparty/fish_speech/tools/file.py +17 -0
- xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +1 -1
- xinference/thirdparty/fish_speech/tools/llama/generate.py +29 -24
- xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +1 -1
- xinference/thirdparty/fish_speech/tools/llama/quantize.py +2 -2
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +34 -0
- xinference/thirdparty/fish_speech/tools/post_api.py +85 -44
- xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +1 -1
- xinference/thirdparty/fish_speech/tools/smart_pad.py +16 -3
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +2 -2
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +4 -2
- xinference/thirdparty/fish_speech/tools/webui.py +12 -146
- xinference/types.py +7 -4
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.632e9148.css → main.5061c4c3.css} +2 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +1 -0
- xinference/web/ui/build/static/js/{main.9cfafbd6.js → main.754740c0.js} +3 -3
- xinference/web/ui/build/static/js/main.754740c0.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +1 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/METADATA +9 -3
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/RECORD +72 -74
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +0 -442
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +0 -44
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +0 -115
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +0 -225
- xinference/thirdparty/fish_speech/tools/auto_rerank.py +0 -159
- xinference/thirdparty/fish_speech/tools/gen_ref.py +0 -36
- xinference/thirdparty/fish_speech/tools/merge_asr_files.py +0 -55
- xinference/web/ui/build/static/css/main.632e9148.css.map +0 -1
- xinference/web/ui/build/static/js/main.9cfafbd6.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/01d6d198156bacbd436c51435edbd4b2cacd47a79db929105eba30f74b67d48d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/59eb25f514afcc4fefd1b309d192b2455f1e0aec68a9de598ca4b2333fe2c774.json +0 -1
- /xinference/web/ui/build/static/js/{main.9cfafbd6.js.LICENSE.txt → main.754740c0.js.LICENSE.txt} +0 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/LICENSE +0 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/WHEEL +0 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/top_level.txt +0 -0
|
@@ -6828,6 +6828,33 @@
|
|
|
6828
6828
|
],
|
|
6829
6829
|
"model_id":"Qwen/Qwen2-VL-2B-Instruct",
|
|
6830
6830
|
"model_revision":"096da3b96240e3d66d35be0e5ccbe282eea8d6b1"
|
|
6831
|
+
},
|
|
6832
|
+
{
|
|
6833
|
+
"model_format":"gptq",
|
|
6834
|
+
"model_size_in_billions":2,
|
|
6835
|
+
"quantizations":[
|
|
6836
|
+
"Int8"
|
|
6837
|
+
],
|
|
6838
|
+
"model_id":"Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8",
|
|
6839
|
+
"model_revision":"d15fb11857ccc566903e2e71341f9db7babb567b"
|
|
6840
|
+
},
|
|
6841
|
+
{
|
|
6842
|
+
"model_format":"gptq",
|
|
6843
|
+
"model_size_in_billions":2,
|
|
6844
|
+
"quantizations":[
|
|
6845
|
+
"Int4"
|
|
6846
|
+
],
|
|
6847
|
+
"model_id":"Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
|
|
6848
|
+
"model_revision":"800d396518c82960ce6d231adecd07bbc474f0a9"
|
|
6849
|
+
},
|
|
6850
|
+
{
|
|
6851
|
+
"model_format":"awq",
|
|
6852
|
+
"model_size_in_billions":2,
|
|
6853
|
+
"quantizations":[
|
|
6854
|
+
"Int4"
|
|
6855
|
+
],
|
|
6856
|
+
"model_id":"Qwen/Qwen2-VL-2B-Instruct-AWQ",
|
|
6857
|
+
"model_revision":"ea8c5854c0044e28626719292de0d9b1a671f6fc"
|
|
6831
6858
|
},
|
|
6832
6859
|
{
|
|
6833
6860
|
"model_format":"pytorch",
|
|
@@ -6837,6 +6864,33 @@
|
|
|
6837
6864
|
],
|
|
6838
6865
|
"model_id":"Qwen/Qwen2-VL-7B-Instruct",
|
|
6839
6866
|
"model_revision":"6010982c1010c3b222fa98afc81575f124aa9bd6"
|
|
6867
|
+
},
|
|
6868
|
+
{
|
|
6869
|
+
"model_format":"gptq",
|
|
6870
|
+
"model_size_in_billions":7,
|
|
6871
|
+
"quantizations":[
|
|
6872
|
+
"Int8"
|
|
6873
|
+
],
|
|
6874
|
+
"model_id":"Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8",
|
|
6875
|
+
"model_revision":"3d152a77eaccfd72d59baedb0b183a1b8fd56e48"
|
|
6876
|
+
},
|
|
6877
|
+
{
|
|
6878
|
+
"model_format":"gptq",
|
|
6879
|
+
"model_size_in_billions":7,
|
|
6880
|
+
"quantizations":[
|
|
6881
|
+
"Int4"
|
|
6882
|
+
],
|
|
6883
|
+
"model_id":"Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4",
|
|
6884
|
+
"model_revision":"5ab897112fa83b9699826be8753ef9184585c77d"
|
|
6885
|
+
},
|
|
6886
|
+
{
|
|
6887
|
+
"model_format":"awq",
|
|
6888
|
+
"model_size_in_billions":7,
|
|
6889
|
+
"quantizations":[
|
|
6890
|
+
"Int4"
|
|
6891
|
+
],
|
|
6892
|
+
"model_id":"Qwen/Qwen2-VL-7B-Instruct-AWQ",
|
|
6893
|
+
"model_revision":"f94216e8b513933bccd567bcd9b7350199f32538"
|
|
6840
6894
|
}
|
|
6841
6895
|
],
|
|
6842
6896
|
"prompt_style":{
|
|
@@ -6851,5 +6905,344 @@
|
|
|
6851
6905
|
"<|endoftext|>"
|
|
6852
6906
|
]
|
|
6853
6907
|
}
|
|
6908
|
+
},
|
|
6909
|
+
{
|
|
6910
|
+
"version": 1,
|
|
6911
|
+
"context_length": 32768,
|
|
6912
|
+
"model_name": "minicpm3-4b",
|
|
6913
|
+
"model_lang": [
|
|
6914
|
+
"zh"
|
|
6915
|
+
],
|
|
6916
|
+
"model_ability": [
|
|
6917
|
+
"chat"
|
|
6918
|
+
],
|
|
6919
|
+
"model_description": "MiniCPM3-4B is the 3rd generation of MiniCPM series. The overall performance of MiniCPM3-4B surpasses Phi-3.5-mini-Instruct and GPT-3.5-Turbo-0125, being comparable with many recent 7B~9B models.",
|
|
6920
|
+
"model_specs": [
|
|
6921
|
+
{
|
|
6922
|
+
"model_format": "pytorch",
|
|
6923
|
+
"model_size_in_billions": 4,
|
|
6924
|
+
"quantizations": [
|
|
6925
|
+
"none"
|
|
6926
|
+
],
|
|
6927
|
+
"model_id": "openbmb/MiniCPM3-4B",
|
|
6928
|
+
"model_revision": "75f9f1097d9d66d11f37fff49210bf940455f8ac"
|
|
6929
|
+
},
|
|
6930
|
+
{
|
|
6931
|
+
"model_format": "gptq",
|
|
6932
|
+
"model_size_in_billions": 4,
|
|
6933
|
+
"quantizations": [
|
|
6934
|
+
"none"
|
|
6935
|
+
],
|
|
6936
|
+
"model_id": "openbmb/MiniCPM3-4B-GPTQ-Int4",
|
|
6937
|
+
"model_revision": "97a66a62f7d09c1ee35b087b42694716a8113dce"
|
|
6938
|
+
}
|
|
6939
|
+
],
|
|
6940
|
+
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
6941
|
+
"stop_token_ids": [
|
|
6942
|
+
1,
|
|
6943
|
+
2
|
|
6944
|
+
],
|
|
6945
|
+
"stop": [
|
|
6946
|
+
"<s>",
|
|
6947
|
+
"</s>"
|
|
6948
|
+
]
|
|
6949
|
+
},
|
|
6950
|
+
{
|
|
6951
|
+
"version":1,
|
|
6952
|
+
"context_length":32768,
|
|
6953
|
+
"model_name":"qwen2-audio-instruct",
|
|
6954
|
+
"model_lang":[
|
|
6955
|
+
"en",
|
|
6956
|
+
"zh"
|
|
6957
|
+
],
|
|
6958
|
+
"model_ability":[
|
|
6959
|
+
"chat",
|
|
6960
|
+
"audio"
|
|
6961
|
+
],
|
|
6962
|
+
"model_description":"Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
|
|
6963
|
+
"model_specs":[
|
|
6964
|
+
{
|
|
6965
|
+
"model_format":"pytorch",
|
|
6966
|
+
"model_size_in_billions":7,
|
|
6967
|
+
"quantizations":[
|
|
6968
|
+
"none"
|
|
6969
|
+
],
|
|
6970
|
+
"model_id":"Qwen/Qwen2-Audio-7B-Instruct",
|
|
6971
|
+
"model_revision":"bac62d2c6808845904c709c17a0402d817558c64"
|
|
6972
|
+
}
|
|
6973
|
+
],
|
|
6974
|
+
"prompt_style":{
|
|
6975
|
+
"style_name":"QWEN",
|
|
6976
|
+
"system_prompt":"You are a helpful assistant",
|
|
6977
|
+
"roles":[
|
|
6978
|
+
"user",
|
|
6979
|
+
"assistant"
|
|
6980
|
+
],
|
|
6981
|
+
"stop": [
|
|
6982
|
+
"<|im_end|>",
|
|
6983
|
+
"<|endoftext|>"
|
|
6984
|
+
]
|
|
6985
|
+
}
|
|
6986
|
+
},
|
|
6987
|
+
{
|
|
6988
|
+
"version":1,
|
|
6989
|
+
"context_length":32768,
|
|
6990
|
+
"model_name":"qwen2-audio",
|
|
6991
|
+
"model_lang":[
|
|
6992
|
+
"en",
|
|
6993
|
+
"zh"
|
|
6994
|
+
],
|
|
6995
|
+
"model_ability":[
|
|
6996
|
+
"chat",
|
|
6997
|
+
"audio"
|
|
6998
|
+
],
|
|
6999
|
+
"model_description":"Qwen2-Audio: A large-scale audio-language model which is capable of accepting various audio signal inputs and performing audio analysis or direct textual responses with regard to speech instructions.",
|
|
7000
|
+
"model_specs":[
|
|
7001
|
+
{
|
|
7002
|
+
"model_format":"pytorch",
|
|
7003
|
+
"model_size_in_billions":7,
|
|
7004
|
+
"quantizations":[
|
|
7005
|
+
"none"
|
|
7006
|
+
],
|
|
7007
|
+
"model_id":"Qwen/Qwen2-Audio-7B",
|
|
7008
|
+
"model_revision":"8577bc71d330c8fa32ffe9f8a1374100759f2466"
|
|
7009
|
+
}
|
|
7010
|
+
],
|
|
7011
|
+
"prompt_style":{
|
|
7012
|
+
"style_name":"QWEN",
|
|
7013
|
+
"system_prompt":"You are a helpful assistant",
|
|
7014
|
+
"roles":[
|
|
7015
|
+
"user",
|
|
7016
|
+
"assistant"
|
|
7017
|
+
],
|
|
7018
|
+
"stop": [
|
|
7019
|
+
"<|im_end|>",
|
|
7020
|
+
"<|endoftext|>"
|
|
7021
|
+
]
|
|
7022
|
+
}
|
|
7023
|
+
},
|
|
7024
|
+
{
|
|
7025
|
+
"version": 1,
|
|
7026
|
+
"context_length": 128000,
|
|
7027
|
+
"model_name": "deepseek-v2",
|
|
7028
|
+
"model_lang": [
|
|
7029
|
+
"en",
|
|
7030
|
+
"zh"
|
|
7031
|
+
],
|
|
7032
|
+
"model_ability": [
|
|
7033
|
+
"generate"
|
|
7034
|
+
],
|
|
7035
|
+
"model_description": "DeepSeek-V2, a strong Mixture-of-Experts (MoE) language model characterized by economical training and efficient inference. ",
|
|
7036
|
+
"model_specs": [
|
|
7037
|
+
{
|
|
7038
|
+
"model_format": "pytorch",
|
|
7039
|
+
"model_size_in_billions": 16,
|
|
7040
|
+
"quantizations": [
|
|
7041
|
+
"4-bit",
|
|
7042
|
+
"8-bit",
|
|
7043
|
+
"none"
|
|
7044
|
+
],
|
|
7045
|
+
"model_id": "deepseek-ai/DeepSeek-V2-Lite",
|
|
7046
|
+
"model_revision": "604d5664dddd88a0433dbae533b7fe9472482de0"
|
|
7047
|
+
},
|
|
7048
|
+
{
|
|
7049
|
+
"model_format": "pytorch",
|
|
7050
|
+
"model_size_in_billions": 236,
|
|
7051
|
+
"quantizations": [
|
|
7052
|
+
"4-bit",
|
|
7053
|
+
"8-bit",
|
|
7054
|
+
"none"
|
|
7055
|
+
],
|
|
7056
|
+
"model_id": "deepseek-ai/DeepSeek-V2",
|
|
7057
|
+
"model_revision": "4461458f186c35188585855f28f77af5661ad489"
|
|
7058
|
+
}
|
|
7059
|
+
]
|
|
7060
|
+
},
|
|
7061
|
+
{
|
|
7062
|
+
"version": 1,
|
|
7063
|
+
"context_length": 128000,
|
|
7064
|
+
"model_name": "deepseek-v2-chat",
|
|
7065
|
+
"model_lang": [
|
|
7066
|
+
"en",
|
|
7067
|
+
"zh"
|
|
7068
|
+
],
|
|
7069
|
+
"model_ability": [
|
|
7070
|
+
"chat"
|
|
7071
|
+
],
|
|
7072
|
+
"model_description": "DeepSeek-V2, a strong Mixture-of-Experts (MoE) language model characterized by economical training and efficient inference. ",
|
|
7073
|
+
"model_specs": [
|
|
7074
|
+
{
|
|
7075
|
+
"model_format": "pytorch",
|
|
7076
|
+
"model_size_in_billions": 16,
|
|
7077
|
+
"quantizations": [
|
|
7078
|
+
"4-bit",
|
|
7079
|
+
"8-bit",
|
|
7080
|
+
"none"
|
|
7081
|
+
],
|
|
7082
|
+
"model_id": "deepseek-ai/DeepSeek-V2-Lite-Chat",
|
|
7083
|
+
"model_revision": "85864749cd611b4353ce1decdb286193298f64c7"
|
|
7084
|
+
},
|
|
7085
|
+
{
|
|
7086
|
+
"model_format": "pytorch",
|
|
7087
|
+
"model_size_in_billions": 236,
|
|
7088
|
+
"quantizations": [
|
|
7089
|
+
"4-bit",
|
|
7090
|
+
"8-bit",
|
|
7091
|
+
"none"
|
|
7092
|
+
],
|
|
7093
|
+
"model_id": "deepseek-ai/DeepSeek-V2-Chat",
|
|
7094
|
+
"model_revision": "8e3f5f6c2226787e41ba3e9283a06389d178c926"
|
|
7095
|
+
}
|
|
7096
|
+
],
|
|
7097
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '<|begin▁of▁sentence|>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + '<|end▁of▁sentence|>' }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
|
|
7098
|
+
"stop_token_ids": [
|
|
7099
|
+
100001
|
|
7100
|
+
],
|
|
7101
|
+
"stop": [
|
|
7102
|
+
"<|end▁of▁sentence|>"
|
|
7103
|
+
]
|
|
7104
|
+
},
|
|
7105
|
+
{
|
|
7106
|
+
"version": 1,
|
|
7107
|
+
"context_length": 128000,
|
|
7108
|
+
"model_name": "deepseek-v2-chat-0628",
|
|
7109
|
+
"model_lang": [
|
|
7110
|
+
"en",
|
|
7111
|
+
"zh"
|
|
7112
|
+
],
|
|
7113
|
+
"model_ability": [
|
|
7114
|
+
"chat"
|
|
7115
|
+
],
|
|
7116
|
+
"model_description": "DeepSeek-V2-Chat-0628 is an improved version of DeepSeek-V2-Chat. ",
|
|
7117
|
+
"model_specs": [
|
|
7118
|
+
{
|
|
7119
|
+
"model_format": "pytorch",
|
|
7120
|
+
"model_size_in_billions": 236,
|
|
7121
|
+
"quantizations": [
|
|
7122
|
+
"4-bit",
|
|
7123
|
+
"8-bit",
|
|
7124
|
+
"none"
|
|
7125
|
+
],
|
|
7126
|
+
"model_id": "deepseek-ai/DeepSeek-V2-Chat-0628",
|
|
7127
|
+
"model_revision": "5d09e272c2b223830f4e84359cd9dd047a5d7c78"
|
|
7128
|
+
}
|
|
7129
|
+
],
|
|
7130
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ '<|begin▁of▁sentence|>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|User|>' + message['content'] }}{% elif message['role'] == 'assistant' %}{{ '<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>' }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|Assistant|>' }}{% endif %}",
|
|
7131
|
+
"stop_token_ids": [
|
|
7132
|
+
100001
|
|
7133
|
+
],
|
|
7134
|
+
"stop": [
|
|
7135
|
+
"<|end▁of▁sentence|>"
|
|
7136
|
+
]
|
|
7137
|
+
},
|
|
7138
|
+
{
|
|
7139
|
+
"version": 1,
|
|
7140
|
+
"context_length": 128000,
|
|
7141
|
+
"model_name": "deepseek-v2.5",
|
|
7142
|
+
"model_lang": [
|
|
7143
|
+
"en",
|
|
7144
|
+
"zh"
|
|
7145
|
+
],
|
|
7146
|
+
"model_ability": [
|
|
7147
|
+
"chat"
|
|
7148
|
+
],
|
|
7149
|
+
"model_description": "DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.",
|
|
7150
|
+
"model_specs": [
|
|
7151
|
+
{
|
|
7152
|
+
"model_format": "pytorch",
|
|
7153
|
+
"model_size_in_billions": 236,
|
|
7154
|
+
"quantizations": [
|
|
7155
|
+
"4-bit",
|
|
7156
|
+
"8-bit",
|
|
7157
|
+
"none"
|
|
7158
|
+
],
|
|
7159
|
+
"model_id": "deepseek-ai/DeepSeek-V2.5",
|
|
7160
|
+
"model_revision": "24b08cb750e0c2757de112d2e16327cb21ed4833"
|
|
7161
|
+
}
|
|
7162
|
+
],
|
|
7163
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %} {%- if message['role'] == 'system' %} {% set ns.system_prompt = message['content'] %} {%- endif %}{%- endfor %}{{'<|begin▁of▁sentence|>'}}{{ns.system_prompt}}{%- for message in messages %} {%- if message['role'] == 'user' %} {%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is none %} {%- set ns.is_tool = false -%} {%- for tool in message['tool_calls']%} {%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}} {%- set ns.is_first = true -%} {%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}} {%- endif %} {%- endfor %} {%- endif %} {%- if message['role'] == 'assistant' and message['content'] is not none %} {%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- set ns.is_tool = false -%} {%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}} {%- endif %} {%- endif %} {%- if message['role'] == 'tool' %} {%- set ns.is_tool = true -%} {%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- set ns.is_output_first = false %} {%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}} {%- endif %} {%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
7164
|
+
"stop_token_ids": [
|
|
7165
|
+
100001
|
|
7166
|
+
],
|
|
7167
|
+
"stop": [
|
|
7168
|
+
"<|end▁of▁sentence|>"
|
|
7169
|
+
]
|
|
7170
|
+
},
|
|
7171
|
+
{
|
|
7172
|
+
"version": 1,
|
|
7173
|
+
"context_length": 131072,
|
|
7174
|
+
"model_name": "yi-coder-chat",
|
|
7175
|
+
"model_lang": [
|
|
7176
|
+
"en"
|
|
7177
|
+
],
|
|
7178
|
+
"model_ability": [
|
|
7179
|
+
"chat"
|
|
7180
|
+
],
|
|
7181
|
+
"model_description": "Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.Excelling in long-context understanding with a maximum context length of 128K tokens.Supporting 52 major programming languages, including popular ones such as Java, Python, JavaScript, and C++.",
|
|
7182
|
+
"model_specs": [
|
|
7183
|
+
{
|
|
7184
|
+
"model_format": "pytorch",
|
|
7185
|
+
"model_size_in_billions": 9,
|
|
7186
|
+
"quantizations": [
|
|
7187
|
+
"none"
|
|
7188
|
+
],
|
|
7189
|
+
"model_id": "01ai/Yi-Coder-9B-Chat",
|
|
7190
|
+
"model_revision": "356a1f8d4e4a606d0b879e54191ca809918576b8"
|
|
7191
|
+
},
|
|
7192
|
+
{
|
|
7193
|
+
"model_format": "pytorch",
|
|
7194
|
+
"model_size_in_billions": "1_5",
|
|
7195
|
+
"quantizations": [
|
|
7196
|
+
"none"
|
|
7197
|
+
],
|
|
7198
|
+
"model_id": "01ai/Yi-Coder-1.5B-Chat",
|
|
7199
|
+
"model_revision": "92fdd1b2f1539ac990e7f4a921db5601da2f0299"
|
|
7200
|
+
}
|
|
7201
|
+
],
|
|
7202
|
+
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
|
|
7203
|
+
"stop_token_ids": [
|
|
7204
|
+
1,
|
|
7205
|
+
2,
|
|
7206
|
+
6,
|
|
7207
|
+
7
|
|
7208
|
+
],
|
|
7209
|
+
"stop": [
|
|
7210
|
+
"<|startoftext|>",
|
|
7211
|
+
"<|endoftext|>",
|
|
7212
|
+
"<|im_start|>",
|
|
7213
|
+
"<|im_end|>"
|
|
7214
|
+
]
|
|
7215
|
+
},
|
|
7216
|
+
{
|
|
7217
|
+
"version": 1,
|
|
7218
|
+
"context_length": 131072,
|
|
7219
|
+
"model_name": "yi-coder",
|
|
7220
|
+
"model_lang": [
|
|
7221
|
+
"en"
|
|
7222
|
+
],
|
|
7223
|
+
"model_ability": [
|
|
7224
|
+
"generate"
|
|
7225
|
+
],
|
|
7226
|
+
"model_description": "Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.Excelling in long-context understanding with a maximum context length of 128K tokens.Supporting 52 major programming languages, including popular ones such as Java, Python, JavaScript, and C++.",
|
|
7227
|
+
"model_specs": [
|
|
7228
|
+
{
|
|
7229
|
+
"model_format": "pytorch",
|
|
7230
|
+
"model_size_in_billions": 9,
|
|
7231
|
+
"quantizations": [
|
|
7232
|
+
"none"
|
|
7233
|
+
],
|
|
7234
|
+
"model_id": "01-ai/Yi-Coder-9B",
|
|
7235
|
+
"model_revision": "e20f8087a9507ac8bce409dc5db5d0c608124238"
|
|
7236
|
+
},
|
|
7237
|
+
{
|
|
7238
|
+
"model_format": "pytorch",
|
|
7239
|
+
"model_size_in_billions": "1_5",
|
|
7240
|
+
"quantizations": [
|
|
7241
|
+
"none"
|
|
7242
|
+
],
|
|
7243
|
+
"model_id": "01-ai/Yi-Coder-1.5B",
|
|
7244
|
+
"model_revision": "00e59e64f47d3c78e4cfbdd345888479797e8109"
|
|
7245
|
+
}
|
|
7246
|
+
]
|
|
6854
7247
|
}
|
|
6855
7248
|
]
|
|
@@ -132,7 +132,9 @@ class LLMFamilyV1(BaseModel):
|
|
|
132
132
|
context_length: Optional[int] = DEFAULT_CONTEXT_LENGTH
|
|
133
133
|
model_name: str
|
|
134
134
|
model_lang: List[str]
|
|
135
|
-
model_ability: List[
|
|
135
|
+
model_ability: List[
|
|
136
|
+
Literal["embed", "generate", "chat", "tools", "vision", "audio"]
|
|
137
|
+
]
|
|
136
138
|
model_description: Optional[str]
|
|
137
139
|
# reason for not required str here: legacy registration
|
|
138
140
|
model_family: Optional[str]
|