xinference 0.9.4__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/oauth2/auth_service.py +47 -18
- xinference/api/oauth2/types.py +1 -0
- xinference/api/restful_api.py +34 -7
- xinference/client/oscar/actor_client.py +4 -3
- xinference/client/restful/restful_client.py +20 -4
- xinference/conftest.py +13 -2
- xinference/core/supervisor.py +48 -1
- xinference/core/worker.py +139 -20
- xinference/deploy/cmdline.py +119 -20
- xinference/model/embedding/core.py +1 -2
- xinference/model/llm/__init__.py +4 -6
- xinference/model/llm/ggml/llamacpp.py +2 -10
- xinference/model/llm/llm_family.json +877 -13
- xinference/model/llm/llm_family.py +15 -0
- xinference/model/llm/llm_family_modelscope.json +571 -0
- xinference/model/llm/pytorch/chatglm.py +2 -0
- xinference/model/llm/pytorch/core.py +22 -26
- xinference/model/llm/pytorch/deepseek_vl.py +232 -0
- xinference/model/llm/pytorch/internlm2.py +2 -0
- xinference/model/llm/pytorch/omnilmm.py +153 -0
- xinference/model/llm/pytorch/qwen_vl.py +2 -0
- xinference/model/llm/pytorch/yi_vl.py +4 -2
- xinference/model/llm/utils.py +53 -5
- xinference/model/llm/vllm/core.py +54 -6
- xinference/model/rerank/core.py +3 -0
- xinference/thirdparty/deepseek_vl/__init__.py +31 -0
- xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
- xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
- xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
- xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
- xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
- xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
- xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
- xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
- xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
- xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
- xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
- xinference/thirdparty/omnilmm/__init__.py +0 -0
- xinference/thirdparty/omnilmm/chat.py +216 -0
- xinference/thirdparty/omnilmm/constants.py +4 -0
- xinference/thirdparty/omnilmm/conversation.py +332 -0
- xinference/thirdparty/omnilmm/model/__init__.py +1 -0
- xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
- xinference/thirdparty/omnilmm/model/resampler.py +166 -0
- xinference/thirdparty/omnilmm/model/utils.py +563 -0
- xinference/thirdparty/omnilmm/train/__init__.py +13 -0
- xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
- xinference/thirdparty/omnilmm/utils.py +134 -0
- xinference/types.py +15 -19
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.76ef2b17.js +3 -0
- xinference/web/ui/build/static/js/main.76ef2b17.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/15e2cf8cd8d0989719b6349428ff576f9009ff4c2dcc52378be0bd938e82495e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/35d0e4a317e5582cbb79d901302e9d706520ac53f8a734c2fd8bfde6eb5a4f02.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3c2f277c93c5f1638e08db38df0d0fb4e58d1c5571aea03241a5c04ff4094704.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/59ce49eae0f486af4c5034d4d2f9ca77c3ec3a32ecc560085caf5ef482b5f4c9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63a4c48f0326d071c7772c46598215c006ae41fd3d4ff3577fe717de66ad6e89.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9cbcb6d77ba21b22c6950b6fb5b305d23c19cf747f99f7d48b6b046f8f7b1b0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d076fd56cf3b15ed2433e3744b98c6b4e4410a19903d1db4de5bba0e1a1b3347.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/daad8131d91134f6d7aef895a0c9c32e1cb928277cb5aa66c01028126d215be0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/de0299226173b0662b573f49e3992220f6611947073bd66ac079728a8bc8837d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e6eccc9aa641e7da833492e27846dc965f9750281420977dc84654ca6ed221e4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e9b52d171223bb59fb918316297a051cdfd42dd453e8260fd918e90bc0a4ebdf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f16aec63602a77bd561d0e67fa00b76469ac54b8033754bba114ec5eb3257964.json +1 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/METADATA +25 -12
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/RECORD +79 -58
- xinference/model/llm/ggml/ctransformers.py +0 -281
- xinference/model/llm/ggml/ctransformers_util.py +0 -161
- xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
- xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0bd70b1ecf307e2681318e864f4692305b6350c8683863007f4caf2f9ac33b6e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/18e5d5422e2464abf4a3e6d38164570e2e426e0a921e9a2628bbae81b18da353.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3e055de705e397e1d413d7f429589b1a98dd78ef378b97f0cdb462c5f2487d5e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/52aa27272b4b9968f62666262b47661cb1992336a2aff3b13994cc36877b3ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/60c4b98d8ea7479fb0c94cfd19c8128f17bd7e27a1e73e6dd9adf6e9d88d18eb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/7e094845f611802b024b57439cbf911038169d06cdf6c34a72a7277f35aa71a4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/98b7ef307f436affe13d75a4f265b27e828ccc2b10ffae6513abe2681bc11971.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b400cfc9db57fa6c70cd2bad055b73c5079fde0ed37974009d898083f6af8cd8.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e1d9b2ae4e1248658704bc6bfc5d6160dcd1a9e771ea4ae8c1fed0aaddeedd29.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +0 -1
- /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.76ef2b17.js.LICENSE.txt} +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/LICENSE +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/WHEEL +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/top_level.txt +0 -0
|
@@ -199,6 +199,21 @@ class CustomLLMFamilyV1(LLMFamilyV1):
|
|
|
199
199
|
)
|
|
200
200
|
llm_spec.prompt_style = BUILTIN_LLM_PROMPT_STYLE[prompt_style_name]
|
|
201
201
|
|
|
202
|
+
# check model ability, registering LLM only provides generate and chat
|
|
203
|
+
# but for vision models, we add back the abilities so that
|
|
204
|
+
# gradio chat interface can be generated properly
|
|
205
|
+
if (
|
|
206
|
+
llm_spec.model_family != "other"
|
|
207
|
+
and llm_spec.model_family
|
|
208
|
+
in {
|
|
209
|
+
family.model_name
|
|
210
|
+
for family in BUILTIN_LLM_FAMILIES
|
|
211
|
+
if "vision" in family.model_ability
|
|
212
|
+
}
|
|
213
|
+
and "vision" not in llm_spec.model_ability
|
|
214
|
+
):
|
|
215
|
+
llm_spec.model_ability.append("vision")
|
|
216
|
+
|
|
202
217
|
return llm_spec
|
|
203
218
|
|
|
204
219
|
|
|
@@ -388,6 +388,50 @@
|
|
|
388
388
|
]
|
|
389
389
|
}
|
|
390
390
|
},
|
|
391
|
+
{
|
|
392
|
+
"version": 1,
|
|
393
|
+
"context_length": 131072,
|
|
394
|
+
"model_name": "chatglm3-128k",
|
|
395
|
+
"model_lang": [
|
|
396
|
+
"en",
|
|
397
|
+
"zh"
|
|
398
|
+
],
|
|
399
|
+
"model_ability": [
|
|
400
|
+
"chat"
|
|
401
|
+
],
|
|
402
|
+
"model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
|
|
403
|
+
"model_specs": [
|
|
404
|
+
{
|
|
405
|
+
"model_format": "pytorch",
|
|
406
|
+
"model_size_in_billions": 6,
|
|
407
|
+
"quantizations": [
|
|
408
|
+
"4-bit",
|
|
409
|
+
"8-bit",
|
|
410
|
+
"none"
|
|
411
|
+
],
|
|
412
|
+
"model_hub": "modelscope",
|
|
413
|
+
"model_id": "ZhipuAI/chatglm3-6b-128k",
|
|
414
|
+
"model_revision": "master"
|
|
415
|
+
}
|
|
416
|
+
],
|
|
417
|
+
"prompt_style": {
|
|
418
|
+
"style_name": "CHATGLM3",
|
|
419
|
+
"system_prompt": "",
|
|
420
|
+
"roles": [
|
|
421
|
+
"user",
|
|
422
|
+
"assistant"
|
|
423
|
+
],
|
|
424
|
+
"stop_token_ids": [
|
|
425
|
+
64795,
|
|
426
|
+
64797,
|
|
427
|
+
2
|
|
428
|
+
],
|
|
429
|
+
"stop": [
|
|
430
|
+
"<|user|>",
|
|
431
|
+
"<|observation|>"
|
|
432
|
+
]
|
|
433
|
+
}
|
|
434
|
+
},
|
|
391
435
|
{
|
|
392
436
|
"version": 1,
|
|
393
437
|
"context_length": 2048,
|
|
@@ -1781,6 +1825,17 @@
|
|
|
1781
1825
|
"model_id": "qwen/Qwen1.5-14B-Chat",
|
|
1782
1826
|
"model_hub": "modelscope"
|
|
1783
1827
|
},
|
|
1828
|
+
{
|
|
1829
|
+
"model_format": "pytorch",
|
|
1830
|
+
"model_size_in_billions": 32,
|
|
1831
|
+
"quantizations": [
|
|
1832
|
+
"4-bit",
|
|
1833
|
+
"8-bit",
|
|
1834
|
+
"none"
|
|
1835
|
+
],
|
|
1836
|
+
"model_id": "qwen/Qwen1.5-32B-Chat",
|
|
1837
|
+
"model_hub": "modelscope"
|
|
1838
|
+
},
|
|
1784
1839
|
{
|
|
1785
1840
|
"model_format": "pytorch",
|
|
1786
1841
|
"model_size_in_billions": 72,
|
|
@@ -1842,6 +1897,15 @@
|
|
|
1842
1897
|
"model_id": "qwen/Qwen1.5-14B-Chat-GPTQ-{quantization}",
|
|
1843
1898
|
"model_hub": "modelscope"
|
|
1844
1899
|
},
|
|
1900
|
+
{
|
|
1901
|
+
"model_format": "gptq",
|
|
1902
|
+
"model_size_in_billions": 32,
|
|
1903
|
+
"quantizations": [
|
|
1904
|
+
"Int4"
|
|
1905
|
+
],
|
|
1906
|
+
"model_id": "qwen/Qwen1.5-32B-Chat-GPTQ-{quantization}",
|
|
1907
|
+
"model_hub": "modelscope"
|
|
1908
|
+
},
|
|
1845
1909
|
{
|
|
1846
1910
|
"model_format": "gptq",
|
|
1847
1911
|
"model_size_in_billions": 72,
|
|
@@ -1897,6 +1961,15 @@
|
|
|
1897
1961
|
"model_id": "qwen/Qwen1.5-14B-Chat-AWQ",
|
|
1898
1962
|
"model_hub": "modelscope"
|
|
1899
1963
|
},
|
|
1964
|
+
{
|
|
1965
|
+
"model_format": "awq",
|
|
1966
|
+
"model_size_in_billions": 32,
|
|
1967
|
+
"quantizations": [
|
|
1968
|
+
"Int4"
|
|
1969
|
+
],
|
|
1970
|
+
"model_id": "qwen/Qwen1.5-32B-Chat-AWQ",
|
|
1971
|
+
"model_hub": "modelscope"
|
|
1972
|
+
},
|
|
1900
1973
|
{
|
|
1901
1974
|
"model_format": "awq",
|
|
1902
1975
|
"model_size_in_billions": 72,
|
|
@@ -1991,6 +2064,23 @@
|
|
|
1991
2064
|
"model_hub": "modelscope",
|
|
1992
2065
|
"model_file_name_template": "qwen1_5-14b-chat-{quantization}.gguf"
|
|
1993
2066
|
},
|
|
2067
|
+
{
|
|
2068
|
+
"model_format": "ggufv2",
|
|
2069
|
+
"model_size_in_billions": 32,
|
|
2070
|
+
"quantizations": [
|
|
2071
|
+
"q2_k",
|
|
2072
|
+
"q3_k_m",
|
|
2073
|
+
"q4_0",
|
|
2074
|
+
"q4_k_m",
|
|
2075
|
+
"q5_0",
|
|
2076
|
+
"q5_k_m",
|
|
2077
|
+
"q6_k",
|
|
2078
|
+
"q8_0"
|
|
2079
|
+
],
|
|
2080
|
+
"model_id": "qwen/Qwen1.5-32B-Chat-GGUF",
|
|
2081
|
+
"model_hub": "modelscope",
|
|
2082
|
+
"model_file_name_template": "qwen1_5-32b-chat-{quantization}.gguf"
|
|
2083
|
+
},
|
|
1994
2084
|
{
|
|
1995
2085
|
"model_format": "ggufv2",
|
|
1996
2086
|
"model_size_in_billions": 72,
|
|
@@ -2031,6 +2121,107 @@
|
|
|
2031
2121
|
]
|
|
2032
2122
|
}
|
|
2033
2123
|
},
|
|
2124
|
+
{
|
|
2125
|
+
"version": 1,
|
|
2126
|
+
"context_length": 32768,
|
|
2127
|
+
"model_name": "qwen1.5-moe-chat",
|
|
2128
|
+
"model_lang": [
|
|
2129
|
+
"en",
|
|
2130
|
+
"zh"
|
|
2131
|
+
],
|
|
2132
|
+
"model_ability": [
|
|
2133
|
+
"chat"
|
|
2134
|
+
],
|
|
2135
|
+
"model_description": "Qwen1.5-MoE is a transformer-based MoE decoder-only language model pretrained on a large amount of data.",
|
|
2136
|
+
"model_specs": [
|
|
2137
|
+
{
|
|
2138
|
+
"model_format": "pytorch",
|
|
2139
|
+
"model_size_in_billions": "2_7",
|
|
2140
|
+
"quantizations": [
|
|
2141
|
+
"4-bit",
|
|
2142
|
+
"8-bit",
|
|
2143
|
+
"none"
|
|
2144
|
+
],
|
|
2145
|
+
"model_id": "qwen/Qwen1.5-MoE-A2.7B-Chat",
|
|
2146
|
+
"model_hub": "modelscope"
|
|
2147
|
+
},
|
|
2148
|
+
{
|
|
2149
|
+
"model_format": "gptq",
|
|
2150
|
+
"model_size_in_billions": "2_7",
|
|
2151
|
+
"quantizations": [
|
|
2152
|
+
"Int4"
|
|
2153
|
+
],
|
|
2154
|
+
"model_id": "qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4",
|
|
2155
|
+
"model_hub": "modelscope"
|
|
2156
|
+
}
|
|
2157
|
+
],
|
|
2158
|
+
"prompt_style": {
|
|
2159
|
+
"style_name": "QWEN",
|
|
2160
|
+
"system_prompt": "You are a helpful assistant.",
|
|
2161
|
+
"roles": [
|
|
2162
|
+
"user",
|
|
2163
|
+
"assistant"
|
|
2164
|
+
],
|
|
2165
|
+
"intra_message_sep": "\n",
|
|
2166
|
+
"stop_token_ids": [
|
|
2167
|
+
151643,
|
|
2168
|
+
151644,
|
|
2169
|
+
151645
|
|
2170
|
+
],
|
|
2171
|
+
"stop": [
|
|
2172
|
+
"<|endoftext|>",
|
|
2173
|
+
"<|im_start|>",
|
|
2174
|
+
"<|im_end|>"
|
|
2175
|
+
]
|
|
2176
|
+
}
|
|
2177
|
+
},
|
|
2178
|
+
{
|
|
2179
|
+
"version": 1,
|
|
2180
|
+
"context_length": 4096,
|
|
2181
|
+
"model_name": "deepseek-vl-chat",
|
|
2182
|
+
"model_lang": [
|
|
2183
|
+
"en",
|
|
2184
|
+
"zh"
|
|
2185
|
+
],
|
|
2186
|
+
"model_ability": [
|
|
2187
|
+
"chat",
|
|
2188
|
+
"vision"
|
|
2189
|
+
],
|
|
2190
|
+
"model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
|
|
2191
|
+
"model_specs": [
|
|
2192
|
+
{
|
|
2193
|
+
"model_format": "pytorch",
|
|
2194
|
+
"model_size_in_billions": "1_3",
|
|
2195
|
+
"quantizations": [
|
|
2196
|
+
"none"
|
|
2197
|
+
],
|
|
2198
|
+
"model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
|
|
2199
|
+
"model_hub": "modelscope"
|
|
2200
|
+
},
|
|
2201
|
+
{
|
|
2202
|
+
"model_format": "pytorch",
|
|
2203
|
+
"model_size_in_billions": 7,
|
|
2204
|
+
"quantizations": [
|
|
2205
|
+
"none"
|
|
2206
|
+
],
|
|
2207
|
+
"model_id": "deepseek-ai/deepseek-vl-7b-chat",
|
|
2208
|
+
"model_hub": "modelscope"
|
|
2209
|
+
}
|
|
2210
|
+
],
|
|
2211
|
+
"prompt_style": {
|
|
2212
|
+
"style_name": "DEEPSEEK_CHAT",
|
|
2213
|
+
"system_prompt": "<|begin▁of▁sentence|>",
|
|
2214
|
+
"roles": [
|
|
2215
|
+
"User",
|
|
2216
|
+
"Assistant"
|
|
2217
|
+
],
|
|
2218
|
+
"intra_message_sep": "\n\n",
|
|
2219
|
+
"inter_message_sep": "<|end▁of▁sentence|>",
|
|
2220
|
+
"stop": [
|
|
2221
|
+
"<|end▁of▁sentence|>"
|
|
2222
|
+
]
|
|
2223
|
+
}
|
|
2224
|
+
},
|
|
2034
2225
|
{
|
|
2035
2226
|
"version": 1,
|
|
2036
2227
|
"context_length": 4096,
|
|
@@ -2474,5 +2665,385 @@
|
|
|
2474
2665
|
"<start_of_turn>"
|
|
2475
2666
|
]
|
|
2476
2667
|
}
|
|
2668
|
+
},
|
|
2669
|
+
{
|
|
2670
|
+
"version":1,
|
|
2671
|
+
"context_length":2048,
|
|
2672
|
+
"model_name":"OmniLMM",
|
|
2673
|
+
"model_lang":[
|
|
2674
|
+
"en",
|
|
2675
|
+
"zh"
|
|
2676
|
+
],
|
|
2677
|
+
"model_ability":[
|
|
2678
|
+
"chat",
|
|
2679
|
+
"vision"
|
|
2680
|
+
],
|
|
2681
|
+
"model_description":"mniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
|
|
2682
|
+
"model_specs":[
|
|
2683
|
+
{
|
|
2684
|
+
"model_format":"pytorch",
|
|
2685
|
+
"model_size_in_billions":3,
|
|
2686
|
+
"quantizations":[
|
|
2687
|
+
"none"
|
|
2688
|
+
],
|
|
2689
|
+
"model_id":"OpenBMB/MiniCPM-V",
|
|
2690
|
+
"model_hub":"modelscope",
|
|
2691
|
+
"model_revision":"master"
|
|
2692
|
+
},
|
|
2693
|
+
{
|
|
2694
|
+
"model_format":"pytorch",
|
|
2695
|
+
"model_size_in_billions":12,
|
|
2696
|
+
"quantizations":[
|
|
2697
|
+
"none"
|
|
2698
|
+
],
|
|
2699
|
+
"model_id":"OpenBMB/OmniLMM-12B",
|
|
2700
|
+
"model_hub":"modelscope",
|
|
2701
|
+
"model_revision":"master"
|
|
2702
|
+
}
|
|
2703
|
+
],
|
|
2704
|
+
"prompt_style":{
|
|
2705
|
+
"style_name":"OmniLMM",
|
|
2706
|
+
"system_prompt":"The role of first msg should be user",
|
|
2707
|
+
"roles":[
|
|
2708
|
+
"user",
|
|
2709
|
+
"assistant"
|
|
2710
|
+
]
|
|
2711
|
+
}
|
|
2712
|
+
},
|
|
2713
|
+
{
|
|
2714
|
+
"version": 1,
|
|
2715
|
+
"context_length": 4096,
|
|
2716
|
+
"model_name": "minicpm-2b-sft-bf16",
|
|
2717
|
+
"model_lang": [
|
|
2718
|
+
"zh"
|
|
2719
|
+
],
|
|
2720
|
+
"model_ability": [
|
|
2721
|
+
"chat"
|
|
2722
|
+
],
|
|
2723
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
2724
|
+
"model_specs": [
|
|
2725
|
+
{
|
|
2726
|
+
"model_format": "pytorch",
|
|
2727
|
+
"model_size_in_billions": 2,
|
|
2728
|
+
"quantizations": [
|
|
2729
|
+
"none"
|
|
2730
|
+
],
|
|
2731
|
+
"model_hub": "modelscope",
|
|
2732
|
+
"model_id": "OpenBMB/miniCPM-bf16",
|
|
2733
|
+
"model_revision": "master"
|
|
2734
|
+
}
|
|
2735
|
+
],
|
|
2736
|
+
"prompt_style": {
|
|
2737
|
+
"style_name": "MINICPM-2B",
|
|
2738
|
+
"system_prompt": "",
|
|
2739
|
+
"roles": [
|
|
2740
|
+
"user",
|
|
2741
|
+
"assistant"
|
|
2742
|
+
],
|
|
2743
|
+
"stop_token_ids": [
|
|
2744
|
+
1,
|
|
2745
|
+
2
|
|
2746
|
+
],
|
|
2747
|
+
"stop": [
|
|
2748
|
+
"<s>",
|
|
2749
|
+
"</s>"
|
|
2750
|
+
]
|
|
2751
|
+
}
|
|
2752
|
+
},
|
|
2753
|
+
{
|
|
2754
|
+
"version": 1,
|
|
2755
|
+
"context_length": 4096,
|
|
2756
|
+
"model_name": "minicpm-2b-sft-fp32",
|
|
2757
|
+
"model_lang": [
|
|
2758
|
+
"zh"
|
|
2759
|
+
],
|
|
2760
|
+
"model_ability": [
|
|
2761
|
+
"chat"
|
|
2762
|
+
],
|
|
2763
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
2764
|
+
"model_specs": [
|
|
2765
|
+
{
|
|
2766
|
+
"model_format": "pytorch",
|
|
2767
|
+
"model_size_in_billions": 2,
|
|
2768
|
+
"quantizations": [
|
|
2769
|
+
"none"
|
|
2770
|
+
],
|
|
2771
|
+
"model_hub": "modelscope",
|
|
2772
|
+
"model_id": "OpenBMB/MiniCPM-2B-sft-fp32",
|
|
2773
|
+
"model_revision": "master"
|
|
2774
|
+
}
|
|
2775
|
+
],
|
|
2776
|
+
"prompt_style": {
|
|
2777
|
+
"style_name": "MINICPM-2B",
|
|
2778
|
+
"system_prompt": "",
|
|
2779
|
+
"roles": [
|
|
2780
|
+
"user",
|
|
2781
|
+
"assistant"
|
|
2782
|
+
],
|
|
2783
|
+
"stop_token_ids": [
|
|
2784
|
+
1,
|
|
2785
|
+
2
|
|
2786
|
+
],
|
|
2787
|
+
"stop": [
|
|
2788
|
+
"<s>",
|
|
2789
|
+
"</s>"
|
|
2790
|
+
]
|
|
2791
|
+
}
|
|
2792
|
+
},
|
|
2793
|
+
{
|
|
2794
|
+
"version": 1,
|
|
2795
|
+
"context_length": 4096,
|
|
2796
|
+
"model_name": "minicpm-2b-dpo-bf16",
|
|
2797
|
+
"model_lang": [
|
|
2798
|
+
"zh"
|
|
2799
|
+
],
|
|
2800
|
+
"model_ability": [
|
|
2801
|
+
"chat"
|
|
2802
|
+
],
|
|
2803
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
2804
|
+
"model_specs": [
|
|
2805
|
+
{
|
|
2806
|
+
"model_format": "pytorch",
|
|
2807
|
+
"model_size_in_billions": 2,
|
|
2808
|
+
"quantizations": [
|
|
2809
|
+
"none"
|
|
2810
|
+
],
|
|
2811
|
+
"model_hub": "modelscope",
|
|
2812
|
+
"model_id": "OpenBMB/MiniCPM-2B-dpo-bf16",
|
|
2813
|
+
"model_revision": "master"
|
|
2814
|
+
}
|
|
2815
|
+
],
|
|
2816
|
+
"prompt_style": {
|
|
2817
|
+
"style_name": "MINICPM-2B",
|
|
2818
|
+
"system_prompt": "",
|
|
2819
|
+
"roles": [
|
|
2820
|
+
"user",
|
|
2821
|
+
"assistant"
|
|
2822
|
+
],
|
|
2823
|
+
"stop_token_ids": [
|
|
2824
|
+
1,
|
|
2825
|
+
2
|
|
2826
|
+
],
|
|
2827
|
+
"stop": [
|
|
2828
|
+
"<s>",
|
|
2829
|
+
"</s>"
|
|
2830
|
+
]
|
|
2831
|
+
}
|
|
2832
|
+
},
|
|
2833
|
+
{
|
|
2834
|
+
"version": 1,
|
|
2835
|
+
"context_length": 4096,
|
|
2836
|
+
"model_name": "minicpm-2b-dpo-fp16",
|
|
2837
|
+
"model_lang": [
|
|
2838
|
+
"zh"
|
|
2839
|
+
],
|
|
2840
|
+
"model_ability": [
|
|
2841
|
+
"chat"
|
|
2842
|
+
],
|
|
2843
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
2844
|
+
"model_specs": [
|
|
2845
|
+
{
|
|
2846
|
+
"model_format": "pytorch",
|
|
2847
|
+
"model_size_in_billions": 2,
|
|
2848
|
+
"quantizations": [
|
|
2849
|
+
"none"
|
|
2850
|
+
],
|
|
2851
|
+
"model_hub": "modelscope",
|
|
2852
|
+
"model_id": "OpenBMB/MiniCPM-2B-dpo-fp16",
|
|
2853
|
+
"model_revision": "master"
|
|
2854
|
+
}
|
|
2855
|
+
],
|
|
2856
|
+
"prompt_style": {
|
|
2857
|
+
"style_name": "MINICPM-2B",
|
|
2858
|
+
"system_prompt": "",
|
|
2859
|
+
"roles": [
|
|
2860
|
+
"user",
|
|
2861
|
+
"assistant"
|
|
2862
|
+
],
|
|
2863
|
+
"stop_token_ids": [
|
|
2864
|
+
1,
|
|
2865
|
+
2
|
|
2866
|
+
],
|
|
2867
|
+
"stop": [
|
|
2868
|
+
"<s>",
|
|
2869
|
+
"</s>"
|
|
2870
|
+
]
|
|
2871
|
+
}
|
|
2872
|
+
},
|
|
2873
|
+
{
|
|
2874
|
+
"version": 1,
|
|
2875
|
+
"context_length": 4096,
|
|
2876
|
+
"model_name": "minicpm-2b-dpo-fp32",
|
|
2877
|
+
"model_lang": [
|
|
2878
|
+
"zh"
|
|
2879
|
+
],
|
|
2880
|
+
"model_ability": [
|
|
2881
|
+
"chat"
|
|
2882
|
+
],
|
|
2883
|
+
"model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
|
|
2884
|
+
"model_specs": [
|
|
2885
|
+
{
|
|
2886
|
+
"model_format": "pytorch",
|
|
2887
|
+
"model_size_in_billions": 2,
|
|
2888
|
+
"quantizations": [
|
|
2889
|
+
"none"
|
|
2890
|
+
],
|
|
2891
|
+
"model_hub": "modelscope",
|
|
2892
|
+
"model_id": "OpenBMB/MiniCPM-2B-dpo-fp32",
|
|
2893
|
+
"model_revision": "master"
|
|
2894
|
+
}
|
|
2895
|
+
],
|
|
2896
|
+
"prompt_style": {
|
|
2897
|
+
"style_name": "MINICPM-2B",
|
|
2898
|
+
"system_prompt": "",
|
|
2899
|
+
"roles": [
|
|
2900
|
+
"user",
|
|
2901
|
+
"assistant"
|
|
2902
|
+
],
|
|
2903
|
+
"stop_token_ids": [
|
|
2904
|
+
1,
|
|
2905
|
+
2
|
|
2906
|
+
],
|
|
2907
|
+
"stop": [
|
|
2908
|
+
"<s>",
|
|
2909
|
+
"</s>"
|
|
2910
|
+
]
|
|
2911
|
+
}
|
|
2912
|
+
},
|
|
2913
|
+
{
|
|
2914
|
+
"version": 1,
|
|
2915
|
+
"context_length": 2048,
|
|
2916
|
+
"model_name": "aquila2",
|
|
2917
|
+
"model_lang": [
|
|
2918
|
+
"zh"
|
|
2919
|
+
],
|
|
2920
|
+
"model_ability": [
|
|
2921
|
+
"generate"
|
|
2922
|
+
],
|
|
2923
|
+
"model_description": "Aquila2 series models are the base language models",
|
|
2924
|
+
"model_specs": [
|
|
2925
|
+
{
|
|
2926
|
+
"model_format": "pytorch",
|
|
2927
|
+
"model_size_in_billions": 34,
|
|
2928
|
+
"quantizations": [
|
|
2929
|
+
"none"
|
|
2930
|
+
],
|
|
2931
|
+
"model_hub": "modelscope",
|
|
2932
|
+
"model_id": "BAAI/Aquila2-34B",
|
|
2933
|
+
"model_revision": "master"
|
|
2934
|
+
},
|
|
2935
|
+
{
|
|
2936
|
+
"model_format": "pytorch",
|
|
2937
|
+
"model_size_in_billions": 70,
|
|
2938
|
+
"quantizations": [
|
|
2939
|
+
"none"
|
|
2940
|
+
],
|
|
2941
|
+
"model_hub": "modelscope",
|
|
2942
|
+
"model_id": "BAAI/Aquila2-70B-Expr",
|
|
2943
|
+
"model_revision": "master"
|
|
2944
|
+
}
|
|
2945
|
+
]
|
|
2946
|
+
},
|
|
2947
|
+
{
|
|
2948
|
+
"version": 1,
|
|
2949
|
+
"context_length": 2048,
|
|
2950
|
+
"model_name": "aquila2-chat",
|
|
2951
|
+
"model_lang": [
|
|
2952
|
+
"zh"
|
|
2953
|
+
],
|
|
2954
|
+
"model_ability": [
|
|
2955
|
+
"generate"
|
|
2956
|
+
],
|
|
2957
|
+
"model_description": "Aquila2-chat series models are the chat models",
|
|
2958
|
+
"model_specs": [
|
|
2959
|
+
{
|
|
2960
|
+
"model_format": "pytorch",
|
|
2961
|
+
"model_size_in_billions": 34,
|
|
2962
|
+
"quantizations": [
|
|
2963
|
+
"none"
|
|
2964
|
+
],
|
|
2965
|
+
"model_hub": "modelscope",
|
|
2966
|
+
"model_id": "BAAI/AquilaChat2-34B",
|
|
2967
|
+
"model_revision": "master"
|
|
2968
|
+
},
|
|
2969
|
+
{
|
|
2970
|
+
"model_format": "gptq",
|
|
2971
|
+
"model_size_in_billions": 34,
|
|
2972
|
+
"quantizations": [
|
|
2973
|
+
"Int4"
|
|
2974
|
+
],
|
|
2975
|
+
"model_hub": "modelscope",
|
|
2976
|
+
"model_id": "BAAI/AquilaChat2-34B-Int4-GPTQ",
|
|
2977
|
+
"model_revision": "master"
|
|
2978
|
+
},
|
|
2979
|
+
{
|
|
2980
|
+
"model_format": "pytorch",
|
|
2981
|
+
"model_size_in_billions": 70,
|
|
2982
|
+
"quantizations": [
|
|
2983
|
+
"none"
|
|
2984
|
+
],
|
|
2985
|
+
"model_hub": "modelscope",
|
|
2986
|
+
"model_id": "BAAI/AquilaChat2-70B-Expr",
|
|
2987
|
+
"model_revision": "master"
|
|
2988
|
+
}
|
|
2989
|
+
],
|
|
2990
|
+
"prompt_style": {
|
|
2991
|
+
"style_name": "ADD_COLON_SINGLE",
|
|
2992
|
+
"intra_message_sep": "\n",
|
|
2993
|
+
"system_prompt": "",
|
|
2994
|
+
"roles": [
|
|
2995
|
+
"USER",
|
|
2996
|
+
"ASSISTANT"
|
|
2997
|
+
],
|
|
2998
|
+
"stop_token_ids": [
|
|
2999
|
+
100006,
|
|
3000
|
+
100007
|
|
3001
|
+
],
|
|
3002
|
+
"stop": [
|
|
3003
|
+
"[CLS]",
|
|
3004
|
+
"</s>"
|
|
3005
|
+
]
|
|
3006
|
+
}
|
|
3007
|
+
},
|
|
3008
|
+
{
|
|
3009
|
+
"version": 1,
|
|
3010
|
+
"context_length": 16384,
|
|
3011
|
+
"model_name": "aquila2-chat-16k",
|
|
3012
|
+
"model_lang": [
|
|
3013
|
+
"zh"
|
|
3014
|
+
],
|
|
3015
|
+
"model_ability": [
|
|
3016
|
+
"generate"
|
|
3017
|
+
],
|
|
3018
|
+
"model_description": "AquilaChat2-16k series models are the long-text chat models",
|
|
3019
|
+
"model_specs": [
|
|
3020
|
+
{
|
|
3021
|
+
"model_format": "pytorch",
|
|
3022
|
+
"model_size_in_billions": 34,
|
|
3023
|
+
"quantizations": [
|
|
3024
|
+
"none"
|
|
3025
|
+
],
|
|
3026
|
+
"model_hub": "modelscope",
|
|
3027
|
+
"model_id": "BAAI/AquilaChat2-34B-16K",
|
|
3028
|
+
"model_revision": "master"
|
|
3029
|
+
}
|
|
3030
|
+
],
|
|
3031
|
+
"prompt_style": {
|
|
3032
|
+
"style_name": "ADD_COLON_SINGLE",
|
|
3033
|
+
"intra_message_sep": "\n",
|
|
3034
|
+
"system_prompt": "",
|
|
3035
|
+
"roles": [
|
|
3036
|
+
"USER",
|
|
3037
|
+
"ASSISTANT"
|
|
3038
|
+
],
|
|
3039
|
+
"stop_token_ids": [
|
|
3040
|
+
100006,
|
|
3041
|
+
100007
|
|
3042
|
+
],
|
|
3043
|
+
"stop": [
|
|
3044
|
+
"[CLS]",
|
|
3045
|
+
"</s>"
|
|
3046
|
+
]
|
|
3047
|
+
}
|
|
2477
3048
|
}
|
|
2478
3049
|
]
|
|
@@ -135,6 +135,8 @@ class ChatglmPytorchChatModel(PytorchChatModel):
|
|
|
135
135
|
chat_history = [h for h in chat_history if not h.get("tool_calls")]
|
|
136
136
|
if not chat_history:
|
|
137
137
|
chat_history = []
|
|
138
|
+
if system_prompt:
|
|
139
|
+
chat_history.append({"role": "system", "content": system_prompt})
|
|
138
140
|
if tools:
|
|
139
141
|
msg = self._model.chat(
|
|
140
142
|
self._tokenizer, prompt, [tools] + chat_history, **kwargs
|
|
@@ -42,6 +42,25 @@ from ..utils import ChatModelMixin
|
|
|
42
42
|
|
|
43
43
|
logger = logging.getLogger(__name__)
|
|
44
44
|
|
|
45
|
+
NON_DEFAULT_MODEL_LIST: List[str] = [
|
|
46
|
+
"baichuan-chat",
|
|
47
|
+
"baichuan-2-chat",
|
|
48
|
+
"vicuna-v1.3",
|
|
49
|
+
"falcon",
|
|
50
|
+
"falcon-instruct",
|
|
51
|
+
"chatglm",
|
|
52
|
+
"chatglm2",
|
|
53
|
+
"chatglm2-32k",
|
|
54
|
+
"chatglm2-128k",
|
|
55
|
+
"llama-2",
|
|
56
|
+
"llama-2-chat",
|
|
57
|
+
"internlm2-chat",
|
|
58
|
+
"qwen-vl-chat",
|
|
59
|
+
"OmniLMM",
|
|
60
|
+
"yi-vl-chat",
|
|
61
|
+
"deepseek-vl-chat",
|
|
62
|
+
]
|
|
63
|
+
|
|
45
64
|
|
|
46
65
|
class PytorchModel(LLM):
|
|
47
66
|
def __init__(
|
|
@@ -233,17 +252,7 @@ class PytorchModel(LLM):
|
|
|
233
252
|
if llm_spec.model_format not in ["pytorch", "gptq", "awq"]:
|
|
234
253
|
return False
|
|
235
254
|
model_family = llm_family.model_family or llm_family.model_name
|
|
236
|
-
if model_family in
|
|
237
|
-
"baichuan-chat",
|
|
238
|
-
"vicuna-v1.3",
|
|
239
|
-
"falcon",
|
|
240
|
-
"falcon-instruct",
|
|
241
|
-
"chatglm",
|
|
242
|
-
"chatglm2",
|
|
243
|
-
"chatglm2-32k",
|
|
244
|
-
"llama-2",
|
|
245
|
-
"llama-2-chat",
|
|
246
|
-
]:
|
|
255
|
+
if model_family in NON_DEFAULT_MODEL_LIST:
|
|
247
256
|
return False
|
|
248
257
|
if "generate" not in llm_family.model_ability:
|
|
249
258
|
return False
|
|
@@ -452,21 +461,8 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
|
|
|
452
461
|
) -> bool:
|
|
453
462
|
if llm_spec.model_format not in ["pytorch", "gptq", "awq"]:
|
|
454
463
|
return False
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
"baichuan-2-chat",
|
|
458
|
-
"vicuna-v1.3",
|
|
459
|
-
"falcon",
|
|
460
|
-
"falcon-instruct",
|
|
461
|
-
"chatglm",
|
|
462
|
-
"chatglm2",
|
|
463
|
-
"chatglm2-32k",
|
|
464
|
-
"llama-2",
|
|
465
|
-
"llama-2-chat",
|
|
466
|
-
"internlm2-chat",
|
|
467
|
-
"qwen-vl-chat",
|
|
468
|
-
"yi-vl-chat",
|
|
469
|
-
]:
|
|
464
|
+
model_family = llm_family.model_family or llm_family.model_name
|
|
465
|
+
if model_family in NON_DEFAULT_MODEL_LIST:
|
|
470
466
|
return False
|
|
471
467
|
if "chat" not in llm_family.model_ability:
|
|
472
468
|
return False
|