xinference 0.9.4__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (103) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/oauth2/auth_service.py +47 -18
  3. xinference/api/oauth2/types.py +1 -0
  4. xinference/api/restful_api.py +34 -7
  5. xinference/client/oscar/actor_client.py +4 -3
  6. xinference/client/restful/restful_client.py +20 -4
  7. xinference/conftest.py +13 -2
  8. xinference/core/supervisor.py +48 -1
  9. xinference/core/worker.py +139 -20
  10. xinference/deploy/cmdline.py +119 -20
  11. xinference/model/embedding/core.py +1 -2
  12. xinference/model/llm/__init__.py +4 -6
  13. xinference/model/llm/ggml/llamacpp.py +2 -10
  14. xinference/model/llm/llm_family.json +877 -13
  15. xinference/model/llm/llm_family.py +15 -0
  16. xinference/model/llm/llm_family_modelscope.json +571 -0
  17. xinference/model/llm/pytorch/chatglm.py +2 -0
  18. xinference/model/llm/pytorch/core.py +22 -26
  19. xinference/model/llm/pytorch/deepseek_vl.py +232 -0
  20. xinference/model/llm/pytorch/internlm2.py +2 -0
  21. xinference/model/llm/pytorch/omnilmm.py +153 -0
  22. xinference/model/llm/pytorch/qwen_vl.py +2 -0
  23. xinference/model/llm/pytorch/yi_vl.py +4 -2
  24. xinference/model/llm/utils.py +53 -5
  25. xinference/model/llm/vllm/core.py +54 -6
  26. xinference/model/rerank/core.py +3 -0
  27. xinference/thirdparty/deepseek_vl/__init__.py +31 -0
  28. xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
  29. xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
  30. xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
  31. xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
  32. xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
  33. xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
  34. xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
  35. xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
  36. xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
  37. xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
  38. xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
  39. xinference/thirdparty/omnilmm/__init__.py +0 -0
  40. xinference/thirdparty/omnilmm/chat.py +216 -0
  41. xinference/thirdparty/omnilmm/constants.py +4 -0
  42. xinference/thirdparty/omnilmm/conversation.py +332 -0
  43. xinference/thirdparty/omnilmm/model/__init__.py +1 -0
  44. xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
  45. xinference/thirdparty/omnilmm/model/resampler.py +166 -0
  46. xinference/thirdparty/omnilmm/model/utils.py +563 -0
  47. xinference/thirdparty/omnilmm/train/__init__.py +13 -0
  48. xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
  49. xinference/thirdparty/omnilmm/utils.py +134 -0
  50. xinference/types.py +15 -19
  51. xinference/web/ui/build/asset-manifest.json +3 -3
  52. xinference/web/ui/build/index.html +1 -1
  53. xinference/web/ui/build/static/js/main.76ef2b17.js +3 -0
  54. xinference/web/ui/build/static/js/main.76ef2b17.js.map +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/15e2cf8cd8d0989719b6349428ff576f9009ff4c2dcc52378be0bd938e82495e.json +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/35d0e4a317e5582cbb79d901302e9d706520ac53f8a734c2fd8bfde6eb5a4f02.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/3c2f277c93c5f1638e08db38df0d0fb4e58d1c5571aea03241a5c04ff4094704.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +1 -0
  59. xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +1 -0
  60. xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +1 -0
  61. xinference/web/ui/node_modules/.cache/babel-loader/59ce49eae0f486af4c5034d4d2f9ca77c3ec3a32ecc560085caf5ef482b5f4c9.json +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/63a4c48f0326d071c7772c46598215c006ae41fd3d4ff3577fe717de66ad6e89.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/b9cbcb6d77ba21b22c6950b6fb5b305d23c19cf747f99f7d48b6b046f8f7b1b0.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/d076fd56cf3b15ed2433e3744b98c6b4e4410a19903d1db4de5bba0e1a1b3347.json +1 -0
  67. xinference/web/ui/node_modules/.cache/babel-loader/daad8131d91134f6d7aef895a0c9c32e1cb928277cb5aa66c01028126d215be0.json +1 -0
  68. xinference/web/ui/node_modules/.cache/babel-loader/de0299226173b0662b573f49e3992220f6611947073bd66ac079728a8bc8837d.json +1 -0
  69. xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +1 -0
  70. xinference/web/ui/node_modules/.cache/babel-loader/e6eccc9aa641e7da833492e27846dc965f9750281420977dc84654ca6ed221e4.json +1 -0
  71. xinference/web/ui/node_modules/.cache/babel-loader/e9b52d171223bb59fb918316297a051cdfd42dd453e8260fd918e90bc0a4ebdf.json +1 -0
  72. xinference/web/ui/node_modules/.cache/babel-loader/f16aec63602a77bd561d0e67fa00b76469ac54b8033754bba114ec5eb3257964.json +1 -0
  73. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/METADATA +25 -12
  74. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/RECORD +79 -58
  75. xinference/model/llm/ggml/ctransformers.py +0 -281
  76. xinference/model/llm/ggml/ctransformers_util.py +0 -161
  77. xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
  78. xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
  79. xinference/web/ui/node_modules/.cache/babel-loader/0bd70b1ecf307e2681318e864f4692305b6350c8683863007f4caf2f9ac33b6e.json +0 -1
  80. xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +0 -1
  81. xinference/web/ui/node_modules/.cache/babel-loader/18e5d5422e2464abf4a3e6d38164570e2e426e0a921e9a2628bbae81b18da353.json +0 -1
  82. xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/3e055de705e397e1d413d7f429589b1a98dd78ef378b97f0cdb462c5f2487d5e.json +0 -1
  84. xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +0 -1
  85. xinference/web/ui/node_modules/.cache/babel-loader/52aa27272b4b9968f62666262b47661cb1992336a2aff3b13994cc36877b3ec3.json +0 -1
  86. xinference/web/ui/node_modules/.cache/babel-loader/60c4b98d8ea7479fb0c94cfd19c8128f17bd7e27a1e73e6dd9adf6e9d88d18eb.json +0 -1
  87. xinference/web/ui/node_modules/.cache/babel-loader/7e094845f611802b024b57439cbf911038169d06cdf6c34a72a7277f35aa71a4.json +0 -1
  88. xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +0 -1
  89. xinference/web/ui/node_modules/.cache/babel-loader/98b7ef307f436affe13d75a4f265b27e828ccc2b10ffae6513abe2681bc11971.json +0 -1
  90. xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +0 -1
  91. xinference/web/ui/node_modules/.cache/babel-loader/b400cfc9db57fa6c70cd2bad055b73c5079fde0ed37974009d898083f6af8cd8.json +0 -1
  92. xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +0 -1
  93. xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
  94. xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +0 -1
  95. xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +0 -1
  96. xinference/web/ui/node_modules/.cache/babel-loader/e1d9b2ae4e1248658704bc6bfc5d6160dcd1a9e771ea4ae8c1fed0aaddeedd29.json +0 -1
  97. xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
  98. xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +0 -1
  99. /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.76ef2b17.js.LICENSE.txt} +0 -0
  100. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/LICENSE +0 -0
  101. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/WHEEL +0 -0
  102. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/entry_points.txt +0 -0
  103. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/top_level.txt +0 -0
@@ -199,6 +199,21 @@ class CustomLLMFamilyV1(LLMFamilyV1):
199
199
  )
200
200
  llm_spec.prompt_style = BUILTIN_LLM_PROMPT_STYLE[prompt_style_name]
201
201
 
202
+ # check model ability, registering LLM only provides generate and chat
203
+ # but for vision models, we add back the abilities so that
204
+ # gradio chat interface can be generated properly
205
+ if (
206
+ llm_spec.model_family != "other"
207
+ and llm_spec.model_family
208
+ in {
209
+ family.model_name
210
+ for family in BUILTIN_LLM_FAMILIES
211
+ if "vision" in family.model_ability
212
+ }
213
+ and "vision" not in llm_spec.model_ability
214
+ ):
215
+ llm_spec.model_ability.append("vision")
216
+
202
217
  return llm_spec
203
218
 
204
219
 
@@ -388,6 +388,50 @@
388
388
  ]
389
389
  }
390
390
  },
391
+ {
392
+ "version": 1,
393
+ "context_length": 131072,
394
+ "model_name": "chatglm3-128k",
395
+ "model_lang": [
396
+ "en",
397
+ "zh"
398
+ ],
399
+ "model_ability": [
400
+ "chat"
401
+ ],
402
+ "model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
403
+ "model_specs": [
404
+ {
405
+ "model_format": "pytorch",
406
+ "model_size_in_billions": 6,
407
+ "quantizations": [
408
+ "4-bit",
409
+ "8-bit",
410
+ "none"
411
+ ],
412
+ "model_hub": "modelscope",
413
+ "model_id": "ZhipuAI/chatglm3-6b-128k",
414
+ "model_revision": "master"
415
+ }
416
+ ],
417
+ "prompt_style": {
418
+ "style_name": "CHATGLM3",
419
+ "system_prompt": "",
420
+ "roles": [
421
+ "user",
422
+ "assistant"
423
+ ],
424
+ "stop_token_ids": [
425
+ 64795,
426
+ 64797,
427
+ 2
428
+ ],
429
+ "stop": [
430
+ "<|user|>",
431
+ "<|observation|>"
432
+ ]
433
+ }
434
+ },
391
435
  {
392
436
  "version": 1,
393
437
  "context_length": 2048,
@@ -1781,6 +1825,17 @@
1781
1825
  "model_id": "qwen/Qwen1.5-14B-Chat",
1782
1826
  "model_hub": "modelscope"
1783
1827
  },
1828
+ {
1829
+ "model_format": "pytorch",
1830
+ "model_size_in_billions": 32,
1831
+ "quantizations": [
1832
+ "4-bit",
1833
+ "8-bit",
1834
+ "none"
1835
+ ],
1836
+ "model_id": "qwen/Qwen1.5-32B-Chat",
1837
+ "model_hub": "modelscope"
1838
+ },
1784
1839
  {
1785
1840
  "model_format": "pytorch",
1786
1841
  "model_size_in_billions": 72,
@@ -1842,6 +1897,15 @@
1842
1897
  "model_id": "qwen/Qwen1.5-14B-Chat-GPTQ-{quantization}",
1843
1898
  "model_hub": "modelscope"
1844
1899
  },
1900
+ {
1901
+ "model_format": "gptq",
1902
+ "model_size_in_billions": 32,
1903
+ "quantizations": [
1904
+ "Int4"
1905
+ ],
1906
+ "model_id": "qwen/Qwen1.5-32B-Chat-GPTQ-{quantization}",
1907
+ "model_hub": "modelscope"
1908
+ },
1845
1909
  {
1846
1910
  "model_format": "gptq",
1847
1911
  "model_size_in_billions": 72,
@@ -1897,6 +1961,15 @@
1897
1961
  "model_id": "qwen/Qwen1.5-14B-Chat-AWQ",
1898
1962
  "model_hub": "modelscope"
1899
1963
  },
1964
+ {
1965
+ "model_format": "awq",
1966
+ "model_size_in_billions": 32,
1967
+ "quantizations": [
1968
+ "Int4"
1969
+ ],
1970
+ "model_id": "qwen/Qwen1.5-32B-Chat-AWQ",
1971
+ "model_hub": "modelscope"
1972
+ },
1900
1973
  {
1901
1974
  "model_format": "awq",
1902
1975
  "model_size_in_billions": 72,
@@ -1991,6 +2064,23 @@
1991
2064
  "model_hub": "modelscope",
1992
2065
  "model_file_name_template": "qwen1_5-14b-chat-{quantization}.gguf"
1993
2066
  },
2067
+ {
2068
+ "model_format": "ggufv2",
2069
+ "model_size_in_billions": 32,
2070
+ "quantizations": [
2071
+ "q2_k",
2072
+ "q3_k_m",
2073
+ "q4_0",
2074
+ "q4_k_m",
2075
+ "q5_0",
2076
+ "q5_k_m",
2077
+ "q6_k",
2078
+ "q8_0"
2079
+ ],
2080
+ "model_id": "qwen/Qwen1.5-32B-Chat-GGUF",
2081
+ "model_hub": "modelscope",
2082
+ "model_file_name_template": "qwen1_5-32b-chat-{quantization}.gguf"
2083
+ },
1994
2084
  {
1995
2085
  "model_format": "ggufv2",
1996
2086
  "model_size_in_billions": 72,
@@ -2031,6 +2121,107 @@
2031
2121
  ]
2032
2122
  }
2033
2123
  },
2124
+ {
2125
+ "version": 1,
2126
+ "context_length": 32768,
2127
+ "model_name": "qwen1.5-moe-chat",
2128
+ "model_lang": [
2129
+ "en",
2130
+ "zh"
2131
+ ],
2132
+ "model_ability": [
2133
+ "chat"
2134
+ ],
2135
+ "model_description": "Qwen1.5-MoE is a transformer-based MoE decoder-only language model pretrained on a large amount of data.",
2136
+ "model_specs": [
2137
+ {
2138
+ "model_format": "pytorch",
2139
+ "model_size_in_billions": "2_7",
2140
+ "quantizations": [
2141
+ "4-bit",
2142
+ "8-bit",
2143
+ "none"
2144
+ ],
2145
+ "model_id": "qwen/Qwen1.5-MoE-A2.7B-Chat",
2146
+ "model_hub": "modelscope"
2147
+ },
2148
+ {
2149
+ "model_format": "gptq",
2150
+ "model_size_in_billions": "2_7",
2151
+ "quantizations": [
2152
+ "Int4"
2153
+ ],
2154
+ "model_id": "qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4",
2155
+ "model_hub": "modelscope"
2156
+ }
2157
+ ],
2158
+ "prompt_style": {
2159
+ "style_name": "QWEN",
2160
+ "system_prompt": "You are a helpful assistant.",
2161
+ "roles": [
2162
+ "user",
2163
+ "assistant"
2164
+ ],
2165
+ "intra_message_sep": "\n",
2166
+ "stop_token_ids": [
2167
+ 151643,
2168
+ 151644,
2169
+ 151645
2170
+ ],
2171
+ "stop": [
2172
+ "<|endoftext|>",
2173
+ "<|im_start|>",
2174
+ "<|im_end|>"
2175
+ ]
2176
+ }
2177
+ },
2178
+ {
2179
+ "version": 1,
2180
+ "context_length": 4096,
2181
+ "model_name": "deepseek-vl-chat",
2182
+ "model_lang": [
2183
+ "en",
2184
+ "zh"
2185
+ ],
2186
+ "model_ability": [
2187
+ "chat",
2188
+ "vision"
2189
+ ],
2190
+ "model_description": "DeepSeek-VL possesses general multimodal understanding capabilities, capable of processing logical diagrams, web pages, formula recognition, scientific literature, natural images, and embodied intelligence in complex scenarios.",
2191
+ "model_specs": [
2192
+ {
2193
+ "model_format": "pytorch",
2194
+ "model_size_in_billions": "1_3",
2195
+ "quantizations": [
2196
+ "none"
2197
+ ],
2198
+ "model_id": "deepseek-ai/deepseek-vl-1.3b-chat",
2199
+ "model_hub": "modelscope"
2200
+ },
2201
+ {
2202
+ "model_format": "pytorch",
2203
+ "model_size_in_billions": 7,
2204
+ "quantizations": [
2205
+ "none"
2206
+ ],
2207
+ "model_id": "deepseek-ai/deepseek-vl-7b-chat",
2208
+ "model_hub": "modelscope"
2209
+ }
2210
+ ],
2211
+ "prompt_style": {
2212
+ "style_name": "DEEPSEEK_CHAT",
2213
+ "system_prompt": "<|begin▁of▁sentence|>",
2214
+ "roles": [
2215
+ "User",
2216
+ "Assistant"
2217
+ ],
2218
+ "intra_message_sep": "\n\n",
2219
+ "inter_message_sep": "<|end▁of▁sentence|>",
2220
+ "stop": [
2221
+ "<|end▁of▁sentence|>"
2222
+ ]
2223
+ }
2224
+ },
2034
2225
  {
2035
2226
  "version": 1,
2036
2227
  "context_length": 4096,
@@ -2474,5 +2665,385 @@
2474
2665
  "<start_of_turn>"
2475
2666
  ]
2476
2667
  }
2668
+ },
2669
+ {
2670
+ "version":1,
2671
+ "context_length":2048,
2672
+ "model_name":"OmniLMM",
2673
+ "model_lang":[
2674
+ "en",
2675
+ "zh"
2676
+ ],
2677
+ "model_ability":[
2678
+ "chat",
2679
+ "vision"
2680
+ ],
2681
+ "model_description":"mniLMM is a family of open-source large multimodal models (LMMs) adept at vision & language modeling.",
2682
+ "model_specs":[
2683
+ {
2684
+ "model_format":"pytorch",
2685
+ "model_size_in_billions":3,
2686
+ "quantizations":[
2687
+ "none"
2688
+ ],
2689
+ "model_id":"OpenBMB/MiniCPM-V",
2690
+ "model_hub":"modelscope",
2691
+ "model_revision":"master"
2692
+ },
2693
+ {
2694
+ "model_format":"pytorch",
2695
+ "model_size_in_billions":12,
2696
+ "quantizations":[
2697
+ "none"
2698
+ ],
2699
+ "model_id":"OpenBMB/OmniLMM-12B",
2700
+ "model_hub":"modelscope",
2701
+ "model_revision":"master"
2702
+ }
2703
+ ],
2704
+ "prompt_style":{
2705
+ "style_name":"OmniLMM",
2706
+ "system_prompt":"The role of first msg should be user",
2707
+ "roles":[
2708
+ "user",
2709
+ "assistant"
2710
+ ]
2711
+ }
2712
+ },
2713
+ {
2714
+ "version": 1,
2715
+ "context_length": 4096,
2716
+ "model_name": "minicpm-2b-sft-bf16",
2717
+ "model_lang": [
2718
+ "zh"
2719
+ ],
2720
+ "model_ability": [
2721
+ "chat"
2722
+ ],
2723
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2724
+ "model_specs": [
2725
+ {
2726
+ "model_format": "pytorch",
2727
+ "model_size_in_billions": 2,
2728
+ "quantizations": [
2729
+ "none"
2730
+ ],
2731
+ "model_hub": "modelscope",
2732
+ "model_id": "OpenBMB/miniCPM-bf16",
2733
+ "model_revision": "master"
2734
+ }
2735
+ ],
2736
+ "prompt_style": {
2737
+ "style_name": "MINICPM-2B",
2738
+ "system_prompt": "",
2739
+ "roles": [
2740
+ "user",
2741
+ "assistant"
2742
+ ],
2743
+ "stop_token_ids": [
2744
+ 1,
2745
+ 2
2746
+ ],
2747
+ "stop": [
2748
+ "<s>",
2749
+ "</s>"
2750
+ ]
2751
+ }
2752
+ },
2753
+ {
2754
+ "version": 1,
2755
+ "context_length": 4096,
2756
+ "model_name": "minicpm-2b-sft-fp32",
2757
+ "model_lang": [
2758
+ "zh"
2759
+ ],
2760
+ "model_ability": [
2761
+ "chat"
2762
+ ],
2763
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2764
+ "model_specs": [
2765
+ {
2766
+ "model_format": "pytorch",
2767
+ "model_size_in_billions": 2,
2768
+ "quantizations": [
2769
+ "none"
2770
+ ],
2771
+ "model_hub": "modelscope",
2772
+ "model_id": "OpenBMB/MiniCPM-2B-sft-fp32",
2773
+ "model_revision": "master"
2774
+ }
2775
+ ],
2776
+ "prompt_style": {
2777
+ "style_name": "MINICPM-2B",
2778
+ "system_prompt": "",
2779
+ "roles": [
2780
+ "user",
2781
+ "assistant"
2782
+ ],
2783
+ "stop_token_ids": [
2784
+ 1,
2785
+ 2
2786
+ ],
2787
+ "stop": [
2788
+ "<s>",
2789
+ "</s>"
2790
+ ]
2791
+ }
2792
+ },
2793
+ {
2794
+ "version": 1,
2795
+ "context_length": 4096,
2796
+ "model_name": "minicpm-2b-dpo-bf16",
2797
+ "model_lang": [
2798
+ "zh"
2799
+ ],
2800
+ "model_ability": [
2801
+ "chat"
2802
+ ],
2803
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2804
+ "model_specs": [
2805
+ {
2806
+ "model_format": "pytorch",
2807
+ "model_size_in_billions": 2,
2808
+ "quantizations": [
2809
+ "none"
2810
+ ],
2811
+ "model_hub": "modelscope",
2812
+ "model_id": "OpenBMB/MiniCPM-2B-dpo-bf16",
2813
+ "model_revision": "master"
2814
+ }
2815
+ ],
2816
+ "prompt_style": {
2817
+ "style_name": "MINICPM-2B",
2818
+ "system_prompt": "",
2819
+ "roles": [
2820
+ "user",
2821
+ "assistant"
2822
+ ],
2823
+ "stop_token_ids": [
2824
+ 1,
2825
+ 2
2826
+ ],
2827
+ "stop": [
2828
+ "<s>",
2829
+ "</s>"
2830
+ ]
2831
+ }
2832
+ },
2833
+ {
2834
+ "version": 1,
2835
+ "context_length": 4096,
2836
+ "model_name": "minicpm-2b-dpo-fp16",
2837
+ "model_lang": [
2838
+ "zh"
2839
+ ],
2840
+ "model_ability": [
2841
+ "chat"
2842
+ ],
2843
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2844
+ "model_specs": [
2845
+ {
2846
+ "model_format": "pytorch",
2847
+ "model_size_in_billions": 2,
2848
+ "quantizations": [
2849
+ "none"
2850
+ ],
2851
+ "model_hub": "modelscope",
2852
+ "model_id": "OpenBMB/MiniCPM-2B-dpo-fp16",
2853
+ "model_revision": "master"
2854
+ }
2855
+ ],
2856
+ "prompt_style": {
2857
+ "style_name": "MINICPM-2B",
2858
+ "system_prompt": "",
2859
+ "roles": [
2860
+ "user",
2861
+ "assistant"
2862
+ ],
2863
+ "stop_token_ids": [
2864
+ 1,
2865
+ 2
2866
+ ],
2867
+ "stop": [
2868
+ "<s>",
2869
+ "</s>"
2870
+ ]
2871
+ }
2872
+ },
2873
+ {
2874
+ "version": 1,
2875
+ "context_length": 4096,
2876
+ "model_name": "minicpm-2b-dpo-fp32",
2877
+ "model_lang": [
2878
+ "zh"
2879
+ ],
2880
+ "model_ability": [
2881
+ "chat"
2882
+ ],
2883
+ "model_description": "MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings.",
2884
+ "model_specs": [
2885
+ {
2886
+ "model_format": "pytorch",
2887
+ "model_size_in_billions": 2,
2888
+ "quantizations": [
2889
+ "none"
2890
+ ],
2891
+ "model_hub": "modelscope",
2892
+ "model_id": "OpenBMB/MiniCPM-2B-dpo-fp32",
2893
+ "model_revision": "master"
2894
+ }
2895
+ ],
2896
+ "prompt_style": {
2897
+ "style_name": "MINICPM-2B",
2898
+ "system_prompt": "",
2899
+ "roles": [
2900
+ "user",
2901
+ "assistant"
2902
+ ],
2903
+ "stop_token_ids": [
2904
+ 1,
2905
+ 2
2906
+ ],
2907
+ "stop": [
2908
+ "<s>",
2909
+ "</s>"
2910
+ ]
2911
+ }
2912
+ },
2913
+ {
2914
+ "version": 1,
2915
+ "context_length": 2048,
2916
+ "model_name": "aquila2",
2917
+ "model_lang": [
2918
+ "zh"
2919
+ ],
2920
+ "model_ability": [
2921
+ "generate"
2922
+ ],
2923
+ "model_description": "Aquila2 series models are the base language models",
2924
+ "model_specs": [
2925
+ {
2926
+ "model_format": "pytorch",
2927
+ "model_size_in_billions": 34,
2928
+ "quantizations": [
2929
+ "none"
2930
+ ],
2931
+ "model_hub": "modelscope",
2932
+ "model_id": "BAAI/Aquila2-34B",
2933
+ "model_revision": "master"
2934
+ },
2935
+ {
2936
+ "model_format": "pytorch",
2937
+ "model_size_in_billions": 70,
2938
+ "quantizations": [
2939
+ "none"
2940
+ ],
2941
+ "model_hub": "modelscope",
2942
+ "model_id": "BAAI/Aquila2-70B-Expr",
2943
+ "model_revision": "master"
2944
+ }
2945
+ ]
2946
+ },
2947
+ {
2948
+ "version": 1,
2949
+ "context_length": 2048,
2950
+ "model_name": "aquila2-chat",
2951
+ "model_lang": [
2952
+ "zh"
2953
+ ],
2954
+ "model_ability": [
2955
+ "generate"
2956
+ ],
2957
+ "model_description": "Aquila2-chat series models are the chat models",
2958
+ "model_specs": [
2959
+ {
2960
+ "model_format": "pytorch",
2961
+ "model_size_in_billions": 34,
2962
+ "quantizations": [
2963
+ "none"
2964
+ ],
2965
+ "model_hub": "modelscope",
2966
+ "model_id": "BAAI/AquilaChat2-34B",
2967
+ "model_revision": "master"
2968
+ },
2969
+ {
2970
+ "model_format": "gptq",
2971
+ "model_size_in_billions": 34,
2972
+ "quantizations": [
2973
+ "Int4"
2974
+ ],
2975
+ "model_hub": "modelscope",
2976
+ "model_id": "BAAI/AquilaChat2-34B-Int4-GPTQ",
2977
+ "model_revision": "master"
2978
+ },
2979
+ {
2980
+ "model_format": "pytorch",
2981
+ "model_size_in_billions": 70,
2982
+ "quantizations": [
2983
+ "none"
2984
+ ],
2985
+ "model_hub": "modelscope",
2986
+ "model_id": "BAAI/AquilaChat2-70B-Expr",
2987
+ "model_revision": "master"
2988
+ }
2989
+ ],
2990
+ "prompt_style": {
2991
+ "style_name": "ADD_COLON_SINGLE",
2992
+ "intra_message_sep": "\n",
2993
+ "system_prompt": "",
2994
+ "roles": [
2995
+ "USER",
2996
+ "ASSISTANT"
2997
+ ],
2998
+ "stop_token_ids": [
2999
+ 100006,
3000
+ 100007
3001
+ ],
3002
+ "stop": [
3003
+ "[CLS]",
3004
+ "</s>"
3005
+ ]
3006
+ }
3007
+ },
3008
+ {
3009
+ "version": 1,
3010
+ "context_length": 16384,
3011
+ "model_name": "aquila2-chat-16k",
3012
+ "model_lang": [
3013
+ "zh"
3014
+ ],
3015
+ "model_ability": [
3016
+ "generate"
3017
+ ],
3018
+ "model_description": "AquilaChat2-16k series models are the long-text chat models",
3019
+ "model_specs": [
3020
+ {
3021
+ "model_format": "pytorch",
3022
+ "model_size_in_billions": 34,
3023
+ "quantizations": [
3024
+ "none"
3025
+ ],
3026
+ "model_hub": "modelscope",
3027
+ "model_id": "BAAI/AquilaChat2-34B-16K",
3028
+ "model_revision": "master"
3029
+ }
3030
+ ],
3031
+ "prompt_style": {
3032
+ "style_name": "ADD_COLON_SINGLE",
3033
+ "intra_message_sep": "\n",
3034
+ "system_prompt": "",
3035
+ "roles": [
3036
+ "USER",
3037
+ "ASSISTANT"
3038
+ ],
3039
+ "stop_token_ids": [
3040
+ 100006,
3041
+ 100007
3042
+ ],
3043
+ "stop": [
3044
+ "[CLS]",
3045
+ "</s>"
3046
+ ]
3047
+ }
2477
3048
  }
2478
3049
  ]
@@ -135,6 +135,8 @@ class ChatglmPytorchChatModel(PytorchChatModel):
135
135
  chat_history = [h for h in chat_history if not h.get("tool_calls")]
136
136
  if not chat_history:
137
137
  chat_history = []
138
+ if system_prompt:
139
+ chat_history.append({"role": "system", "content": system_prompt})
138
140
  if tools:
139
141
  msg = self._model.chat(
140
142
  self._tokenizer, prompt, [tools] + chat_history, **kwargs
@@ -42,6 +42,25 @@ from ..utils import ChatModelMixin
42
42
 
43
43
  logger = logging.getLogger(__name__)
44
44
 
45
+ NON_DEFAULT_MODEL_LIST: List[str] = [
46
+ "baichuan-chat",
47
+ "baichuan-2-chat",
48
+ "vicuna-v1.3",
49
+ "falcon",
50
+ "falcon-instruct",
51
+ "chatglm",
52
+ "chatglm2",
53
+ "chatglm2-32k",
54
+ "chatglm2-128k",
55
+ "llama-2",
56
+ "llama-2-chat",
57
+ "internlm2-chat",
58
+ "qwen-vl-chat",
59
+ "OmniLMM",
60
+ "yi-vl-chat",
61
+ "deepseek-vl-chat",
62
+ ]
63
+
45
64
 
46
65
  class PytorchModel(LLM):
47
66
  def __init__(
@@ -233,17 +252,7 @@ class PytorchModel(LLM):
233
252
  if llm_spec.model_format not in ["pytorch", "gptq", "awq"]:
234
253
  return False
235
254
  model_family = llm_family.model_family or llm_family.model_name
236
- if model_family in [
237
- "baichuan-chat",
238
- "vicuna-v1.3",
239
- "falcon",
240
- "falcon-instruct",
241
- "chatglm",
242
- "chatglm2",
243
- "chatglm2-32k",
244
- "llama-2",
245
- "llama-2-chat",
246
- ]:
255
+ if model_family in NON_DEFAULT_MODEL_LIST:
247
256
  return False
248
257
  if "generate" not in llm_family.model_ability:
249
258
  return False
@@ -452,21 +461,8 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
452
461
  ) -> bool:
453
462
  if llm_spec.model_format not in ["pytorch", "gptq", "awq"]:
454
463
  return False
455
- if llm_family.model_name in [
456
- "baichuan-chat",
457
- "baichuan-2-chat",
458
- "vicuna-v1.3",
459
- "falcon",
460
- "falcon-instruct",
461
- "chatglm",
462
- "chatglm2",
463
- "chatglm2-32k",
464
- "llama-2",
465
- "llama-2-chat",
466
- "internlm2-chat",
467
- "qwen-vl-chat",
468
- "yi-vl-chat",
469
- ]:
464
+ model_family = llm_family.model_family or llm_family.model_name
465
+ if model_family in NON_DEFAULT_MODEL_LIST:
470
466
  return False
471
467
  if "chat" not in llm_family.model_ability:
472
468
  return False