xinference 0.7.5__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (120) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/oauth2/__init__.py +13 -0
  3. xinference/api/oauth2/common.py +14 -0
  4. xinference/api/oauth2/core.py +93 -0
  5. xinference/api/oauth2/types.py +36 -0
  6. xinference/api/oauth2/utils.py +44 -0
  7. xinference/api/restful_api.py +216 -27
  8. xinference/client/oscar/actor_client.py +18 -18
  9. xinference/client/restful/restful_client.py +96 -33
  10. xinference/conftest.py +63 -1
  11. xinference/constants.py +1 -0
  12. xinference/core/chat_interface.py +143 -3
  13. xinference/core/metrics.py +83 -0
  14. xinference/core/model.py +244 -181
  15. xinference/core/status_guard.py +86 -0
  16. xinference/core/supervisor.py +57 -7
  17. xinference/core/worker.py +134 -13
  18. xinference/deploy/cmdline.py +142 -16
  19. xinference/deploy/local.py +39 -7
  20. xinference/deploy/supervisor.py +2 -0
  21. xinference/deploy/worker.py +33 -5
  22. xinference/fields.py +4 -1
  23. xinference/model/core.py +8 -1
  24. xinference/model/embedding/core.py +3 -2
  25. xinference/model/embedding/model_spec_modelscope.json +60 -18
  26. xinference/model/image/stable_diffusion/core.py +4 -3
  27. xinference/model/llm/__init__.py +7 -0
  28. xinference/model/llm/ggml/llamacpp.py +3 -2
  29. xinference/model/llm/llm_family.json +87 -3
  30. xinference/model/llm/llm_family.py +15 -5
  31. xinference/model/llm/llm_family_modelscope.json +92 -3
  32. xinference/model/llm/pytorch/chatglm.py +70 -28
  33. xinference/model/llm/pytorch/core.py +11 -30
  34. xinference/model/llm/pytorch/internlm2.py +155 -0
  35. xinference/model/llm/pytorch/utils.py +0 -153
  36. xinference/model/llm/utils.py +37 -8
  37. xinference/model/llm/vllm/core.py +15 -3
  38. xinference/model/multimodal/__init__.py +15 -8
  39. xinference/model/multimodal/core.py +8 -1
  40. xinference/model/multimodal/model_spec.json +9 -0
  41. xinference/model/multimodal/model_spec_modelscope.json +45 -0
  42. xinference/model/multimodal/qwen_vl.py +5 -9
  43. xinference/model/utils.py +7 -2
  44. xinference/types.py +2 -0
  45. xinference/web/ui/build/asset-manifest.json +3 -3
  46. xinference/web/ui/build/index.html +1 -1
  47. xinference/web/ui/build/static/js/main.b83095c2.js +3 -0
  48. xinference/web/ui/build/static/js/{main.236e72e7.js.LICENSE.txt → main.b83095c2.js.LICENSE.txt} +7 -0
  49. xinference/web/ui/build/static/js/main.b83095c2.js.map +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/0a853b2fa1902551e262a2f1a4b7894341f27b3dd9587f2ef7aaea195af89518.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/101923c539819f26ad11fbcbd6f6e56436b285efbb090dcc7dd648c6e924c4a8.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/193e7ba39e70d4bb2895a5cb317f6f293a5fd02e7e324c02a1eba2f83216419c.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/22858de5265f2d279fca9f2f54dfb147e4b2704200dfb5d2ad3ec9769417328f.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/27696db5fcd4fcf0e7974cadf1e4a2ab89690474045c3188eafd586323ad13bb.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/27bcada3ee8f89d21184b359f022fc965f350ffaca52c9814c29f1fc37121173.json +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/27bdbe25deab8cf08f7fab8f05f8f26cf84a98809527a37986a4ab73a57ba96a.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/2bee7b8bd3d52976a45d6068e1333df88b943e0e679403c809e45382e3818037.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/30670751f55508ef3b861e13dd71b9e5a10d2561373357a12fc3831a0b77fd93.json +1 -0
  59. xinference/web/ui/node_modules/.cache/babel-loader/3605cd3a96ff2a3b443c70a101575482279ad26847924cab0684d165ba0d2492.json +1 -0
  60. xinference/web/ui/node_modules/.cache/babel-loader/3789ef437d3ecbf945bb9cea39093d1f16ebbfa32dbe6daf35abcfb6d48de6f1.json +1 -0
  61. xinference/web/ui/node_modules/.cache/babel-loader/4942da6bc03bf7373af068e22f916341aabc5b5df855d73c1d348c696724ce37.json +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/4d933e35e0fe79867d3aa6c46db28804804efddf5490347cb6c2c2879762a157.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/4d96f071168af43965e0fab2ded658fa0a15b8d9ca03789a5ef9c5c16a4e3cee.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/52a6136cb2dbbf9c51d461724d9b283ebe74a73fb19d5df7ba8e13c42bd7174d.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/5c408307c982f07f9c09c85c98212d1b1c22548a9194c69548750a3016b91b88.json +1 -0
  67. xinference/web/ui/node_modules/.cache/babel-loader/663adbcb60b942e9cf094c8d9fabe57517f5e5e6e722d28b4948a40b7445a3b8.json +1 -0
  68. xinference/web/ui/node_modules/.cache/babel-loader/666bb2e1b250dc731311a7e4880886177885dfa768508d2ed63e02630cc78725.json +1 -0
  69. xinference/web/ui/node_modules/.cache/babel-loader/71493aadd34d568fbe605cacaba220aa69bd09273251ee4ba27930f8d01fccd8.json +1 -0
  70. xinference/web/ui/node_modules/.cache/babel-loader/8b071db2a5a9ef68dc14d5f606540bd23d9785e365a11997c510656764d2dccf.json +1 -0
  71. xinference/web/ui/node_modules/.cache/babel-loader/8b246d79cd3f6fc78f11777e6a6acca6a2c5d4ecce7f2dd4dcf9a48126440d3c.json +1 -0
  72. xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +1 -0
  73. xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +1 -0
  74. xinference/web/ui/node_modules/.cache/babel-loader/a4d72d3b806ba061919115f0c513738726872e3c79cf258f007519d3f91d1a16.json +1 -0
  75. xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +1 -0
  76. xinference/web/ui/node_modules/.cache/babel-loader/b4e4fccaf8f2489a29081f0bf3b191656bd452fb3c8b5e3c6d92d94f680964d5.json +1 -0
  77. xinference/web/ui/node_modules/.cache/babel-loader/b53eb7c7967f6577bd3e678293c44204fb03ffa7fdc1dd59d3099015c68f6f7f.json +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +1 -0
  79. xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +1 -0
  80. xinference/web/ui/node_modules/.cache/babel-loader/d06af85a84e5c5a29d3acf2dbb5b30c0cf75c8aec4ab5f975e6096f944ee4324.json +1 -0
  81. xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +1 -0
  82. xinference/web/ui/node_modules/.cache/babel-loader/d5e150bff31715977d8f537c970f06d4fe3de9909d7e8342244a83a9f6447121.json +1 -0
  83. xinference/web/ui/node_modules/.cache/babel-loader/de36e5c08fd524e341d664883dda6cb1745acc852a4f1b011a35a0b4615f72fa.json +1 -0
  84. xinference/web/ui/node_modules/.cache/babel-loader/f037ffef5992af0892d6d991053c1dace364cd39a3f11f1a41f92776e8a59459.json +1 -0
  85. xinference/web/ui/node_modules/.cache/babel-loader/f23ab356a8603d4a2aaa74388c2f381675c207d37c4d1c832df922e9655c9a6b.json +1 -0
  86. xinference/web/ui/node_modules/.cache/babel-loader/f7c23b0922f4087b9e2e3e46f15c946b772daa46c28c3a12426212ecaf481deb.json +1 -0
  87. xinference/web/ui/node_modules/.cache/babel-loader/f95a8bd358eeb55fa2f49f1224cc2f4f36006359856744ff09ae4bb295f59ec1.json +1 -0
  88. xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +1 -0
  89. xinference/web/ui/node_modules/.package-lock.json +36 -0
  90. xinference/web/ui/node_modules/@types/cookie/package.json +30 -0
  91. xinference/web/ui/node_modules/@types/hoist-non-react-statics/package.json +33 -0
  92. xinference/web/ui/node_modules/react-cookie/package.json +55 -0
  93. xinference/web/ui/node_modules/universal-cookie/package.json +48 -0
  94. xinference/web/ui/package-lock.json +37 -0
  95. xinference/web/ui/package.json +3 -2
  96. {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/METADATA +17 -6
  97. {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/RECORD +101 -66
  98. xinference/web/ui/build/static/js/main.236e72e7.js +0 -3
  99. xinference/web/ui/build/static/js/main.236e72e7.js.map +0 -1
  100. xinference/web/ui/node_modules/.cache/babel-loader/0cccfbe5d963b8e31eb679f9d9677392839cedd04aa2956ac6b33cf19599d597.json +0 -1
  101. xinference/web/ui/node_modules/.cache/babel-loader/0f3b6cc71b7c83bdc85aa4835927aeb86af2ce0d2ac241917ecfbf90f75c6d27.json +0 -1
  102. xinference/web/ui/node_modules/.cache/babel-loader/2f651cf60b1bde50c0601c7110f77dd44819fb6e2501ff748a631724d91445d4.json +0 -1
  103. xinference/web/ui/node_modules/.cache/babel-loader/42bb623f337ad08ed076484185726e072ca52bb88e373d72c7b052db4c273342.json +0 -1
  104. xinference/web/ui/node_modules/.cache/babel-loader/57af83639c604bd3362d0f03f7505e81c6f67ff77bee7c6bb31f6e5523eba185.json +0 -1
  105. xinference/web/ui/node_modules/.cache/babel-loader/667753ce39ce1d4bcbf9a5f1a103d653be1d19d42f4e1fbaceb9b507679a52c7.json +0 -1
  106. xinference/web/ui/node_modules/.cache/babel-loader/66ed1bd4c06748c1b176a625c25c856997edc787856c73162f82f2b465c5d956.json +0 -1
  107. xinference/web/ui/node_modules/.cache/babel-loader/78f2521da2e2a98b075a2666cb782c7e2c019cd3c72199eecd5901c82d8655df.json +0 -1
  108. xinference/web/ui/node_modules/.cache/babel-loader/8d2b0b3c6988d1894694dcbbe708ef91cfe62d62dac317031f09915ced637953.json +0 -1
  109. xinference/web/ui/node_modules/.cache/babel-loader/9427ae7f1e94ae8dcd2333fb361e381f4054fde07394fe5448658e3417368476.json +0 -1
  110. xinference/web/ui/node_modules/.cache/babel-loader/bcee2b4e76b07620f9087989eb86d43c645ba3c7a74132cf926260af1164af0e.json +0 -1
  111. xinference/web/ui/node_modules/.cache/babel-loader/cc2ddd02ccc1dad1a2737ac247c79e6f6ed2c7836c6b68e511e3048f666b64af.json +0 -1
  112. xinference/web/ui/node_modules/.cache/babel-loader/d2e8e6665a7efc832b43907dadf4e3c896a59eaf8129f9a520882466c8f2e489.json +0 -1
  113. xinference/web/ui/node_modules/.cache/babel-loader/d8a42e9df7157de9f28eecefdf178fd113bf2280d28471b6e32a8a45276042df.json +0 -1
  114. xinference/web/ui/node_modules/.cache/babel-loader/e26750d9556e9741912333349e4da454c53dbfddbfc6002ab49518dcf02af745.json +0 -1
  115. xinference/web/ui/node_modules/.cache/babel-loader/ef42ec014d7bc373b874b2a1ff0dcd785490f125e913698bc049b0bd778e4d66.json +0 -1
  116. xinference/web/ui/node_modules/.cache/babel-loader/fe3eb4d76c79ca98833f686d642224eeeb94cc83ad14300d281623796d087f0a.json +0 -1
  117. {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/LICENSE +0 -0
  118. {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/WHEEL +0 -0
  119. {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/entry_points.txt +0 -0
  120. {xinference-0.7.5.dist-info → xinference-0.8.1.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,8 @@
5
5
  "max_tokens": 512,
6
6
  "language": ["en"],
7
7
  "model_id": "Xorbits/bge-large-en",
8
- "model_revision": "v0.0.1"
8
+ "model_revision": "v0.0.1",
9
+ "model_hub": "modelscope"
9
10
  },
10
11
  {
11
12
  "model_name": "bge-base-en",
@@ -13,7 +14,8 @@
13
14
  "max_tokens": 512,
14
15
  "language": ["en"],
15
16
  "model_id": "Xorbits/bge-base-en",
16
- "model_revision": "v0.0.1"
17
+ "model_revision": "v0.0.1",
18
+ "model_hub": "modelscope"
17
19
  },
18
20
  {
19
21
  "model_name": "gte-large",
@@ -21,7 +23,8 @@
21
23
  "max_tokens": 512,
22
24
  "language": ["en"],
23
25
  "model_id": "Xorbits/gte-large",
24
- "model_revision": "v0.0.1"
26
+ "model_revision": "v0.0.1",
27
+ "model_hub": "modelscope"
25
28
  },
26
29
  {
27
30
  "model_name": "gte-base",
@@ -29,7 +32,8 @@
29
32
  "max_tokens": 512,
30
33
  "language": ["en"],
31
34
  "model_id": "Xorbits/gte-base",
32
- "model_revision": "v0.0.1"
35
+ "model_revision": "v0.0.1",
36
+ "model_hub": "modelscope"
33
37
  },
34
38
  {
35
39
  "model_name": "e5-large-v2",
@@ -37,7 +41,8 @@
37
41
  "max_tokens": 512,
38
42
  "language": ["en"],
39
43
  "model_id": "Xorbits/e5-large-v2",
40
- "model_revision": "v0.0.1"
44
+ "model_revision": "v0.0.1",
45
+ "model_hub": "modelscope"
41
46
  },
42
47
  {
43
48
  "model_name": "bge-large-zh",
@@ -45,7 +50,8 @@
45
50
  "max_tokens": 512,
46
51
  "language": ["zh"],
47
52
  "model_id": "Xorbits/bge-large-zh",
48
- "model_revision": "v0.0.1"
53
+ "model_revision": "v0.0.1",
54
+ "model_hub": "modelscope"
49
55
  },
50
56
  {
51
57
  "model_name": "bge-large-zh-noinstruct",
@@ -53,7 +59,8 @@
53
59
  "max_tokens": 512,
54
60
  "language": ["zh"],
55
61
  "model_id": "Xorbits/bge-large-zh-noinstruct",
56
- "model_revision": "v0.0.1"
62
+ "model_revision": "v0.0.1",
63
+ "model_hub": "modelscope"
57
64
  },
58
65
  {
59
66
  "model_name": "bge-base-zh",
@@ -61,7 +68,8 @@
61
68
  "max_tokens": 512,
62
69
  "language": ["zh"],
63
70
  "model_id": "Xorbits/bge-base-zh",
64
- "model_revision": "v0.0.2"
71
+ "model_revision": "v0.0.2",
72
+ "model_hub": "modelscope"
65
73
  },
66
74
  {
67
75
  "model_name": "multilingual-e5-large",
@@ -69,7 +77,8 @@
69
77
  "max_tokens": 514,
70
78
  "language": ["zh"],
71
79
  "model_id": "Xorbits/multilingual-e5-large",
72
- "model_revision": "v0.0.1"
80
+ "model_revision": "v0.0.1",
81
+ "model_hub": "modelscope"
73
82
  },
74
83
  {
75
84
  "model_name": "bge-small-zh",
@@ -77,7 +86,8 @@
77
86
  "max_tokens": 512,
78
87
  "language": ["zh"],
79
88
  "model_id": "Xorbits/bge-small-zh",
80
- "model_revision": "v0.0.1"
89
+ "model_revision": "v0.0.1",
90
+ "model_hub": "modelscope"
81
91
  },
82
92
  {
83
93
  "model_name": "bge-small-zh-v1.5",
@@ -85,7 +95,8 @@
85
95
  "max_tokens": 512,
86
96
  "language": ["zh"],
87
97
  "model_id": "Xorbits/bge-small-zh-v1.5",
88
- "model_revision": "v0.0.2"
98
+ "model_revision": "v0.0.2",
99
+ "model_hub": "modelscope"
89
100
  },
90
101
  {
91
102
  "model_name": "bge-base-zh-v1.5",
@@ -93,7 +104,8 @@
93
104
  "max_tokens": 512,
94
105
  "language": ["zh"],
95
106
  "model_id": "Xorbits/bge-base-zh-v1.5",
96
- "model_revision": "v0.0.1"
107
+ "model_revision": "v0.0.1",
108
+ "model_hub": "modelscope"
97
109
  },
98
110
  {
99
111
  "model_name": "bge-large-zh-v1.5",
@@ -101,7 +113,8 @@
101
113
  "max_tokens": 512,
102
114
  "language": ["zh"],
103
115
  "model_id": "Xorbits/bge-large-zh-v1.5",
104
- "model_revision": "v0.0.1"
116
+ "model_revision": "v0.0.1",
117
+ "model_hub": "modelscope"
105
118
  },
106
119
  {
107
120
  "model_name": "bge-small-en-v1.5",
@@ -109,7 +122,8 @@
109
122
  "max_tokens": 512,
110
123
  "language": ["en"],
111
124
  "model_id": "Xorbits/bge-small-en-v1.5",
112
- "model_revision": "v0.0.2"
125
+ "model_revision": "v0.0.2",
126
+ "model_hub": "modelscope"
113
127
  },
114
128
  {
115
129
  "model_name": "bge-base-en-v1.5",
@@ -117,7 +131,8 @@
117
131
  "max_tokens": 512,
118
132
  "language": ["en"],
119
133
  "model_id": "Xorbits/bge-base-en-v1.5",
120
- "model_revision": "v0.0.1"
134
+ "model_revision": "v0.0.1",
135
+ "model_hub": "modelscope"
121
136
  },
122
137
  {
123
138
  "model_name": "bge-large-en-v1.5",
@@ -125,7 +140,8 @@
125
140
  "max_tokens": 512,
126
141
  "language": ["en"],
127
142
  "model_id": "Xorbits/bge-large-en-v1.5",
128
- "model_revision": "v0.0.1"
143
+ "model_revision": "v0.0.1",
144
+ "model_hub": "modelscope"
129
145
  },
130
146
  {
131
147
  "model_name": "jina-embeddings-v2-small-en",
@@ -133,7 +149,8 @@
133
149
  "max_tokens": 8192,
134
150
  "language": ["en"],
135
151
  "model_id": "Xorbits/jina-embeddings-v2-small-en",
136
- "model_revision": "v0.0.1"
152
+ "model_revision": "v0.0.1",
153
+ "model_hub": "modelscope"
137
154
  },
138
155
  {
139
156
  "model_name": "jina-embeddings-v2-base-en",
@@ -141,6 +158,31 @@
141
158
  "max_tokens": 8192,
142
159
  "language": ["en"],
143
160
  "model_id": "Xorbits/jina-embeddings-v2-base-en",
144
- "model_revision": "v0.0.1"
161
+ "model_revision": "v0.0.1",
162
+ "model_hub": "modelscope"
163
+ },
164
+ {
165
+ "model_name": "text2vec-large-chinese",
166
+ "dimensions": 1024,
167
+ "max_tokens": 256,
168
+ "language": ["zh"],
169
+ "model_id": "Jerry0/text2vec-large-chinese",
170
+ "model_hub": "modelscope"
171
+ },
172
+ {
173
+ "model_name": "text2vec-base-chinese",
174
+ "dimensions": 768,
175
+ "max_tokens": 128,
176
+ "language": ["zh"],
177
+ "model_id": "Jerry0/text2vec-base-chinese",
178
+ "model_hub": "modelscope"
179
+ },
180
+ {
181
+ "model_name": "text2vec-base-chinese-paraphrase",
182
+ "dimensions": 768,
183
+ "max_tokens": 256,
184
+ "language": ["zh"],
185
+ "model_id": "mwei23/text2vec-base-chinese-paraphrase",
186
+ "model_hub": "modelscope"
145
187
  }
146
188
  ]
@@ -15,6 +15,7 @@
15
15
  import base64
16
16
  import logging
17
17
  import os
18
+ import re
18
19
  import time
19
20
  import uuid
20
21
  from concurrent.futures import ThreadPoolExecutor
@@ -101,7 +102,7 @@ class DiffusionModel:
101
102
  def _gen_base64_image(_img):
102
103
  buffered = BytesIO()
103
104
  _img.save(buffered, format="jpeg")
104
- return base64.b64encode(buffered.getvalue())
105
+ return base64.b64encode(buffered.getvalue()).decode()
105
106
 
106
107
  with ThreadPoolExecutor() as executor:
107
108
  results = list(map(partial(executor.submit, _gen_base64_image), images))
@@ -120,7 +121,7 @@ class DiffusionModel:
120
121
  ):
121
122
  # References:
122
123
  # https://huggingface.co/docs/diffusers/main/en/api/pipelines/controlnet_sdxl
123
- width, height = map(int, size.split("*"))
124
+ width, height = map(int, re.split(r"[^\d]+", size))
124
125
  return self._call_model(
125
126
  prompt=prompt,
126
127
  height=height,
@@ -140,7 +141,7 @@ class DiffusionModel:
140
141
  response_format: str = "url",
141
142
  **kwargs,
142
143
  ):
143
- width, height = map(int, size.split("*"))
144
+ width, height = map(int, re.split(r"[^\d]+", size))
144
145
  return self._call_model(
145
146
  image=image,
146
147
  prompt=prompt,
@@ -21,6 +21,7 @@ from .llm_family import (
21
21
  BUILTIN_LLM_FAMILIES,
22
22
  BUILTIN_LLM_MODEL_CHAT_FAMILIES,
23
23
  BUILTIN_LLM_MODEL_GENERATE_FAMILIES,
24
+ BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES,
24
25
  BUILTIN_LLM_PROMPT_STYLE,
25
26
  BUILTIN_MODELSCOPE_LLM_FAMILIES,
26
27
  LLM_CLASSES,
@@ -47,6 +48,7 @@ def _install():
47
48
  from .pytorch.chatglm import ChatglmPytorchChatModel
48
49
  from .pytorch.core import PytorchChatModel, PytorchModel
49
50
  from .pytorch.falcon import FalconPytorchChatModel, FalconPytorchModel
51
+ from .pytorch.internlm2 import Internlm2PytorchChatModel
50
52
  from .pytorch.llama_2 import LlamaPytorchChatModel, LlamaPytorchModel
51
53
  from .pytorch.vicuna import VicunaPytorchChatModel
52
54
  from .vllm.core import VLLMChatModel, VLLMModel
@@ -79,6 +81,7 @@ def _install():
79
81
  LlamaPytorchChatModel,
80
82
  PytorchChatModel,
81
83
  FalconPytorchModel,
84
+ Internlm2PytorchChatModel,
82
85
  PytorchModel,
83
86
  ]
84
87
  )
@@ -102,6 +105,8 @@ def _install():
102
105
  BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
103
106
  else:
104
107
  BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
108
+ if "tool_call" in model_spec.model_ability:
109
+ BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
105
110
 
106
111
  modelscope_json_path = os.path.join(
107
112
  os.path.dirname(os.path.abspath(__file__)), "llm_family_modelscope.json"
@@ -123,6 +128,8 @@ def _install():
123
128
  BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
124
129
  else:
125
130
  BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
131
+ if "tool_call" in model_spec.model_ability:
132
+ BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
126
133
 
127
134
  from ...constants import XINFERENCE_MODEL_DIR
128
135
 
@@ -306,7 +306,8 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
306
306
 
307
307
  generate_config = self._sanitize_generate_config(generate_config)
308
308
  # TODO(codingl2k1): qwen hacky to set stop for function call.
309
- if tools and self.model_family.model_name == "qwen-chat":
309
+ model_family = self.model_family.model_family or self.model_family.model_name
310
+ if tools and "qwen-chat" == model_family:
310
311
  stop = generate_config.get("stop")
311
312
  if isinstance(stop, str):
312
313
  generate_config["stop"] = [stop, "Observation:"]
@@ -326,6 +327,6 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
326
327
  assert not isinstance(c, Iterator)
327
328
  if tools:
328
329
  return self._tool_calls_completion(
329
- self.model_family.model_name, self.model_uid, c, tools
330
+ self.model_family, self.model_uid, c, tools
330
331
  )
331
332
  return self._to_chat_completion(c)
@@ -535,7 +535,8 @@
535
535
  "zh"
536
536
  ],
537
537
  "model_ability": [
538
- "chat"
538
+ "chat",
539
+ "tools"
539
540
  ],
540
541
  "model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
541
542
  "model_specs": [
@@ -609,6 +610,15 @@
609
610
  "roles": [
610
611
  "user",
611
612
  "assistant"
613
+ ],
614
+ "stop_token_ids": [
615
+ 64795,
616
+ 64797,
617
+ 2
618
+ ],
619
+ "stop":[
620
+ "<|user|>",
621
+ "<|observation|>"
612
622
  ]
613
623
  }
614
624
  },
@@ -1139,14 +1149,15 @@
1139
1149
  },
1140
1150
  {
1141
1151
  "version": 1,
1142
- "context_length": 2048,
1152
+ "context_length": 32768,
1143
1153
  "model_name": "qwen-chat",
1144
1154
  "model_lang": [
1145
1155
  "en",
1146
1156
  "zh"
1147
1157
  ],
1148
1158
  "model_ability": [
1149
- "chat"
1159
+ "chat",
1160
+ "tools"
1150
1161
  ],
1151
1162
  "model_description": "Qwen-chat is a fine-tuned version of the Qwen LLM trained with alignment techniques, specializing in chatting.",
1152
1163
  "model_specs": [
@@ -1172,6 +1183,8 @@
1172
1183
  "model_format": "pytorch",
1173
1184
  "model_size_in_billions": "1_8",
1174
1185
  "quantizations": [
1186
+ "4-bit",
1187
+ "8-bit",
1175
1188
  "none"
1176
1189
  ],
1177
1190
  "model_id": "Qwen/Qwen-1_8B-Chat",
@@ -1181,6 +1194,8 @@
1181
1194
  "model_format": "pytorch",
1182
1195
  "model_size_in_billions": 7,
1183
1196
  "quantizations": [
1197
+ "4-bit",
1198
+ "8-bit",
1184
1199
  "none"
1185
1200
  ],
1186
1201
  "model_id": "Qwen/Qwen-7B-Chat",
@@ -1190,6 +1205,8 @@
1190
1205
  "model_format": "pytorch",
1191
1206
  "model_size_in_billions": 14,
1192
1207
  "quantizations": [
1208
+ "4-bit",
1209
+ "8-bit",
1193
1210
  "none"
1194
1211
  ],
1195
1212
  "model_id": "Qwen/Qwen-14B-Chat",
@@ -1199,6 +1216,8 @@
1199
1216
  "model_format": "pytorch",
1200
1217
  "model_size_in_billions": 72,
1201
1218
  "quantizations": [
1219
+ "4-bit",
1220
+ "8-bit",
1202
1221
  "none"
1203
1222
  ],
1204
1223
  "model_id": "Qwen/Qwen-72B-Chat",
@@ -1213,6 +1232,15 @@
1213
1232
  ],
1214
1233
  "model_id": "Qwen/Qwen-7B-Chat-{quantization}"
1215
1234
  },
1235
+ {
1236
+ "model_format": "gptq",
1237
+ "model_size_in_billions": "1_8",
1238
+ "quantizations": [
1239
+ "Int4",
1240
+ "Int8"
1241
+ ],
1242
+ "model_id": "Qwen/Qwen-1_8B-Chat-{quantization}"
1243
+ },
1216
1244
  {
1217
1245
  "model_format": "gptq",
1218
1246
  "model_size_in_billions": 14,
@@ -2468,6 +2496,14 @@
2468
2496
  ],
2469
2497
  "model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
2470
2498
  "model_specs": [
2499
+ {
2500
+ "model_format": "gptq",
2501
+ "model_size_in_billions": 34,
2502
+ "quantizations": [
2503
+ "8bits"
2504
+ ],
2505
+ "model_id": "01-ai/Yi-34B-Chat-{quantization}"
2506
+ },
2471
2507
  {
2472
2508
  "model_format": "pytorch",
2473
2509
  "model_size_in_billions": 34,
@@ -3127,5 +3163,53 @@
3127
3163
  "model_revision": "70d1740208c8ba39f9ba250b22117ec25311ab33"
3128
3164
  }
3129
3165
  ]
3166
+ },
3167
+ {
3168
+ "version": 1,
3169
+ "context_length": 204800,
3170
+ "model_name": "internlm2-chat",
3171
+ "model_lang": [
3172
+ "en",
3173
+ "zh"
3174
+ ],
3175
+ "model_ability": [
3176
+ "chat"
3177
+ ],
3178
+ "model_description": "The second generation of the InternLM model, InternLM2.",
3179
+ "model_specs": [
3180
+ {
3181
+ "model_format": "pytorch",
3182
+ "model_size_in_billions": 7,
3183
+ "quantizations": [
3184
+ "none"
3185
+ ],
3186
+ "model_id": "internlm/internlm2-chat-7b",
3187
+ "model_revision": "5797f79825bab7013932d57e2babaac1b8de6b4f"
3188
+ },
3189
+ {
3190
+ "model_format": "pytorch",
3191
+ "model_size_in_billions": 20,
3192
+ "quantizations": [
3193
+ "none"
3194
+ ],
3195
+ "model_id": "internlm/internlm2-chat-20b",
3196
+ "model_revision": "3ccaf3ae82d5d01c0a95eecf40ee550f9c543635"
3197
+ }
3198
+ ],
3199
+ "prompt_style": {
3200
+ "style_name": "INTERNLM2",
3201
+ "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
3202
+ "roles": [
3203
+ "[UNUSED_TOKEN_146]user",
3204
+ "[UNUSED_TOKEN_146]assistant"
3205
+ ],
3206
+ "intra_message_sep": "[UNUSED_TOKEN_145]",
3207
+ "stop_token_ids": [
3208
+ 92542
3209
+ ],
3210
+ "stop": [
3211
+ "[UNUSED_TOKEN_145]"
3212
+ ]
3213
+ }
3130
3214
  }
3131
3215
  ]
@@ -43,6 +43,7 @@ DEFAULT_CONTEXT_LENGTH = 2048
43
43
  BUILTIN_LLM_PROMPT_STYLE: Dict[str, "PromptStyleV1"] = {}
44
44
  BUILTIN_LLM_MODEL_CHAT_FAMILIES: Set[str] = set()
45
45
  BUILTIN_LLM_MODEL_GENERATE_FAMILIES: Set[str] = set()
46
+ BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES: Set[str] = set()
46
47
 
47
48
 
48
49
  class GgmlLLMSpecV1(BaseModel):
@@ -50,7 +51,7 @@ class GgmlLLMSpecV1(BaseModel):
50
51
  # Must in order that `str` first, then `int`
51
52
  model_size_in_billions: Union[str, int]
52
53
  quantizations: List[str]
53
- model_id: str
54
+ model_id: Optional[str]
54
55
  model_file_name_template: str
55
56
  model_hub: str = "huggingface"
56
57
  model_uri: Optional[str]
@@ -73,7 +74,7 @@ class PytorchLLMSpecV1(BaseModel):
73
74
  # Must in order that `str` first, then `int`
74
75
  model_size_in_billions: Union[str, int]
75
76
  quantizations: List[str]
76
- model_id: str
77
+ model_id: Optional[str]
77
78
  model_hub: str = "huggingface"
78
79
  model_uri: Optional[str]
79
80
  model_revision: Optional[str]
@@ -105,7 +106,7 @@ class LLMFamilyV1(BaseModel):
105
106
  context_length: Optional[int] = DEFAULT_CONTEXT_LENGTH
106
107
  model_name: str
107
108
  model_lang: List[str]
108
- model_ability: List[Literal["embed", "generate", "chat"]]
109
+ model_ability: List[Literal["embed", "generate", "chat", "tools"]]
109
110
  model_description: Optional[str]
110
111
  # reason for not required str here: legacy registration
111
112
  model_family: Optional[str]
@@ -155,6 +156,15 @@ class CustomLLMFamilyV1(LLMFamilyV1):
155
156
  f"`model_family` for chat model must be `other` or one of the following values: \n"
156
157
  f"{', '.join(list(BUILTIN_LLM_MODEL_CHAT_FAMILIES))}"
157
158
  )
159
+ if (
160
+ llm_spec.model_family != "other"
161
+ and "tool_call" in llm_spec.model_ability
162
+ and llm_spec.model_family not in BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES
163
+ ):
164
+ raise ValueError(
165
+ f"`model_family` for tool call model must be `other` or one of the following values: \n"
166
+ f"{', '.join(list(BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES))}"
167
+ )
158
168
  if (
159
169
  llm_spec.model_family != "other"
160
170
  and "chat" not in llm_spec.model_ability
@@ -413,7 +423,7 @@ def _get_cache_dir(
413
423
  # quantization a dedicated cache dir.
414
424
  quant_suffix = ""
415
425
  for q in llm_spec.quantizations:
416
- if q in llm_spec.model_id:
426
+ if llm_spec.model_id and q in llm_spec.model_id:
417
427
  quant_suffix = q
418
428
  break
419
429
  cache_dir_name = (
@@ -726,7 +736,7 @@ def match_llm(
726
736
  def _apply_format_to_model_id(spec: LLMSpecV1, q: str) -> LLMSpecV1:
727
737
  # Different quantized versions of some models use different model ids,
728
738
  # Here we check the `{}` in the model id to format the id.
729
- if "{" in spec.model_id:
739
+ if spec.model_id and "{" in spec.model_id:
730
740
  spec.model_id = spec.model_id.format(quantization=q)
731
741
  return spec
732
742
 
@@ -297,7 +297,8 @@
297
297
  "zh"
298
298
  ],
299
299
  "model_ability": [
300
- "chat"
300
+ "chat",
301
+ "tools"
301
302
  ],
302
303
  "model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
303
304
  "model_specs": [
@@ -375,6 +376,15 @@
375
376
  "roles": [
376
377
  "user",
377
378
  "assistant"
379
+ ],
380
+ "stop_token_ids": [
381
+ 64795,
382
+ 64797,
383
+ 2
384
+ ],
385
+ "stop":[
386
+ "<|user|>",
387
+ "<|observation|>"
378
388
  ]
379
389
  }
380
390
  },
@@ -1108,6 +1118,15 @@
1108
1118
  ],
1109
1119
  "model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
1110
1120
  "model_specs": [
1121
+ {
1122
+ "model_format": "gptq",
1123
+ "model_size_in_billions": 34,
1124
+ "quantizations": [
1125
+ "8bits"
1126
+ ],
1127
+ "model_id": "01ai/Yi-34B-Chat-{quantization}",
1128
+ "model_revision": "master"
1129
+ },
1111
1130
  {
1112
1131
  "model_format": "pytorch",
1113
1132
  "model_size_in_billions": 34,
@@ -1452,14 +1471,15 @@
1452
1471
  },
1453
1472
  {
1454
1473
  "version": 1,
1455
- "context_length": 2048,
1474
+ "context_length": 32768,
1456
1475
  "model_name": "qwen-chat",
1457
1476
  "model_lang": [
1458
1477
  "en",
1459
1478
  "zh"
1460
1479
  ],
1461
1480
  "model_ability": [
1462
- "chat"
1481
+ "chat",
1482
+ "tools"
1463
1483
  ],
1464
1484
  "model_description": "Qwen-chat is a fine-tuned version of the Qwen LLM trained with alignment techniques, specializing in chatting.",
1465
1485
  "model_specs": [
@@ -1489,6 +1509,8 @@
1489
1509
  "model_format": "pytorch",
1490
1510
  "model_size_in_billions": "1_8",
1491
1511
  "quantizations": [
1512
+ "4-bit",
1513
+ "8-bit",
1492
1514
  "none"
1493
1515
  ],
1494
1516
  "model_hub": "modelscope",
@@ -1499,6 +1521,8 @@
1499
1521
  "model_format": "pytorch",
1500
1522
  "model_size_in_billions": 7,
1501
1523
  "quantizations": [
1524
+ "4-bit",
1525
+ "8-bit",
1502
1526
  "none"
1503
1527
  ],
1504
1528
  "model_hub": "modelscope",
@@ -1509,6 +1533,8 @@
1509
1533
  "model_format": "pytorch",
1510
1534
  "model_size_in_billions": 72,
1511
1535
  "quantizations": [
1536
+ "4-bit",
1537
+ "8-bit",
1512
1538
  "none"
1513
1539
  ],
1514
1540
  "model_hub": "modelscope",
@@ -1519,12 +1545,25 @@
1519
1545
  "model_format": "pytorch",
1520
1546
  "model_size_in_billions": 14,
1521
1547
  "quantizations": [
1548
+ "4-bit",
1549
+ "8-bit",
1522
1550
  "none"
1523
1551
  ],
1524
1552
  "model_id": "qwen/Qwen-14B-Chat",
1525
1553
  "model_hub": "modelscope",
1526
1554
  "model_revision": "v1.0.7"
1527
1555
  },
1556
+ {
1557
+ "model_format": "gptq",
1558
+ "model_size_in_billions": "1_8",
1559
+ "quantizations": [
1560
+ "Int4",
1561
+ "Int8"
1562
+ ],
1563
+ "model_id": "qwen/Qwen-1_8B-Chat-{quantization}",
1564
+ "model_hub": "modelscope",
1565
+ "model_revision": "master"
1566
+ },
1528
1567
  {
1529
1568
  "model_format": "gptq",
1530
1569
  "model_size_in_billions": 7,
@@ -1739,5 +1778,55 @@
1739
1778
  "model_revision": "master"
1740
1779
  }
1741
1780
  ]
1781
+ },
1782
+ {
1783
+ "version": 1,
1784
+ "context_length": 204800,
1785
+ "model_name": "internlm2-chat",
1786
+ "model_lang": [
1787
+ "en",
1788
+ "zh"
1789
+ ],
1790
+ "model_ability": [
1791
+ "chat"
1792
+ ],
1793
+ "model_description": "The second generation of the InternLM model, InternLM2.",
1794
+ "model_specs": [
1795
+ {
1796
+ "model_format": "pytorch",
1797
+ "model_size_in_billions": 7,
1798
+ "quantizations": [
1799
+ "none"
1800
+ ],
1801
+ "model_id": "Shanghai_AI_Laboratory/internlm2-chat-7b",
1802
+ "model_hub": "modelscope",
1803
+ "model_revision": "master"
1804
+ },
1805
+ {
1806
+ "model_format": "pytorch",
1807
+ "model_size_in_billions": 20,
1808
+ "quantizations": [
1809
+ "none"
1810
+ ],
1811
+ "model_id": "Shanghai_AI_Laboratory/internlm2-chat-20b",
1812
+ "model_hub": "modelscope",
1813
+ "model_revision": "master"
1814
+ }
1815
+ ],
1816
+ "prompt_style": {
1817
+ "style_name": "INTERNLM2",
1818
+ "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
1819
+ "roles": [
1820
+ "[UNUSED_TOKEN_146]user",
1821
+ "[UNUSED_TOKEN_146]assistant"
1822
+ ],
1823
+ "intra_message_sep": "[UNUSED_TOKEN_145]",
1824
+ "stop_token_ids": [
1825
+ 92542
1826
+ ],
1827
+ "stop": [
1828
+ "[UNUSED_TOKEN_145]"
1829
+ ]
1830
+ }
1742
1831
  }
1743
1832
  ]