xinference 0.6.4__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (258) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +49 -62
  3. xinference/client/restful/restful_client.py +10 -1
  4. xinference/conftest.py +2 -2
  5. xinference/constants.py +10 -0
  6. xinference/core/model.py +33 -10
  7. xinference/core/resource.py +12 -11
  8. xinference/core/supervisor.py +22 -2
  9. xinference/core/worker.py +44 -16
  10. xinference/deploy/cmdline.py +19 -9
  11. xinference/deploy/local.py +9 -1
  12. xinference/deploy/supervisor.py +16 -3
  13. xinference/deploy/utils.py +1 -0
  14. xinference/deploy/worker.py +1 -1
  15. xinference/model/embedding/__init__.py +10 -0
  16. xinference/model/embedding/core.py +3 -0
  17. xinference/model/embedding/custom.py +5 -4
  18. xinference/model/embedding/model_spec.json +16 -0
  19. xinference/model/embedding/model_spec_modelscope.json +16 -0
  20. xinference/model/llm/__init__.py +22 -2
  21. xinference/model/llm/core.py +2 -2
  22. xinference/model/llm/ggml/chatglm.py +79 -15
  23. xinference/model/llm/ggml/llamacpp.py +2 -2
  24. xinference/model/llm/llm_family.json +99 -4
  25. xinference/model/llm/llm_family.py +54 -8
  26. xinference/model/llm/llm_family_modelscope.json +81 -2
  27. xinference/model/llm/pytorch/chatglm.py +95 -2
  28. xinference/model/llm/utils.py +12 -8
  29. xinference/model/llm/vllm/core.py +26 -5
  30. xinference/model/utils.py +25 -0
  31. xinference/types.py +64 -5
  32. xinference/utils.py +20 -0
  33. xinference/web/ui/build/asset-manifest.json +3 -3
  34. xinference/web/ui/build/index.html +1 -1
  35. xinference/web/ui/build/static/js/main.8126d441.js +3 -0
  36. xinference/web/ui/build/static/js/main.8126d441.js.map +1 -0
  37. xinference/web/ui/node_modules/.cache/babel-loader/06eb9846159adb398d44df0b0debc256a9fd9e8171a7d68f5c4ee4d655acfa45.json +1 -0
  38. xinference/web/ui/node_modules/.cache/babel-loader/0cccfbe5d963b8e31eb679f9d9677392839cedd04aa2956ac6b33cf19599d597.json +1 -0
  39. xinference/web/ui/node_modules/.cache/babel-loader/2f651cf60b1bde50c0601c7110f77dd44819fb6e2501ff748a631724d91445d4.json +1 -0
  40. xinference/web/ui/node_modules/.cache/babel-loader/3bda436576ecb05f81f7b6ec475d1cfaf03e2b3066e3a75902fe6e8c4773b43b.json +1 -0
  41. xinference/web/ui/node_modules/.cache/babel-loader/47887a9524ffeecdc2a7839dace146b24f97a5564fc3d431d6179ad2b153cf1f.json +1 -0
  42. xinference/web/ui/node_modules/.cache/babel-loader/48878f5178bad1a47757e011af41c974a7946efa29485506c4d19f25bf5d522d.json +1 -0
  43. xinference/web/ui/node_modules/.cache/babel-loader/59574eb63cfe9ed2e58d2f5a420e1ae54354e243a602e9bc73deae3147ed4f98.json +1 -0
  44. xinference/web/ui/node_modules/.cache/babel-loader/6a60ae66b29c2f3634fd081d369b9e63b4522fe18eb9e43e9979d1ff264b68ad.json +1 -0
  45. xinference/web/ui/node_modules/.cache/babel-loader/75a5abcbc92da335fdde530f5689194ec79a4b2345b8cba594f8904d3b88e3c6.json +1 -0
  46. xinference/web/ui/node_modules/.cache/babel-loader/84bfe7afede38da1f8ad569d891276fe4d66cfb87bf5c9ff7a113788ba62bb88.json +1 -0
  47. xinference/web/ui/node_modules/.cache/babel-loader/940ed05006583b955894e2b8f65a4a5ebf34f8149d747f59fae5131f17d65482.json +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/9c5f03db9aa88582a9b69b25c7f1acc78ba7fc61f743c9ed7399abb292d5dbde.json +1 -0
  49. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/a5e2e9f707eb7039bea096ca117d996b8f9cbc2a5613fd8e0c5b0094444ce23c.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/c02e70e9b9efcf3bd056606308104308d6a6ac559f2bc0b4454c11fb5874457c.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/e26750d9556e9741912333349e4da454c53dbfddbfc6002ab49518dcf02af745.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/e610aefd7000a3f8542a25cb66c64671cc8da18350de4e5b577102ba4bb78d65.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/ef42ec014d7bc373b874b2a1ff0dcd785490f125e913698bc049b0bd778e4d66.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/fe3eb4d76c79ca98833f686d642224eeeb94cc83ad14300d281623796d087f0a.json +1 -0
  56. xinference/web/ui/node_modules/.package-lock.json +1077 -405
  57. xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/globals/globals.json +163 -3
  58. xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/globals/package.json +1 -1
  59. xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/ignore/package.json +64 -0
  60. xinference/web/ui/node_modules/@eslint/eslintrc/package.json +18 -37
  61. xinference/web/ui/node_modules/@eslint/js/package.json +1 -1
  62. xinference/web/ui/node_modules/@eslint-community/regexpp/package.json +9 -4
  63. xinference/web/ui/node_modules/@humanwhocodes/config-array/package.json +14 -14
  64. xinference/web/ui/node_modules/@rushstack/eslint-patch/package.json +6 -4
  65. xinference/web/ui/node_modules/@types/semver/package.json +15 -15
  66. xinference/web/ui/node_modules/@ungap/structured-clone/cjs/package.json +1 -0
  67. xinference/web/ui/node_modules/@ungap/structured-clone/package.json +53 -0
  68. xinference/web/ui/node_modules/ansi-colors/package.json +129 -0
  69. xinference/web/ui/node_modules/array-includes/package.json +8 -8
  70. xinference/web/ui/node_modules/array.prototype.findlastindex/package.json +120 -0
  71. xinference/web/ui/node_modules/array.prototype.flat/package.json +8 -8
  72. xinference/web/ui/node_modules/array.prototype.flatmap/package.json +8 -8
  73. xinference/web/ui/node_modules/arraybuffer.prototype.slice/package.json +103 -0
  74. xinference/web/ui/node_modules/ast-types-flow/package.json +2 -2
  75. xinference/web/ui/node_modules/astral-regex/package.json +33 -0
  76. xinference/web/ui/node_modules/asynciterator.prototype/package.json +72 -0
  77. xinference/web/ui/node_modules/axe-core/locales/_template.json +0 -12
  78. xinference/web/ui/node_modules/axe-core/package.json +1 -2
  79. xinference/web/ui/node_modules/axe-core/sri-history.json +0 -8
  80. xinference/web/ui/node_modules/call-bind/package.json +33 -23
  81. xinference/web/ui/node_modules/define-data-property/package.json +113 -0
  82. xinference/web/ui/node_modules/define-data-property/tsconfig.json +59 -0
  83. xinference/web/ui/node_modules/define-properties/package.json +5 -4
  84. xinference/web/ui/node_modules/enquirer/package.json +112 -0
  85. xinference/web/ui/node_modules/es-abstract/helpers/caseFolding.json +1430 -0
  86. xinference/web/ui/node_modules/es-abstract/package.json +29 -23
  87. xinference/web/ui/node_modules/es-iterator-helpers/index.json +17 -0
  88. xinference/web/ui/node_modules/es-iterator-helpers/package.json +185 -0
  89. xinference/web/ui/node_modules/eslint/conf/{rule-type-list.json → category-list.json} +9 -6
  90. xinference/web/ui/node_modules/eslint/node_modules/@babel/code-frame/package.json +25 -0
  91. xinference/web/ui/node_modules/eslint/node_modules/eslint-visitor-keys/lib/visitor-keys.json +289 -0
  92. xinference/web/ui/node_modules/eslint/node_modules/eslint-visitor-keys/package.json +39 -0
  93. xinference/web/ui/node_modules/eslint/node_modules/glob-parent/package.json +48 -0
  94. xinference/web/ui/node_modules/eslint/node_modules/ignore/package.json +64 -0
  95. xinference/web/ui/node_modules/eslint/package.json +53 -82
  96. xinference/web/ui/node_modules/eslint-config-prettier/package.json +13 -0
  97. xinference/web/ui/node_modules/eslint-import-resolver-node/package.json +3 -3
  98. xinference/web/ui/node_modules/eslint-plugin-import/package.json +22 -17
  99. xinference/web/ui/node_modules/eslint-plugin-jsx-a11y/package.json +25 -24
  100. xinference/web/ui/node_modules/eslint-plugin-simple-import-sort/package.json +23 -0
  101. xinference/web/ui/node_modules/eslint-plugin-testing-library/package.json +1 -1
  102. xinference/web/ui/node_modules/eslint-scope/package.json +19 -34
  103. xinference/web/ui/node_modules/eslint-utils/node_modules/eslint-visitor-keys/lib/visitor-keys.json +284 -0
  104. xinference/web/ui/node_modules/eslint-utils/node_modules/eslint-visitor-keys/package.json +40 -0
  105. xinference/web/ui/node_modules/eslint-utils/package.json +65 -0
  106. xinference/web/ui/node_modules/eslint-visitor-keys/package.json +15 -15
  107. xinference/web/ui/node_modules/espree/node_modules/acorn/package.json +35 -0
  108. xinference/web/ui/node_modules/espree/node_modules/eslint-visitor-keys/lib/visitor-keys.json +284 -0
  109. xinference/web/ui/node_modules/espree/node_modules/eslint-visitor-keys/package.json +40 -0
  110. xinference/web/ui/node_modules/espree/package.json +27 -51
  111. xinference/web/ui/node_modules/function-bind/package.json +38 -14
  112. xinference/web/ui/node_modules/function.prototype.name/package.json +32 -13
  113. xinference/web/ui/node_modules/functional-red-black-tree/package.json +40 -0
  114. xinference/web/ui/node_modules/get-intrinsic/package.json +11 -11
  115. xinference/web/ui/node_modules/hasown/package.json +91 -0
  116. xinference/web/ui/node_modules/hasown/tsconfig.json +49 -0
  117. xinference/web/ui/node_modules/is-async-function/package.json +86 -0
  118. xinference/web/ui/node_modules/is-core-module/core.json +3 -3
  119. xinference/web/ui/node_modules/is-core-module/package.json +7 -7
  120. xinference/web/ui/node_modules/is-finalizationregistry/package.json +67 -0
  121. xinference/web/ui/node_modules/is-generator-function/package.json +87 -0
  122. xinference/web/ui/node_modules/is-typed-array/package.json +8 -10
  123. xinference/web/ui/node_modules/iterator.prototype/package.json +73 -0
  124. xinference/web/ui/node_modules/jsx-ast-utils/package.json +5 -5
  125. xinference/web/ui/node_modules/language-tags/package.json +48 -8
  126. xinference/web/ui/node_modules/lodash.truncate/package.json +17 -0
  127. xinference/web/ui/node_modules/object-inspect/package.json +8 -6
  128. xinference/web/ui/node_modules/object.entries/package.json +7 -7
  129. xinference/web/ui/node_modules/object.fromentries/package.json +7 -7
  130. xinference/web/ui/node_modules/object.groupby/package.json +83 -0
  131. xinference/web/ui/node_modules/object.values/package.json +7 -7
  132. xinference/web/ui/node_modules/prettier/package.json +21 -0
  133. xinference/web/ui/node_modules/progress/package.json +26 -0
  134. xinference/web/ui/node_modules/react-scripts/node_modules/@eslint/eslintrc/package.json +82 -0
  135. xinference/web/ui/node_modules/react-scripts/node_modules/@humanwhocodes/config-array/package.json +61 -0
  136. xinference/web/ui/node_modules/react-scripts/node_modules/@humanwhocodes/object-schema/package.json +33 -0
  137. xinference/web/ui/node_modules/react-scripts/node_modules/ansi-styles/package.json +56 -0
  138. xinference/web/ui/node_modules/react-scripts/node_modules/chalk/package.json +68 -0
  139. xinference/web/ui/node_modules/react-scripts/node_modules/color-convert/package.json +48 -0
  140. xinference/web/ui/node_modules/react-scripts/node_modules/color-name/package.json +28 -0
  141. xinference/web/ui/node_modules/react-scripts/node_modules/escape-string-regexp/package.json +38 -0
  142. xinference/web/ui/node_modules/react-scripts/node_modules/eslint/conf/replacements.json +22 -0
  143. xinference/web/ui/node_modules/react-scripts/node_modules/eslint/conf/rule-type-list.json +28 -0
  144. xinference/web/ui/node_modules/react-scripts/node_modules/eslint/package.json +179 -0
  145. xinference/web/ui/node_modules/react-scripts/node_modules/eslint-scope/package.json +63 -0
  146. xinference/web/ui/node_modules/react-scripts/node_modules/espree/package.json +88 -0
  147. xinference/web/ui/node_modules/react-scripts/node_modules/globals/globals.json +1974 -0
  148. xinference/web/ui/node_modules/react-scripts/node_modules/globals/package.json +55 -0
  149. xinference/web/ui/node_modules/react-scripts/node_modules/has-flag/package.json +46 -0
  150. xinference/web/ui/node_modules/react-scripts/node_modules/supports-color/package.json +53 -0
  151. xinference/web/ui/node_modules/react-scripts/node_modules/type-fest/package.json +58 -0
  152. xinference/web/ui/node_modules/reflect.getprototypeof/package.json +99 -0
  153. xinference/web/ui/node_modules/regexp.prototype.flags/package.json +8 -7
  154. xinference/web/ui/node_modules/regexpp/package.json +91 -0
  155. xinference/web/ui/node_modules/resolve/lib/core.json +4 -1
  156. xinference/web/ui/node_modules/resolve/package.json +9 -8
  157. xinference/web/ui/node_modules/resolve/test/resolver/multirepo/package.json +1 -1
  158. xinference/web/ui/node_modules/safe-array-concat/package.json +5 -5
  159. xinference/web/ui/node_modules/set-function-length/package.json +84 -0
  160. xinference/web/ui/node_modules/set-function-name/package.json +80 -0
  161. xinference/web/ui/node_modules/slice-ansi/node_modules/ansi-styles/package.json +56 -0
  162. xinference/web/ui/node_modules/slice-ansi/node_modules/color-convert/package.json +48 -0
  163. xinference/web/ui/node_modules/slice-ansi/node_modules/color-name/package.json +28 -0
  164. xinference/web/ui/node_modules/slice-ansi/package.json +52 -0
  165. xinference/web/ui/node_modules/string.prototype.trim/package.json +7 -7
  166. xinference/web/ui/node_modules/string.prototype.trimend/package.json +7 -7
  167. xinference/web/ui/node_modules/string.prototype.trimstart/package.json +7 -7
  168. xinference/web/ui/node_modules/table/dist/src/schemas/config.json +95 -0
  169. xinference/web/ui/node_modules/table/dist/src/schemas/shared.json +139 -0
  170. xinference/web/ui/node_modules/table/dist/src/schemas/streamConfig.json +25 -0
  171. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/data.json +13 -0
  172. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/applicator.json +53 -0
  173. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/content.json +17 -0
  174. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/core.json +57 -0
  175. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/format.json +14 -0
  176. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/meta-data.json +37 -0
  177. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/validation.json +90 -0
  178. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/schema.json +39 -0
  179. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/applicator.json +48 -0
  180. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/content.json +17 -0
  181. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/core.json +51 -0
  182. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/format-annotation.json +14 -0
  183. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/meta-data.json +37 -0
  184. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/unevaluated.json +15 -0
  185. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/validation.json +90 -0
  186. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/schema.json +55 -0
  187. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-draft-06.json +137 -0
  188. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-draft-07.json +151 -0
  189. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-secure.json +88 -0
  190. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/data.json +13 -0
  191. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/applicator.json +53 -0
  192. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/content.json +17 -0
  193. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/core.json +57 -0
  194. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/format.json +14 -0
  195. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/meta-data.json +37 -0
  196. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/validation.json +90 -0
  197. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/schema.json +39 -0
  198. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/applicator.json +48 -0
  199. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/content.json +17 -0
  200. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/core.json +51 -0
  201. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/format-annotation.json +14 -0
  202. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/meta-data.json +37 -0
  203. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/unevaluated.json +15 -0
  204. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/validation.json +90 -0
  205. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/schema.json +55 -0
  206. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-draft-06.json +137 -0
  207. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-draft-07.json +151 -0
  208. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-secure.json +88 -0
  209. xinference/web/ui/node_modules/table/node_modules/ajv/package.json +126 -0
  210. xinference/web/ui/node_modules/table/node_modules/json-schema-traverse/package.json +43 -0
  211. xinference/web/ui/node_modules/table/package.json +77 -0
  212. xinference/web/ui/node_modules/typed-array-buffer/package.json +73 -0
  213. xinference/web/ui/node_modules/typed-array-byte-length/package.json +98 -0
  214. xinference/web/ui/node_modules/v8-compile-cache/package.json +34 -0
  215. xinference/web/ui/node_modules/which-builtin-type/package.json +93 -0
  216. xinference/web/ui/node_modules/which-typed-array/package.json +4 -5
  217. xinference/web/ui/package-lock.json +1085 -406
  218. xinference/web/ui/package.json +10 -2
  219. {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/METADATA +53 -36
  220. {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/RECORD +232 -124
  221. {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/WHEEL +1 -1
  222. xinference/web/ui/build/static/js/main.8ae3b6d9.js +0 -3
  223. xinference/web/ui/build/static/js/main.8ae3b6d9.js.map +0 -1
  224. xinference/web/ui/node_modules/.cache/babel-loader/06363becf51869c421a8b3e34b4e3f50aa0aac3d590446044f9412e379f4ebbe.json +0 -1
  225. xinference/web/ui/node_modules/.cache/babel-loader/2849edddeb99a8ecdda577e810eead74b8f8a291cdfbd987839d604666ed79d0.json +0 -1
  226. xinference/web/ui/node_modules/.cache/babel-loader/2c774712d327cdf0b192aaa22785ec380e9427c587350c33289828d99e9c4abc.json +0 -1
  227. xinference/web/ui/node_modules/.cache/babel-loader/34c578e50d3040519ca8dc28bf0f7fec8674c2d6c0fcc3e98401c0a3f9f013cf.json +0 -1
  228. xinference/web/ui/node_modules/.cache/babel-loader/5933910e7c33febbabc0297ef7ba80f5e53ed96aa125b6a44ff2910aec29ced1.json +0 -1
  229. xinference/web/ui/node_modules/.cache/babel-loader/5e18a8354ea03d22a967fd8cb2171aa798edcb3da5d66ab1fd3b9663affd0abe.json +0 -1
  230. xinference/web/ui/node_modules/.cache/babel-loader/717cd7c186ace4812d1e602bdd299d8dc507f072670cc43974d53aac2574df5d.json +0 -1
  231. xinference/web/ui/node_modules/.cache/babel-loader/82dd896a6674286c48c1ab9f9147dd6e542dccd99848d5b3133a38efba8bd7ee.json +0 -1
  232. xinference/web/ui/node_modules/.cache/babel-loader/a178cfde289ffd15fd54b1c80fd9d231ae0f9644db33acb02084e69b32bfee37.json +0 -1
  233. xinference/web/ui/node_modules/.cache/babel-loader/adaec65f73accce3171b51b0fbcbfd8d0cd83f81a2e1b28eb34148644875499a.json +0 -1
  234. xinference/web/ui/node_modules/.cache/babel-loader/ae8f44c77c2e6f79680fe32fb00174183cd867093ebbda967b8985c33cc10fa2.json +0 -1
  235. xinference/web/ui/node_modules/.cache/babel-loader/b10bd04b4d6e28bfcaaaab37b0a4c1986e87a5b7e62e5ce4d56019880ef26990.json +0 -1
  236. xinference/web/ui/node_modules/.cache/babel-loader/cfc5da1cedee985a556e04865affccb72d0f624cbfb73da348bbe8693e8a4983.json +0 -1
  237. xinference/web/ui/node_modules/.cache/babel-loader/eebd0123c4b4396737e56b9181406a9fd76b107dd32971d23b0de99f51dd38d6.json +0 -1
  238. xinference/web/ui/node_modules/@nicolo-ribaudo/eslint-scope-5-internals/node_modules/eslint-scope/package.json +0 -48
  239. xinference/web/ui/node_modules/@typescript-eslint/utils/node_modules/eslint-scope/package.json +0 -48
  240. xinference/web/ui/node_modules/@typescript-eslint/utils/node_modules/estraverse/package.json +0 -40
  241. xinference/web/ui/node_modules/eslint/node_modules/argparse/package.json +0 -31
  242. xinference/web/ui/node_modules/eslint/node_modules/js-yaml/package.json +0 -66
  243. xinference/web/ui/node_modules/eslint-plugin-jsx-a11y/node_modules/semver/package.json +0 -38
  244. xinference/web/ui/node_modules/function-bind/.jscs.json +0 -176
  245. xinference/web/ui/node_modules/resolve/test/resolver/malformed_package_json/package.json +0 -1
  246. xinference/web/ui/node_modules/webpack/node_modules/eslint-scope/package.json +0 -48
  247. xinference/web/ui/node_modules/webpack/node_modules/estraverse/package.json +0 -40
  248. /xinference/web/ui/build/static/js/{main.8ae3b6d9.js.LICENSE.txt → main.8126d441.js.LICENSE.txt} +0 -0
  249. /xinference/web/ui/node_modules/{@nicolo-ribaudo/eslint-scope-5-internals → eslint-scope}/node_modules/estraverse/package.json +0 -0
  250. /xinference/web/ui/node_modules/{@eslint/eslintrc → react-scripts}/node_modules/argparse/package.json +0 -0
  251. /xinference/web/ui/node_modules/{eslint → react-scripts/node_modules/eslint}/lib/cli-engine/formatters/formatters-meta.json +0 -0
  252. /xinference/web/ui/node_modules/{eslint-config-react-app → react-scripts/node_modules/eslint-config-react-app}/package.json +0 -0
  253. /xinference/web/ui/node_modules/{eslint-plugin-flowtype → react-scripts/node_modules/eslint-plugin-flowtype}/dist/configs/recommended.json +0 -0
  254. /xinference/web/ui/node_modules/{eslint-plugin-flowtype → react-scripts/node_modules/eslint-plugin-flowtype}/package.json +0 -0
  255. /xinference/web/ui/node_modules/{@eslint/eslintrc → react-scripts}/node_modules/js-yaml/package.json +0 -0
  256. {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/LICENSE +0 -0
  257. {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/entry_points.txt +0 -0
  258. {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,10 @@ from typing import Dict, Optional
22
22
  import xoscar as xo
23
23
  from xoscar.utils import get_next_port
24
24
 
25
+ from ..constants import (
26
+ XINFERENCE_HEALTH_CHECK_ATTEMPTS,
27
+ XINFERENCE_HEALTH_CHECK_INTERVAL,
28
+ )
25
29
  from ..core.supervisor import SupervisorActor
26
30
  from .utils import health_check
27
31
 
@@ -66,11 +70,20 @@ def run_in_subprocess(
66
70
  return p
67
71
 
68
72
 
69
- def main(host: str, port: int, logging_conf: Optional[Dict] = None):
70
- supervisor_address = f"{host}:{get_next_port()}"
73
+ def main(
74
+ host: str,
75
+ port: int,
76
+ supervisor_port: Optional[int],
77
+ logging_conf: Optional[Dict] = None,
78
+ ):
79
+ supervisor_address = f"{host}:{supervisor_port or get_next_port()}"
71
80
  local_cluster = run_in_subprocess(supervisor_address, logging_conf)
72
81
 
73
- if not health_check(address=supervisor_address, max_attempts=3, sleep_interval=1):
82
+ if not health_check(
83
+ address=supervisor_address,
84
+ max_attempts=XINFERENCE_HEALTH_CHECK_ATTEMPTS,
85
+ sleep_interval=XINFERENCE_HEALTH_CHECK_INTERVAL,
86
+ ):
74
87
  raise RuntimeError("Supervisor is not available after multiple attempts")
75
88
 
76
89
  try:
@@ -110,6 +110,7 @@ async def create_worker_actor_pool(
110
110
  return await xo.create_actor_pool(
111
111
  address=address,
112
112
  n_process=0,
113
+ auto_recover="process",
113
114
  subprocess_start_method=subprocess_start_method,
114
115
  logging_conf={"dict": logging_conf},
115
116
  )
@@ -18,10 +18,10 @@ import os
18
18
  from typing import Any, Optional
19
19
 
20
20
  import xoscar as xo
21
- from xorbits._mars.resource import cuda_count
22
21
  from xoscar import MainActorPoolType
23
22
 
24
23
  from ..core.worker import WorkerActor
24
+ from ..utils import cuda_count
25
25
 
26
26
  logger = logging.getLogger(__name__)
27
27
 
@@ -33,5 +33,15 @@ MODELSCOPE_EMBEDDING_MODELS = dict(
33
33
  codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
34
34
  )
35
35
  )
36
+
37
+ from ...constants import XINFERENCE_MODEL_DIR
38
+
39
+ user_defined_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "embedding")
40
+ if os.path.isdir(user_defined_llm_dir):
41
+ for f in os.listdir(user_defined_llm_dir):
42
+ with codecs.open(os.path.join(user_defined_llm_dir, f), encoding="utf-8") as fd:
43
+ user_defined_llm_family = CustomEmbeddingModelSpec.parse_obj(json.load(fd))
44
+ register_embedding(user_defined_llm_family, persist=False)
45
+
36
46
  del _model_spec_json
37
47
  del _model_spec_modelscope_json
@@ -220,6 +220,9 @@ class EmbeddingModel:
220
220
  ]
221
221
 
222
222
  raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
223
+ from ..utils import patch_trust_remote_code
224
+
225
+ patch_trust_remote_code()
223
226
  self._model = SentenceTransformer(self._model_path, device=self._device)
224
227
 
225
228
  def create_embedding(self, sentences: Union[str, List[str]], **kwargs):
@@ -49,10 +49,6 @@ def register_embedding(model_spec: CustomEmbeddingModelSpec, persist: bool):
49
49
  f" or a digit, and can only contain letters, digits, underscores, or dashes."
50
50
  )
51
51
 
52
- model_uri = model_spec.model_uri
53
- if model_uri and not is_valid_model_uri(model_uri):
54
- raise ValueError(f"Invalid model URI {model_uri}.")
55
-
56
52
  with UD_EMBEDDING_LOCK:
57
53
  for model_name in (
58
54
  list(BUILTIN_EMBEDDING_MODELS.keys())
@@ -67,6 +63,11 @@ def register_embedding(model_spec: CustomEmbeddingModelSpec, persist: bool):
67
63
  UD_EMBEDDINGS.append(model_spec)
68
64
 
69
65
  if persist:
66
+ # We only validate model URL when persist is True.
67
+ model_uri = model_spec.model_uri
68
+ if model_uri and not is_valid_model_uri(model_uri):
69
+ raise ValueError(f"Invalid model URI {model_uri}.")
70
+
70
71
  persist_path = os.path.join(
71
72
  XINFERENCE_MODEL_DIR, "embedding", f"{model_spec.model_name}.json"
72
73
  )
@@ -126,5 +126,21 @@
126
126
  "language": ["en"],
127
127
  "model_id": "BAAI/bge-large-en-v1.5",
128
128
  "model_revision": "5888da4a3a013e65d33dd6f612ecd4625eb87a7d"
129
+ },
130
+ {
131
+ "model_name": "jina-embeddings-v2-small-en",
132
+ "dimensions": 512,
133
+ "max_tokens": 8192,
134
+ "language": ["en"],
135
+ "model_id": "jinaai/jina-embeddings-v2-small-en",
136
+ "model_revision": "b811f03af3d4d7ea72a7c25c802b21fc675a5d99"
137
+ },
138
+ {
139
+ "model_name": "jina-embeddings-v2-base-en",
140
+ "dimensions": 512,
141
+ "max_tokens": 8192,
142
+ "language": ["en"],
143
+ "model_id": "jinaai/jina-embeddings-v2-base-en",
144
+ "model_revision": "7302ac470bed880590f9344bfeee32ff8722d0e5"
129
145
  }
130
146
  ]
@@ -126,5 +126,21 @@
126
126
  "language": ["en"],
127
127
  "model_id": "Xorbits/bge-large-en-v1.5",
128
128
  "model_revision": "v0.0.1"
129
+ },
130
+ {
131
+ "model_name": "jina-embeddings-v2-small-en",
132
+ "dimensions": 512,
133
+ "max_tokens": 8192,
134
+ "language": ["en"],
135
+ "model_id": "Xorbits/jina-embeddings-v2-small-en",
136
+ "model_revision": "v0.0.1"
137
+ },
138
+ {
139
+ "model_name": "jina-embeddings-v2-base-en",
140
+ "dimensions": 512,
141
+ "max_tokens": 8192,
142
+ "language": ["en"],
143
+ "model_id": "Xorbits/jina-embeddings-v2-base-en",
144
+ "model_revision": "v0.0.1"
129
145
  }
130
146
  ]
@@ -19,6 +19,7 @@ import os
19
19
  from .core import LLM
20
20
  from .llm_family import (
21
21
  BUILTIN_LLM_FAMILIES,
22
+ BUILTIN_LLM_PROMPT_STYLE,
22
23
  BUILTIN_MODELSCOPE_LLM_FAMILIES,
23
24
  LLM_CLASSES,
24
25
  GgmlLLMSpecV1,
@@ -89,13 +90,32 @@ def _install():
89
90
  os.path.dirname(os.path.abspath(__file__)), "llm_family.json"
90
91
  )
91
92
  for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
92
- BUILTIN_LLM_FAMILIES.append(LLMFamilyV1.parse_obj(json_obj))
93
+ model_spec = LLMFamilyV1.parse_obj(json_obj)
94
+ BUILTIN_LLM_FAMILIES.append(model_spec)
95
+
96
+ # register prompt style
97
+ if "chat" in model_spec.model_ability and isinstance(
98
+ model_spec.prompt_style, PromptStyleV1
99
+ ):
100
+ # note that the key is the model name,
101
+ # since there are multiple representations of the same prompt style name in json.
102
+ BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = model_spec.prompt_style
93
103
 
94
104
  modelscope_json_path = os.path.join(
95
105
  os.path.dirname(os.path.abspath(__file__)), "llm_family_modelscope.json"
96
106
  )
97
107
  for json_obj in json.load(codecs.open(modelscope_json_path, "r", encoding="utf-8")):
98
- BUILTIN_MODELSCOPE_LLM_FAMILIES.append(LLMFamilyV1.parse_obj(json_obj))
108
+ model_spec = LLMFamilyV1.parse_obj(json_obj)
109
+ BUILTIN_MODELSCOPE_LLM_FAMILIES.append(model_spec)
110
+
111
+ # register prompt style, in case that we have something missed
112
+ # if duplicated with huggingface json, keep it as the huggingface style
113
+ if (
114
+ "chat" in model_spec.model_ability
115
+ and isinstance(model_spec.prompt_style, PromptStyleV1)
116
+ and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
117
+ ):
118
+ BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = model_spec.prompt_style
99
119
 
100
120
  from ...constants import XINFERENCE_MODEL_DIR
101
121
 
@@ -61,13 +61,13 @@ class LLM(abc.ABC):
61
61
 
62
62
  @staticmethod
63
63
  def _has_cuda_device():
64
- from xorbits._mars.resource import cuda_count
64
+ from ...utils import cuda_count
65
65
 
66
66
  return cuda_count() > 0
67
67
 
68
68
  @staticmethod
69
69
  def _get_cuda_count():
70
- from xorbits._mars.resource import cuda_count
70
+ from ...utils import cuda_count
71
71
 
72
72
  cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES", None)
73
73
  if cuda_visible_devices is None:
@@ -11,13 +11,13 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import json
15
15
  import logging
16
16
  import os
17
17
  import time
18
18
  import uuid
19
19
  from pathlib import Path
20
- from typing import TYPE_CHECKING, Iterator, List, Optional, Union
20
+ from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Union
21
21
 
22
22
  from ....types import (
23
23
  ChatCompletion,
@@ -107,7 +107,7 @@ class ChatglmCppChatModel(LLM):
107
107
 
108
108
  @staticmethod
109
109
  def _convert_raw_text_chunks_to_chat(
110
- tokens: Iterator[str], model_name: str
110
+ tokens: Iterator[Any], model_name: str
111
111
  ) -> Iterator[ChatCompletionChunk]:
112
112
  yield {
113
113
  "id": "chat" + f"cmpl-{str(uuid.uuid4())}",
@@ -124,7 +124,7 @@ class ChatglmCppChatModel(LLM):
124
124
  }
125
125
  ],
126
126
  }
127
- for token in enumerate(tokens):
127
+ for token in tokens:
128
128
  yield {
129
129
  "id": "chat" + f"cmpl-{str(uuid.uuid4())}",
130
130
  "model": model_name,
@@ -134,30 +134,30 @@ class ChatglmCppChatModel(LLM):
134
134
  {
135
135
  "index": 0,
136
136
  "delta": {
137
- "content": token[1],
137
+ "content": token
138
+ if isinstance(token, str)
139
+ else token.content,
138
140
  },
139
141
  "finish_reason": None,
140
142
  }
141
143
  ],
142
144
  }
143
145
 
144
- @staticmethod
146
+ @classmethod
145
147
  def _convert_raw_text_completion_to_chat(
146
- text: str, model_name: str
148
+ cls, text: Any, model_name: str
147
149
  ) -> ChatCompletion:
150
+ _id = str(uuid.uuid4())
148
151
  return {
149
- "id": "chat" + f"cmpl-{str(uuid.uuid4())}",
152
+ "id": "chat" + f"cmpl-{_id}",
150
153
  "model": model_name,
151
154
  "object": "chat.completion",
152
155
  "created": int(time.time()),
153
156
  "choices": [
154
157
  {
155
158
  "index": 0,
156
- "message": {
157
- "role": "assistant",
158
- "content": text,
159
- },
160
- "finish_reason": None,
159
+ "message": cls._message_to_json_string(_id, text),
160
+ "finish_reason": cls._finish_reason_from_msg(text),
161
161
  }
162
162
  ],
163
163
  "usage": {
@@ -167,6 +167,66 @@ class ChatglmCppChatModel(LLM):
167
167
  },
168
168
  }
169
169
 
170
+ @staticmethod
171
+ def _finish_reason_from_msg(msg):
172
+ if isinstance(msg, str):
173
+ return None
174
+ else:
175
+ return "tool_calls" if msg.tool_calls else "stop"
176
+
177
+ @staticmethod
178
+ def _eval_arguments(arguments):
179
+ def tool_call(**kwargs):
180
+ return kwargs
181
+
182
+ try:
183
+ return json.dumps(eval(arguments, dict(tool_call=tool_call)))
184
+ except Exception:
185
+ return f"Invalid arguments {arguments}"
186
+
187
+ @classmethod
188
+ def _message_to_json_string(cls, _id, msg) -> ChatCompletionMessage:
189
+ if isinstance(msg, str):
190
+ return {
191
+ "role": "assistant",
192
+ "content": msg,
193
+ }
194
+ else:
195
+ return {
196
+ "role": msg.role,
197
+ "content": msg.content,
198
+ "tool_calls": [
199
+ {
200
+ "id": f"call_{_id}",
201
+ "type": tc.type,
202
+ "function": {
203
+ "name": tc.function.name,
204
+ "arguments": cls._eval_arguments(tc.function.arguments),
205
+ },
206
+ }
207
+ for tc in msg.tool_calls
208
+ ],
209
+ }
210
+
211
+ @staticmethod
212
+ def _handle_tools(generate_config) -> Optional[ChatCompletionMessage]:
213
+ """Convert openai tools to ChatGLM tools."""
214
+ if generate_config is None:
215
+ return None
216
+ tools = generate_config.pop("tools", None)
217
+ if tools is None:
218
+ return None
219
+ chatglm_tools = []
220
+ for elem in tools:
221
+ if elem.get("type") != "function" or "function" not in elem:
222
+ raise ValueError("ChatGLM tools only support function type.")
223
+ chatglm_tools.append(elem["function"])
224
+ return {
225
+ "role": "system",
226
+ "content": f"Answer the following questions as best as you can. You have access to the following tools:\n"
227
+ f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}",
228
+ }
229
+
170
230
  def chat(
171
231
  self,
172
232
  prompt: str,
@@ -174,11 +234,15 @@ class ChatglmCppChatModel(LLM):
174
234
  generate_config: Optional[ChatglmCppGenerateConfig] = None,
175
235
  ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
176
236
  if chat_history is not None:
177
- chat_history_list = [message["content"] for message in chat_history]
237
+ chat_history_list = chat_history
178
238
  else:
179
239
  chat_history_list = []
180
240
 
181
- chat_history_list.append(prompt)
241
+ tool_message = self._handle_tools(generate_config)
242
+ if tool_message is not None:
243
+ chat_history_list.insert(0, tool_message)
244
+
245
+ chat_history_list.append({"role": "user", "content": prompt})
182
246
  logger.debug("Full conversation history:\n%s", str(chat_history_list))
183
247
 
184
248
  generate_config = self._sanitize_generate_config(generate_config)
@@ -68,7 +68,7 @@ class LlamaCppModel(LLM):
68
68
  self._llm = None
69
69
 
70
70
  def _can_apply_metal(self):
71
- return self.quantization in ["q4_0", "q4_1"]
71
+ return self.quantization.lower() in ["q4_0", "q4_1", "q4_k_s", "q4_k_m"]
72
72
 
73
73
  def _can_apply_cublas(self):
74
74
  # TODO: figure out the quantizations supported.
@@ -189,7 +189,7 @@ class LlamaCppModel(LLM):
189
189
  try:
190
190
  self._llm = Llama(
191
191
  model_path=model_path,
192
- verbose=False,
192
+ verbose=True,
193
193
  **self._llamacpp_model_config,
194
194
  )
195
195
  except AssertionError:
@@ -512,7 +512,7 @@
512
512
  "none"
513
513
  ],
514
514
  "model_id": "THUDM/chatglm3-6b",
515
- "model_revision": "fc3235f807ef5527af598c05f04f2ffd17f48bab"
515
+ "model_revision": "e46a14881eae613281abbd266ee918e93a56018f"
516
516
  }
517
517
  ],
518
518
  "prompt_style": {
@@ -1136,6 +1136,17 @@
1136
1136
  "model_id": "Qwen/Qwen-14B-Chat",
1137
1137
  "model_revision": "fab8385c8f7e7980ef61944729fe134ccbbca263"
1138
1138
  },
1139
+ {
1140
+ "model_format": "pytorch",
1141
+ "model_size_in_billions": 72,
1142
+ "quantizations": [
1143
+ "4-bit",
1144
+ "8-bit",
1145
+ "none"
1146
+ ],
1147
+ "model_id": "Qwen/Qwen-72B-Chat",
1148
+ "model_revision": "2cd9f76279337941ec1a4abeec6f8eb3c38d0f55"
1149
+ },
1139
1150
  {
1140
1151
  "model_format": "gptq",
1141
1152
  "model_size_in_billions": 7,
@@ -1153,6 +1164,15 @@
1153
1164
  "Int8"
1154
1165
  ],
1155
1166
  "model_id": "Qwen/Qwen-14B-Chat-{quantization}"
1167
+ },
1168
+ {
1169
+ "model_format": "gptq",
1170
+ "model_size_in_billions": 72,
1171
+ "quantizations": [
1172
+ "Int4",
1173
+ "Int8"
1174
+ ],
1175
+ "model_id": "Qwen/Qwen-72B-Chat-{quantization}"
1156
1176
  }
1157
1177
  ],
1158
1178
  "prompt_style": {
@@ -1164,7 +1184,14 @@
1164
1184
  ],
1165
1185
  "intra_message_sep": "\n",
1166
1186
  "stop_token_ids": [
1167
- 151643
1187
+ 151643,
1188
+ 151644,
1189
+ 151645
1190
+ ],
1191
+ "stop": [
1192
+ "<|endoftext|>",
1193
+ "<|im_start|>",
1194
+ "<|im_end|>"
1168
1195
  ]
1169
1196
  }
1170
1197
  },
@@ -2077,7 +2104,7 @@
2077
2104
  "model_ability": [
2078
2105
  "generate"
2079
2106
  ],
2080
- "model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI. The first public release contains two bilingual (English/Chinese) base models with the parameter sizes of 6B and 34B. Both of them are trained with 4K sequence length and can be extended to 32K during inference time.",
2107
+ "model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
2081
2108
  "model_specs": [
2082
2109
  {
2083
2110
  "model_format": "ggufv2",
@@ -2134,7 +2161,7 @@
2134
2161
  "model_ability": [
2135
2162
  "generate"
2136
2163
  ],
2137
- "model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI. The first public release contains two bilingual (English/Chinese) base models with the parameter sizes of 6B and 34B. Both of them are trained with 4K sequence length and can be extended to 32K during inference time.",
2164
+ "model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
2138
2165
  "model_specs": [
2139
2166
  {
2140
2167
  "model_format": "pytorch",
@@ -2160,6 +2187,74 @@
2160
2187
  }
2161
2188
  ]
2162
2189
  },
2190
+ {
2191
+ "version": 1,
2192
+ "context_length": 204800,
2193
+ "model_name": "Yi-chat",
2194
+ "model_lang": [
2195
+ "en",
2196
+ "zh"
2197
+ ],
2198
+ "model_ability": [
2199
+ "chat"
2200
+ ],
2201
+ "model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
2202
+ "model_specs": [
2203
+ {
2204
+ "model_format": "pytorch",
2205
+ "model_size_in_billions": 34,
2206
+ "quantizations": [
2207
+ "4-bit",
2208
+ "8-bit",
2209
+ "none"
2210
+ ],
2211
+ "model_id": "01-ai/Yi-34B-Chat",
2212
+ "model_revision": "a99ec35331cbfc9da596af7d4538fe2efecff03c"
2213
+ },
2214
+ {
2215
+ "model_format": "ggufv2",
2216
+ "model_size_in_billions": 34,
2217
+ "quantizations": [
2218
+ "Q2_K",
2219
+ "Q3_K_L",
2220
+ "Q3_K_M",
2221
+ "Q3_K_S",
2222
+ "Q4_0",
2223
+ "Q4_K_M",
2224
+ "Q4_K_S",
2225
+ "Q5_0",
2226
+ "Q5_K_M",
2227
+ "Q5_K_S",
2228
+ "Q6_K",
2229
+ "Q8_0"
2230
+ ],
2231
+ "model_id": "TheBloke/Yi-34B-Chat-GGUF",
2232
+ "model_file_name_template": "yi-34b-chat.{quantization}.gguf"
2233
+ }
2234
+ ],
2235
+ "prompt_style": {
2236
+ "style_name": "CHATML",
2237
+ "system_prompt": "",
2238
+ "roles": [
2239
+ "<|im_start|>user",
2240
+ "<|im_start|>assistant"
2241
+ ],
2242
+ "intra_message_sep": "<|im_end|>",
2243
+ "inter_message_sep": "",
2244
+ "stop_token_ids": [
2245
+ 2,
2246
+ 6,
2247
+ 7,
2248
+ 8
2249
+ ],
2250
+ "stop": [
2251
+ "<|endoftext|>",
2252
+ "<|im_start|>",
2253
+ "<|im_end|>",
2254
+ "<|im_sep|>"
2255
+ ]
2256
+ }
2257
+ },
2163
2258
  {
2164
2259
  "version": 1,
2165
2260
  "context_length": 2048,
@@ -17,9 +17,13 @@ import os
17
17
  import platform
18
18
  import shutil
19
19
  from threading import Lock
20
- from typing import List, Optional, Tuple, Type, Union
20
+ from typing import Any, Dict, List, Optional, Tuple, Type, Union
21
21
 
22
- from pydantic import BaseModel, Field
22
+ from pydantic import BaseModel, Field, Protocol, ValidationError
23
+ from pydantic.error_wrappers import ErrorWrapper
24
+ from pydantic.parse import load_str_bytes
25
+ from pydantic.types import StrBytes
26
+ from pydantic.utils import ROOT_KEY
23
27
  from typing_extensions import Annotated, Literal
24
28
 
25
29
  from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
@@ -36,6 +40,7 @@ from . import LLM
36
40
  logger = logging.getLogger(__name__)
37
41
 
38
42
  DEFAULT_CONTEXT_LENGTH = 2048
43
+ BUILTIN_LLM_PROMPT_STYLE: Dict[str, "PromptStyleV1"] = {}
39
44
 
40
45
 
41
46
  class GgmlLLMSpecV1(BaseModel):
@@ -80,12 +85,52 @@ class LLMFamilyV1(BaseModel):
80
85
  prompt_style: Optional["PromptStyleV1"]
81
86
 
82
87
 
88
+ class CustomLLMFamilyV1(LLMFamilyV1):
89
+ prompt_style: Optional[Union["PromptStyleV1", str]] # type: ignore
90
+
91
+ @classmethod
92
+ def parse_raw(
93
+ cls: Any,
94
+ b: StrBytes,
95
+ *,
96
+ content_type: Optional[str] = None,
97
+ encoding: str = "utf8",
98
+ proto: Protocol = None,
99
+ allow_pickle: bool = False,
100
+ ) -> LLMFamilyV1:
101
+ # See source code of BaseModel.parse_raw
102
+ try:
103
+ obj = load_str_bytes(
104
+ b,
105
+ proto=proto,
106
+ content_type=content_type,
107
+ encoding=encoding,
108
+ allow_pickle=allow_pickle,
109
+ json_loads=cls.__config__.json_loads,
110
+ )
111
+ except (ValueError, TypeError, UnicodeDecodeError) as e:
112
+ raise ValidationError([ErrorWrapper(e, loc=ROOT_KEY)], cls)
113
+ llm_spec = cls.parse_obj(obj)
114
+
115
+ # handle prompt style when user choose existing style
116
+ if llm_spec.prompt_style is not None and isinstance(llm_spec.prompt_style, str):
117
+ prompt_style_name = llm_spec.prompt_style
118
+ if prompt_style_name not in BUILTIN_LLM_PROMPT_STYLE:
119
+ raise ValueError(
120
+ f"Xinference does not support the prompt style name: {prompt_style_name}"
121
+ )
122
+ llm_spec.prompt_style = BUILTIN_LLM_PROMPT_STYLE[prompt_style_name]
123
+
124
+ return llm_spec
125
+
126
+
83
127
  LLMSpecV1 = Annotated[
84
128
  Union[GgmlLLMSpecV1, PytorchLLMSpecV1],
85
129
  Field(discriminator="model_format"),
86
130
  ]
87
131
 
88
132
  LLMFamilyV1.update_forward_refs()
133
+ CustomLLMFamilyV1.update_forward_refs()
89
134
 
90
135
 
91
136
  LLM_CLASSES: List[Type[LLM]] = []
@@ -580,7 +625,7 @@ def _is_linux():
580
625
  def _has_cuda_device():
581
626
  # `cuda_count` method already contains the logic for the
582
627
  # number of GPUs specified by `CUDA_VISIBLE_DEVICES`.
583
- from xorbits._mars.resource import cuda_count
628
+ from ...utils import cuda_count
584
629
 
585
630
  return cuda_count() > 0
586
631
 
@@ -677,11 +722,6 @@ def register_llm(llm_family: LLMFamilyV1, persist: bool):
677
722
  f" or a digit, and can only contain letters, digits, underscores, or dashes."
678
723
  )
679
724
 
680
- for spec in llm_family.model_specs:
681
- model_uri = spec.model_uri
682
- if model_uri and not is_valid_model_uri(model_uri):
683
- raise ValueError(f"Invalid model URI {model_uri}.")
684
-
685
725
  with UD_LLM_FAMILIES_LOCK:
686
726
  for family in BUILTIN_LLM_FAMILIES + UD_LLM_FAMILIES:
687
727
  if llm_family.model_name == family.model_name:
@@ -692,6 +732,12 @@ def register_llm(llm_family: LLMFamilyV1, persist: bool):
692
732
  UD_LLM_FAMILIES.append(llm_family)
693
733
 
694
734
  if persist:
735
+ # We only validate model URL when persist is True.
736
+ for spec in llm_family.model_specs:
737
+ model_uri = spec.model_uri
738
+ if model_uri and not is_valid_model_uri(model_uri):
739
+ raise ValueError(f"Invalid model URI {model_uri}.")
740
+
695
741
  persist_path = os.path.join(
696
742
  XINFERENCE_MODEL_DIR, "llm", f"{llm_family.model_name}.json"
697
743
  )