xinference 0.6.5__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (243) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +18 -4
  3. xinference/core/model.py +33 -10
  4. xinference/core/supervisor.py +10 -0
  5. xinference/core/worker.py +40 -13
  6. xinference/deploy/utils.py +4 -1
  7. xinference/model/embedding/__init__.py +5 -1
  8. xinference/model/embedding/core.py +10 -7
  9. xinference/model/llm/ggml/chatglm.py +7 -5
  10. xinference/model/llm/llm_family.py +41 -15
  11. xinference/model/llm/pytorch/core.py +1 -0
  12. xinference/model/llm/pytorch/utils.py +3 -0
  13. xinference/model/llm/vllm/core.py +8 -2
  14. xinference/model/rerank/__init__.py +5 -1
  15. xinference/model/rerank/core.py +7 -6
  16. xinference/model/utils.py +38 -2
  17. xinference/types.py +4 -4
  18. xinference/web/ui/build/asset-manifest.json +3 -3
  19. xinference/web/ui/build/index.html +1 -1
  20. xinference/web/ui/build/static/js/main.778615cc.js +3 -0
  21. xinference/web/ui/build/static/js/main.778615cc.js.map +1 -0
  22. xinference/web/ui/node_modules/.cache/babel-loader/0cccfbe5d963b8e31eb679f9d9677392839cedd04aa2956ac6b33cf19599d597.json +1 -0
  23. xinference/web/ui/node_modules/.cache/babel-loader/2f651cf60b1bde50c0601c7110f77dd44819fb6e2501ff748a631724d91445d4.json +1 -0
  24. xinference/web/ui/node_modules/.cache/babel-loader/35204b265d70210394b0a8571e5b01c8c0f9a748437b8a923961e3560ae3d550.json +1 -0
  25. xinference/web/ui/node_modules/.cache/babel-loader/47887a9524ffeecdc2a7839dace146b24f97a5564fc3d431d6179ad2b153cf1f.json +1 -0
  26. xinference/web/ui/node_modules/.cache/babel-loader/483eb7e5f01e34c6a42ad7c64dad550ff945ee21053a52c2e5e7ebe108b85411.json +1 -0
  27. xinference/web/ui/node_modules/.cache/babel-loader/667753ce39ce1d4bcbf9a5f1a103d653be1d19d42f4e1fbaceb9b507679a52c7.json +1 -0
  28. xinference/web/ui/node_modules/.cache/babel-loader/6a60ae66b29c2f3634fd081d369b9e63b4522fe18eb9e43e9979d1ff264b68ad.json +1 -0
  29. xinference/web/ui/node_modules/.cache/babel-loader/6e63957e4e0801705c6cb258379bfda0007ce6c3ddd2e3b62898b68455c3edf4.json +1 -0
  30. xinference/web/ui/node_modules/.cache/babel-loader/8d2b0b3c6988d1894694dcbbe708ef91cfe62d62dac317031f09915ced637953.json +1 -0
  31. xinference/web/ui/node_modules/.cache/babel-loader/8d77975a2735d67a618407026e5325608ccd66f1b379a74faf35b4087db536f3.json +1 -0
  32. xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +1 -0
  33. xinference/web/ui/node_modules/.cache/babel-loader/a5e2e9f707eb7039bea096ca117d996b8f9cbc2a5613fd8e0c5b0094444ce23c.json +1 -0
  34. xinference/web/ui/node_modules/.cache/babel-loader/b374bf2be1eac3cff5e0a8528a8e816e266ece911f714c123110961798a93a3b.json +1 -0
  35. xinference/web/ui/node_modules/.cache/babel-loader/d8a42e9df7157de9f28eecefdf178fd113bf2280d28471b6e32a8a45276042df.json +1 -0
  36. xinference/web/ui/node_modules/.cache/babel-loader/e26750d9556e9741912333349e4da454c53dbfddbfc6002ab49518dcf02af745.json +1 -0
  37. xinference/web/ui/node_modules/.cache/babel-loader/ed010102f476cd1a22b49be031a7f94f2ab3dd7ba8bf58839a771d46e28ff559.json +1 -0
  38. xinference/web/ui/node_modules/.cache/babel-loader/ef42ec014d7bc373b874b2a1ff0dcd785490f125e913698bc049b0bd778e4d66.json +1 -0
  39. xinference/web/ui/node_modules/.cache/babel-loader/fe3eb4d76c79ca98833f686d642224eeeb94cc83ad14300d281623796d087f0a.json +1 -0
  40. xinference/web/ui/node_modules/.cache/babel-loader/fe653ca0ca4297b415c0be4013574870d0465a657ae0f3d3f5b66ef6a831390c.json +1 -0
  41. xinference/web/ui/node_modules/.package-lock.json +1077 -405
  42. xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/globals/globals.json +163 -3
  43. xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/globals/package.json +1 -1
  44. xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/ignore/package.json +64 -0
  45. xinference/web/ui/node_modules/@eslint/eslintrc/package.json +18 -37
  46. xinference/web/ui/node_modules/@eslint/js/package.json +1 -1
  47. xinference/web/ui/node_modules/@eslint-community/regexpp/package.json +9 -4
  48. xinference/web/ui/node_modules/@humanwhocodes/config-array/package.json +14 -14
  49. xinference/web/ui/node_modules/@rushstack/eslint-patch/package.json +6 -4
  50. xinference/web/ui/node_modules/@types/semver/package.json +15 -15
  51. xinference/web/ui/node_modules/@ungap/structured-clone/cjs/package.json +1 -0
  52. xinference/web/ui/node_modules/@ungap/structured-clone/package.json +53 -0
  53. xinference/web/ui/node_modules/ansi-colors/package.json +129 -0
  54. xinference/web/ui/node_modules/array-includes/package.json +8 -8
  55. xinference/web/ui/node_modules/array.prototype.findlastindex/package.json +120 -0
  56. xinference/web/ui/node_modules/array.prototype.flat/package.json +8 -8
  57. xinference/web/ui/node_modules/array.prototype.flatmap/package.json +8 -8
  58. xinference/web/ui/node_modules/arraybuffer.prototype.slice/package.json +103 -0
  59. xinference/web/ui/node_modules/ast-types-flow/package.json +2 -2
  60. xinference/web/ui/node_modules/astral-regex/package.json +33 -0
  61. xinference/web/ui/node_modules/asynciterator.prototype/package.json +72 -0
  62. xinference/web/ui/node_modules/axe-core/locales/_template.json +0 -12
  63. xinference/web/ui/node_modules/axe-core/package.json +1 -2
  64. xinference/web/ui/node_modules/axe-core/sri-history.json +0 -8
  65. xinference/web/ui/node_modules/call-bind/package.json +33 -23
  66. xinference/web/ui/node_modules/define-data-property/package.json +113 -0
  67. xinference/web/ui/node_modules/define-data-property/tsconfig.json +59 -0
  68. xinference/web/ui/node_modules/define-properties/package.json +5 -4
  69. xinference/web/ui/node_modules/enquirer/package.json +112 -0
  70. xinference/web/ui/node_modules/es-abstract/helpers/caseFolding.json +1430 -0
  71. xinference/web/ui/node_modules/es-abstract/package.json +29 -23
  72. xinference/web/ui/node_modules/es-iterator-helpers/index.json +17 -0
  73. xinference/web/ui/node_modules/es-iterator-helpers/package.json +185 -0
  74. xinference/web/ui/node_modules/eslint/conf/{rule-type-list.json → category-list.json} +9 -6
  75. xinference/web/ui/node_modules/eslint/node_modules/@babel/code-frame/package.json +25 -0
  76. xinference/web/ui/node_modules/eslint/node_modules/eslint-visitor-keys/lib/visitor-keys.json +289 -0
  77. xinference/web/ui/node_modules/eslint/node_modules/eslint-visitor-keys/package.json +39 -0
  78. xinference/web/ui/node_modules/eslint/node_modules/glob-parent/package.json +48 -0
  79. xinference/web/ui/node_modules/eslint/node_modules/ignore/package.json +64 -0
  80. xinference/web/ui/node_modules/eslint/package.json +53 -82
  81. xinference/web/ui/node_modules/eslint-config-prettier/package.json +13 -0
  82. xinference/web/ui/node_modules/eslint-import-resolver-node/package.json +3 -3
  83. xinference/web/ui/node_modules/eslint-plugin-import/package.json +22 -17
  84. xinference/web/ui/node_modules/eslint-plugin-jsx-a11y/package.json +25 -24
  85. xinference/web/ui/node_modules/eslint-plugin-simple-import-sort/package.json +23 -0
  86. xinference/web/ui/node_modules/eslint-plugin-testing-library/package.json +1 -1
  87. xinference/web/ui/node_modules/eslint-scope/package.json +19 -34
  88. xinference/web/ui/node_modules/eslint-utils/node_modules/eslint-visitor-keys/lib/visitor-keys.json +284 -0
  89. xinference/web/ui/node_modules/eslint-utils/node_modules/eslint-visitor-keys/package.json +40 -0
  90. xinference/web/ui/node_modules/eslint-utils/package.json +65 -0
  91. xinference/web/ui/node_modules/eslint-visitor-keys/package.json +15 -15
  92. xinference/web/ui/node_modules/espree/node_modules/acorn/package.json +35 -0
  93. xinference/web/ui/node_modules/espree/node_modules/eslint-visitor-keys/lib/visitor-keys.json +284 -0
  94. xinference/web/ui/node_modules/espree/node_modules/eslint-visitor-keys/package.json +40 -0
  95. xinference/web/ui/node_modules/espree/package.json +27 -51
  96. xinference/web/ui/node_modules/function-bind/package.json +38 -14
  97. xinference/web/ui/node_modules/function.prototype.name/package.json +32 -13
  98. xinference/web/ui/node_modules/functional-red-black-tree/package.json +40 -0
  99. xinference/web/ui/node_modules/get-intrinsic/package.json +11 -11
  100. xinference/web/ui/node_modules/hasown/package.json +91 -0
  101. xinference/web/ui/node_modules/hasown/tsconfig.json +49 -0
  102. xinference/web/ui/node_modules/is-async-function/package.json +86 -0
  103. xinference/web/ui/node_modules/is-core-module/core.json +3 -3
  104. xinference/web/ui/node_modules/is-core-module/package.json +7 -7
  105. xinference/web/ui/node_modules/is-finalizationregistry/package.json +67 -0
  106. xinference/web/ui/node_modules/is-generator-function/package.json +87 -0
  107. xinference/web/ui/node_modules/is-typed-array/package.json +8 -10
  108. xinference/web/ui/node_modules/iterator.prototype/package.json +73 -0
  109. xinference/web/ui/node_modules/jsx-ast-utils/package.json +5 -5
  110. xinference/web/ui/node_modules/language-tags/package.json +48 -8
  111. xinference/web/ui/node_modules/lodash.truncate/package.json +17 -0
  112. xinference/web/ui/node_modules/object-inspect/package.json +8 -6
  113. xinference/web/ui/node_modules/object.entries/package.json +7 -7
  114. xinference/web/ui/node_modules/object.fromentries/package.json +7 -7
  115. xinference/web/ui/node_modules/object.groupby/package.json +83 -0
  116. xinference/web/ui/node_modules/object.values/package.json +7 -7
  117. xinference/web/ui/node_modules/prettier/package.json +21 -0
  118. xinference/web/ui/node_modules/progress/package.json +26 -0
  119. xinference/web/ui/node_modules/react-scripts/node_modules/@eslint/eslintrc/package.json +82 -0
  120. xinference/web/ui/node_modules/react-scripts/node_modules/@humanwhocodes/config-array/package.json +61 -0
  121. xinference/web/ui/node_modules/react-scripts/node_modules/@humanwhocodes/object-schema/package.json +33 -0
  122. xinference/web/ui/node_modules/react-scripts/node_modules/ansi-styles/package.json +56 -0
  123. xinference/web/ui/node_modules/react-scripts/node_modules/chalk/package.json +68 -0
  124. xinference/web/ui/node_modules/react-scripts/node_modules/color-convert/package.json +48 -0
  125. xinference/web/ui/node_modules/react-scripts/node_modules/color-name/package.json +28 -0
  126. xinference/web/ui/node_modules/react-scripts/node_modules/escape-string-regexp/package.json +38 -0
  127. xinference/web/ui/node_modules/react-scripts/node_modules/eslint/conf/replacements.json +22 -0
  128. xinference/web/ui/node_modules/react-scripts/node_modules/eslint/conf/rule-type-list.json +28 -0
  129. xinference/web/ui/node_modules/react-scripts/node_modules/eslint/package.json +179 -0
  130. xinference/web/ui/node_modules/react-scripts/node_modules/eslint-scope/package.json +63 -0
  131. xinference/web/ui/node_modules/react-scripts/node_modules/espree/package.json +88 -0
  132. xinference/web/ui/node_modules/react-scripts/node_modules/globals/globals.json +1974 -0
  133. xinference/web/ui/node_modules/react-scripts/node_modules/globals/package.json +55 -0
  134. xinference/web/ui/node_modules/react-scripts/node_modules/has-flag/package.json +46 -0
  135. xinference/web/ui/node_modules/react-scripts/node_modules/supports-color/package.json +53 -0
  136. xinference/web/ui/node_modules/react-scripts/node_modules/type-fest/package.json +58 -0
  137. xinference/web/ui/node_modules/reflect.getprototypeof/package.json +99 -0
  138. xinference/web/ui/node_modules/regexp.prototype.flags/package.json +8 -7
  139. xinference/web/ui/node_modules/regexpp/package.json +91 -0
  140. xinference/web/ui/node_modules/resolve/lib/core.json +4 -1
  141. xinference/web/ui/node_modules/resolve/package.json +9 -8
  142. xinference/web/ui/node_modules/resolve/test/resolver/multirepo/package.json +1 -1
  143. xinference/web/ui/node_modules/safe-array-concat/package.json +5 -5
  144. xinference/web/ui/node_modules/set-function-length/package.json +84 -0
  145. xinference/web/ui/node_modules/set-function-name/package.json +80 -0
  146. xinference/web/ui/node_modules/slice-ansi/node_modules/ansi-styles/package.json +56 -0
  147. xinference/web/ui/node_modules/slice-ansi/node_modules/color-convert/package.json +48 -0
  148. xinference/web/ui/node_modules/slice-ansi/node_modules/color-name/package.json +28 -0
  149. xinference/web/ui/node_modules/slice-ansi/package.json +52 -0
  150. xinference/web/ui/node_modules/string.prototype.trim/package.json +7 -7
  151. xinference/web/ui/node_modules/string.prototype.trimend/package.json +7 -7
  152. xinference/web/ui/node_modules/string.prototype.trimstart/package.json +7 -7
  153. xinference/web/ui/node_modules/table/dist/src/schemas/config.json +95 -0
  154. xinference/web/ui/node_modules/table/dist/src/schemas/shared.json +139 -0
  155. xinference/web/ui/node_modules/table/dist/src/schemas/streamConfig.json +25 -0
  156. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/data.json +13 -0
  157. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/applicator.json +53 -0
  158. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/content.json +17 -0
  159. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/core.json +57 -0
  160. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/format.json +14 -0
  161. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/meta-data.json +37 -0
  162. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/validation.json +90 -0
  163. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/schema.json +39 -0
  164. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/applicator.json +48 -0
  165. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/content.json +17 -0
  166. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/core.json +51 -0
  167. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/format-annotation.json +14 -0
  168. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/meta-data.json +37 -0
  169. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/unevaluated.json +15 -0
  170. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/validation.json +90 -0
  171. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/schema.json +55 -0
  172. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-draft-06.json +137 -0
  173. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-draft-07.json +151 -0
  174. xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-secure.json +88 -0
  175. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/data.json +13 -0
  176. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/applicator.json +53 -0
  177. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/content.json +17 -0
  178. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/core.json +57 -0
  179. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/format.json +14 -0
  180. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/meta-data.json +37 -0
  181. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/validation.json +90 -0
  182. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/schema.json +39 -0
  183. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/applicator.json +48 -0
  184. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/content.json +17 -0
  185. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/core.json +51 -0
  186. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/format-annotation.json +14 -0
  187. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/meta-data.json +37 -0
  188. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/unevaluated.json +15 -0
  189. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/validation.json +90 -0
  190. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/schema.json +55 -0
  191. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-draft-06.json +137 -0
  192. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-draft-07.json +151 -0
  193. xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-secure.json +88 -0
  194. xinference/web/ui/node_modules/table/node_modules/ajv/package.json +126 -0
  195. xinference/web/ui/node_modules/table/node_modules/json-schema-traverse/package.json +43 -0
  196. xinference/web/ui/node_modules/table/package.json +77 -0
  197. xinference/web/ui/node_modules/typed-array-buffer/package.json +73 -0
  198. xinference/web/ui/node_modules/typed-array-byte-length/package.json +98 -0
  199. xinference/web/ui/node_modules/v8-compile-cache/package.json +34 -0
  200. xinference/web/ui/node_modules/which-builtin-type/package.json +93 -0
  201. xinference/web/ui/node_modules/which-typed-array/package.json +4 -5
  202. xinference/web/ui/package-lock.json +1085 -406
  203. xinference/web/ui/package.json +10 -2
  204. {xinference-0.6.5.dist-info → xinference-0.7.1.dist-info}/METADATA +46 -35
  205. {xinference-0.6.5.dist-info → xinference-0.7.1.dist-info}/RECORD +217 -110
  206. xinference/web/ui/build/static/js/main.9b46963e.js +0 -3
  207. xinference/web/ui/build/static/js/main.9b46963e.js.map +0 -1
  208. xinference/web/ui/node_modules/.cache/babel-loader/06363becf51869c421a8b3e34b4e3f50aa0aac3d590446044f9412e379f4ebbe.json +0 -1
  209. xinference/web/ui/node_modules/.cache/babel-loader/2849edddeb99a8ecdda577e810eead74b8f8a291cdfbd987839d604666ed79d0.json +0 -1
  210. xinference/web/ui/node_modules/.cache/babel-loader/2c774712d327cdf0b192aaa22785ec380e9427c587350c33289828d99e9c4abc.json +0 -1
  211. xinference/web/ui/node_modules/.cache/babel-loader/34c578e50d3040519ca8dc28bf0f7fec8674c2d6c0fcc3e98401c0a3f9f013cf.json +0 -1
  212. xinference/web/ui/node_modules/.cache/babel-loader/5933910e7c33febbabc0297ef7ba80f5e53ed96aa125b6a44ff2910aec29ced1.json +0 -1
  213. xinference/web/ui/node_modules/.cache/babel-loader/5e18a8354ea03d22a967fd8cb2171aa798edcb3da5d66ab1fd3b9663affd0abe.json +0 -1
  214. xinference/web/ui/node_modules/.cache/babel-loader/717cd7c186ace4812d1e602bdd299d8dc507f072670cc43974d53aac2574df5d.json +0 -1
  215. xinference/web/ui/node_modules/.cache/babel-loader/a178cfde289ffd15fd54b1c80fd9d231ae0f9644db33acb02084e69b32bfee37.json +0 -1
  216. xinference/web/ui/node_modules/.cache/babel-loader/aad919619ddb400e30bf9886f3ad7f59d5ec7cdbb98ed8b8442565bb19164237.json +0 -1
  217. xinference/web/ui/node_modules/.cache/babel-loader/adaec65f73accce3171b51b0fbcbfd8d0cd83f81a2e1b28eb34148644875499a.json +0 -1
  218. xinference/web/ui/node_modules/.cache/babel-loader/ae8f44c77c2e6f79680fe32fb00174183cd867093ebbda967b8985c33cc10fa2.json +0 -1
  219. xinference/web/ui/node_modules/.cache/babel-loader/b10bd04b4d6e28bfcaaaab37b0a4c1986e87a5b7e62e5ce4d56019880ef26990.json +0 -1
  220. xinference/web/ui/node_modules/.cache/babel-loader/cfc5da1cedee985a556e04865affccb72d0f624cbfb73da348bbe8693e8a4983.json +0 -1
  221. xinference/web/ui/node_modules/.cache/babel-loader/eebd0123c4b4396737e56b9181406a9fd76b107dd32971d23b0de99f51dd38d6.json +0 -1
  222. xinference/web/ui/node_modules/@nicolo-ribaudo/eslint-scope-5-internals/node_modules/eslint-scope/package.json +0 -48
  223. xinference/web/ui/node_modules/@typescript-eslint/utils/node_modules/eslint-scope/package.json +0 -48
  224. xinference/web/ui/node_modules/@typescript-eslint/utils/node_modules/estraverse/package.json +0 -40
  225. xinference/web/ui/node_modules/eslint/node_modules/argparse/package.json +0 -31
  226. xinference/web/ui/node_modules/eslint/node_modules/js-yaml/package.json +0 -66
  227. xinference/web/ui/node_modules/eslint-plugin-jsx-a11y/node_modules/semver/package.json +0 -38
  228. xinference/web/ui/node_modules/function-bind/.jscs.json +0 -176
  229. xinference/web/ui/node_modules/resolve/test/resolver/malformed_package_json/package.json +0 -1
  230. xinference/web/ui/node_modules/webpack/node_modules/eslint-scope/package.json +0 -48
  231. xinference/web/ui/node_modules/webpack/node_modules/estraverse/package.json +0 -40
  232. /xinference/web/ui/build/static/js/{main.9b46963e.js.LICENSE.txt → main.778615cc.js.LICENSE.txt} +0 -0
  233. /xinference/web/ui/node_modules/{@nicolo-ribaudo/eslint-scope-5-internals → eslint-scope}/node_modules/estraverse/package.json +0 -0
  234. /xinference/web/ui/node_modules/{@eslint/eslintrc → react-scripts}/node_modules/argparse/package.json +0 -0
  235. /xinference/web/ui/node_modules/{eslint → react-scripts/node_modules/eslint}/lib/cli-engine/formatters/formatters-meta.json +0 -0
  236. /xinference/web/ui/node_modules/{eslint-config-react-app → react-scripts/node_modules/eslint-config-react-app}/package.json +0 -0
  237. /xinference/web/ui/node_modules/{eslint-plugin-flowtype → react-scripts/node_modules/eslint-plugin-flowtype}/dist/configs/recommended.json +0 -0
  238. /xinference/web/ui/node_modules/{eslint-plugin-flowtype → react-scripts/node_modules/eslint-plugin-flowtype}/package.json +0 -0
  239. /xinference/web/ui/node_modules/{@eslint/eslintrc → react-scripts}/node_modules/js-yaml/package.json +0 -0
  240. {xinference-0.6.5.dist-info → xinference-0.7.1.dist-info}/LICENSE +0 -0
  241. {xinference-0.6.5.dist-info → xinference-0.7.1.dist-info}/WHEEL +0 -0
  242. {xinference-0.6.5.dist-info → xinference-0.7.1.dist-info}/entry_points.txt +0 -0
  243. {xinference-0.6.5.dist-info → xinference-0.7.1.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2023-12-01T18:35:32+0800",
11
+ "date": "2023-12-12T19:35:36+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "909a428f4762a5e397123a42b8d2abff02eada83",
15
- "version": "0.6.5"
14
+ "full-revisionid": "91f5f13c3914e1943977c80281ce485e8e3502cf",
15
+ "version": "0.7.1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -158,6 +158,9 @@ class RESTfulAPI:
158
158
  self._router.add_api_route(
159
159
  "/v1/models/prompts", self._get_builtin_prompts, methods=["GET"]
160
160
  )
161
+ self._router.add_api_route(
162
+ "/v1/cluster/devices", self._get_devices_count, methods=["GET"]
163
+ )
161
164
  self._router.add_api_route(
162
165
  "/v1/models/{model_uid}", self.describe_model, methods=["GET"]
163
166
  )
@@ -255,9 +258,6 @@ class RESTfulAPI:
255
258
  f"{pprint.pformat(invalid_routes)}"
256
259
  )
257
260
 
258
- for tp in [CreateChatCompletion, CreateCompletion]:
259
- logger.debug("Dump request model fields:\n%s", tp.__fields__)
260
-
261
261
  class SPAStaticFiles(StaticFiles):
262
262
  async def get_response(self, path: str, scope):
263
263
  response = await super().get_response(path, scope)
@@ -310,6 +310,17 @@ class RESTfulAPI:
310
310
  logger.error(e, exc_info=True)
311
311
  raise HTTPException(status_code=500, detail=str(e))
312
312
 
313
+ async def _get_devices_count(self) -> JSONResponse:
314
+ """
315
+ For internal usage
316
+ """
317
+ try:
318
+ data = await (await self._get_supervisor_ref()).get_devices_count()
319
+ return JSONResponse(content=data)
320
+ except Exception as e:
321
+ logger.error(e, exc_info=True)
322
+ raise HTTPException(status_code=500, detail=str(e))
323
+
313
324
  async def get_status(self) -> JSONResponse:
314
325
  try:
315
326
  data = await (await self._get_supervisor_ref()).get_status()
@@ -707,7 +718,10 @@ class RESTfulAPI:
707
718
 
708
719
  if (
709
720
  not body.messages
710
- or body.messages[-1].get("role") != "user"
721
+ or (
722
+ body.messages[-1].get("role") != "user"
723
+ and body.messages[-1].get("role") != "system"
724
+ )
711
725
  or not body.messages[-1].get("content")
712
726
  ):
713
727
  raise HTTPException(
xinference/core/model.py CHANGED
@@ -14,6 +14,7 @@
14
14
 
15
15
  import asyncio
16
16
  import inspect
17
+ import os
17
18
  import uuid
18
19
  from typing import (
19
20
  TYPE_CHECKING,
@@ -44,6 +45,15 @@ from .utils import json_dumps, log_async
44
45
 
45
46
  T = TypeVar("T")
46
47
 
48
+ try:
49
+ from torch.cuda import OutOfMemoryError
50
+ except ImportError:
51
+
52
+ class _OutOfMemoryError(Exception):
53
+ pass
54
+
55
+ OutOfMemoryError = _OutOfMemoryError
56
+
47
57
 
48
58
  def request_limit(fn):
49
59
  """
@@ -192,18 +202,30 @@ class ModelActor(xo.StatelessActor):
192
202
  return ret
193
203
 
194
204
  async def _call_wrapper(self, _wrapper: Callable):
195
- assert not (
196
- inspect.iscoroutinefunction(_wrapper)
197
- or inspect.isasyncgenfunction(_wrapper)
198
- )
199
- if self._lock is None:
200
- return await asyncio.to_thread(_wrapper)
201
- else:
202
- async with self._lock:
205
+ try:
206
+ assert not (
207
+ inspect.iscoroutinefunction(_wrapper)
208
+ or inspect.isasyncgenfunction(_wrapper)
209
+ )
210
+ if self._lock is None:
203
211
  return await asyncio.to_thread(_wrapper)
212
+ else:
213
+ async with self._lock:
214
+ return await asyncio.to_thread(_wrapper)
215
+ except OutOfMemoryError:
216
+ logger.exception(
217
+ "Model actor is out of memory, model id: %s", self.model_uid()
218
+ )
219
+ os._exit(1)
204
220
 
205
221
  async def _call_async_wrapper(self, _wrapper: Callable):
206
- return await asyncio.create_task(_wrapper())
222
+ try:
223
+ return await asyncio.create_task(_wrapper())
224
+ except OutOfMemoryError:
225
+ logger.exception(
226
+ "Model actor is out of memory, model id: %s", self.model_uid()
227
+ )
228
+ os._exit(1)
207
229
 
208
230
  @log_async(logger=logger)
209
231
  @request_limit
@@ -365,7 +387,8 @@ class ModelActor(xo.StatelessActor):
365
387
 
366
388
  async def _async_wrapper():
367
389
  try:
368
- return await anext(gen) # noqa: F821
390
+ # anext is only available for Python >= 3.10
391
+ return await gen.__anext__() # noqa: F821
369
392
  except StopAsyncIteration:
370
393
  return stop
371
394
 
@@ -106,6 +106,16 @@ class SupervisorActor(xo.StatelessActor):
106
106
  data[k] = v.dict()
107
107
  return data
108
108
 
109
+ async def get_devices_count(self) -> int:
110
+ from ..utils import cuda_count
111
+
112
+ if self.is_local_deployment():
113
+ return cuda_count()
114
+ # distributed deployment, choose a worker and return its cuda_count.
115
+ # Assume that each worker has the same count of cards.
116
+ worker_ref = await self._choose_worker()
117
+ return await worker_ref.get_devices_count()
118
+
109
119
  async def _choose_worker(self) -> xo.ActorRefType["WorkerActor"]:
110
120
  # TODO: better allocation strategy.
111
121
  min_running_model_count = None
xinference/core/worker.py CHANGED
@@ -48,6 +48,7 @@ class WorkerActor(xo.StatelessActor):
48
48
  self._supervisor_address = supervisor_address
49
49
  self._supervisor_ref = None
50
50
  self._main_pool = main_pool
51
+ self._main_pool.recover_sub_pool = self.recover_sub_pool
51
52
 
52
53
  # internal states.
53
54
  self._model_uid_to_model: Dict[str, xo.ActorRefType["ModelActor"]] = {}
@@ -55,9 +56,22 @@ class WorkerActor(xo.StatelessActor):
55
56
  self._gpu_to_model_uid: Dict[int, str] = {}
56
57
  self._gpu_to_embedding_model_uids: Dict[int, Set[str]] = defaultdict(set)
57
58
  self._model_uid_to_addr: Dict[str, str] = {}
59
+ self._model_uid_to_launch_args: Dict[str, Dict] = {}
58
60
 
59
61
  self._lock = asyncio.Lock()
60
62
 
63
+ async def recover_sub_pool(self, address):
64
+ logger.warning("Process %s is down, create model.", address)
65
+ for model_uid, addr in self._model_uid_to_addr.items():
66
+ if addr == address:
67
+ launch_args = self._model_uid_to_launch_args.get(model_uid)
68
+ try:
69
+ await self.terminate_model(model_uid)
70
+ except Exception:
71
+ pass
72
+ await self.launch_builtin_model(**launch_args)
73
+ break
74
+
61
75
  @classmethod
62
76
  def uid(cls) -> str:
63
77
  return "worker"
@@ -94,6 +108,12 @@ class WorkerActor(xo.StatelessActor):
94
108
  async def __pre_destroy__(self):
95
109
  self._upload_task.cancel()
96
110
 
111
+ @staticmethod
112
+ def get_devices_count():
113
+ from ..utils import cuda_count
114
+
115
+ return cuda_count()
116
+
97
117
  @log_sync(logger=logger)
98
118
  def get_model_count(self) -> int:
99
119
  return len(self._model_uid_to_model)
@@ -175,7 +195,7 @@ class WorkerActor(xo.StatelessActor):
175
195
  gpu_cnt = n_gpu if isinstance(n_gpu, int) else 1
176
196
  devices = (
177
197
  [await self.allocate_devices_for_embedding(model_uid)]
178
- if model_type == "embedding"
198
+ if model_type in ["embedding", "rerank"]
179
199
  else self.allocate_devices(model_uid=model_uid, n_gpu=gpu_cnt)
180
200
  )
181
201
  env["CUDA_VISIBLE_DEVICES"] = ",".join([str(dev) for dev in devices])
@@ -278,7 +298,6 @@ class WorkerActor(xo.StatelessActor):
278
298
  for dev in devices:
279
299
  self._gpu_to_model_uid[int(dev)] = model_uid
280
300
  self._model_uid_to_addr[model_uid] = subpool_address
281
- return model_ref
282
301
 
283
302
  @log_async(logger=logger)
284
303
  async def launch_builtin_model(
@@ -292,7 +311,9 @@ class WorkerActor(xo.StatelessActor):
292
311
  n_gpu: Optional[Union[int, str]] = "auto",
293
312
  request_limits: Optional[int] = None,
294
313
  **kwargs,
295
- ) -> xo.ActorRefType["ModelActor"]:
314
+ ):
315
+ launch_args = locals()
316
+ launch_args.pop("self")
296
317
  if n_gpu is not None:
297
318
  if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > cuda_count()):
298
319
  raise ValueError(
@@ -343,7 +364,7 @@ class WorkerActor(xo.StatelessActor):
343
364
  self._model_uid_to_model[model_uid] = model_ref
344
365
  self._model_uid_to_model_spec[model_uid] = model_description
345
366
  self._model_uid_to_addr[model_uid] = subpool_address
346
- return model_ref
367
+ self._model_uid_to_launch_args[model_uid] = launch_args
347
368
 
348
369
  @log_async(logger=logger)
349
370
  async def terminate_model(self, model_uid: str):
@@ -351,15 +372,21 @@ class WorkerActor(xo.StatelessActor):
351
372
  if model_ref is None:
352
373
  raise ValueError(f"Model not found in the model list, uid: {model_uid}")
353
374
 
354
- await xo.destroy_actor(model_ref)
355
- del self._model_uid_to_model[model_uid]
356
- del self._model_uid_to_model_spec[model_uid]
357
-
358
- self.release_devices(model_uid)
359
-
360
- subpool_address = self._model_uid_to_addr[model_uid]
361
- await self._main_pool.remove_sub_pool(subpool_address)
362
- del self._model_uid_to_addr[model_uid]
375
+ try:
376
+ await xo.destroy_actor(model_ref)
377
+ except Exception as e:
378
+ logger.debug(
379
+ "Destroy model actor failed, model uid: %s, error: %s", model_uid, e
380
+ )
381
+ try:
382
+ subpool_address = self._model_uid_to_addr[model_uid]
383
+ await self._main_pool.remove_sub_pool(subpool_address)
384
+ finally:
385
+ del self._model_uid_to_model[model_uid]
386
+ del self._model_uid_to_model_spec[model_uid]
387
+ self.release_devices(model_uid)
388
+ del self._model_uid_to_addr[model_uid]
389
+ del self._model_uid_to_launch_args[model_uid]
363
390
 
364
391
  @log_async(logger=logger)
365
392
  async def list_models(self) -> Dict[str, Dict[str, Any]]:
@@ -60,7 +60,9 @@ def get_config_dict(
60
60
  "disable_existing_loggers": False,
61
61
  "formatters": {
62
62
  "formatter": {
63
- "format": "%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s"
63
+ "format": (
64
+ "%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s"
65
+ )
64
66
  },
65
67
  },
66
68
  "filters": {
@@ -110,6 +112,7 @@ async def create_worker_actor_pool(
110
112
  return await xo.create_actor_pool(
111
113
  address=address,
112
114
  n_process=0,
115
+ auto_recover="process",
113
116
  subprocess_start_method=subprocess_start_method,
114
117
  logging_conf={"dict": logging_conf},
115
118
  )
@@ -16,7 +16,7 @@ import codecs
16
16
  import json
17
17
  import os
18
18
 
19
- from .core import EmbeddingModelSpec, get_cache_status
19
+ from .core import MODEL_NAME_TO_REVISION, EmbeddingModelSpec, get_cache_status
20
20
  from .custom import CustomEmbeddingModelSpec, register_embedding, unregister_embedding
21
21
 
22
22
  _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
@@ -27,12 +27,16 @@ BUILTIN_EMBEDDING_MODELS = dict(
27
27
  (spec["model_name"], EmbeddingModelSpec(**spec))
28
28
  for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
29
29
  )
30
+ for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items():
31
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
30
32
  MODELSCOPE_EMBEDDING_MODELS = dict(
31
33
  (spec["model_name"], EmbeddingModelSpec(**spec))
32
34
  for spec in json.load(
33
35
  codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
34
36
  )
35
37
  )
38
+ for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items():
39
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
36
40
 
37
41
  from ...constants import XINFERENCE_MODEL_DIR
38
42
 
@@ -15,7 +15,8 @@
15
15
  import logging
16
16
  import os
17
17
  import shutil
18
- from typing import List, Optional, Tuple, Union, no_type_check
18
+ from collections import defaultdict
19
+ from typing import Dict, List, Optional, Tuple, Union, no_type_check
19
20
 
20
21
  import numpy as np
21
22
  from pydantic import BaseModel
@@ -23,11 +24,14 @@ from pydantic import BaseModel
23
24
  from ...constants import XINFERENCE_CACHE_DIR
24
25
  from ...types import Embedding, EmbeddingData, EmbeddingUsage
25
26
  from ..core import ModelDescription
26
- from ..utils import valid_model_revision
27
+ from ..utils import is_model_cached, valid_model_revision
27
28
 
28
29
  logger = logging.getLogger(__name__)
29
30
 
30
31
  SUPPORTED_SCHEMES = ["s3"]
32
+ # Used for check whether the model is cached.
33
+ # Init when registering all the builtin models.
34
+ MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
31
35
 
32
36
 
33
37
  class EmbeddingModelSpec(BaseModel):
@@ -195,11 +199,7 @@ def cache(model_spec: EmbeddingModelSpec):
195
199
  def get_cache_status(
196
200
  model_spec: EmbeddingModelSpec,
197
201
  ) -> bool:
198
- cache_dir = os.path.realpath(
199
- os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
200
- )
201
- meta_path = os.path.join(cache_dir, "__valid_download")
202
- return valid_model_revision(meta_path, model_spec.model_revision)
202
+ return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
203
203
 
204
204
 
205
205
  class EmbeddingModel:
@@ -220,6 +220,9 @@ class EmbeddingModel:
220
220
  ]
221
221
 
222
222
  raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
223
+ from ..utils import patch_trust_remote_code
224
+
225
+ patch_trust_remote_code()
223
226
  self._model = SentenceTransformer(self._model_path, device=self._device)
224
227
 
225
228
  def create_embedding(self, sentences: Union[str, List[str]], **kwargs):
@@ -134,9 +134,9 @@ class ChatglmCppChatModel(LLM):
134
134
  {
135
135
  "index": 0,
136
136
  "delta": {
137
- "content": token
138
- if isinstance(token, str)
139
- else token.content,
137
+ "content": (
138
+ token if isinstance(token, str) else token.content
139
+ ),
140
140
  },
141
141
  "finish_reason": None,
142
142
  }
@@ -223,8 +223,10 @@ class ChatglmCppChatModel(LLM):
223
223
  chatglm_tools.append(elem["function"])
224
224
  return {
225
225
  "role": "system",
226
- "content": f"Answer the following questions as best as you can. You have access to the following tools:\n"
227
- f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}",
226
+ "content": (
227
+ f"Answer the following questions as best as you can. You have access to the following tools:\n"
228
+ f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}"
229
+ ),
228
230
  }
229
231
 
230
232
  def chat(
@@ -588,31 +588,57 @@ def cache_from_huggingface(
588
588
  return cache_dir
589
589
 
590
590
 
591
+ def _check_revision(
592
+ llm_family: LLMFamilyV1,
593
+ llm_spec: "LLMSpecV1",
594
+ builtin: list,
595
+ meta_path: str,
596
+ ) -> bool:
597
+ for family in builtin:
598
+ if llm_family.model_name == family.model_name:
599
+ specs = family.model_specs
600
+ for spec in specs:
601
+ if (
602
+ spec.model_format == "pytorch"
603
+ and spec.model_size_in_billions == llm_spec.model_size_in_billions
604
+ ):
605
+ return valid_model_revision(meta_path, spec.model_revision)
606
+ return False
607
+
608
+
591
609
  def get_cache_status(
592
610
  llm_family: LLMFamilyV1,
593
611
  llm_spec: "LLMSpecV1",
594
612
  ) -> Union[bool, List[bool]]:
613
+ """
614
+ When calling this function from above, `llm_family` is constructed only from BUILTIN_LLM_FAMILIES,
615
+ so we should check both huggingface and modelscope cache files.
616
+ """
595
617
  cache_dir = _get_cache_dir(llm_family, llm_spec, create_if_not_exist=False)
618
+ # check revision for pytorch model
596
619
  if llm_spec.model_format == "pytorch":
597
- return _skip_download(
598
- cache_dir,
599
- llm_spec.model_format,
600
- llm_spec.model_hub,
601
- llm_spec.model_revision,
602
- "none",
603
- )
620
+ hf_meta_path = _get_meta_path(cache_dir, "pytorch", "huggingface", "none")
621
+ ms_meta_path = _get_meta_path(cache_dir, "pytorch", "modelscope", "none")
622
+ revisions = [
623
+ _check_revision(llm_family, llm_spec, BUILTIN_LLM_FAMILIES, hf_meta_path),
624
+ _check_revision(
625
+ llm_family, llm_spec, BUILTIN_MODELSCOPE_LLM_FAMILIES, ms_meta_path
626
+ ),
627
+ ]
628
+ return any(revisions)
629
+ # just check meta file for ggml and gptq model
604
630
  elif llm_spec.model_format in ["ggmlv3", "ggufv2", "gptq"]:
605
631
  ret = []
606
632
  for q in llm_spec.quantizations:
607
- ret.append(
608
- _skip_download(
609
- cache_dir,
610
- llm_spec.model_format,
611
- llm_spec.model_hub,
612
- llm_spec.model_revision,
613
- q,
614
- )
633
+ assert q is not None
634
+ hf_meta_path = _get_meta_path(
635
+ cache_dir, llm_spec.model_format, "huggingface", q
636
+ )
637
+ ms_meta_path = _get_meta_path(
638
+ cache_dir, llm_spec.model_format, "modelscope", q
615
639
  )
640
+ results = [os.path.exists(hf_meta_path), os.path.exists(ms_meta_path)]
641
+ ret.append(any(results))
616
642
  return ret
617
643
  else:
618
644
  raise ValueError(f"Unsupported model format: {llm_spec.model_format}")
@@ -345,6 +345,7 @@ class PytorchModel(LLM):
345
345
  inputs = input
346
346
 
347
347
  tokenizer = self._tokenizer
348
+ tokenizer.pad_token = tokenizer.eos_token
348
349
  is_llama = "llama" in str(type(self._model)) # llama supports batch inference
349
350
  is_chatglm = "chatglm" in str(type(self._model))
350
351
  if is_llama:
@@ -259,6 +259,7 @@ def generate_stream(
259
259
  raise ValueError("Invalid stop field type.")
260
260
 
261
261
  if stream:
262
+ output = output.strip("�")
262
263
  tmp_output_length = len(output)
263
264
  output = output[last_output_length:]
264
265
  last_output_length = tmp_output_length
@@ -424,6 +425,7 @@ def generate_stream_falcon(
424
425
  raise ValueError("Invalid stop field type.")
425
426
 
426
427
  if stream:
428
+ output = output.strip("�")
427
429
  tmp_output_length = len(output)
428
430
  output = output[last_output_length:]
429
431
  last_output_length = tmp_output_length
@@ -552,6 +554,7 @@ def generate_stream_chatglm(
552
554
  response = process_response(response)
553
555
 
554
556
  if stream:
557
+ response = response.strip("�")
555
558
  tmp_response_length = len(response)
556
559
  response = response[last_response_length:]
557
560
  last_response_length = tmp_response_length
@@ -79,6 +79,10 @@ VLLM_SUPPORTED_CHAT_MODELS = [
79
79
  "internlm-chat-20b",
80
80
  "qwen-chat",
81
81
  "Yi",
82
+ "Yi-chat",
83
+ "code-llama",
84
+ "code-llama-python",
85
+ "code-llama-instruct",
82
86
  "mistral-instruct-v0.1",
83
87
  "chatglm3",
84
88
  ]
@@ -319,7 +323,9 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
319
323
  if not generate_config:
320
324
  generate_config = {}
321
325
  if self.model_family.prompt_style:
322
- if (not generate_config["stop"]) and self.model_family.prompt_style.stop:
326
+ if (
327
+ not generate_config.get("stop")
328
+ ) and self.model_family.prompt_style.stop:
323
329
  generate_config["stop"] = self.model_family.prompt_style.stop.copy()
324
330
  if self.model_family.prompt_style.stop_token_ids:
325
331
  generate_config.setdefault(
@@ -343,7 +349,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
343
349
  full_prompt = self.get_prompt(prompt, chat_history, prompt_style)
344
350
 
345
351
  sanitized = self._sanitize_chat_config(generate_config)
346
- stream = sanitized["stream"]
352
+ stream = sanitized.get("stream", None)
347
353
 
348
354
  if stream:
349
355
  agen = await self.async_generate(full_prompt, sanitized)
@@ -16,7 +16,7 @@ import codecs
16
16
  import json
17
17
  import os
18
18
 
19
- from .core import RerankModelSpec, get_cache_status
19
+ from .core import MODEL_NAME_TO_REVISION, RerankModelSpec, get_cache_status
20
20
 
21
21
  _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
22
22
  _model_spec_modelscope_json = os.path.join(
@@ -26,11 +26,15 @@ BUILTIN_RERANK_MODELS = dict(
26
26
  (spec["model_name"], RerankModelSpec(**spec))
27
27
  for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
28
28
  )
29
+ for model_name, model_spec in BUILTIN_RERANK_MODELS.items():
30
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
29
31
  MODELSCOPE_RERANK_MODELS = dict(
30
32
  (spec["model_name"], RerankModelSpec(**spec))
31
33
  for spec in json.load(
32
34
  codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
33
35
  )
34
36
  )
37
+ for model_name, model_spec in MODELSCOPE_RERANK_MODELS.items():
38
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
35
39
  del _model_spec_json
36
40
  del _model_spec_modelscope_json
@@ -15,6 +15,7 @@
15
15
  import logging
16
16
  import os
17
17
  import uuid
18
+ from collections import defaultdict
18
19
  from typing import Dict, List, Optional, Tuple
19
20
 
20
21
  import numpy as np
@@ -23,10 +24,14 @@ from pydantic import BaseModel
23
24
  from ...constants import XINFERENCE_CACHE_DIR
24
25
  from ...types import Document, DocumentObj, Rerank
25
26
  from ..core import ModelDescription
26
- from ..utils import valid_model_revision
27
+ from ..utils import is_model_cached, valid_model_revision
27
28
 
28
29
  logger = logging.getLogger(__name__)
29
30
 
31
+ # Used for check whether the model is cached.
32
+ # Init when registering all the builtin models.
33
+ MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
34
+
30
35
 
31
36
  class RerankModelSpec(BaseModel):
32
37
  model_name: str
@@ -126,11 +131,7 @@ class RerankModel:
126
131
  def get_cache_status(
127
132
  model_spec: RerankModelSpec,
128
133
  ) -> bool:
129
- cache_dir = os.path.realpath(
130
- os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
131
- )
132
- meta_path = os.path.join(cache_dir, "__valid_download")
133
- return valid_model_revision(meta_path, model_spec.model_revision)
134
+ return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
134
135
 
135
136
 
136
137
  def cache(model_spec: RerankModelSpec):
xinference/model/utils.py CHANGED
@@ -16,11 +16,11 @@ import logging
16
16
  import os
17
17
  from json import JSONDecodeError
18
18
  from pathlib import Path
19
- from typing import Callable, Dict, Optional, Tuple
19
+ from typing import Any, Callable, Dict, Optional, Tuple
20
20
 
21
21
  from fsspec import AbstractFileSystem
22
22
 
23
- from ..constants import XINFERENCE_ENV_MODEL_SRC
23
+ from ..constants import XINFERENCE_CACHE_DIR, XINFERENCE_ENV_MODEL_SRC
24
24
 
25
25
  logger = logging.getLogger(__name__)
26
26
  MAX_ATTEMPTS = 3
@@ -132,6 +132,17 @@ def valid_model_revision(
132
132
  return real_revision == expected_model_revision
133
133
 
134
134
 
135
+ def is_model_cached(model_spec: Any, name_to_revisions_mapping: Dict):
136
+ cache_dir = os.path.realpath(
137
+ os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
138
+ )
139
+ meta_path = os.path.join(cache_dir, "__valid_download")
140
+ revisions = name_to_revisions_mapping[model_spec.model_name]
141
+ if model_spec.model_revision not in revisions: # Usually for UT
142
+ revisions.append(model_spec.model_revision)
143
+ return any([valid_model_revision(meta_path, revision) for revision in revisions])
144
+
145
+
135
146
  def is_valid_model_name(model_name: str) -> bool:
136
147
  import re
137
148
 
@@ -211,3 +222,28 @@ def copy_from_src_to_dst(
211
222
  )
212
223
  if attempt + 1 == max_attempt:
213
224
  raise
225
+
226
+
227
+ def patch_trust_remote_code():
228
+ """sentence-transformers calls transformers without the trust_remote_code=True, some embedding
229
+ models will fail to load, e.g. jina-embeddings-v2-base-en
230
+
231
+ :return:
232
+ """
233
+ try:
234
+ from transformers.dynamic_module_utils import resolve_trust_remote_code
235
+ except ImportError:
236
+ logger.error("Patch transformers trust_remote_code failed.")
237
+ else:
238
+
239
+ def _patched_resolve_trust_remote_code(*args, **kwargs):
240
+ logger.info("Patched resolve_trust_remote_code: %s %s", args, kwargs)
241
+ return True
242
+
243
+ if (
244
+ resolve_trust_remote_code.__code__
245
+ != _patched_resolve_trust_remote_code.__code__
246
+ ):
247
+ resolve_trust_remote_code.__code__ = (
248
+ _patched_resolve_trust_remote_code.__code__
249
+ )
xinference/types.py CHANGED
@@ -289,7 +289,7 @@ def get_pydantic_model_from_method(
289
289
  model.__fields__.pop(key)
290
290
  if exclude_fields is not None:
291
291
  for key in exclude_fields:
292
- model.__fields__.pop(key)
292
+ model.__fields__.pop(key, None)
293
293
  if include_fields is not None:
294
294
  dummy_model = create_model("DummyModel", **include_fields)
295
295
  model.__fields__.update(dummy_model.__fields__)
@@ -307,10 +307,10 @@ def fix_forward_ref(model):
307
307
  if isinstance(field.annotation, ForwardRef):
308
308
  exclude_fields.append(key)
309
309
  include_fields[key] = (Optional[Any], None)
310
- if exclude_fields is not None:
310
+ if exclude_fields:
311
311
  for key in exclude_fields:
312
- model.__fields__.pop(key)
313
- if include_fields is not None:
312
+ model.__fields__.pop(key, None)
313
+ if include_fields:
314
314
  dummy_model = create_model("DummyModel", **include_fields)
315
315
  model.__fields__.update(dummy_model.__fields__)
316
316
  return model