xinference 0.6.5__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +18 -4
- xinference/core/model.py +33 -10
- xinference/core/supervisor.py +10 -0
- xinference/core/worker.py +40 -13
- xinference/deploy/utils.py +4 -1
- xinference/model/embedding/__init__.py +5 -1
- xinference/model/embedding/core.py +10 -7
- xinference/model/llm/ggml/chatglm.py +7 -5
- xinference/model/llm/llm_family.py +41 -15
- xinference/model/llm/pytorch/core.py +1 -0
- xinference/model/llm/pytorch/utils.py +3 -0
- xinference/model/llm/vllm/core.py +8 -2
- xinference/model/rerank/__init__.py +5 -1
- xinference/model/rerank/core.py +7 -6
- xinference/model/utils.py +38 -2
- xinference/types.py +4 -4
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.778615cc.js +3 -0
- xinference/web/ui/build/static/js/main.778615cc.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0cccfbe5d963b8e31eb679f9d9677392839cedd04aa2956ac6b33cf19599d597.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2f651cf60b1bde50c0601c7110f77dd44819fb6e2501ff748a631724d91445d4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/35204b265d70210394b0a8571e5b01c8c0f9a748437b8a923961e3560ae3d550.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/47887a9524ffeecdc2a7839dace146b24f97a5564fc3d431d6179ad2b153cf1f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/483eb7e5f01e34c6a42ad7c64dad550ff945ee21053a52c2e5e7ebe108b85411.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/667753ce39ce1d4bcbf9a5f1a103d653be1d19d42f4e1fbaceb9b507679a52c7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6a60ae66b29c2f3634fd081d369b9e63b4522fe18eb9e43e9979d1ff264b68ad.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6e63957e4e0801705c6cb258379bfda0007ce6c3ddd2e3b62898b68455c3edf4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8d2b0b3c6988d1894694dcbbe708ef91cfe62d62dac317031f09915ced637953.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8d77975a2735d67a618407026e5325608ccd66f1b379a74faf35b4087db536f3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a5e2e9f707eb7039bea096ca117d996b8f9cbc2a5613fd8e0c5b0094444ce23c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b374bf2be1eac3cff5e0a8528a8e816e266ece911f714c123110961798a93a3b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d8a42e9df7157de9f28eecefdf178fd113bf2280d28471b6e32a8a45276042df.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e26750d9556e9741912333349e4da454c53dbfddbfc6002ab49518dcf02af745.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ed010102f476cd1a22b49be031a7f94f2ab3dd7ba8bf58839a771d46e28ff559.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ef42ec014d7bc373b874b2a1ff0dcd785490f125e913698bc049b0bd778e4d66.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/fe3eb4d76c79ca98833f686d642224eeeb94cc83ad14300d281623796d087f0a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/fe653ca0ca4297b415c0be4013574870d0465a657ae0f3d3f5b66ef6a831390c.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +1077 -405
- xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/globals/globals.json +163 -3
- xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/globals/package.json +1 -1
- xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/ignore/package.json +64 -0
- xinference/web/ui/node_modules/@eslint/eslintrc/package.json +18 -37
- xinference/web/ui/node_modules/@eslint/js/package.json +1 -1
- xinference/web/ui/node_modules/@eslint-community/regexpp/package.json +9 -4
- xinference/web/ui/node_modules/@humanwhocodes/config-array/package.json +14 -14
- xinference/web/ui/node_modules/@rushstack/eslint-patch/package.json +6 -4
- xinference/web/ui/node_modules/@types/semver/package.json +15 -15
- xinference/web/ui/node_modules/@ungap/structured-clone/cjs/package.json +1 -0
- xinference/web/ui/node_modules/@ungap/structured-clone/package.json +53 -0
- xinference/web/ui/node_modules/ansi-colors/package.json +129 -0
- xinference/web/ui/node_modules/array-includes/package.json +8 -8
- xinference/web/ui/node_modules/array.prototype.findlastindex/package.json +120 -0
- xinference/web/ui/node_modules/array.prototype.flat/package.json +8 -8
- xinference/web/ui/node_modules/array.prototype.flatmap/package.json +8 -8
- xinference/web/ui/node_modules/arraybuffer.prototype.slice/package.json +103 -0
- xinference/web/ui/node_modules/ast-types-flow/package.json +2 -2
- xinference/web/ui/node_modules/astral-regex/package.json +33 -0
- xinference/web/ui/node_modules/asynciterator.prototype/package.json +72 -0
- xinference/web/ui/node_modules/axe-core/locales/_template.json +0 -12
- xinference/web/ui/node_modules/axe-core/package.json +1 -2
- xinference/web/ui/node_modules/axe-core/sri-history.json +0 -8
- xinference/web/ui/node_modules/call-bind/package.json +33 -23
- xinference/web/ui/node_modules/define-data-property/package.json +113 -0
- xinference/web/ui/node_modules/define-data-property/tsconfig.json +59 -0
- xinference/web/ui/node_modules/define-properties/package.json +5 -4
- xinference/web/ui/node_modules/enquirer/package.json +112 -0
- xinference/web/ui/node_modules/es-abstract/helpers/caseFolding.json +1430 -0
- xinference/web/ui/node_modules/es-abstract/package.json +29 -23
- xinference/web/ui/node_modules/es-iterator-helpers/index.json +17 -0
- xinference/web/ui/node_modules/es-iterator-helpers/package.json +185 -0
- xinference/web/ui/node_modules/eslint/conf/{rule-type-list.json → category-list.json} +9 -6
- xinference/web/ui/node_modules/eslint/node_modules/@babel/code-frame/package.json +25 -0
- xinference/web/ui/node_modules/eslint/node_modules/eslint-visitor-keys/lib/visitor-keys.json +289 -0
- xinference/web/ui/node_modules/eslint/node_modules/eslint-visitor-keys/package.json +39 -0
- xinference/web/ui/node_modules/eslint/node_modules/glob-parent/package.json +48 -0
- xinference/web/ui/node_modules/eslint/node_modules/ignore/package.json +64 -0
- xinference/web/ui/node_modules/eslint/package.json +53 -82
- xinference/web/ui/node_modules/eslint-config-prettier/package.json +13 -0
- xinference/web/ui/node_modules/eslint-import-resolver-node/package.json +3 -3
- xinference/web/ui/node_modules/eslint-plugin-import/package.json +22 -17
- xinference/web/ui/node_modules/eslint-plugin-jsx-a11y/package.json +25 -24
- xinference/web/ui/node_modules/eslint-plugin-simple-import-sort/package.json +23 -0
- xinference/web/ui/node_modules/eslint-plugin-testing-library/package.json +1 -1
- xinference/web/ui/node_modules/eslint-scope/package.json +19 -34
- xinference/web/ui/node_modules/eslint-utils/node_modules/eslint-visitor-keys/lib/visitor-keys.json +284 -0
- xinference/web/ui/node_modules/eslint-utils/node_modules/eslint-visitor-keys/package.json +40 -0
- xinference/web/ui/node_modules/eslint-utils/package.json +65 -0
- xinference/web/ui/node_modules/eslint-visitor-keys/package.json +15 -15
- xinference/web/ui/node_modules/espree/node_modules/acorn/package.json +35 -0
- xinference/web/ui/node_modules/espree/node_modules/eslint-visitor-keys/lib/visitor-keys.json +284 -0
- xinference/web/ui/node_modules/espree/node_modules/eslint-visitor-keys/package.json +40 -0
- xinference/web/ui/node_modules/espree/package.json +27 -51
- xinference/web/ui/node_modules/function-bind/package.json +38 -14
- xinference/web/ui/node_modules/function.prototype.name/package.json +32 -13
- xinference/web/ui/node_modules/functional-red-black-tree/package.json +40 -0
- xinference/web/ui/node_modules/get-intrinsic/package.json +11 -11
- xinference/web/ui/node_modules/hasown/package.json +91 -0
- xinference/web/ui/node_modules/hasown/tsconfig.json +49 -0
- xinference/web/ui/node_modules/is-async-function/package.json +86 -0
- xinference/web/ui/node_modules/is-core-module/core.json +3 -3
- xinference/web/ui/node_modules/is-core-module/package.json +7 -7
- xinference/web/ui/node_modules/is-finalizationregistry/package.json +67 -0
- xinference/web/ui/node_modules/is-generator-function/package.json +87 -0
- xinference/web/ui/node_modules/is-typed-array/package.json +8 -10
- xinference/web/ui/node_modules/iterator.prototype/package.json +73 -0
- xinference/web/ui/node_modules/jsx-ast-utils/package.json +5 -5
- xinference/web/ui/node_modules/language-tags/package.json +48 -8
- xinference/web/ui/node_modules/lodash.truncate/package.json +17 -0
- xinference/web/ui/node_modules/object-inspect/package.json +8 -6
- xinference/web/ui/node_modules/object.entries/package.json +7 -7
- xinference/web/ui/node_modules/object.fromentries/package.json +7 -7
- xinference/web/ui/node_modules/object.groupby/package.json +83 -0
- xinference/web/ui/node_modules/object.values/package.json +7 -7
- xinference/web/ui/node_modules/prettier/package.json +21 -0
- xinference/web/ui/node_modules/progress/package.json +26 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/@eslint/eslintrc/package.json +82 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/@humanwhocodes/config-array/package.json +61 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/@humanwhocodes/object-schema/package.json +33 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/ansi-styles/package.json +56 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/chalk/package.json +68 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/color-convert/package.json +48 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/color-name/package.json +28 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/escape-string-regexp/package.json +38 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/eslint/conf/replacements.json +22 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/eslint/conf/rule-type-list.json +28 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/eslint/package.json +179 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/eslint-scope/package.json +63 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/espree/package.json +88 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/globals/globals.json +1974 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/globals/package.json +55 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/has-flag/package.json +46 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/supports-color/package.json +53 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/type-fest/package.json +58 -0
- xinference/web/ui/node_modules/reflect.getprototypeof/package.json +99 -0
- xinference/web/ui/node_modules/regexp.prototype.flags/package.json +8 -7
- xinference/web/ui/node_modules/regexpp/package.json +91 -0
- xinference/web/ui/node_modules/resolve/lib/core.json +4 -1
- xinference/web/ui/node_modules/resolve/package.json +9 -8
- xinference/web/ui/node_modules/resolve/test/resolver/multirepo/package.json +1 -1
- xinference/web/ui/node_modules/safe-array-concat/package.json +5 -5
- xinference/web/ui/node_modules/set-function-length/package.json +84 -0
- xinference/web/ui/node_modules/set-function-name/package.json +80 -0
- xinference/web/ui/node_modules/slice-ansi/node_modules/ansi-styles/package.json +56 -0
- xinference/web/ui/node_modules/slice-ansi/node_modules/color-convert/package.json +48 -0
- xinference/web/ui/node_modules/slice-ansi/node_modules/color-name/package.json +28 -0
- xinference/web/ui/node_modules/slice-ansi/package.json +52 -0
- xinference/web/ui/node_modules/string.prototype.trim/package.json +7 -7
- xinference/web/ui/node_modules/string.prototype.trimend/package.json +7 -7
- xinference/web/ui/node_modules/string.prototype.trimstart/package.json +7 -7
- xinference/web/ui/node_modules/table/dist/src/schemas/config.json +95 -0
- xinference/web/ui/node_modules/table/dist/src/schemas/shared.json +139 -0
- xinference/web/ui/node_modules/table/dist/src/schemas/streamConfig.json +25 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/data.json +13 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/applicator.json +53 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/content.json +17 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/core.json +57 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/format.json +14 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/meta-data.json +37 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/validation.json +90 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/schema.json +39 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/applicator.json +48 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/content.json +17 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/core.json +51 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/format-annotation.json +14 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/meta-data.json +37 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/unevaluated.json +15 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/validation.json +90 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/schema.json +55 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-draft-06.json +137 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-draft-07.json +151 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-secure.json +88 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/data.json +13 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/applicator.json +53 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/content.json +17 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/core.json +57 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/format.json +14 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/meta-data.json +37 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/validation.json +90 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/schema.json +39 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/applicator.json +48 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/content.json +17 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/core.json +51 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/format-annotation.json +14 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/meta-data.json +37 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/unevaluated.json +15 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/validation.json +90 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/schema.json +55 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-draft-06.json +137 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-draft-07.json +151 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-secure.json +88 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/package.json +126 -0
- xinference/web/ui/node_modules/table/node_modules/json-schema-traverse/package.json +43 -0
- xinference/web/ui/node_modules/table/package.json +77 -0
- xinference/web/ui/node_modules/typed-array-buffer/package.json +73 -0
- xinference/web/ui/node_modules/typed-array-byte-length/package.json +98 -0
- xinference/web/ui/node_modules/v8-compile-cache/package.json +34 -0
- xinference/web/ui/node_modules/which-builtin-type/package.json +93 -0
- xinference/web/ui/node_modules/which-typed-array/package.json +4 -5
- xinference/web/ui/package-lock.json +1085 -406
- xinference/web/ui/package.json +10 -2
- {xinference-0.6.5.dist-info → xinference-0.7.1.dist-info}/METADATA +46 -35
- {xinference-0.6.5.dist-info → xinference-0.7.1.dist-info}/RECORD +217 -110
- xinference/web/ui/build/static/js/main.9b46963e.js +0 -3
- xinference/web/ui/build/static/js/main.9b46963e.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/06363becf51869c421a8b3e34b4e3f50aa0aac3d590446044f9412e379f4ebbe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2849edddeb99a8ecdda577e810eead74b8f8a291cdfbd987839d604666ed79d0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2c774712d327cdf0b192aaa22785ec380e9427c587350c33289828d99e9c4abc.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/34c578e50d3040519ca8dc28bf0f7fec8674c2d6c0fcc3e98401c0a3f9f013cf.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5933910e7c33febbabc0297ef7ba80f5e53ed96aa125b6a44ff2910aec29ced1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5e18a8354ea03d22a967fd8cb2171aa798edcb3da5d66ab1fd3b9663affd0abe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/717cd7c186ace4812d1e602bdd299d8dc507f072670cc43974d53aac2574df5d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a178cfde289ffd15fd54b1c80fd9d231ae0f9644db33acb02084e69b32bfee37.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/aad919619ddb400e30bf9886f3ad7f59d5ec7cdbb98ed8b8442565bb19164237.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/adaec65f73accce3171b51b0fbcbfd8d0cd83f81a2e1b28eb34148644875499a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ae8f44c77c2e6f79680fe32fb00174183cd867093ebbda967b8985c33cc10fa2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b10bd04b4d6e28bfcaaaab37b0a4c1986e87a5b7e62e5ce4d56019880ef26990.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/cfc5da1cedee985a556e04865affccb72d0f624cbfb73da348bbe8693e8a4983.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/eebd0123c4b4396737e56b9181406a9fd76b107dd32971d23b0de99f51dd38d6.json +0 -1
- xinference/web/ui/node_modules/@nicolo-ribaudo/eslint-scope-5-internals/node_modules/eslint-scope/package.json +0 -48
- xinference/web/ui/node_modules/@typescript-eslint/utils/node_modules/eslint-scope/package.json +0 -48
- xinference/web/ui/node_modules/@typescript-eslint/utils/node_modules/estraverse/package.json +0 -40
- xinference/web/ui/node_modules/eslint/node_modules/argparse/package.json +0 -31
- xinference/web/ui/node_modules/eslint/node_modules/js-yaml/package.json +0 -66
- xinference/web/ui/node_modules/eslint-plugin-jsx-a11y/node_modules/semver/package.json +0 -38
- xinference/web/ui/node_modules/function-bind/.jscs.json +0 -176
- xinference/web/ui/node_modules/resolve/test/resolver/malformed_package_json/package.json +0 -1
- xinference/web/ui/node_modules/webpack/node_modules/eslint-scope/package.json +0 -48
- xinference/web/ui/node_modules/webpack/node_modules/estraverse/package.json +0 -40
- /xinference/web/ui/build/static/js/{main.9b46963e.js.LICENSE.txt → main.778615cc.js.LICENSE.txt} +0 -0
- /xinference/web/ui/node_modules/{@nicolo-ribaudo/eslint-scope-5-internals → eslint-scope}/node_modules/estraverse/package.json +0 -0
- /xinference/web/ui/node_modules/{@eslint/eslintrc → react-scripts}/node_modules/argparse/package.json +0 -0
- /xinference/web/ui/node_modules/{eslint → react-scripts/node_modules/eslint}/lib/cli-engine/formatters/formatters-meta.json +0 -0
- /xinference/web/ui/node_modules/{eslint-config-react-app → react-scripts/node_modules/eslint-config-react-app}/package.json +0 -0
- /xinference/web/ui/node_modules/{eslint-plugin-flowtype → react-scripts/node_modules/eslint-plugin-flowtype}/dist/configs/recommended.json +0 -0
- /xinference/web/ui/node_modules/{eslint-plugin-flowtype → react-scripts/node_modules/eslint-plugin-flowtype}/package.json +0 -0
- /xinference/web/ui/node_modules/{@eslint/eslintrc → react-scripts}/node_modules/js-yaml/package.json +0 -0
- {xinference-0.6.5.dist-info → xinference-0.7.1.dist-info}/LICENSE +0 -0
- {xinference-0.6.5.dist-info → xinference-0.7.1.dist-info}/WHEEL +0 -0
- {xinference-0.6.5.dist-info → xinference-0.7.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.6.5.dist-info → xinference-0.7.1.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2023-12-
|
|
11
|
+
"date": "2023-12-12T19:35:36+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.
|
|
14
|
+
"full-revisionid": "91f5f13c3914e1943977c80281ce485e8e3502cf",
|
|
15
|
+
"version": "0.7.1"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -158,6 +158,9 @@ class RESTfulAPI:
|
|
|
158
158
|
self._router.add_api_route(
|
|
159
159
|
"/v1/models/prompts", self._get_builtin_prompts, methods=["GET"]
|
|
160
160
|
)
|
|
161
|
+
self._router.add_api_route(
|
|
162
|
+
"/v1/cluster/devices", self._get_devices_count, methods=["GET"]
|
|
163
|
+
)
|
|
161
164
|
self._router.add_api_route(
|
|
162
165
|
"/v1/models/{model_uid}", self.describe_model, methods=["GET"]
|
|
163
166
|
)
|
|
@@ -255,9 +258,6 @@ class RESTfulAPI:
|
|
|
255
258
|
f"{pprint.pformat(invalid_routes)}"
|
|
256
259
|
)
|
|
257
260
|
|
|
258
|
-
for tp in [CreateChatCompletion, CreateCompletion]:
|
|
259
|
-
logger.debug("Dump request model fields:\n%s", tp.__fields__)
|
|
260
|
-
|
|
261
261
|
class SPAStaticFiles(StaticFiles):
|
|
262
262
|
async def get_response(self, path: str, scope):
|
|
263
263
|
response = await super().get_response(path, scope)
|
|
@@ -310,6 +310,17 @@ class RESTfulAPI:
|
|
|
310
310
|
logger.error(e, exc_info=True)
|
|
311
311
|
raise HTTPException(status_code=500, detail=str(e))
|
|
312
312
|
|
|
313
|
+
async def _get_devices_count(self) -> JSONResponse:
|
|
314
|
+
"""
|
|
315
|
+
For internal usage
|
|
316
|
+
"""
|
|
317
|
+
try:
|
|
318
|
+
data = await (await self._get_supervisor_ref()).get_devices_count()
|
|
319
|
+
return JSONResponse(content=data)
|
|
320
|
+
except Exception as e:
|
|
321
|
+
logger.error(e, exc_info=True)
|
|
322
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
323
|
+
|
|
313
324
|
async def get_status(self) -> JSONResponse:
|
|
314
325
|
try:
|
|
315
326
|
data = await (await self._get_supervisor_ref()).get_status()
|
|
@@ -707,7 +718,10 @@ class RESTfulAPI:
|
|
|
707
718
|
|
|
708
719
|
if (
|
|
709
720
|
not body.messages
|
|
710
|
-
or
|
|
721
|
+
or (
|
|
722
|
+
body.messages[-1].get("role") != "user"
|
|
723
|
+
and body.messages[-1].get("role") != "system"
|
|
724
|
+
)
|
|
711
725
|
or not body.messages[-1].get("content")
|
|
712
726
|
):
|
|
713
727
|
raise HTTPException(
|
xinference/core/model.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
16
|
import inspect
|
|
17
|
+
import os
|
|
17
18
|
import uuid
|
|
18
19
|
from typing import (
|
|
19
20
|
TYPE_CHECKING,
|
|
@@ -44,6 +45,15 @@ from .utils import json_dumps, log_async
|
|
|
44
45
|
|
|
45
46
|
T = TypeVar("T")
|
|
46
47
|
|
|
48
|
+
try:
|
|
49
|
+
from torch.cuda import OutOfMemoryError
|
|
50
|
+
except ImportError:
|
|
51
|
+
|
|
52
|
+
class _OutOfMemoryError(Exception):
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
OutOfMemoryError = _OutOfMemoryError
|
|
56
|
+
|
|
47
57
|
|
|
48
58
|
def request_limit(fn):
|
|
49
59
|
"""
|
|
@@ -192,18 +202,30 @@ class ModelActor(xo.StatelessActor):
|
|
|
192
202
|
return ret
|
|
193
203
|
|
|
194
204
|
async def _call_wrapper(self, _wrapper: Callable):
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
else:
|
|
202
|
-
async with self._lock:
|
|
205
|
+
try:
|
|
206
|
+
assert not (
|
|
207
|
+
inspect.iscoroutinefunction(_wrapper)
|
|
208
|
+
or inspect.isasyncgenfunction(_wrapper)
|
|
209
|
+
)
|
|
210
|
+
if self._lock is None:
|
|
203
211
|
return await asyncio.to_thread(_wrapper)
|
|
212
|
+
else:
|
|
213
|
+
async with self._lock:
|
|
214
|
+
return await asyncio.to_thread(_wrapper)
|
|
215
|
+
except OutOfMemoryError:
|
|
216
|
+
logger.exception(
|
|
217
|
+
"Model actor is out of memory, model id: %s", self.model_uid()
|
|
218
|
+
)
|
|
219
|
+
os._exit(1)
|
|
204
220
|
|
|
205
221
|
async def _call_async_wrapper(self, _wrapper: Callable):
|
|
206
|
-
|
|
222
|
+
try:
|
|
223
|
+
return await asyncio.create_task(_wrapper())
|
|
224
|
+
except OutOfMemoryError:
|
|
225
|
+
logger.exception(
|
|
226
|
+
"Model actor is out of memory, model id: %s", self.model_uid()
|
|
227
|
+
)
|
|
228
|
+
os._exit(1)
|
|
207
229
|
|
|
208
230
|
@log_async(logger=logger)
|
|
209
231
|
@request_limit
|
|
@@ -365,7 +387,8 @@ class ModelActor(xo.StatelessActor):
|
|
|
365
387
|
|
|
366
388
|
async def _async_wrapper():
|
|
367
389
|
try:
|
|
368
|
-
|
|
390
|
+
# anext is only available for Python >= 3.10
|
|
391
|
+
return await gen.__anext__() # noqa: F821
|
|
369
392
|
except StopAsyncIteration:
|
|
370
393
|
return stop
|
|
371
394
|
|
xinference/core/supervisor.py
CHANGED
|
@@ -106,6 +106,16 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
106
106
|
data[k] = v.dict()
|
|
107
107
|
return data
|
|
108
108
|
|
|
109
|
+
async def get_devices_count(self) -> int:
|
|
110
|
+
from ..utils import cuda_count
|
|
111
|
+
|
|
112
|
+
if self.is_local_deployment():
|
|
113
|
+
return cuda_count()
|
|
114
|
+
# distributed deployment, choose a worker and return its cuda_count.
|
|
115
|
+
# Assume that each worker has the same count of cards.
|
|
116
|
+
worker_ref = await self._choose_worker()
|
|
117
|
+
return await worker_ref.get_devices_count()
|
|
118
|
+
|
|
109
119
|
async def _choose_worker(self) -> xo.ActorRefType["WorkerActor"]:
|
|
110
120
|
# TODO: better allocation strategy.
|
|
111
121
|
min_running_model_count = None
|
xinference/core/worker.py
CHANGED
|
@@ -48,6 +48,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
48
48
|
self._supervisor_address = supervisor_address
|
|
49
49
|
self._supervisor_ref = None
|
|
50
50
|
self._main_pool = main_pool
|
|
51
|
+
self._main_pool.recover_sub_pool = self.recover_sub_pool
|
|
51
52
|
|
|
52
53
|
# internal states.
|
|
53
54
|
self._model_uid_to_model: Dict[str, xo.ActorRefType["ModelActor"]] = {}
|
|
@@ -55,9 +56,22 @@ class WorkerActor(xo.StatelessActor):
|
|
|
55
56
|
self._gpu_to_model_uid: Dict[int, str] = {}
|
|
56
57
|
self._gpu_to_embedding_model_uids: Dict[int, Set[str]] = defaultdict(set)
|
|
57
58
|
self._model_uid_to_addr: Dict[str, str] = {}
|
|
59
|
+
self._model_uid_to_launch_args: Dict[str, Dict] = {}
|
|
58
60
|
|
|
59
61
|
self._lock = asyncio.Lock()
|
|
60
62
|
|
|
63
|
+
async def recover_sub_pool(self, address):
|
|
64
|
+
logger.warning("Process %s is down, create model.", address)
|
|
65
|
+
for model_uid, addr in self._model_uid_to_addr.items():
|
|
66
|
+
if addr == address:
|
|
67
|
+
launch_args = self._model_uid_to_launch_args.get(model_uid)
|
|
68
|
+
try:
|
|
69
|
+
await self.terminate_model(model_uid)
|
|
70
|
+
except Exception:
|
|
71
|
+
pass
|
|
72
|
+
await self.launch_builtin_model(**launch_args)
|
|
73
|
+
break
|
|
74
|
+
|
|
61
75
|
@classmethod
|
|
62
76
|
def uid(cls) -> str:
|
|
63
77
|
return "worker"
|
|
@@ -94,6 +108,12 @@ class WorkerActor(xo.StatelessActor):
|
|
|
94
108
|
async def __pre_destroy__(self):
|
|
95
109
|
self._upload_task.cancel()
|
|
96
110
|
|
|
111
|
+
@staticmethod
|
|
112
|
+
def get_devices_count():
|
|
113
|
+
from ..utils import cuda_count
|
|
114
|
+
|
|
115
|
+
return cuda_count()
|
|
116
|
+
|
|
97
117
|
@log_sync(logger=logger)
|
|
98
118
|
def get_model_count(self) -> int:
|
|
99
119
|
return len(self._model_uid_to_model)
|
|
@@ -175,7 +195,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
175
195
|
gpu_cnt = n_gpu if isinstance(n_gpu, int) else 1
|
|
176
196
|
devices = (
|
|
177
197
|
[await self.allocate_devices_for_embedding(model_uid)]
|
|
178
|
-
if model_type
|
|
198
|
+
if model_type in ["embedding", "rerank"]
|
|
179
199
|
else self.allocate_devices(model_uid=model_uid, n_gpu=gpu_cnt)
|
|
180
200
|
)
|
|
181
201
|
env["CUDA_VISIBLE_DEVICES"] = ",".join([str(dev) for dev in devices])
|
|
@@ -278,7 +298,6 @@ class WorkerActor(xo.StatelessActor):
|
|
|
278
298
|
for dev in devices:
|
|
279
299
|
self._gpu_to_model_uid[int(dev)] = model_uid
|
|
280
300
|
self._model_uid_to_addr[model_uid] = subpool_address
|
|
281
|
-
return model_ref
|
|
282
301
|
|
|
283
302
|
@log_async(logger=logger)
|
|
284
303
|
async def launch_builtin_model(
|
|
@@ -292,7 +311,9 @@ class WorkerActor(xo.StatelessActor):
|
|
|
292
311
|
n_gpu: Optional[Union[int, str]] = "auto",
|
|
293
312
|
request_limits: Optional[int] = None,
|
|
294
313
|
**kwargs,
|
|
295
|
-
)
|
|
314
|
+
):
|
|
315
|
+
launch_args = locals()
|
|
316
|
+
launch_args.pop("self")
|
|
296
317
|
if n_gpu is not None:
|
|
297
318
|
if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > cuda_count()):
|
|
298
319
|
raise ValueError(
|
|
@@ -343,7 +364,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
343
364
|
self._model_uid_to_model[model_uid] = model_ref
|
|
344
365
|
self._model_uid_to_model_spec[model_uid] = model_description
|
|
345
366
|
self._model_uid_to_addr[model_uid] = subpool_address
|
|
346
|
-
|
|
367
|
+
self._model_uid_to_launch_args[model_uid] = launch_args
|
|
347
368
|
|
|
348
369
|
@log_async(logger=logger)
|
|
349
370
|
async def terminate_model(self, model_uid: str):
|
|
@@ -351,15 +372,21 @@ class WorkerActor(xo.StatelessActor):
|
|
|
351
372
|
if model_ref is None:
|
|
352
373
|
raise ValueError(f"Model not found in the model list, uid: {model_uid}")
|
|
353
374
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
375
|
+
try:
|
|
376
|
+
await xo.destroy_actor(model_ref)
|
|
377
|
+
except Exception as e:
|
|
378
|
+
logger.debug(
|
|
379
|
+
"Destroy model actor failed, model uid: %s, error: %s", model_uid, e
|
|
380
|
+
)
|
|
381
|
+
try:
|
|
382
|
+
subpool_address = self._model_uid_to_addr[model_uid]
|
|
383
|
+
await self._main_pool.remove_sub_pool(subpool_address)
|
|
384
|
+
finally:
|
|
385
|
+
del self._model_uid_to_model[model_uid]
|
|
386
|
+
del self._model_uid_to_model_spec[model_uid]
|
|
387
|
+
self.release_devices(model_uid)
|
|
388
|
+
del self._model_uid_to_addr[model_uid]
|
|
389
|
+
del self._model_uid_to_launch_args[model_uid]
|
|
363
390
|
|
|
364
391
|
@log_async(logger=logger)
|
|
365
392
|
async def list_models(self) -> Dict[str, Dict[str, Any]]:
|
xinference/deploy/utils.py
CHANGED
|
@@ -60,7 +60,9 @@ def get_config_dict(
|
|
|
60
60
|
"disable_existing_loggers": False,
|
|
61
61
|
"formatters": {
|
|
62
62
|
"formatter": {
|
|
63
|
-
"format":
|
|
63
|
+
"format": (
|
|
64
|
+
"%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s"
|
|
65
|
+
)
|
|
64
66
|
},
|
|
65
67
|
},
|
|
66
68
|
"filters": {
|
|
@@ -110,6 +112,7 @@ async def create_worker_actor_pool(
|
|
|
110
112
|
return await xo.create_actor_pool(
|
|
111
113
|
address=address,
|
|
112
114
|
n_process=0,
|
|
115
|
+
auto_recover="process",
|
|
113
116
|
subprocess_start_method=subprocess_start_method,
|
|
114
117
|
logging_conf={"dict": logging_conf},
|
|
115
118
|
)
|
|
@@ -16,7 +16,7 @@ import codecs
|
|
|
16
16
|
import json
|
|
17
17
|
import os
|
|
18
18
|
|
|
19
|
-
from .core import EmbeddingModelSpec, get_cache_status
|
|
19
|
+
from .core import MODEL_NAME_TO_REVISION, EmbeddingModelSpec, get_cache_status
|
|
20
20
|
from .custom import CustomEmbeddingModelSpec, register_embedding, unregister_embedding
|
|
21
21
|
|
|
22
22
|
_model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
|
|
@@ -27,12 +27,16 @@ BUILTIN_EMBEDDING_MODELS = dict(
|
|
|
27
27
|
(spec["model_name"], EmbeddingModelSpec(**spec))
|
|
28
28
|
for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
|
|
29
29
|
)
|
|
30
|
+
for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items():
|
|
31
|
+
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
30
32
|
MODELSCOPE_EMBEDDING_MODELS = dict(
|
|
31
33
|
(spec["model_name"], EmbeddingModelSpec(**spec))
|
|
32
34
|
for spec in json.load(
|
|
33
35
|
codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
|
|
34
36
|
)
|
|
35
37
|
)
|
|
38
|
+
for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items():
|
|
39
|
+
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
36
40
|
|
|
37
41
|
from ...constants import XINFERENCE_MODEL_DIR
|
|
38
42
|
|
|
@@ -15,7 +15,8 @@
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import shutil
|
|
18
|
-
from
|
|
18
|
+
from collections import defaultdict
|
|
19
|
+
from typing import Dict, List, Optional, Tuple, Union, no_type_check
|
|
19
20
|
|
|
20
21
|
import numpy as np
|
|
21
22
|
from pydantic import BaseModel
|
|
@@ -23,11 +24,14 @@ from pydantic import BaseModel
|
|
|
23
24
|
from ...constants import XINFERENCE_CACHE_DIR
|
|
24
25
|
from ...types import Embedding, EmbeddingData, EmbeddingUsage
|
|
25
26
|
from ..core import ModelDescription
|
|
26
|
-
from ..utils import valid_model_revision
|
|
27
|
+
from ..utils import is_model_cached, valid_model_revision
|
|
27
28
|
|
|
28
29
|
logger = logging.getLogger(__name__)
|
|
29
30
|
|
|
30
31
|
SUPPORTED_SCHEMES = ["s3"]
|
|
32
|
+
# Used for check whether the model is cached.
|
|
33
|
+
# Init when registering all the builtin models.
|
|
34
|
+
MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
|
|
31
35
|
|
|
32
36
|
|
|
33
37
|
class EmbeddingModelSpec(BaseModel):
|
|
@@ -195,11 +199,7 @@ def cache(model_spec: EmbeddingModelSpec):
|
|
|
195
199
|
def get_cache_status(
|
|
196
200
|
model_spec: EmbeddingModelSpec,
|
|
197
201
|
) -> bool:
|
|
198
|
-
|
|
199
|
-
os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
|
|
200
|
-
)
|
|
201
|
-
meta_path = os.path.join(cache_dir, "__valid_download")
|
|
202
|
-
return valid_model_revision(meta_path, model_spec.model_revision)
|
|
202
|
+
return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
|
|
203
203
|
|
|
204
204
|
|
|
205
205
|
class EmbeddingModel:
|
|
@@ -220,6 +220,9 @@ class EmbeddingModel:
|
|
|
220
220
|
]
|
|
221
221
|
|
|
222
222
|
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
223
|
+
from ..utils import patch_trust_remote_code
|
|
224
|
+
|
|
225
|
+
patch_trust_remote_code()
|
|
223
226
|
self._model = SentenceTransformer(self._model_path, device=self._device)
|
|
224
227
|
|
|
225
228
|
def create_embedding(self, sentences: Union[str, List[str]], **kwargs):
|
|
@@ -134,9 +134,9 @@ class ChatglmCppChatModel(LLM):
|
|
|
134
134
|
{
|
|
135
135
|
"index": 0,
|
|
136
136
|
"delta": {
|
|
137
|
-
"content":
|
|
138
|
-
|
|
139
|
-
|
|
137
|
+
"content": (
|
|
138
|
+
token if isinstance(token, str) else token.content
|
|
139
|
+
),
|
|
140
140
|
},
|
|
141
141
|
"finish_reason": None,
|
|
142
142
|
}
|
|
@@ -223,8 +223,10 @@ class ChatglmCppChatModel(LLM):
|
|
|
223
223
|
chatglm_tools.append(elem["function"])
|
|
224
224
|
return {
|
|
225
225
|
"role": "system",
|
|
226
|
-
"content":
|
|
227
|
-
|
|
226
|
+
"content": (
|
|
227
|
+
f"Answer the following questions as best as you can. You have access to the following tools:\n"
|
|
228
|
+
f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}"
|
|
229
|
+
),
|
|
228
230
|
}
|
|
229
231
|
|
|
230
232
|
def chat(
|
|
@@ -588,31 +588,57 @@ def cache_from_huggingface(
|
|
|
588
588
|
return cache_dir
|
|
589
589
|
|
|
590
590
|
|
|
591
|
+
def _check_revision(
|
|
592
|
+
llm_family: LLMFamilyV1,
|
|
593
|
+
llm_spec: "LLMSpecV1",
|
|
594
|
+
builtin: list,
|
|
595
|
+
meta_path: str,
|
|
596
|
+
) -> bool:
|
|
597
|
+
for family in builtin:
|
|
598
|
+
if llm_family.model_name == family.model_name:
|
|
599
|
+
specs = family.model_specs
|
|
600
|
+
for spec in specs:
|
|
601
|
+
if (
|
|
602
|
+
spec.model_format == "pytorch"
|
|
603
|
+
and spec.model_size_in_billions == llm_spec.model_size_in_billions
|
|
604
|
+
):
|
|
605
|
+
return valid_model_revision(meta_path, spec.model_revision)
|
|
606
|
+
return False
|
|
607
|
+
|
|
608
|
+
|
|
591
609
|
def get_cache_status(
|
|
592
610
|
llm_family: LLMFamilyV1,
|
|
593
611
|
llm_spec: "LLMSpecV1",
|
|
594
612
|
) -> Union[bool, List[bool]]:
|
|
613
|
+
"""
|
|
614
|
+
When calling this function from above, `llm_family` is constructed only from BUILTIN_LLM_FAMILIES,
|
|
615
|
+
so we should check both huggingface and modelscope cache files.
|
|
616
|
+
"""
|
|
595
617
|
cache_dir = _get_cache_dir(llm_family, llm_spec, create_if_not_exist=False)
|
|
618
|
+
# check revision for pytorch model
|
|
596
619
|
if llm_spec.model_format == "pytorch":
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
llm_spec
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
620
|
+
hf_meta_path = _get_meta_path(cache_dir, "pytorch", "huggingface", "none")
|
|
621
|
+
ms_meta_path = _get_meta_path(cache_dir, "pytorch", "modelscope", "none")
|
|
622
|
+
revisions = [
|
|
623
|
+
_check_revision(llm_family, llm_spec, BUILTIN_LLM_FAMILIES, hf_meta_path),
|
|
624
|
+
_check_revision(
|
|
625
|
+
llm_family, llm_spec, BUILTIN_MODELSCOPE_LLM_FAMILIES, ms_meta_path
|
|
626
|
+
),
|
|
627
|
+
]
|
|
628
|
+
return any(revisions)
|
|
629
|
+
# just check meta file for ggml and gptq model
|
|
604
630
|
elif llm_spec.model_format in ["ggmlv3", "ggufv2", "gptq"]:
|
|
605
631
|
ret = []
|
|
606
632
|
for q in llm_spec.quantizations:
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
q,
|
|
614
|
-
)
|
|
633
|
+
assert q is not None
|
|
634
|
+
hf_meta_path = _get_meta_path(
|
|
635
|
+
cache_dir, llm_spec.model_format, "huggingface", q
|
|
636
|
+
)
|
|
637
|
+
ms_meta_path = _get_meta_path(
|
|
638
|
+
cache_dir, llm_spec.model_format, "modelscope", q
|
|
615
639
|
)
|
|
640
|
+
results = [os.path.exists(hf_meta_path), os.path.exists(ms_meta_path)]
|
|
641
|
+
ret.append(any(results))
|
|
616
642
|
return ret
|
|
617
643
|
else:
|
|
618
644
|
raise ValueError(f"Unsupported model format: {llm_spec.model_format}")
|
|
@@ -345,6 +345,7 @@ class PytorchModel(LLM):
|
|
|
345
345
|
inputs = input
|
|
346
346
|
|
|
347
347
|
tokenizer = self._tokenizer
|
|
348
|
+
tokenizer.pad_token = tokenizer.eos_token
|
|
348
349
|
is_llama = "llama" in str(type(self._model)) # llama supports batch inference
|
|
349
350
|
is_chatglm = "chatglm" in str(type(self._model))
|
|
350
351
|
if is_llama:
|
|
@@ -259,6 +259,7 @@ def generate_stream(
|
|
|
259
259
|
raise ValueError("Invalid stop field type.")
|
|
260
260
|
|
|
261
261
|
if stream:
|
|
262
|
+
output = output.strip("�")
|
|
262
263
|
tmp_output_length = len(output)
|
|
263
264
|
output = output[last_output_length:]
|
|
264
265
|
last_output_length = tmp_output_length
|
|
@@ -424,6 +425,7 @@ def generate_stream_falcon(
|
|
|
424
425
|
raise ValueError("Invalid stop field type.")
|
|
425
426
|
|
|
426
427
|
if stream:
|
|
428
|
+
output = output.strip("�")
|
|
427
429
|
tmp_output_length = len(output)
|
|
428
430
|
output = output[last_output_length:]
|
|
429
431
|
last_output_length = tmp_output_length
|
|
@@ -552,6 +554,7 @@ def generate_stream_chatglm(
|
|
|
552
554
|
response = process_response(response)
|
|
553
555
|
|
|
554
556
|
if stream:
|
|
557
|
+
response = response.strip("�")
|
|
555
558
|
tmp_response_length = len(response)
|
|
556
559
|
response = response[last_response_length:]
|
|
557
560
|
last_response_length = tmp_response_length
|
|
@@ -79,6 +79,10 @@ VLLM_SUPPORTED_CHAT_MODELS = [
|
|
|
79
79
|
"internlm-chat-20b",
|
|
80
80
|
"qwen-chat",
|
|
81
81
|
"Yi",
|
|
82
|
+
"Yi-chat",
|
|
83
|
+
"code-llama",
|
|
84
|
+
"code-llama-python",
|
|
85
|
+
"code-llama-instruct",
|
|
82
86
|
"mistral-instruct-v0.1",
|
|
83
87
|
"chatglm3",
|
|
84
88
|
]
|
|
@@ -319,7 +323,9 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
319
323
|
if not generate_config:
|
|
320
324
|
generate_config = {}
|
|
321
325
|
if self.model_family.prompt_style:
|
|
322
|
-
if (
|
|
326
|
+
if (
|
|
327
|
+
not generate_config.get("stop")
|
|
328
|
+
) and self.model_family.prompt_style.stop:
|
|
323
329
|
generate_config["stop"] = self.model_family.prompt_style.stop.copy()
|
|
324
330
|
if self.model_family.prompt_style.stop_token_ids:
|
|
325
331
|
generate_config.setdefault(
|
|
@@ -343,7 +349,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
343
349
|
full_prompt = self.get_prompt(prompt, chat_history, prompt_style)
|
|
344
350
|
|
|
345
351
|
sanitized = self._sanitize_chat_config(generate_config)
|
|
346
|
-
stream = sanitized
|
|
352
|
+
stream = sanitized.get("stream", None)
|
|
347
353
|
|
|
348
354
|
if stream:
|
|
349
355
|
agen = await self.async_generate(full_prompt, sanitized)
|
|
@@ -16,7 +16,7 @@ import codecs
|
|
|
16
16
|
import json
|
|
17
17
|
import os
|
|
18
18
|
|
|
19
|
-
from .core import RerankModelSpec, get_cache_status
|
|
19
|
+
from .core import MODEL_NAME_TO_REVISION, RerankModelSpec, get_cache_status
|
|
20
20
|
|
|
21
21
|
_model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
|
|
22
22
|
_model_spec_modelscope_json = os.path.join(
|
|
@@ -26,11 +26,15 @@ BUILTIN_RERANK_MODELS = dict(
|
|
|
26
26
|
(spec["model_name"], RerankModelSpec(**spec))
|
|
27
27
|
for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
|
|
28
28
|
)
|
|
29
|
+
for model_name, model_spec in BUILTIN_RERANK_MODELS.items():
|
|
30
|
+
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
29
31
|
MODELSCOPE_RERANK_MODELS = dict(
|
|
30
32
|
(spec["model_name"], RerankModelSpec(**spec))
|
|
31
33
|
for spec in json.load(
|
|
32
34
|
codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
|
|
33
35
|
)
|
|
34
36
|
)
|
|
37
|
+
for model_name, model_spec in MODELSCOPE_RERANK_MODELS.items():
|
|
38
|
+
MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
|
|
35
39
|
del _model_spec_json
|
|
36
40
|
del _model_spec_modelscope_json
|
xinference/model/rerank/core.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import uuid
|
|
18
|
+
from collections import defaultdict
|
|
18
19
|
from typing import Dict, List, Optional, Tuple
|
|
19
20
|
|
|
20
21
|
import numpy as np
|
|
@@ -23,10 +24,14 @@ from pydantic import BaseModel
|
|
|
23
24
|
from ...constants import XINFERENCE_CACHE_DIR
|
|
24
25
|
from ...types import Document, DocumentObj, Rerank
|
|
25
26
|
from ..core import ModelDescription
|
|
26
|
-
from ..utils import valid_model_revision
|
|
27
|
+
from ..utils import is_model_cached, valid_model_revision
|
|
27
28
|
|
|
28
29
|
logger = logging.getLogger(__name__)
|
|
29
30
|
|
|
31
|
+
# Used for check whether the model is cached.
|
|
32
|
+
# Init when registering all the builtin models.
|
|
33
|
+
MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
|
|
34
|
+
|
|
30
35
|
|
|
31
36
|
class RerankModelSpec(BaseModel):
|
|
32
37
|
model_name: str
|
|
@@ -126,11 +131,7 @@ class RerankModel:
|
|
|
126
131
|
def get_cache_status(
|
|
127
132
|
model_spec: RerankModelSpec,
|
|
128
133
|
) -> bool:
|
|
129
|
-
|
|
130
|
-
os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
|
|
131
|
-
)
|
|
132
|
-
meta_path = os.path.join(cache_dir, "__valid_download")
|
|
133
|
-
return valid_model_revision(meta_path, model_spec.model_revision)
|
|
134
|
+
return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
|
|
134
135
|
|
|
135
136
|
|
|
136
137
|
def cache(model_spec: RerankModelSpec):
|
xinference/model/utils.py
CHANGED
|
@@ -16,11 +16,11 @@ import logging
|
|
|
16
16
|
import os
|
|
17
17
|
from json import JSONDecodeError
|
|
18
18
|
from pathlib import Path
|
|
19
|
-
from typing import Callable, Dict, Optional, Tuple
|
|
19
|
+
from typing import Any, Callable, Dict, Optional, Tuple
|
|
20
20
|
|
|
21
21
|
from fsspec import AbstractFileSystem
|
|
22
22
|
|
|
23
|
-
from ..constants import XINFERENCE_ENV_MODEL_SRC
|
|
23
|
+
from ..constants import XINFERENCE_CACHE_DIR, XINFERENCE_ENV_MODEL_SRC
|
|
24
24
|
|
|
25
25
|
logger = logging.getLogger(__name__)
|
|
26
26
|
MAX_ATTEMPTS = 3
|
|
@@ -132,6 +132,17 @@ def valid_model_revision(
|
|
|
132
132
|
return real_revision == expected_model_revision
|
|
133
133
|
|
|
134
134
|
|
|
135
|
+
def is_model_cached(model_spec: Any, name_to_revisions_mapping: Dict):
|
|
136
|
+
cache_dir = os.path.realpath(
|
|
137
|
+
os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
|
|
138
|
+
)
|
|
139
|
+
meta_path = os.path.join(cache_dir, "__valid_download")
|
|
140
|
+
revisions = name_to_revisions_mapping[model_spec.model_name]
|
|
141
|
+
if model_spec.model_revision not in revisions: # Usually for UT
|
|
142
|
+
revisions.append(model_spec.model_revision)
|
|
143
|
+
return any([valid_model_revision(meta_path, revision) for revision in revisions])
|
|
144
|
+
|
|
145
|
+
|
|
135
146
|
def is_valid_model_name(model_name: str) -> bool:
|
|
136
147
|
import re
|
|
137
148
|
|
|
@@ -211,3 +222,28 @@ def copy_from_src_to_dst(
|
|
|
211
222
|
)
|
|
212
223
|
if attempt + 1 == max_attempt:
|
|
213
224
|
raise
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def patch_trust_remote_code():
|
|
228
|
+
"""sentence-transformers calls transformers without the trust_remote_code=True, some embedding
|
|
229
|
+
models will fail to load, e.g. jina-embeddings-v2-base-en
|
|
230
|
+
|
|
231
|
+
:return:
|
|
232
|
+
"""
|
|
233
|
+
try:
|
|
234
|
+
from transformers.dynamic_module_utils import resolve_trust_remote_code
|
|
235
|
+
except ImportError:
|
|
236
|
+
logger.error("Patch transformers trust_remote_code failed.")
|
|
237
|
+
else:
|
|
238
|
+
|
|
239
|
+
def _patched_resolve_trust_remote_code(*args, **kwargs):
|
|
240
|
+
logger.info("Patched resolve_trust_remote_code: %s %s", args, kwargs)
|
|
241
|
+
return True
|
|
242
|
+
|
|
243
|
+
if (
|
|
244
|
+
resolve_trust_remote_code.__code__
|
|
245
|
+
!= _patched_resolve_trust_remote_code.__code__
|
|
246
|
+
):
|
|
247
|
+
resolve_trust_remote_code.__code__ = (
|
|
248
|
+
_patched_resolve_trust_remote_code.__code__
|
|
249
|
+
)
|
xinference/types.py
CHANGED
|
@@ -289,7 +289,7 @@ def get_pydantic_model_from_method(
|
|
|
289
289
|
model.__fields__.pop(key)
|
|
290
290
|
if exclude_fields is not None:
|
|
291
291
|
for key in exclude_fields:
|
|
292
|
-
model.__fields__.pop(key)
|
|
292
|
+
model.__fields__.pop(key, None)
|
|
293
293
|
if include_fields is not None:
|
|
294
294
|
dummy_model = create_model("DummyModel", **include_fields)
|
|
295
295
|
model.__fields__.update(dummy_model.__fields__)
|
|
@@ -307,10 +307,10 @@ def fix_forward_ref(model):
|
|
|
307
307
|
if isinstance(field.annotation, ForwardRef):
|
|
308
308
|
exclude_fields.append(key)
|
|
309
309
|
include_fields[key] = (Optional[Any], None)
|
|
310
|
-
if exclude_fields
|
|
310
|
+
if exclude_fields:
|
|
311
311
|
for key in exclude_fields:
|
|
312
|
-
model.__fields__.pop(key)
|
|
313
|
-
if include_fields
|
|
312
|
+
model.__fields__.pop(key, None)
|
|
313
|
+
if include_fields:
|
|
314
314
|
dummy_model = create_model("DummyModel", **include_fields)
|
|
315
315
|
model.__fields__.update(dummy_model.__fields__)
|
|
316
316
|
return model
|