xinference 0.6.4__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +49 -62
- xinference/client/restful/restful_client.py +10 -1
- xinference/conftest.py +2 -2
- xinference/constants.py +10 -0
- xinference/core/model.py +33 -10
- xinference/core/resource.py +12 -11
- xinference/core/supervisor.py +22 -2
- xinference/core/worker.py +44 -16
- xinference/deploy/cmdline.py +19 -9
- xinference/deploy/local.py +9 -1
- xinference/deploy/supervisor.py +16 -3
- xinference/deploy/utils.py +1 -0
- xinference/deploy/worker.py +1 -1
- xinference/model/embedding/__init__.py +10 -0
- xinference/model/embedding/core.py +3 -0
- xinference/model/embedding/custom.py +5 -4
- xinference/model/embedding/model_spec.json +16 -0
- xinference/model/embedding/model_spec_modelscope.json +16 -0
- xinference/model/llm/__init__.py +22 -2
- xinference/model/llm/core.py +2 -2
- xinference/model/llm/ggml/chatglm.py +79 -15
- xinference/model/llm/ggml/llamacpp.py +2 -2
- xinference/model/llm/llm_family.json +99 -4
- xinference/model/llm/llm_family.py +54 -8
- xinference/model/llm/llm_family_modelscope.json +81 -2
- xinference/model/llm/pytorch/chatglm.py +95 -2
- xinference/model/llm/utils.py +12 -8
- xinference/model/llm/vllm/core.py +26 -5
- xinference/model/utils.py +25 -0
- xinference/types.py +64 -5
- xinference/utils.py +20 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.8126d441.js +3 -0
- xinference/web/ui/build/static/js/main.8126d441.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/06eb9846159adb398d44df0b0debc256a9fd9e8171a7d68f5c4ee4d655acfa45.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0cccfbe5d963b8e31eb679f9d9677392839cedd04aa2956ac6b33cf19599d597.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2f651cf60b1bde50c0601c7110f77dd44819fb6e2501ff748a631724d91445d4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3bda436576ecb05f81f7b6ec475d1cfaf03e2b3066e3a75902fe6e8c4773b43b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/47887a9524ffeecdc2a7839dace146b24f97a5564fc3d431d6179ad2b153cf1f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/48878f5178bad1a47757e011af41c974a7946efa29485506c4d19f25bf5d522d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/59574eb63cfe9ed2e58d2f5a420e1ae54354e243a602e9bc73deae3147ed4f98.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6a60ae66b29c2f3634fd081d369b9e63b4522fe18eb9e43e9979d1ff264b68ad.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/75a5abcbc92da335fdde530f5689194ec79a4b2345b8cba594f8904d3b88e3c6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/84bfe7afede38da1f8ad569d891276fe4d66cfb87bf5c9ff7a113788ba62bb88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/940ed05006583b955894e2b8f65a4a5ebf34f8149d747f59fae5131f17d65482.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9c5f03db9aa88582a9b69b25c7f1acc78ba7fc61f743c9ed7399abb292d5dbde.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a5e2e9f707eb7039bea096ca117d996b8f9cbc2a5613fd8e0c5b0094444ce23c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c02e70e9b9efcf3bd056606308104308d6a6ac559f2bc0b4454c11fb5874457c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e26750d9556e9741912333349e4da454c53dbfddbfc6002ab49518dcf02af745.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e610aefd7000a3f8542a25cb66c64671cc8da18350de4e5b577102ba4bb78d65.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ef42ec014d7bc373b874b2a1ff0dcd785490f125e913698bc049b0bd778e4d66.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/fe3eb4d76c79ca98833f686d642224eeeb94cc83ad14300d281623796d087f0a.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +1077 -405
- xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/globals/globals.json +163 -3
- xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/globals/package.json +1 -1
- xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/ignore/package.json +64 -0
- xinference/web/ui/node_modules/@eslint/eslintrc/package.json +18 -37
- xinference/web/ui/node_modules/@eslint/js/package.json +1 -1
- xinference/web/ui/node_modules/@eslint-community/regexpp/package.json +9 -4
- xinference/web/ui/node_modules/@humanwhocodes/config-array/package.json +14 -14
- xinference/web/ui/node_modules/@rushstack/eslint-patch/package.json +6 -4
- xinference/web/ui/node_modules/@types/semver/package.json +15 -15
- xinference/web/ui/node_modules/@ungap/structured-clone/cjs/package.json +1 -0
- xinference/web/ui/node_modules/@ungap/structured-clone/package.json +53 -0
- xinference/web/ui/node_modules/ansi-colors/package.json +129 -0
- xinference/web/ui/node_modules/array-includes/package.json +8 -8
- xinference/web/ui/node_modules/array.prototype.findlastindex/package.json +120 -0
- xinference/web/ui/node_modules/array.prototype.flat/package.json +8 -8
- xinference/web/ui/node_modules/array.prototype.flatmap/package.json +8 -8
- xinference/web/ui/node_modules/arraybuffer.prototype.slice/package.json +103 -0
- xinference/web/ui/node_modules/ast-types-flow/package.json +2 -2
- xinference/web/ui/node_modules/astral-regex/package.json +33 -0
- xinference/web/ui/node_modules/asynciterator.prototype/package.json +72 -0
- xinference/web/ui/node_modules/axe-core/locales/_template.json +0 -12
- xinference/web/ui/node_modules/axe-core/package.json +1 -2
- xinference/web/ui/node_modules/axe-core/sri-history.json +0 -8
- xinference/web/ui/node_modules/call-bind/package.json +33 -23
- xinference/web/ui/node_modules/define-data-property/package.json +113 -0
- xinference/web/ui/node_modules/define-data-property/tsconfig.json +59 -0
- xinference/web/ui/node_modules/define-properties/package.json +5 -4
- xinference/web/ui/node_modules/enquirer/package.json +112 -0
- xinference/web/ui/node_modules/es-abstract/helpers/caseFolding.json +1430 -0
- xinference/web/ui/node_modules/es-abstract/package.json +29 -23
- xinference/web/ui/node_modules/es-iterator-helpers/index.json +17 -0
- xinference/web/ui/node_modules/es-iterator-helpers/package.json +185 -0
- xinference/web/ui/node_modules/eslint/conf/{rule-type-list.json → category-list.json} +9 -6
- xinference/web/ui/node_modules/eslint/node_modules/@babel/code-frame/package.json +25 -0
- xinference/web/ui/node_modules/eslint/node_modules/eslint-visitor-keys/lib/visitor-keys.json +289 -0
- xinference/web/ui/node_modules/eslint/node_modules/eslint-visitor-keys/package.json +39 -0
- xinference/web/ui/node_modules/eslint/node_modules/glob-parent/package.json +48 -0
- xinference/web/ui/node_modules/eslint/node_modules/ignore/package.json +64 -0
- xinference/web/ui/node_modules/eslint/package.json +53 -82
- xinference/web/ui/node_modules/eslint-config-prettier/package.json +13 -0
- xinference/web/ui/node_modules/eslint-import-resolver-node/package.json +3 -3
- xinference/web/ui/node_modules/eslint-plugin-import/package.json +22 -17
- xinference/web/ui/node_modules/eslint-plugin-jsx-a11y/package.json +25 -24
- xinference/web/ui/node_modules/eslint-plugin-simple-import-sort/package.json +23 -0
- xinference/web/ui/node_modules/eslint-plugin-testing-library/package.json +1 -1
- xinference/web/ui/node_modules/eslint-scope/package.json +19 -34
- xinference/web/ui/node_modules/eslint-utils/node_modules/eslint-visitor-keys/lib/visitor-keys.json +284 -0
- xinference/web/ui/node_modules/eslint-utils/node_modules/eslint-visitor-keys/package.json +40 -0
- xinference/web/ui/node_modules/eslint-utils/package.json +65 -0
- xinference/web/ui/node_modules/eslint-visitor-keys/package.json +15 -15
- xinference/web/ui/node_modules/espree/node_modules/acorn/package.json +35 -0
- xinference/web/ui/node_modules/espree/node_modules/eslint-visitor-keys/lib/visitor-keys.json +284 -0
- xinference/web/ui/node_modules/espree/node_modules/eslint-visitor-keys/package.json +40 -0
- xinference/web/ui/node_modules/espree/package.json +27 -51
- xinference/web/ui/node_modules/function-bind/package.json +38 -14
- xinference/web/ui/node_modules/function.prototype.name/package.json +32 -13
- xinference/web/ui/node_modules/functional-red-black-tree/package.json +40 -0
- xinference/web/ui/node_modules/get-intrinsic/package.json +11 -11
- xinference/web/ui/node_modules/hasown/package.json +91 -0
- xinference/web/ui/node_modules/hasown/tsconfig.json +49 -0
- xinference/web/ui/node_modules/is-async-function/package.json +86 -0
- xinference/web/ui/node_modules/is-core-module/core.json +3 -3
- xinference/web/ui/node_modules/is-core-module/package.json +7 -7
- xinference/web/ui/node_modules/is-finalizationregistry/package.json +67 -0
- xinference/web/ui/node_modules/is-generator-function/package.json +87 -0
- xinference/web/ui/node_modules/is-typed-array/package.json +8 -10
- xinference/web/ui/node_modules/iterator.prototype/package.json +73 -0
- xinference/web/ui/node_modules/jsx-ast-utils/package.json +5 -5
- xinference/web/ui/node_modules/language-tags/package.json +48 -8
- xinference/web/ui/node_modules/lodash.truncate/package.json +17 -0
- xinference/web/ui/node_modules/object-inspect/package.json +8 -6
- xinference/web/ui/node_modules/object.entries/package.json +7 -7
- xinference/web/ui/node_modules/object.fromentries/package.json +7 -7
- xinference/web/ui/node_modules/object.groupby/package.json +83 -0
- xinference/web/ui/node_modules/object.values/package.json +7 -7
- xinference/web/ui/node_modules/prettier/package.json +21 -0
- xinference/web/ui/node_modules/progress/package.json +26 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/@eslint/eslintrc/package.json +82 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/@humanwhocodes/config-array/package.json +61 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/@humanwhocodes/object-schema/package.json +33 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/ansi-styles/package.json +56 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/chalk/package.json +68 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/color-convert/package.json +48 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/color-name/package.json +28 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/escape-string-regexp/package.json +38 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/eslint/conf/replacements.json +22 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/eslint/conf/rule-type-list.json +28 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/eslint/package.json +179 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/eslint-scope/package.json +63 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/espree/package.json +88 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/globals/globals.json +1974 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/globals/package.json +55 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/has-flag/package.json +46 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/supports-color/package.json +53 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/type-fest/package.json +58 -0
- xinference/web/ui/node_modules/reflect.getprototypeof/package.json +99 -0
- xinference/web/ui/node_modules/regexp.prototype.flags/package.json +8 -7
- xinference/web/ui/node_modules/regexpp/package.json +91 -0
- xinference/web/ui/node_modules/resolve/lib/core.json +4 -1
- xinference/web/ui/node_modules/resolve/package.json +9 -8
- xinference/web/ui/node_modules/resolve/test/resolver/multirepo/package.json +1 -1
- xinference/web/ui/node_modules/safe-array-concat/package.json +5 -5
- xinference/web/ui/node_modules/set-function-length/package.json +84 -0
- xinference/web/ui/node_modules/set-function-name/package.json +80 -0
- xinference/web/ui/node_modules/slice-ansi/node_modules/ansi-styles/package.json +56 -0
- xinference/web/ui/node_modules/slice-ansi/node_modules/color-convert/package.json +48 -0
- xinference/web/ui/node_modules/slice-ansi/node_modules/color-name/package.json +28 -0
- xinference/web/ui/node_modules/slice-ansi/package.json +52 -0
- xinference/web/ui/node_modules/string.prototype.trim/package.json +7 -7
- xinference/web/ui/node_modules/string.prototype.trimend/package.json +7 -7
- xinference/web/ui/node_modules/string.prototype.trimstart/package.json +7 -7
- xinference/web/ui/node_modules/table/dist/src/schemas/config.json +95 -0
- xinference/web/ui/node_modules/table/dist/src/schemas/shared.json +139 -0
- xinference/web/ui/node_modules/table/dist/src/schemas/streamConfig.json +25 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/data.json +13 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/applicator.json +53 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/content.json +17 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/core.json +57 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/format.json +14 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/meta-data.json +37 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/validation.json +90 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/schema.json +39 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/applicator.json +48 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/content.json +17 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/core.json +51 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/format-annotation.json +14 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/meta-data.json +37 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/unevaluated.json +15 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/validation.json +90 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/schema.json +55 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-draft-06.json +137 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-draft-07.json +151 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-secure.json +88 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/data.json +13 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/applicator.json +53 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/content.json +17 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/core.json +57 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/format.json +14 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/meta-data.json +37 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/validation.json +90 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/schema.json +39 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/applicator.json +48 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/content.json +17 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/core.json +51 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/format-annotation.json +14 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/meta-data.json +37 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/unevaluated.json +15 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/validation.json +90 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/schema.json +55 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-draft-06.json +137 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-draft-07.json +151 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-secure.json +88 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/package.json +126 -0
- xinference/web/ui/node_modules/table/node_modules/json-schema-traverse/package.json +43 -0
- xinference/web/ui/node_modules/table/package.json +77 -0
- xinference/web/ui/node_modules/typed-array-buffer/package.json +73 -0
- xinference/web/ui/node_modules/typed-array-byte-length/package.json +98 -0
- xinference/web/ui/node_modules/v8-compile-cache/package.json +34 -0
- xinference/web/ui/node_modules/which-builtin-type/package.json +93 -0
- xinference/web/ui/node_modules/which-typed-array/package.json +4 -5
- xinference/web/ui/package-lock.json +1085 -406
- xinference/web/ui/package.json +10 -2
- {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/METADATA +53 -36
- {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/RECORD +232 -124
- {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/WHEEL +1 -1
- xinference/web/ui/build/static/js/main.8ae3b6d9.js +0 -3
- xinference/web/ui/build/static/js/main.8ae3b6d9.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/06363becf51869c421a8b3e34b4e3f50aa0aac3d590446044f9412e379f4ebbe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2849edddeb99a8ecdda577e810eead74b8f8a291cdfbd987839d604666ed79d0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2c774712d327cdf0b192aaa22785ec380e9427c587350c33289828d99e9c4abc.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/34c578e50d3040519ca8dc28bf0f7fec8674c2d6c0fcc3e98401c0a3f9f013cf.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5933910e7c33febbabc0297ef7ba80f5e53ed96aa125b6a44ff2910aec29ced1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5e18a8354ea03d22a967fd8cb2171aa798edcb3da5d66ab1fd3b9663affd0abe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/717cd7c186ace4812d1e602bdd299d8dc507f072670cc43974d53aac2574df5d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/82dd896a6674286c48c1ab9f9147dd6e542dccd99848d5b3133a38efba8bd7ee.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a178cfde289ffd15fd54b1c80fd9d231ae0f9644db33acb02084e69b32bfee37.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/adaec65f73accce3171b51b0fbcbfd8d0cd83f81a2e1b28eb34148644875499a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ae8f44c77c2e6f79680fe32fb00174183cd867093ebbda967b8985c33cc10fa2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b10bd04b4d6e28bfcaaaab37b0a4c1986e87a5b7e62e5ce4d56019880ef26990.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/cfc5da1cedee985a556e04865affccb72d0f624cbfb73da348bbe8693e8a4983.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/eebd0123c4b4396737e56b9181406a9fd76b107dd32971d23b0de99f51dd38d6.json +0 -1
- xinference/web/ui/node_modules/@nicolo-ribaudo/eslint-scope-5-internals/node_modules/eslint-scope/package.json +0 -48
- xinference/web/ui/node_modules/@typescript-eslint/utils/node_modules/eslint-scope/package.json +0 -48
- xinference/web/ui/node_modules/@typescript-eslint/utils/node_modules/estraverse/package.json +0 -40
- xinference/web/ui/node_modules/eslint/node_modules/argparse/package.json +0 -31
- xinference/web/ui/node_modules/eslint/node_modules/js-yaml/package.json +0 -66
- xinference/web/ui/node_modules/eslint-plugin-jsx-a11y/node_modules/semver/package.json +0 -38
- xinference/web/ui/node_modules/function-bind/.jscs.json +0 -176
- xinference/web/ui/node_modules/resolve/test/resolver/malformed_package_json/package.json +0 -1
- xinference/web/ui/node_modules/webpack/node_modules/eslint-scope/package.json +0 -48
- xinference/web/ui/node_modules/webpack/node_modules/estraverse/package.json +0 -40
- /xinference/web/ui/build/static/js/{main.8ae3b6d9.js.LICENSE.txt → main.8126d441.js.LICENSE.txt} +0 -0
- /xinference/web/ui/node_modules/{@nicolo-ribaudo/eslint-scope-5-internals → eslint-scope}/node_modules/estraverse/package.json +0 -0
- /xinference/web/ui/node_modules/{@eslint/eslintrc → react-scripts}/node_modules/argparse/package.json +0 -0
- /xinference/web/ui/node_modules/{eslint → react-scripts/node_modules/eslint}/lib/cli-engine/formatters/formatters-meta.json +0 -0
- /xinference/web/ui/node_modules/{eslint-config-react-app → react-scripts/node_modules/eslint-config-react-app}/package.json +0 -0
- /xinference/web/ui/node_modules/{eslint-plugin-flowtype → react-scripts/node_modules/eslint-plugin-flowtype}/dist/configs/recommended.json +0 -0
- /xinference/web/ui/node_modules/{eslint-plugin-flowtype → react-scripts/node_modules/eslint-plugin-flowtype}/package.json +0 -0
- /xinference/web/ui/node_modules/{@eslint/eslintrc → react-scripts}/node_modules/js-yaml/package.json +0 -0
- {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/LICENSE +0 -0
- {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/top_level.txt +0 -0
xinference/deploy/supervisor.py
CHANGED
|
@@ -22,6 +22,10 @@ from typing import Dict, Optional
|
|
|
22
22
|
import xoscar as xo
|
|
23
23
|
from xoscar.utils import get_next_port
|
|
24
24
|
|
|
25
|
+
from ..constants import (
|
|
26
|
+
XINFERENCE_HEALTH_CHECK_ATTEMPTS,
|
|
27
|
+
XINFERENCE_HEALTH_CHECK_INTERVAL,
|
|
28
|
+
)
|
|
25
29
|
from ..core.supervisor import SupervisorActor
|
|
26
30
|
from .utils import health_check
|
|
27
31
|
|
|
@@ -66,11 +70,20 @@ def run_in_subprocess(
|
|
|
66
70
|
return p
|
|
67
71
|
|
|
68
72
|
|
|
69
|
-
def main(
|
|
70
|
-
|
|
73
|
+
def main(
|
|
74
|
+
host: str,
|
|
75
|
+
port: int,
|
|
76
|
+
supervisor_port: Optional[int],
|
|
77
|
+
logging_conf: Optional[Dict] = None,
|
|
78
|
+
):
|
|
79
|
+
supervisor_address = f"{host}:{supervisor_port or get_next_port()}"
|
|
71
80
|
local_cluster = run_in_subprocess(supervisor_address, logging_conf)
|
|
72
81
|
|
|
73
|
-
if not health_check(
|
|
82
|
+
if not health_check(
|
|
83
|
+
address=supervisor_address,
|
|
84
|
+
max_attempts=XINFERENCE_HEALTH_CHECK_ATTEMPTS,
|
|
85
|
+
sleep_interval=XINFERENCE_HEALTH_CHECK_INTERVAL,
|
|
86
|
+
):
|
|
74
87
|
raise RuntimeError("Supervisor is not available after multiple attempts")
|
|
75
88
|
|
|
76
89
|
try:
|
xinference/deploy/utils.py
CHANGED
xinference/deploy/worker.py
CHANGED
|
@@ -18,10 +18,10 @@ import os
|
|
|
18
18
|
from typing import Any, Optional
|
|
19
19
|
|
|
20
20
|
import xoscar as xo
|
|
21
|
-
from xorbits._mars.resource import cuda_count
|
|
22
21
|
from xoscar import MainActorPoolType
|
|
23
22
|
|
|
24
23
|
from ..core.worker import WorkerActor
|
|
24
|
+
from ..utils import cuda_count
|
|
25
25
|
|
|
26
26
|
logger = logging.getLogger(__name__)
|
|
27
27
|
|
|
@@ -33,5 +33,15 @@ MODELSCOPE_EMBEDDING_MODELS = dict(
|
|
|
33
33
|
codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
|
|
34
34
|
)
|
|
35
35
|
)
|
|
36
|
+
|
|
37
|
+
from ...constants import XINFERENCE_MODEL_DIR
|
|
38
|
+
|
|
39
|
+
user_defined_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "embedding")
|
|
40
|
+
if os.path.isdir(user_defined_llm_dir):
|
|
41
|
+
for f in os.listdir(user_defined_llm_dir):
|
|
42
|
+
with codecs.open(os.path.join(user_defined_llm_dir, f), encoding="utf-8") as fd:
|
|
43
|
+
user_defined_llm_family = CustomEmbeddingModelSpec.parse_obj(json.load(fd))
|
|
44
|
+
register_embedding(user_defined_llm_family, persist=False)
|
|
45
|
+
|
|
36
46
|
del _model_spec_json
|
|
37
47
|
del _model_spec_modelscope_json
|
|
@@ -220,6 +220,9 @@ class EmbeddingModel:
|
|
|
220
220
|
]
|
|
221
221
|
|
|
222
222
|
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
223
|
+
from ..utils import patch_trust_remote_code
|
|
224
|
+
|
|
225
|
+
patch_trust_remote_code()
|
|
223
226
|
self._model = SentenceTransformer(self._model_path, device=self._device)
|
|
224
227
|
|
|
225
228
|
def create_embedding(self, sentences: Union[str, List[str]], **kwargs):
|
|
@@ -49,10 +49,6 @@ def register_embedding(model_spec: CustomEmbeddingModelSpec, persist: bool):
|
|
|
49
49
|
f" or a digit, and can only contain letters, digits, underscores, or dashes."
|
|
50
50
|
)
|
|
51
51
|
|
|
52
|
-
model_uri = model_spec.model_uri
|
|
53
|
-
if model_uri and not is_valid_model_uri(model_uri):
|
|
54
|
-
raise ValueError(f"Invalid model URI {model_uri}.")
|
|
55
|
-
|
|
56
52
|
with UD_EMBEDDING_LOCK:
|
|
57
53
|
for model_name in (
|
|
58
54
|
list(BUILTIN_EMBEDDING_MODELS.keys())
|
|
@@ -67,6 +63,11 @@ def register_embedding(model_spec: CustomEmbeddingModelSpec, persist: bool):
|
|
|
67
63
|
UD_EMBEDDINGS.append(model_spec)
|
|
68
64
|
|
|
69
65
|
if persist:
|
|
66
|
+
# We only validate model URL when persist is True.
|
|
67
|
+
model_uri = model_spec.model_uri
|
|
68
|
+
if model_uri and not is_valid_model_uri(model_uri):
|
|
69
|
+
raise ValueError(f"Invalid model URI {model_uri}.")
|
|
70
|
+
|
|
70
71
|
persist_path = os.path.join(
|
|
71
72
|
XINFERENCE_MODEL_DIR, "embedding", f"{model_spec.model_name}.json"
|
|
72
73
|
)
|
|
@@ -126,5 +126,21 @@
|
|
|
126
126
|
"language": ["en"],
|
|
127
127
|
"model_id": "BAAI/bge-large-en-v1.5",
|
|
128
128
|
"model_revision": "5888da4a3a013e65d33dd6f612ecd4625eb87a7d"
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
"model_name": "jina-embeddings-v2-small-en",
|
|
132
|
+
"dimensions": 512,
|
|
133
|
+
"max_tokens": 8192,
|
|
134
|
+
"language": ["en"],
|
|
135
|
+
"model_id": "jinaai/jina-embeddings-v2-small-en",
|
|
136
|
+
"model_revision": "b811f03af3d4d7ea72a7c25c802b21fc675a5d99"
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
"model_name": "jina-embeddings-v2-base-en",
|
|
140
|
+
"dimensions": 512,
|
|
141
|
+
"max_tokens": 8192,
|
|
142
|
+
"language": ["en"],
|
|
143
|
+
"model_id": "jinaai/jina-embeddings-v2-base-en",
|
|
144
|
+
"model_revision": "7302ac470bed880590f9344bfeee32ff8722d0e5"
|
|
129
145
|
}
|
|
130
146
|
]
|
|
@@ -126,5 +126,21 @@
|
|
|
126
126
|
"language": ["en"],
|
|
127
127
|
"model_id": "Xorbits/bge-large-en-v1.5",
|
|
128
128
|
"model_revision": "v0.0.1"
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
"model_name": "jina-embeddings-v2-small-en",
|
|
132
|
+
"dimensions": 512,
|
|
133
|
+
"max_tokens": 8192,
|
|
134
|
+
"language": ["en"],
|
|
135
|
+
"model_id": "Xorbits/jina-embeddings-v2-small-en",
|
|
136
|
+
"model_revision": "v0.0.1"
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
"model_name": "jina-embeddings-v2-base-en",
|
|
140
|
+
"dimensions": 512,
|
|
141
|
+
"max_tokens": 8192,
|
|
142
|
+
"language": ["en"],
|
|
143
|
+
"model_id": "Xorbits/jina-embeddings-v2-base-en",
|
|
144
|
+
"model_revision": "v0.0.1"
|
|
129
145
|
}
|
|
130
146
|
]
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -19,6 +19,7 @@ import os
|
|
|
19
19
|
from .core import LLM
|
|
20
20
|
from .llm_family import (
|
|
21
21
|
BUILTIN_LLM_FAMILIES,
|
|
22
|
+
BUILTIN_LLM_PROMPT_STYLE,
|
|
22
23
|
BUILTIN_MODELSCOPE_LLM_FAMILIES,
|
|
23
24
|
LLM_CLASSES,
|
|
24
25
|
GgmlLLMSpecV1,
|
|
@@ -89,13 +90,32 @@ def _install():
|
|
|
89
90
|
os.path.dirname(os.path.abspath(__file__)), "llm_family.json"
|
|
90
91
|
)
|
|
91
92
|
for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
|
|
92
|
-
|
|
93
|
+
model_spec = LLMFamilyV1.parse_obj(json_obj)
|
|
94
|
+
BUILTIN_LLM_FAMILIES.append(model_spec)
|
|
95
|
+
|
|
96
|
+
# register prompt style
|
|
97
|
+
if "chat" in model_spec.model_ability and isinstance(
|
|
98
|
+
model_spec.prompt_style, PromptStyleV1
|
|
99
|
+
):
|
|
100
|
+
# note that the key is the model name,
|
|
101
|
+
# since there are multiple representations of the same prompt style name in json.
|
|
102
|
+
BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = model_spec.prompt_style
|
|
93
103
|
|
|
94
104
|
modelscope_json_path = os.path.join(
|
|
95
105
|
os.path.dirname(os.path.abspath(__file__)), "llm_family_modelscope.json"
|
|
96
106
|
)
|
|
97
107
|
for json_obj in json.load(codecs.open(modelscope_json_path, "r", encoding="utf-8")):
|
|
98
|
-
|
|
108
|
+
model_spec = LLMFamilyV1.parse_obj(json_obj)
|
|
109
|
+
BUILTIN_MODELSCOPE_LLM_FAMILIES.append(model_spec)
|
|
110
|
+
|
|
111
|
+
# register prompt style, in case that we have something missed
|
|
112
|
+
# if duplicated with huggingface json, keep it as the huggingface style
|
|
113
|
+
if (
|
|
114
|
+
"chat" in model_spec.model_ability
|
|
115
|
+
and isinstance(model_spec.prompt_style, PromptStyleV1)
|
|
116
|
+
and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
|
|
117
|
+
):
|
|
118
|
+
BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = model_spec.prompt_style
|
|
99
119
|
|
|
100
120
|
from ...constants import XINFERENCE_MODEL_DIR
|
|
101
121
|
|
xinference/model/llm/core.py
CHANGED
|
@@ -61,13 +61,13 @@ class LLM(abc.ABC):
|
|
|
61
61
|
|
|
62
62
|
@staticmethod
|
|
63
63
|
def _has_cuda_device():
|
|
64
|
-
from
|
|
64
|
+
from ...utils import cuda_count
|
|
65
65
|
|
|
66
66
|
return cuda_count() > 0
|
|
67
67
|
|
|
68
68
|
@staticmethod
|
|
69
69
|
def _get_cuda_count():
|
|
70
|
-
from
|
|
70
|
+
from ...utils import cuda_count
|
|
71
71
|
|
|
72
72
|
cuda_visible_devices = os.getenv("CUDA_VISIBLE_DEVICES", None)
|
|
73
73
|
if cuda_visible_devices is None:
|
|
@@ -11,13 +11,13 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
import json
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import time
|
|
18
18
|
import uuid
|
|
19
19
|
from pathlib import Path
|
|
20
|
-
from typing import TYPE_CHECKING, Iterator, List, Optional, Union
|
|
20
|
+
from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Union
|
|
21
21
|
|
|
22
22
|
from ....types import (
|
|
23
23
|
ChatCompletion,
|
|
@@ -107,7 +107,7 @@ class ChatglmCppChatModel(LLM):
|
|
|
107
107
|
|
|
108
108
|
@staticmethod
|
|
109
109
|
def _convert_raw_text_chunks_to_chat(
|
|
110
|
-
tokens: Iterator[
|
|
110
|
+
tokens: Iterator[Any], model_name: str
|
|
111
111
|
) -> Iterator[ChatCompletionChunk]:
|
|
112
112
|
yield {
|
|
113
113
|
"id": "chat" + f"cmpl-{str(uuid.uuid4())}",
|
|
@@ -124,7 +124,7 @@ class ChatglmCppChatModel(LLM):
|
|
|
124
124
|
}
|
|
125
125
|
],
|
|
126
126
|
}
|
|
127
|
-
for token in
|
|
127
|
+
for token in tokens:
|
|
128
128
|
yield {
|
|
129
129
|
"id": "chat" + f"cmpl-{str(uuid.uuid4())}",
|
|
130
130
|
"model": model_name,
|
|
@@ -134,30 +134,30 @@ class ChatglmCppChatModel(LLM):
|
|
|
134
134
|
{
|
|
135
135
|
"index": 0,
|
|
136
136
|
"delta": {
|
|
137
|
-
"content": token
|
|
137
|
+
"content": token
|
|
138
|
+
if isinstance(token, str)
|
|
139
|
+
else token.content,
|
|
138
140
|
},
|
|
139
141
|
"finish_reason": None,
|
|
140
142
|
}
|
|
141
143
|
],
|
|
142
144
|
}
|
|
143
145
|
|
|
144
|
-
@
|
|
146
|
+
@classmethod
|
|
145
147
|
def _convert_raw_text_completion_to_chat(
|
|
146
|
-
text:
|
|
148
|
+
cls, text: Any, model_name: str
|
|
147
149
|
) -> ChatCompletion:
|
|
150
|
+
_id = str(uuid.uuid4())
|
|
148
151
|
return {
|
|
149
|
-
"id": "chat" + f"cmpl-{
|
|
152
|
+
"id": "chat" + f"cmpl-{_id}",
|
|
150
153
|
"model": model_name,
|
|
151
154
|
"object": "chat.completion",
|
|
152
155
|
"created": int(time.time()),
|
|
153
156
|
"choices": [
|
|
154
157
|
{
|
|
155
158
|
"index": 0,
|
|
156
|
-
"message":
|
|
157
|
-
|
|
158
|
-
"content": text,
|
|
159
|
-
},
|
|
160
|
-
"finish_reason": None,
|
|
159
|
+
"message": cls._message_to_json_string(_id, text),
|
|
160
|
+
"finish_reason": cls._finish_reason_from_msg(text),
|
|
161
161
|
}
|
|
162
162
|
],
|
|
163
163
|
"usage": {
|
|
@@ -167,6 +167,66 @@ class ChatglmCppChatModel(LLM):
|
|
|
167
167
|
},
|
|
168
168
|
}
|
|
169
169
|
|
|
170
|
+
@staticmethod
|
|
171
|
+
def _finish_reason_from_msg(msg):
|
|
172
|
+
if isinstance(msg, str):
|
|
173
|
+
return None
|
|
174
|
+
else:
|
|
175
|
+
return "tool_calls" if msg.tool_calls else "stop"
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def _eval_arguments(arguments):
|
|
179
|
+
def tool_call(**kwargs):
|
|
180
|
+
return kwargs
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
return json.dumps(eval(arguments, dict(tool_call=tool_call)))
|
|
184
|
+
except Exception:
|
|
185
|
+
return f"Invalid arguments {arguments}"
|
|
186
|
+
|
|
187
|
+
@classmethod
|
|
188
|
+
def _message_to_json_string(cls, _id, msg) -> ChatCompletionMessage:
|
|
189
|
+
if isinstance(msg, str):
|
|
190
|
+
return {
|
|
191
|
+
"role": "assistant",
|
|
192
|
+
"content": msg,
|
|
193
|
+
}
|
|
194
|
+
else:
|
|
195
|
+
return {
|
|
196
|
+
"role": msg.role,
|
|
197
|
+
"content": msg.content,
|
|
198
|
+
"tool_calls": [
|
|
199
|
+
{
|
|
200
|
+
"id": f"call_{_id}",
|
|
201
|
+
"type": tc.type,
|
|
202
|
+
"function": {
|
|
203
|
+
"name": tc.function.name,
|
|
204
|
+
"arguments": cls._eval_arguments(tc.function.arguments),
|
|
205
|
+
},
|
|
206
|
+
}
|
|
207
|
+
for tc in msg.tool_calls
|
|
208
|
+
],
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
@staticmethod
|
|
212
|
+
def _handle_tools(generate_config) -> Optional[ChatCompletionMessage]:
|
|
213
|
+
"""Convert openai tools to ChatGLM tools."""
|
|
214
|
+
if generate_config is None:
|
|
215
|
+
return None
|
|
216
|
+
tools = generate_config.pop("tools", None)
|
|
217
|
+
if tools is None:
|
|
218
|
+
return None
|
|
219
|
+
chatglm_tools = []
|
|
220
|
+
for elem in tools:
|
|
221
|
+
if elem.get("type") != "function" or "function" not in elem:
|
|
222
|
+
raise ValueError("ChatGLM tools only support function type.")
|
|
223
|
+
chatglm_tools.append(elem["function"])
|
|
224
|
+
return {
|
|
225
|
+
"role": "system",
|
|
226
|
+
"content": f"Answer the following questions as best as you can. You have access to the following tools:\n"
|
|
227
|
+
f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}",
|
|
228
|
+
}
|
|
229
|
+
|
|
170
230
|
def chat(
|
|
171
231
|
self,
|
|
172
232
|
prompt: str,
|
|
@@ -174,11 +234,15 @@ class ChatglmCppChatModel(LLM):
|
|
|
174
234
|
generate_config: Optional[ChatglmCppGenerateConfig] = None,
|
|
175
235
|
) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
|
|
176
236
|
if chat_history is not None:
|
|
177
|
-
chat_history_list =
|
|
237
|
+
chat_history_list = chat_history
|
|
178
238
|
else:
|
|
179
239
|
chat_history_list = []
|
|
180
240
|
|
|
181
|
-
|
|
241
|
+
tool_message = self._handle_tools(generate_config)
|
|
242
|
+
if tool_message is not None:
|
|
243
|
+
chat_history_list.insert(0, tool_message)
|
|
244
|
+
|
|
245
|
+
chat_history_list.append({"role": "user", "content": prompt})
|
|
182
246
|
logger.debug("Full conversation history:\n%s", str(chat_history_list))
|
|
183
247
|
|
|
184
248
|
generate_config = self._sanitize_generate_config(generate_config)
|
|
@@ -68,7 +68,7 @@ class LlamaCppModel(LLM):
|
|
|
68
68
|
self._llm = None
|
|
69
69
|
|
|
70
70
|
def _can_apply_metal(self):
|
|
71
|
-
return self.quantization in ["q4_0", "q4_1"]
|
|
71
|
+
return self.quantization.lower() in ["q4_0", "q4_1", "q4_k_s", "q4_k_m"]
|
|
72
72
|
|
|
73
73
|
def _can_apply_cublas(self):
|
|
74
74
|
# TODO: figure out the quantizations supported.
|
|
@@ -189,7 +189,7 @@ class LlamaCppModel(LLM):
|
|
|
189
189
|
try:
|
|
190
190
|
self._llm = Llama(
|
|
191
191
|
model_path=model_path,
|
|
192
|
-
verbose=
|
|
192
|
+
verbose=True,
|
|
193
193
|
**self._llamacpp_model_config,
|
|
194
194
|
)
|
|
195
195
|
except AssertionError:
|
|
@@ -512,7 +512,7 @@
|
|
|
512
512
|
"none"
|
|
513
513
|
],
|
|
514
514
|
"model_id": "THUDM/chatglm3-6b",
|
|
515
|
-
"model_revision": "
|
|
515
|
+
"model_revision": "e46a14881eae613281abbd266ee918e93a56018f"
|
|
516
516
|
}
|
|
517
517
|
],
|
|
518
518
|
"prompt_style": {
|
|
@@ -1136,6 +1136,17 @@
|
|
|
1136
1136
|
"model_id": "Qwen/Qwen-14B-Chat",
|
|
1137
1137
|
"model_revision": "fab8385c8f7e7980ef61944729fe134ccbbca263"
|
|
1138
1138
|
},
|
|
1139
|
+
{
|
|
1140
|
+
"model_format": "pytorch",
|
|
1141
|
+
"model_size_in_billions": 72,
|
|
1142
|
+
"quantizations": [
|
|
1143
|
+
"4-bit",
|
|
1144
|
+
"8-bit",
|
|
1145
|
+
"none"
|
|
1146
|
+
],
|
|
1147
|
+
"model_id": "Qwen/Qwen-72B-Chat",
|
|
1148
|
+
"model_revision": "2cd9f76279337941ec1a4abeec6f8eb3c38d0f55"
|
|
1149
|
+
},
|
|
1139
1150
|
{
|
|
1140
1151
|
"model_format": "gptq",
|
|
1141
1152
|
"model_size_in_billions": 7,
|
|
@@ -1153,6 +1164,15 @@
|
|
|
1153
1164
|
"Int8"
|
|
1154
1165
|
],
|
|
1155
1166
|
"model_id": "Qwen/Qwen-14B-Chat-{quantization}"
|
|
1167
|
+
},
|
|
1168
|
+
{
|
|
1169
|
+
"model_format": "gptq",
|
|
1170
|
+
"model_size_in_billions": 72,
|
|
1171
|
+
"quantizations": [
|
|
1172
|
+
"Int4",
|
|
1173
|
+
"Int8"
|
|
1174
|
+
],
|
|
1175
|
+
"model_id": "Qwen/Qwen-72B-Chat-{quantization}"
|
|
1156
1176
|
}
|
|
1157
1177
|
],
|
|
1158
1178
|
"prompt_style": {
|
|
@@ -1164,7 +1184,14 @@
|
|
|
1164
1184
|
],
|
|
1165
1185
|
"intra_message_sep": "\n",
|
|
1166
1186
|
"stop_token_ids": [
|
|
1167
|
-
151643
|
|
1187
|
+
151643,
|
|
1188
|
+
151644,
|
|
1189
|
+
151645
|
|
1190
|
+
],
|
|
1191
|
+
"stop": [
|
|
1192
|
+
"<|endoftext|>",
|
|
1193
|
+
"<|im_start|>",
|
|
1194
|
+
"<|im_end|>"
|
|
1168
1195
|
]
|
|
1169
1196
|
}
|
|
1170
1197
|
},
|
|
@@ -2077,7 +2104,7 @@
|
|
|
2077
2104
|
"model_ability": [
|
|
2078
2105
|
"generate"
|
|
2079
2106
|
],
|
|
2080
|
-
"model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.
|
|
2107
|
+
"model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
|
|
2081
2108
|
"model_specs": [
|
|
2082
2109
|
{
|
|
2083
2110
|
"model_format": "ggufv2",
|
|
@@ -2134,7 +2161,7 @@
|
|
|
2134
2161
|
"model_ability": [
|
|
2135
2162
|
"generate"
|
|
2136
2163
|
],
|
|
2137
|
-
"model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.
|
|
2164
|
+
"model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
|
|
2138
2165
|
"model_specs": [
|
|
2139
2166
|
{
|
|
2140
2167
|
"model_format": "pytorch",
|
|
@@ -2160,6 +2187,74 @@
|
|
|
2160
2187
|
}
|
|
2161
2188
|
]
|
|
2162
2189
|
},
|
|
2190
|
+
{
|
|
2191
|
+
"version": 1,
|
|
2192
|
+
"context_length": 204800,
|
|
2193
|
+
"model_name": "Yi-chat",
|
|
2194
|
+
"model_lang": [
|
|
2195
|
+
"en",
|
|
2196
|
+
"zh"
|
|
2197
|
+
],
|
|
2198
|
+
"model_ability": [
|
|
2199
|
+
"chat"
|
|
2200
|
+
],
|
|
2201
|
+
"model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
|
|
2202
|
+
"model_specs": [
|
|
2203
|
+
{
|
|
2204
|
+
"model_format": "pytorch",
|
|
2205
|
+
"model_size_in_billions": 34,
|
|
2206
|
+
"quantizations": [
|
|
2207
|
+
"4-bit",
|
|
2208
|
+
"8-bit",
|
|
2209
|
+
"none"
|
|
2210
|
+
],
|
|
2211
|
+
"model_id": "01-ai/Yi-34B-Chat",
|
|
2212
|
+
"model_revision": "a99ec35331cbfc9da596af7d4538fe2efecff03c"
|
|
2213
|
+
},
|
|
2214
|
+
{
|
|
2215
|
+
"model_format": "ggufv2",
|
|
2216
|
+
"model_size_in_billions": 34,
|
|
2217
|
+
"quantizations": [
|
|
2218
|
+
"Q2_K",
|
|
2219
|
+
"Q3_K_L",
|
|
2220
|
+
"Q3_K_M",
|
|
2221
|
+
"Q3_K_S",
|
|
2222
|
+
"Q4_0",
|
|
2223
|
+
"Q4_K_M",
|
|
2224
|
+
"Q4_K_S",
|
|
2225
|
+
"Q5_0",
|
|
2226
|
+
"Q5_K_M",
|
|
2227
|
+
"Q5_K_S",
|
|
2228
|
+
"Q6_K",
|
|
2229
|
+
"Q8_0"
|
|
2230
|
+
],
|
|
2231
|
+
"model_id": "TheBloke/Yi-34B-Chat-GGUF",
|
|
2232
|
+
"model_file_name_template": "yi-34b-chat.{quantization}.gguf"
|
|
2233
|
+
}
|
|
2234
|
+
],
|
|
2235
|
+
"prompt_style": {
|
|
2236
|
+
"style_name": "CHATML",
|
|
2237
|
+
"system_prompt": "",
|
|
2238
|
+
"roles": [
|
|
2239
|
+
"<|im_start|>user",
|
|
2240
|
+
"<|im_start|>assistant"
|
|
2241
|
+
],
|
|
2242
|
+
"intra_message_sep": "<|im_end|>",
|
|
2243
|
+
"inter_message_sep": "",
|
|
2244
|
+
"stop_token_ids": [
|
|
2245
|
+
2,
|
|
2246
|
+
6,
|
|
2247
|
+
7,
|
|
2248
|
+
8
|
|
2249
|
+
],
|
|
2250
|
+
"stop": [
|
|
2251
|
+
"<|endoftext|>",
|
|
2252
|
+
"<|im_start|>",
|
|
2253
|
+
"<|im_end|>",
|
|
2254
|
+
"<|im_sep|>"
|
|
2255
|
+
]
|
|
2256
|
+
}
|
|
2257
|
+
},
|
|
2163
2258
|
{
|
|
2164
2259
|
"version": 1,
|
|
2165
2260
|
"context_length": 2048,
|
|
@@ -17,9 +17,13 @@ import os
|
|
|
17
17
|
import platform
|
|
18
18
|
import shutil
|
|
19
19
|
from threading import Lock
|
|
20
|
-
from typing import List, Optional, Tuple, Type, Union
|
|
20
|
+
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
|
21
21
|
|
|
22
|
-
from pydantic import BaseModel, Field
|
|
22
|
+
from pydantic import BaseModel, Field, Protocol, ValidationError
|
|
23
|
+
from pydantic.error_wrappers import ErrorWrapper
|
|
24
|
+
from pydantic.parse import load_str_bytes
|
|
25
|
+
from pydantic.types import StrBytes
|
|
26
|
+
from pydantic.utils import ROOT_KEY
|
|
23
27
|
from typing_extensions import Annotated, Literal
|
|
24
28
|
|
|
25
29
|
from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
|
|
@@ -36,6 +40,7 @@ from . import LLM
|
|
|
36
40
|
logger = logging.getLogger(__name__)
|
|
37
41
|
|
|
38
42
|
DEFAULT_CONTEXT_LENGTH = 2048
|
|
43
|
+
BUILTIN_LLM_PROMPT_STYLE: Dict[str, "PromptStyleV1"] = {}
|
|
39
44
|
|
|
40
45
|
|
|
41
46
|
class GgmlLLMSpecV1(BaseModel):
|
|
@@ -80,12 +85,52 @@ class LLMFamilyV1(BaseModel):
|
|
|
80
85
|
prompt_style: Optional["PromptStyleV1"]
|
|
81
86
|
|
|
82
87
|
|
|
88
|
+
class CustomLLMFamilyV1(LLMFamilyV1):
|
|
89
|
+
prompt_style: Optional[Union["PromptStyleV1", str]] # type: ignore
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def parse_raw(
|
|
93
|
+
cls: Any,
|
|
94
|
+
b: StrBytes,
|
|
95
|
+
*,
|
|
96
|
+
content_type: Optional[str] = None,
|
|
97
|
+
encoding: str = "utf8",
|
|
98
|
+
proto: Protocol = None,
|
|
99
|
+
allow_pickle: bool = False,
|
|
100
|
+
) -> LLMFamilyV1:
|
|
101
|
+
# See source code of BaseModel.parse_raw
|
|
102
|
+
try:
|
|
103
|
+
obj = load_str_bytes(
|
|
104
|
+
b,
|
|
105
|
+
proto=proto,
|
|
106
|
+
content_type=content_type,
|
|
107
|
+
encoding=encoding,
|
|
108
|
+
allow_pickle=allow_pickle,
|
|
109
|
+
json_loads=cls.__config__.json_loads,
|
|
110
|
+
)
|
|
111
|
+
except (ValueError, TypeError, UnicodeDecodeError) as e:
|
|
112
|
+
raise ValidationError([ErrorWrapper(e, loc=ROOT_KEY)], cls)
|
|
113
|
+
llm_spec = cls.parse_obj(obj)
|
|
114
|
+
|
|
115
|
+
# handle prompt style when user choose existing style
|
|
116
|
+
if llm_spec.prompt_style is not None and isinstance(llm_spec.prompt_style, str):
|
|
117
|
+
prompt_style_name = llm_spec.prompt_style
|
|
118
|
+
if prompt_style_name not in BUILTIN_LLM_PROMPT_STYLE:
|
|
119
|
+
raise ValueError(
|
|
120
|
+
f"Xinference does not support the prompt style name: {prompt_style_name}"
|
|
121
|
+
)
|
|
122
|
+
llm_spec.prompt_style = BUILTIN_LLM_PROMPT_STYLE[prompt_style_name]
|
|
123
|
+
|
|
124
|
+
return llm_spec
|
|
125
|
+
|
|
126
|
+
|
|
83
127
|
LLMSpecV1 = Annotated[
|
|
84
128
|
Union[GgmlLLMSpecV1, PytorchLLMSpecV1],
|
|
85
129
|
Field(discriminator="model_format"),
|
|
86
130
|
]
|
|
87
131
|
|
|
88
132
|
LLMFamilyV1.update_forward_refs()
|
|
133
|
+
CustomLLMFamilyV1.update_forward_refs()
|
|
89
134
|
|
|
90
135
|
|
|
91
136
|
LLM_CLASSES: List[Type[LLM]] = []
|
|
@@ -580,7 +625,7 @@ def _is_linux():
|
|
|
580
625
|
def _has_cuda_device():
|
|
581
626
|
# `cuda_count` method already contains the logic for the
|
|
582
627
|
# number of GPUs specified by `CUDA_VISIBLE_DEVICES`.
|
|
583
|
-
from
|
|
628
|
+
from ...utils import cuda_count
|
|
584
629
|
|
|
585
630
|
return cuda_count() > 0
|
|
586
631
|
|
|
@@ -677,11 +722,6 @@ def register_llm(llm_family: LLMFamilyV1, persist: bool):
|
|
|
677
722
|
f" or a digit, and can only contain letters, digits, underscores, or dashes."
|
|
678
723
|
)
|
|
679
724
|
|
|
680
|
-
for spec in llm_family.model_specs:
|
|
681
|
-
model_uri = spec.model_uri
|
|
682
|
-
if model_uri and not is_valid_model_uri(model_uri):
|
|
683
|
-
raise ValueError(f"Invalid model URI {model_uri}.")
|
|
684
|
-
|
|
685
725
|
with UD_LLM_FAMILIES_LOCK:
|
|
686
726
|
for family in BUILTIN_LLM_FAMILIES + UD_LLM_FAMILIES:
|
|
687
727
|
if llm_family.model_name == family.model_name:
|
|
@@ -692,6 +732,12 @@ def register_llm(llm_family: LLMFamilyV1, persist: bool):
|
|
|
692
732
|
UD_LLM_FAMILIES.append(llm_family)
|
|
693
733
|
|
|
694
734
|
if persist:
|
|
735
|
+
# We only validate model URL when persist is True.
|
|
736
|
+
for spec in llm_family.model_specs:
|
|
737
|
+
model_uri = spec.model_uri
|
|
738
|
+
if model_uri and not is_valid_model_uri(model_uri):
|
|
739
|
+
raise ValueError(f"Invalid model URI {model_uri}.")
|
|
740
|
+
|
|
695
741
|
persist_path = os.path.join(
|
|
696
742
|
XINFERENCE_MODEL_DIR, "llm", f"{llm_family.model_name}.json"
|
|
697
743
|
)
|