xinference 0.6.4__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +49 -62
- xinference/client/restful/restful_client.py +10 -1
- xinference/conftest.py +2 -2
- xinference/constants.py +10 -0
- xinference/core/model.py +33 -10
- xinference/core/resource.py +12 -11
- xinference/core/supervisor.py +22 -2
- xinference/core/worker.py +44 -16
- xinference/deploy/cmdline.py +19 -9
- xinference/deploy/local.py +9 -1
- xinference/deploy/supervisor.py +16 -3
- xinference/deploy/utils.py +1 -0
- xinference/deploy/worker.py +1 -1
- xinference/model/embedding/__init__.py +10 -0
- xinference/model/embedding/core.py +3 -0
- xinference/model/embedding/custom.py +5 -4
- xinference/model/embedding/model_spec.json +16 -0
- xinference/model/embedding/model_spec_modelscope.json +16 -0
- xinference/model/llm/__init__.py +22 -2
- xinference/model/llm/core.py +2 -2
- xinference/model/llm/ggml/chatglm.py +79 -15
- xinference/model/llm/ggml/llamacpp.py +2 -2
- xinference/model/llm/llm_family.json +99 -4
- xinference/model/llm/llm_family.py +54 -8
- xinference/model/llm/llm_family_modelscope.json +81 -2
- xinference/model/llm/pytorch/chatglm.py +95 -2
- xinference/model/llm/utils.py +12 -8
- xinference/model/llm/vllm/core.py +26 -5
- xinference/model/utils.py +25 -0
- xinference/types.py +64 -5
- xinference/utils.py +20 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.8126d441.js +3 -0
- xinference/web/ui/build/static/js/main.8126d441.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/06eb9846159adb398d44df0b0debc256a9fd9e8171a7d68f5c4ee4d655acfa45.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0cccfbe5d963b8e31eb679f9d9677392839cedd04aa2956ac6b33cf19599d597.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2f651cf60b1bde50c0601c7110f77dd44819fb6e2501ff748a631724d91445d4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3bda436576ecb05f81f7b6ec475d1cfaf03e2b3066e3a75902fe6e8c4773b43b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/47887a9524ffeecdc2a7839dace146b24f97a5564fc3d431d6179ad2b153cf1f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/48878f5178bad1a47757e011af41c974a7946efa29485506c4d19f25bf5d522d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/59574eb63cfe9ed2e58d2f5a420e1ae54354e243a602e9bc73deae3147ed4f98.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6a60ae66b29c2f3634fd081d369b9e63b4522fe18eb9e43e9979d1ff264b68ad.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/75a5abcbc92da335fdde530f5689194ec79a4b2345b8cba594f8904d3b88e3c6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/84bfe7afede38da1f8ad569d891276fe4d66cfb87bf5c9ff7a113788ba62bb88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/940ed05006583b955894e2b8f65a4a5ebf34f8149d747f59fae5131f17d65482.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9c5f03db9aa88582a9b69b25c7f1acc78ba7fc61f743c9ed7399abb292d5dbde.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a5e2e9f707eb7039bea096ca117d996b8f9cbc2a5613fd8e0c5b0094444ce23c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c02e70e9b9efcf3bd056606308104308d6a6ac559f2bc0b4454c11fb5874457c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e26750d9556e9741912333349e4da454c53dbfddbfc6002ab49518dcf02af745.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e610aefd7000a3f8542a25cb66c64671cc8da18350de4e5b577102ba4bb78d65.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ef42ec014d7bc373b874b2a1ff0dcd785490f125e913698bc049b0bd778e4d66.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/fe3eb4d76c79ca98833f686d642224eeeb94cc83ad14300d281623796d087f0a.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +1077 -405
- xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/globals/globals.json +163 -3
- xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/globals/package.json +1 -1
- xinference/web/ui/node_modules/@eslint/eslintrc/node_modules/ignore/package.json +64 -0
- xinference/web/ui/node_modules/@eslint/eslintrc/package.json +18 -37
- xinference/web/ui/node_modules/@eslint/js/package.json +1 -1
- xinference/web/ui/node_modules/@eslint-community/regexpp/package.json +9 -4
- xinference/web/ui/node_modules/@humanwhocodes/config-array/package.json +14 -14
- xinference/web/ui/node_modules/@rushstack/eslint-patch/package.json +6 -4
- xinference/web/ui/node_modules/@types/semver/package.json +15 -15
- xinference/web/ui/node_modules/@ungap/structured-clone/cjs/package.json +1 -0
- xinference/web/ui/node_modules/@ungap/structured-clone/package.json +53 -0
- xinference/web/ui/node_modules/ansi-colors/package.json +129 -0
- xinference/web/ui/node_modules/array-includes/package.json +8 -8
- xinference/web/ui/node_modules/array.prototype.findlastindex/package.json +120 -0
- xinference/web/ui/node_modules/array.prototype.flat/package.json +8 -8
- xinference/web/ui/node_modules/array.prototype.flatmap/package.json +8 -8
- xinference/web/ui/node_modules/arraybuffer.prototype.slice/package.json +103 -0
- xinference/web/ui/node_modules/ast-types-flow/package.json +2 -2
- xinference/web/ui/node_modules/astral-regex/package.json +33 -0
- xinference/web/ui/node_modules/asynciterator.prototype/package.json +72 -0
- xinference/web/ui/node_modules/axe-core/locales/_template.json +0 -12
- xinference/web/ui/node_modules/axe-core/package.json +1 -2
- xinference/web/ui/node_modules/axe-core/sri-history.json +0 -8
- xinference/web/ui/node_modules/call-bind/package.json +33 -23
- xinference/web/ui/node_modules/define-data-property/package.json +113 -0
- xinference/web/ui/node_modules/define-data-property/tsconfig.json +59 -0
- xinference/web/ui/node_modules/define-properties/package.json +5 -4
- xinference/web/ui/node_modules/enquirer/package.json +112 -0
- xinference/web/ui/node_modules/es-abstract/helpers/caseFolding.json +1430 -0
- xinference/web/ui/node_modules/es-abstract/package.json +29 -23
- xinference/web/ui/node_modules/es-iterator-helpers/index.json +17 -0
- xinference/web/ui/node_modules/es-iterator-helpers/package.json +185 -0
- xinference/web/ui/node_modules/eslint/conf/{rule-type-list.json → category-list.json} +9 -6
- xinference/web/ui/node_modules/eslint/node_modules/@babel/code-frame/package.json +25 -0
- xinference/web/ui/node_modules/eslint/node_modules/eslint-visitor-keys/lib/visitor-keys.json +289 -0
- xinference/web/ui/node_modules/eslint/node_modules/eslint-visitor-keys/package.json +39 -0
- xinference/web/ui/node_modules/eslint/node_modules/glob-parent/package.json +48 -0
- xinference/web/ui/node_modules/eslint/node_modules/ignore/package.json +64 -0
- xinference/web/ui/node_modules/eslint/package.json +53 -82
- xinference/web/ui/node_modules/eslint-config-prettier/package.json +13 -0
- xinference/web/ui/node_modules/eslint-import-resolver-node/package.json +3 -3
- xinference/web/ui/node_modules/eslint-plugin-import/package.json +22 -17
- xinference/web/ui/node_modules/eslint-plugin-jsx-a11y/package.json +25 -24
- xinference/web/ui/node_modules/eslint-plugin-simple-import-sort/package.json +23 -0
- xinference/web/ui/node_modules/eslint-plugin-testing-library/package.json +1 -1
- xinference/web/ui/node_modules/eslint-scope/package.json +19 -34
- xinference/web/ui/node_modules/eslint-utils/node_modules/eslint-visitor-keys/lib/visitor-keys.json +284 -0
- xinference/web/ui/node_modules/eslint-utils/node_modules/eslint-visitor-keys/package.json +40 -0
- xinference/web/ui/node_modules/eslint-utils/package.json +65 -0
- xinference/web/ui/node_modules/eslint-visitor-keys/package.json +15 -15
- xinference/web/ui/node_modules/espree/node_modules/acorn/package.json +35 -0
- xinference/web/ui/node_modules/espree/node_modules/eslint-visitor-keys/lib/visitor-keys.json +284 -0
- xinference/web/ui/node_modules/espree/node_modules/eslint-visitor-keys/package.json +40 -0
- xinference/web/ui/node_modules/espree/package.json +27 -51
- xinference/web/ui/node_modules/function-bind/package.json +38 -14
- xinference/web/ui/node_modules/function.prototype.name/package.json +32 -13
- xinference/web/ui/node_modules/functional-red-black-tree/package.json +40 -0
- xinference/web/ui/node_modules/get-intrinsic/package.json +11 -11
- xinference/web/ui/node_modules/hasown/package.json +91 -0
- xinference/web/ui/node_modules/hasown/tsconfig.json +49 -0
- xinference/web/ui/node_modules/is-async-function/package.json +86 -0
- xinference/web/ui/node_modules/is-core-module/core.json +3 -3
- xinference/web/ui/node_modules/is-core-module/package.json +7 -7
- xinference/web/ui/node_modules/is-finalizationregistry/package.json +67 -0
- xinference/web/ui/node_modules/is-generator-function/package.json +87 -0
- xinference/web/ui/node_modules/is-typed-array/package.json +8 -10
- xinference/web/ui/node_modules/iterator.prototype/package.json +73 -0
- xinference/web/ui/node_modules/jsx-ast-utils/package.json +5 -5
- xinference/web/ui/node_modules/language-tags/package.json +48 -8
- xinference/web/ui/node_modules/lodash.truncate/package.json +17 -0
- xinference/web/ui/node_modules/object-inspect/package.json +8 -6
- xinference/web/ui/node_modules/object.entries/package.json +7 -7
- xinference/web/ui/node_modules/object.fromentries/package.json +7 -7
- xinference/web/ui/node_modules/object.groupby/package.json +83 -0
- xinference/web/ui/node_modules/object.values/package.json +7 -7
- xinference/web/ui/node_modules/prettier/package.json +21 -0
- xinference/web/ui/node_modules/progress/package.json +26 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/@eslint/eslintrc/package.json +82 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/@humanwhocodes/config-array/package.json +61 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/@humanwhocodes/object-schema/package.json +33 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/ansi-styles/package.json +56 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/chalk/package.json +68 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/color-convert/package.json +48 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/color-name/package.json +28 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/escape-string-regexp/package.json +38 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/eslint/conf/replacements.json +22 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/eslint/conf/rule-type-list.json +28 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/eslint/package.json +179 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/eslint-scope/package.json +63 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/espree/package.json +88 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/globals/globals.json +1974 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/globals/package.json +55 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/has-flag/package.json +46 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/supports-color/package.json +53 -0
- xinference/web/ui/node_modules/react-scripts/node_modules/type-fest/package.json +58 -0
- xinference/web/ui/node_modules/reflect.getprototypeof/package.json +99 -0
- xinference/web/ui/node_modules/regexp.prototype.flags/package.json +8 -7
- xinference/web/ui/node_modules/regexpp/package.json +91 -0
- xinference/web/ui/node_modules/resolve/lib/core.json +4 -1
- xinference/web/ui/node_modules/resolve/package.json +9 -8
- xinference/web/ui/node_modules/resolve/test/resolver/multirepo/package.json +1 -1
- xinference/web/ui/node_modules/safe-array-concat/package.json +5 -5
- xinference/web/ui/node_modules/set-function-length/package.json +84 -0
- xinference/web/ui/node_modules/set-function-name/package.json +80 -0
- xinference/web/ui/node_modules/slice-ansi/node_modules/ansi-styles/package.json +56 -0
- xinference/web/ui/node_modules/slice-ansi/node_modules/color-convert/package.json +48 -0
- xinference/web/ui/node_modules/slice-ansi/node_modules/color-name/package.json +28 -0
- xinference/web/ui/node_modules/slice-ansi/package.json +52 -0
- xinference/web/ui/node_modules/string.prototype.trim/package.json +7 -7
- xinference/web/ui/node_modules/string.prototype.trimend/package.json +7 -7
- xinference/web/ui/node_modules/string.prototype.trimstart/package.json +7 -7
- xinference/web/ui/node_modules/table/dist/src/schemas/config.json +95 -0
- xinference/web/ui/node_modules/table/dist/src/schemas/shared.json +139 -0
- xinference/web/ui/node_modules/table/dist/src/schemas/streamConfig.json +25 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/data.json +13 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/applicator.json +53 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/content.json +17 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/core.json +57 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/format.json +14 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/meta-data.json +37 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/meta/validation.json +90 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2019-09/schema.json +39 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/applicator.json +48 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/content.json +17 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/core.json +51 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/format-annotation.json +14 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/meta-data.json +37 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/unevaluated.json +15 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/meta/validation.json +90 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-2020-12/schema.json +55 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-draft-06.json +137 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-draft-07.json +151 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/dist/refs/json-schema-secure.json +88 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/data.json +13 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/applicator.json +53 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/content.json +17 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/core.json +57 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/format.json +14 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/meta-data.json +37 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/meta/validation.json +90 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2019-09/schema.json +39 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/applicator.json +48 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/content.json +17 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/core.json +51 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/format-annotation.json +14 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/meta-data.json +37 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/unevaluated.json +15 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/meta/validation.json +90 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-2020-12/schema.json +55 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-draft-06.json +137 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-draft-07.json +151 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/lib/refs/json-schema-secure.json +88 -0
- xinference/web/ui/node_modules/table/node_modules/ajv/package.json +126 -0
- xinference/web/ui/node_modules/table/node_modules/json-schema-traverse/package.json +43 -0
- xinference/web/ui/node_modules/table/package.json +77 -0
- xinference/web/ui/node_modules/typed-array-buffer/package.json +73 -0
- xinference/web/ui/node_modules/typed-array-byte-length/package.json +98 -0
- xinference/web/ui/node_modules/v8-compile-cache/package.json +34 -0
- xinference/web/ui/node_modules/which-builtin-type/package.json +93 -0
- xinference/web/ui/node_modules/which-typed-array/package.json +4 -5
- xinference/web/ui/package-lock.json +1085 -406
- xinference/web/ui/package.json +10 -2
- {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/METADATA +53 -36
- {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/RECORD +232 -124
- {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/WHEEL +1 -1
- xinference/web/ui/build/static/js/main.8ae3b6d9.js +0 -3
- xinference/web/ui/build/static/js/main.8ae3b6d9.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/06363becf51869c421a8b3e34b4e3f50aa0aac3d590446044f9412e379f4ebbe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2849edddeb99a8ecdda577e810eead74b8f8a291cdfbd987839d604666ed79d0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2c774712d327cdf0b192aaa22785ec380e9427c587350c33289828d99e9c4abc.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/34c578e50d3040519ca8dc28bf0f7fec8674c2d6c0fcc3e98401c0a3f9f013cf.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5933910e7c33febbabc0297ef7ba80f5e53ed96aa125b6a44ff2910aec29ced1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5e18a8354ea03d22a967fd8cb2171aa798edcb3da5d66ab1fd3b9663affd0abe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/717cd7c186ace4812d1e602bdd299d8dc507f072670cc43974d53aac2574df5d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/82dd896a6674286c48c1ab9f9147dd6e542dccd99848d5b3133a38efba8bd7ee.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a178cfde289ffd15fd54b1c80fd9d231ae0f9644db33acb02084e69b32bfee37.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/adaec65f73accce3171b51b0fbcbfd8d0cd83f81a2e1b28eb34148644875499a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ae8f44c77c2e6f79680fe32fb00174183cd867093ebbda967b8985c33cc10fa2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b10bd04b4d6e28bfcaaaab37b0a4c1986e87a5b7e62e5ce4d56019880ef26990.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/cfc5da1cedee985a556e04865affccb72d0f624cbfb73da348bbe8693e8a4983.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/eebd0123c4b4396737e56b9181406a9fd76b107dd32971d23b0de99f51dd38d6.json +0 -1
- xinference/web/ui/node_modules/@nicolo-ribaudo/eslint-scope-5-internals/node_modules/eslint-scope/package.json +0 -48
- xinference/web/ui/node_modules/@typescript-eslint/utils/node_modules/eslint-scope/package.json +0 -48
- xinference/web/ui/node_modules/@typescript-eslint/utils/node_modules/estraverse/package.json +0 -40
- xinference/web/ui/node_modules/eslint/node_modules/argparse/package.json +0 -31
- xinference/web/ui/node_modules/eslint/node_modules/js-yaml/package.json +0 -66
- xinference/web/ui/node_modules/eslint-plugin-jsx-a11y/node_modules/semver/package.json +0 -38
- xinference/web/ui/node_modules/function-bind/.jscs.json +0 -176
- xinference/web/ui/node_modules/resolve/test/resolver/malformed_package_json/package.json +0 -1
- xinference/web/ui/node_modules/webpack/node_modules/eslint-scope/package.json +0 -48
- xinference/web/ui/node_modules/webpack/node_modules/estraverse/package.json +0 -40
- /xinference/web/ui/build/static/js/{main.8ae3b6d9.js.LICENSE.txt → main.8126d441.js.LICENSE.txt} +0 -0
- /xinference/web/ui/node_modules/{@nicolo-ribaudo/eslint-scope-5-internals → eslint-scope}/node_modules/estraverse/package.json +0 -0
- /xinference/web/ui/node_modules/{@eslint/eslintrc → react-scripts}/node_modules/argparse/package.json +0 -0
- /xinference/web/ui/node_modules/{eslint → react-scripts/node_modules/eslint}/lib/cli-engine/formatters/formatters-meta.json +0 -0
- /xinference/web/ui/node_modules/{eslint-config-react-app → react-scripts/node_modules/eslint-config-react-app}/package.json +0 -0
- /xinference/web/ui/node_modules/{eslint-plugin-flowtype → react-scripts/node_modules/eslint-plugin-flowtype}/dist/configs/recommended.json +0 -0
- /xinference/web/ui/node_modules/{eslint-plugin-flowtype → react-scripts/node_modules/eslint-plugin-flowtype}/package.json +0 -0
- /xinference/web/ui/node_modules/{@eslint/eslintrc → react-scripts}/node_modules/js-yaml/package.json +0 -0
- {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/LICENSE +0 -0
- {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.6.4.dist-info → xinference-0.7.0.dist-info}/top_level.txt +0 -0
|
@@ -957,7 +957,7 @@
|
|
|
957
957
|
"model_ability": [
|
|
958
958
|
"generate"
|
|
959
959
|
],
|
|
960
|
-
"model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.
|
|
960
|
+
"model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
|
|
961
961
|
"model_specs": [
|
|
962
962
|
{
|
|
963
963
|
"model_format": "pytorch",
|
|
@@ -1024,6 +1024,55 @@
|
|
|
1024
1024
|
}
|
|
1025
1025
|
]
|
|
1026
1026
|
},
|
|
1027
|
+
{
|
|
1028
|
+
"version": 1,
|
|
1029
|
+
"context_length": 204800,
|
|
1030
|
+
"model_name": "Yi-chat",
|
|
1031
|
+
"model_lang": [
|
|
1032
|
+
"en",
|
|
1033
|
+
"zh"
|
|
1034
|
+
],
|
|
1035
|
+
"model_ability": [
|
|
1036
|
+
"chat"
|
|
1037
|
+
],
|
|
1038
|
+
"model_description": "The Yi series models are large language models trained from scratch by developers at 01.AI.",
|
|
1039
|
+
"model_specs": [
|
|
1040
|
+
{
|
|
1041
|
+
"model_format": "pytorch",
|
|
1042
|
+
"model_size_in_billions": 34,
|
|
1043
|
+
"quantizations": [
|
|
1044
|
+
"4-bit",
|
|
1045
|
+
"8-bit",
|
|
1046
|
+
"none"
|
|
1047
|
+
],
|
|
1048
|
+
"model_hub": "modelscope",
|
|
1049
|
+
"model_id": "01ai/Yi-34B-Chat",
|
|
1050
|
+
"model_revision": "master"
|
|
1051
|
+
}
|
|
1052
|
+
],
|
|
1053
|
+
"prompt_style": {
|
|
1054
|
+
"style_name": "CHATML",
|
|
1055
|
+
"system_prompt": "",
|
|
1056
|
+
"roles": [
|
|
1057
|
+
"<|im_start|>user",
|
|
1058
|
+
"<|im_start|>assistant"
|
|
1059
|
+
],
|
|
1060
|
+
"intra_message_sep": "<|im_end|>",
|
|
1061
|
+
"inter_message_sep": "",
|
|
1062
|
+
"stop_token_ids": [
|
|
1063
|
+
2,
|
|
1064
|
+
6,
|
|
1065
|
+
7,
|
|
1066
|
+
8
|
|
1067
|
+
],
|
|
1068
|
+
"stop": [
|
|
1069
|
+
"<|endoftext|>",
|
|
1070
|
+
"<|im_start|>",
|
|
1071
|
+
"<|im_end|>",
|
|
1072
|
+
"<|im_sep|>"
|
|
1073
|
+
]
|
|
1074
|
+
}
|
|
1075
|
+
},
|
|
1027
1076
|
{
|
|
1028
1077
|
"version": 1,
|
|
1029
1078
|
"context_length": 2048,
|
|
@@ -1329,6 +1378,18 @@
|
|
|
1329
1378
|
"model_id": "qwen/Qwen-7B-Chat",
|
|
1330
1379
|
"model_revision": "v1.1.7"
|
|
1331
1380
|
},
|
|
1381
|
+
{
|
|
1382
|
+
"model_format": "pytorch",
|
|
1383
|
+
"model_size_in_billions": 72,
|
|
1384
|
+
"quantizations": [
|
|
1385
|
+
"4-bit",
|
|
1386
|
+
"8-bit",
|
|
1387
|
+
"none"
|
|
1388
|
+
],
|
|
1389
|
+
"model_hub": "modelscope",
|
|
1390
|
+
"model_id": "qwen/Qwen-72B-Chat",
|
|
1391
|
+
"model_revision": "master"
|
|
1392
|
+
},
|
|
1332
1393
|
{
|
|
1333
1394
|
"model_format": "pytorch",
|
|
1334
1395
|
"model_size_in_billions": 14,
|
|
@@ -1360,6 +1421,17 @@
|
|
|
1360
1421
|
"model_id": "qwen/Qwen-14B-Chat-{quantization}",
|
|
1361
1422
|
"model_hub": "modelscope",
|
|
1362
1423
|
"model_revision": "v1.0.7"
|
|
1424
|
+
},
|
|
1425
|
+
{
|
|
1426
|
+
"model_format": "gptq",
|
|
1427
|
+
"model_size_in_billions": 72,
|
|
1428
|
+
"quantizations": [
|
|
1429
|
+
"Int4",
|
|
1430
|
+
"Int8"
|
|
1431
|
+
],
|
|
1432
|
+
"model_id": "qwen/Qwen-72B-Chat-{quantization}",
|
|
1433
|
+
"model_hub": "modelscope",
|
|
1434
|
+
"model_revision": "master"
|
|
1363
1435
|
}
|
|
1364
1436
|
],
|
|
1365
1437
|
"prompt_style": {
|
|
@@ -1371,7 +1443,14 @@
|
|
|
1371
1443
|
],
|
|
1372
1444
|
"intra_message_sep": "\n",
|
|
1373
1445
|
"stop_token_ids": [
|
|
1374
|
-
151643
|
|
1446
|
+
151643,
|
|
1447
|
+
151644,
|
|
1448
|
+
151645
|
|
1449
|
+
],
|
|
1450
|
+
"stop": [
|
|
1451
|
+
"<|endoftext|>",
|
|
1452
|
+
"<|im_start|>",
|
|
1453
|
+
"<|im_end|>"
|
|
1375
1454
|
]
|
|
1376
1455
|
}
|
|
1377
1456
|
}
|
|
@@ -11,9 +11,17 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
import json
|
|
15
|
+
import time
|
|
16
|
+
import uuid
|
|
17
|
+
from typing import Iterator, List, Optional, Union
|
|
14
18
|
|
|
15
|
-
from
|
|
16
|
-
|
|
19
|
+
from ....types import (
|
|
20
|
+
ChatCompletion,
|
|
21
|
+
ChatCompletionChunk,
|
|
22
|
+
ChatCompletionMessage,
|
|
23
|
+
PytorchGenerateConfig,
|
|
24
|
+
)
|
|
17
25
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
18
26
|
from .core import PytorchChatModel, PytorchModelConfig
|
|
19
27
|
|
|
@@ -71,3 +79,88 @@ class ChatglmPytorchChatModel(PytorchChatModel):
|
|
|
71
79
|
if "chat" not in llm_family.model_ability:
|
|
72
80
|
return False
|
|
73
81
|
return True
|
|
82
|
+
|
|
83
|
+
@staticmethod
|
|
84
|
+
def _handle_tools(generate_config) -> Optional[dict]:
|
|
85
|
+
"""Convert openai tools to ChatGLM tools."""
|
|
86
|
+
if generate_config is None:
|
|
87
|
+
return None
|
|
88
|
+
tools = generate_config.pop("tools", None)
|
|
89
|
+
if tools is None:
|
|
90
|
+
return None
|
|
91
|
+
chatglm_tools = []
|
|
92
|
+
for elem in tools:
|
|
93
|
+
if elem.get("type") != "function" or "function" not in elem:
|
|
94
|
+
raise ValueError("ChatGLM tools only support function type.")
|
|
95
|
+
chatglm_tools.append(elem["function"])
|
|
96
|
+
return {
|
|
97
|
+
"role": "system",
|
|
98
|
+
"content": f"Answer the following questions as best as you can. You have access to the following tools:",
|
|
99
|
+
"tools": chatglm_tools,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
@staticmethod
|
|
103
|
+
def _tool_calls_completion(msg, model_name) -> ChatCompletion:
|
|
104
|
+
_id = str(uuid.uuid4())
|
|
105
|
+
return {
|
|
106
|
+
"id": "chat" + f"cmpl-{_id}",
|
|
107
|
+
"model": model_name,
|
|
108
|
+
"object": "chat.completion",
|
|
109
|
+
"created": int(time.time()),
|
|
110
|
+
"choices": [
|
|
111
|
+
{
|
|
112
|
+
"index": 0,
|
|
113
|
+
"message": {
|
|
114
|
+
"role": "assistant",
|
|
115
|
+
"content": None,
|
|
116
|
+
"tool_calls": [
|
|
117
|
+
{
|
|
118
|
+
"id": f"call_{_id}",
|
|
119
|
+
"type": "function",
|
|
120
|
+
"function": {
|
|
121
|
+
"name": msg["name"],
|
|
122
|
+
"arguments": json.dumps(msg["parameters"]),
|
|
123
|
+
},
|
|
124
|
+
}
|
|
125
|
+
],
|
|
126
|
+
},
|
|
127
|
+
"finish_reason": "tool_calls",
|
|
128
|
+
}
|
|
129
|
+
],
|
|
130
|
+
"usage": {
|
|
131
|
+
"prompt_tokens": -1,
|
|
132
|
+
"completion_tokens": -1,
|
|
133
|
+
"total_tokens": -1,
|
|
134
|
+
},
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
def chat(
|
|
138
|
+
self,
|
|
139
|
+
prompt: str,
|
|
140
|
+
system_prompt: Optional[str] = None,
|
|
141
|
+
chat_history: Optional[List[ChatCompletionMessage]] = None,
|
|
142
|
+
generate_config: Optional[PytorchGenerateConfig] = None,
|
|
143
|
+
) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
|
|
144
|
+
tools = self._handle_tools(generate_config)
|
|
145
|
+
if tools:
|
|
146
|
+
# Tool calls only works for non stream, so we call chat directly.
|
|
147
|
+
kwargs = {}
|
|
148
|
+
generate_config = generate_config or {}
|
|
149
|
+
temperature = generate_config.get("temperature")
|
|
150
|
+
if temperature is not None:
|
|
151
|
+
kwargs["temperature"] = float(temperature)
|
|
152
|
+
top_p = generate_config.get("top_p")
|
|
153
|
+
if top_p is not None:
|
|
154
|
+
kwargs["top_p"] = float(top_p)
|
|
155
|
+
max_length = generate_config.get("max_tokens")
|
|
156
|
+
if max_length is not None:
|
|
157
|
+
kwargs["max_length"] = int(max_length)
|
|
158
|
+
msg = self._model.chat(self._tokenizer, prompt, [tools], **kwargs)
|
|
159
|
+
return self._tool_calls_completion(msg[0], self.model_uid)
|
|
160
|
+
else:
|
|
161
|
+
return super().chat(
|
|
162
|
+
prompt=prompt,
|
|
163
|
+
system_prompt=system_prompt,
|
|
164
|
+
chat_history=chat_history,
|
|
165
|
+
generate_config=generate_config,
|
|
166
|
+
)
|
xinference/model/llm/utils.py
CHANGED
|
@@ -122,19 +122,20 @@ class ChatModelMixin:
|
|
|
122
122
|
ret += role + ":"
|
|
123
123
|
return ret
|
|
124
124
|
elif prompt_style.style_name == "CHATGLM3":
|
|
125
|
-
|
|
126
|
-
f"<|system
|
|
125
|
+
prompts = (
|
|
126
|
+
[f"<|system|>\n{prompt_style.system_prompt}"]
|
|
127
127
|
if prompt_style.system_prompt
|
|
128
|
-
else
|
|
128
|
+
else []
|
|
129
129
|
)
|
|
130
|
+
|
|
130
131
|
for i, message in enumerate(chat_history):
|
|
131
132
|
role = message["role"]
|
|
132
133
|
content = message["content"]
|
|
133
134
|
if content:
|
|
134
|
-
|
|
135
|
+
prompts.append(f"<|{role}|>\n{content}")
|
|
135
136
|
else:
|
|
136
|
-
|
|
137
|
-
return
|
|
137
|
+
prompts.append(f"<|{role}|>")
|
|
138
|
+
return "\n".join(prompts)
|
|
138
139
|
elif prompt_style.style_name == "XVERSE":
|
|
139
140
|
ret = (
|
|
140
141
|
f"<|system|> \n {prompt_style.system_prompt}"
|
|
@@ -184,11 +185,14 @@ class ChatModelMixin:
|
|
|
184
185
|
ret += "<s>"
|
|
185
186
|
role = message["role"]
|
|
186
187
|
content = message["content"]
|
|
187
|
-
ret += role + ":" + content + seps[i % 2]
|
|
188
|
+
ret += role + ":" + str(content) + seps[i % 2]
|
|
188
189
|
if len(ret) == 0:
|
|
189
190
|
ret += "<s>"
|
|
190
191
|
ret += (
|
|
191
|
-
chat_history[-2]["role"]
|
|
192
|
+
chat_history[-2]["role"]
|
|
193
|
+
+ ":"
|
|
194
|
+
+ str(chat_history[-2]["content"])
|
|
195
|
+
+ seps[0]
|
|
192
196
|
)
|
|
193
197
|
ret += chat_history[-1]["role"] + ":"
|
|
194
198
|
return ret
|
|
@@ -17,6 +17,7 @@ import time
|
|
|
17
17
|
import uuid
|
|
18
18
|
from typing import TYPE_CHECKING, AsyncGenerator, Dict, List, Optional, TypedDict, Union
|
|
19
19
|
|
|
20
|
+
from ....constants import XINFERENCE_DISABLE_VLLM
|
|
20
21
|
from ....types import (
|
|
21
22
|
ChatCompletion,
|
|
22
23
|
ChatCompletionChunk,
|
|
@@ -44,6 +45,7 @@ class VLLMModelConfig(TypedDict, total=False):
|
|
|
44
45
|
gpu_memory_utilization: float
|
|
45
46
|
max_num_batched_tokens: int
|
|
46
47
|
max_num_seqs: int
|
|
48
|
+
quantization: Optional[str]
|
|
47
49
|
|
|
48
50
|
|
|
49
51
|
class VLLMGenerateConfig(TypedDict, total=False):
|
|
@@ -54,6 +56,7 @@ class VLLMGenerateConfig(TypedDict, total=False):
|
|
|
54
56
|
temperature: float
|
|
55
57
|
top_p: float
|
|
56
58
|
max_tokens: int
|
|
59
|
+
stop_token_ids: Optional[List[int]]
|
|
57
60
|
stop: Optional[Union[str, List[str]]]
|
|
58
61
|
stream: bool # non-sampling param, should not be passed to the engine.
|
|
59
62
|
|
|
@@ -65,7 +68,7 @@ try:
|
|
|
65
68
|
except ImportError:
|
|
66
69
|
VLLM_INSTALLED = False
|
|
67
70
|
|
|
68
|
-
VLLM_SUPPORTED_MODELS = ["llama-2", "baichuan", "internlm-16k"]
|
|
71
|
+
VLLM_SUPPORTED_MODELS = ["llama-2", "baichuan", "internlm-16k", "mistral-v0.1"]
|
|
69
72
|
VLLM_SUPPORTED_CHAT_MODELS = [
|
|
70
73
|
"llama-2-chat",
|
|
71
74
|
"vicuna-v1.3",
|
|
@@ -74,6 +77,10 @@ VLLM_SUPPORTED_CHAT_MODELS = [
|
|
|
74
77
|
"internlm-chat-7b",
|
|
75
78
|
"internlm-chat-8k",
|
|
76
79
|
"internlm-chat-20b",
|
|
80
|
+
"qwen-chat",
|
|
81
|
+
"Yi",
|
|
82
|
+
"mistral-instruct-v0.1",
|
|
83
|
+
"chatglm3",
|
|
77
84
|
]
|
|
78
85
|
|
|
79
86
|
|
|
@@ -127,6 +134,7 @@ class VLLMModel(LLM):
|
|
|
127
134
|
model_config.setdefault("swap_space", 4)
|
|
128
135
|
model_config.setdefault("gpu_memory_utilization", 0.90)
|
|
129
136
|
model_config.setdefault("max_num_seqs", 256)
|
|
137
|
+
model_config.setdefault("quantization", None)
|
|
130
138
|
|
|
131
139
|
return model_config
|
|
132
140
|
|
|
@@ -150,6 +158,9 @@ class VLLMModel(LLM):
|
|
|
150
158
|
sanitized.setdefault("top_p", generate_config.get("top_p", 1.0))
|
|
151
159
|
sanitized.setdefault("max_tokens", generate_config.get("max_tokens", 16))
|
|
152
160
|
sanitized.setdefault("stop", generate_config.get("stop", None))
|
|
161
|
+
sanitized.setdefault(
|
|
162
|
+
"stop_token_ids", generate_config.get("stop_token_ids", None)
|
|
163
|
+
)
|
|
153
164
|
sanitized.setdefault("stream", generate_config.get("stream", None))
|
|
154
165
|
|
|
155
166
|
return sanitized
|
|
@@ -158,6 +169,8 @@ class VLLMModel(LLM):
|
|
|
158
169
|
def match(
|
|
159
170
|
cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
|
|
160
171
|
) -> bool:
|
|
172
|
+
if XINFERENCE_DISABLE_VLLM:
|
|
173
|
+
return False
|
|
161
174
|
if not cls._has_cuda_device():
|
|
162
175
|
return False
|
|
163
176
|
if not cls._is_linux():
|
|
@@ -287,6 +300,8 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
287
300
|
def match(
|
|
288
301
|
cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
|
|
289
302
|
) -> bool:
|
|
303
|
+
if XINFERENCE_DISABLE_VLLM:
|
|
304
|
+
return False
|
|
290
305
|
if quantization != "none":
|
|
291
306
|
return False
|
|
292
307
|
if llm_spec.model_format != "pytorch":
|
|
@@ -303,10 +318,16 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
303
318
|
) -> Dict:
|
|
304
319
|
if not generate_config:
|
|
305
320
|
generate_config = {}
|
|
306
|
-
if self.model_family.prompt_style
|
|
307
|
-
|
|
308
|
-
"stop"
|
|
309
|
-
)
|
|
321
|
+
if self.model_family.prompt_style:
|
|
322
|
+
if (
|
|
323
|
+
not generate_config.get("stop")
|
|
324
|
+
) and self.model_family.prompt_style.stop:
|
|
325
|
+
generate_config["stop"] = self.model_family.prompt_style.stop.copy()
|
|
326
|
+
if self.model_family.prompt_style.stop_token_ids:
|
|
327
|
+
generate_config.setdefault(
|
|
328
|
+
"stop_token_ids",
|
|
329
|
+
self.model_family.prompt_style.stop_token_ids.copy(),
|
|
330
|
+
)
|
|
310
331
|
return generate_config
|
|
311
332
|
|
|
312
333
|
async def async_chat(
|
xinference/model/utils.py
CHANGED
|
@@ -211,3 +211,28 @@ def copy_from_src_to_dst(
|
|
|
211
211
|
)
|
|
212
212
|
if attempt + 1 == max_attempt:
|
|
213
213
|
raise
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def patch_trust_remote_code():
|
|
217
|
+
"""sentence-transformers calls transformers without the trust_remote_code=True, some embedding
|
|
218
|
+
models will fail to load, e.g. jina-embeddings-v2-base-en
|
|
219
|
+
|
|
220
|
+
:return:
|
|
221
|
+
"""
|
|
222
|
+
try:
|
|
223
|
+
from transformers.dynamic_module_utils import resolve_trust_remote_code
|
|
224
|
+
except ImportError:
|
|
225
|
+
logger.error("Patch transformers trust_remote_code failed.")
|
|
226
|
+
else:
|
|
227
|
+
|
|
228
|
+
def _patched_resolve_trust_remote_code(*args, **kwargs):
|
|
229
|
+
logger.info("Patched resolve_trust_remote_code: %s %s", args, kwargs)
|
|
230
|
+
return True
|
|
231
|
+
|
|
232
|
+
if (
|
|
233
|
+
resolve_trust_remote_code.__code__
|
|
234
|
+
!= _patched_resolve_trust_remote_code.__code__
|
|
235
|
+
):
|
|
236
|
+
resolve_trust_remote_code.__code__ = (
|
|
237
|
+
_patched_resolve_trust_remote_code.__code__
|
|
238
|
+
)
|
xinference/types.py
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import Any, Callable, Dict, Iterable, List, Optional, Union
|
|
15
|
+
from typing import Any, Callable, Dict, ForwardRef, Iterable, List, Optional, Union
|
|
16
16
|
|
|
17
17
|
from pydantic import (
|
|
18
18
|
BaseModel,
|
|
@@ -121,8 +121,9 @@ class Completion(TypedDict):
|
|
|
121
121
|
|
|
122
122
|
class ChatCompletionMessage(TypedDict):
|
|
123
123
|
role: str
|
|
124
|
-
content: str
|
|
124
|
+
content: Optional[str]
|
|
125
125
|
user: NotRequired[str]
|
|
126
|
+
tool_calls: NotRequired[List]
|
|
126
127
|
|
|
127
128
|
|
|
128
129
|
class ChatCompletionChoice(TypedDict):
|
|
@@ -288,13 +289,33 @@ def get_pydantic_model_from_method(
|
|
|
288
289
|
model.__fields__.pop(key)
|
|
289
290
|
if exclude_fields is not None:
|
|
290
291
|
for key in exclude_fields:
|
|
291
|
-
model.__fields__.pop(key)
|
|
292
|
+
model.__fields__.pop(key, None)
|
|
292
293
|
if include_fields is not None:
|
|
293
294
|
dummy_model = create_model("DummyModel", **include_fields)
|
|
294
295
|
model.__fields__.update(dummy_model.__fields__)
|
|
295
296
|
return model
|
|
296
297
|
|
|
297
298
|
|
|
299
|
+
def fix_forward_ref(model):
|
|
300
|
+
"""
|
|
301
|
+
pydantic in Python 3.8 generates ForwardRef field, we replace them
|
|
302
|
+
by the Optional[Any]
|
|
303
|
+
"""
|
|
304
|
+
exclude_fields = []
|
|
305
|
+
include_fields = {}
|
|
306
|
+
for key, field in model.__fields__.items():
|
|
307
|
+
if isinstance(field.annotation, ForwardRef):
|
|
308
|
+
exclude_fields.append(key)
|
|
309
|
+
include_fields[key] = (Optional[Any], None)
|
|
310
|
+
if exclude_fields:
|
|
311
|
+
for key in exclude_fields:
|
|
312
|
+
model.__fields__.pop(key, None)
|
|
313
|
+
if include_fields:
|
|
314
|
+
dummy_model = create_model("DummyModel", **include_fields)
|
|
315
|
+
model.__fields__.update(dummy_model.__fields__)
|
|
316
|
+
return model
|
|
317
|
+
|
|
318
|
+
|
|
298
319
|
class ModelAndPrompt(BaseModel):
|
|
299
320
|
model: str
|
|
300
321
|
prompt: str
|
|
@@ -318,7 +339,9 @@ try:
|
|
|
318
339
|
from llama_cpp import Llama
|
|
319
340
|
|
|
320
341
|
CreateCompletionLlamaCpp = get_pydantic_model_from_method(
|
|
321
|
-
Llama.create_completion,
|
|
342
|
+
Llama.create_completion,
|
|
343
|
+
exclude_fields=["model", "prompt", "grammar"],
|
|
344
|
+
include_fields={"grammar": (Optional[Any], None)},
|
|
322
345
|
)
|
|
323
346
|
except ImportError:
|
|
324
347
|
CreateCompletionLlamaCpp = create_model("CreateCompletionLlamaCpp")
|
|
@@ -330,7 +353,7 @@ try:
|
|
|
330
353
|
CreateCompletionCTransformers = get_pydantic_model_from_method(
|
|
331
354
|
LLM.generate,
|
|
332
355
|
exclude_fields=["tokens"],
|
|
333
|
-
include_fields={"max_tokens": (int, max_tokens_field)},
|
|
356
|
+
include_fields={"max_tokens": (Optional[int], max_tokens_field)},
|
|
334
357
|
)
|
|
335
358
|
except ImportError:
|
|
336
359
|
CreateCompletionCTransformers = create_model("CreateCompletionCTransformers")
|
|
@@ -370,6 +393,7 @@ try:
|
|
|
370
393
|
CreateCompletionOpenAI = create_model_from_typeddict(
|
|
371
394
|
CompletionCreateParamsNonStreaming,
|
|
372
395
|
)
|
|
396
|
+
CreateCompletionOpenAI = fix_forward_ref(CreateCompletionOpenAI)
|
|
373
397
|
except ImportError:
|
|
374
398
|
# TODO(codingl2k1): Remove it if openai < 1 is dropped.
|
|
375
399
|
CreateCompletionOpenAI = _CreateCompletionOpenAIFallback
|
|
@@ -383,3 +407,38 @@ class CreateCompletion(
|
|
|
383
407
|
CreateCompletionOpenAI,
|
|
384
408
|
):
|
|
385
409
|
pass
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
class CreateChatModel(BaseModel):
|
|
413
|
+
model: str
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
# Currently, chat calls generates, so the params share the same one.
|
|
417
|
+
CreateChatCompletionTorch = CreateCompletionTorch
|
|
418
|
+
CreateChatCompletionLlamaCpp: BaseModel = CreateCompletionLlamaCpp
|
|
419
|
+
CreateChatCompletionCTransformers: BaseModel = CreateCompletionCTransformers
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
# This type is for openai API compatibility
|
|
423
|
+
CreateChatCompletionOpenAI: BaseModel
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
# Only support openai > 1
|
|
427
|
+
from openai.types.chat.completion_create_params import (
|
|
428
|
+
CompletionCreateParamsNonStreaming,
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
CreateChatCompletionOpenAI = create_model_from_typeddict(
|
|
432
|
+
CompletionCreateParamsNonStreaming,
|
|
433
|
+
)
|
|
434
|
+
CreateChatCompletionOpenAI = fix_forward_ref(CreateChatCompletionOpenAI)
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
class CreateChatCompletion(
|
|
438
|
+
CreateChatModel,
|
|
439
|
+
CreateChatCompletionTorch,
|
|
440
|
+
CreateChatCompletionLlamaCpp,
|
|
441
|
+
CreateChatCompletionCTransformers,
|
|
442
|
+
CreateChatCompletionOpenAI,
|
|
443
|
+
):
|
|
444
|
+
pass
|
xinference/utils.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import torch
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def cuda_count():
|
|
19
|
+
# even if install torch cpu, this interface would return 0.
|
|
20
|
+
return torch.cuda.device_count()
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"files": {
|
|
3
|
-
"main.js": "./static/js/main.
|
|
3
|
+
"main.js": "./static/js/main.8126d441.js",
|
|
4
4
|
"static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
|
|
5
5
|
"index.html": "./index.html",
|
|
6
|
-
"main.
|
|
6
|
+
"main.8126d441.js.map": "./static/js/main.8126d441.js.map"
|
|
7
7
|
},
|
|
8
8
|
"entrypoints": [
|
|
9
|
-
"static/js/main.
|
|
9
|
+
"static/js/main.8126d441.js"
|
|
10
10
|
]
|
|
11
11
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.
|
|
1
|
+
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.8126d441.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|