xinference 0.8.5__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/__init__.py +6 -0
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +136 -74
- xinference/core/model.py +3 -2
- xinference/core/resource.py +4 -2
- xinference/core/supervisor.py +37 -12
- xinference/core/worker.py +13 -13
- xinference/deploy/worker.py +7 -7
- xinference/device_utils.py +100 -0
- xinference/model/audio/whisper.py +20 -8
- xinference/model/image/core.py +5 -1
- xinference/model/image/stable_diffusion/core.py +3 -5
- xinference/model/llm/llm_family.json +93 -3
- xinference/model/llm/llm_family_modelscope.json +46 -10
- xinference/model/llm/pytorch/compression.py +3 -1
- xinference/model/llm/pytorch/core.py +33 -14
- xinference/model/llm/pytorch/qwen_vl.py +5 -3
- xinference/model/llm/pytorch/spec_decoding_utils.py +3 -1
- xinference/model/llm/pytorch/spec_model.py +20 -17
- xinference/model/llm/pytorch/utils.py +3 -2
- xinference/model/llm/pytorch/yi_vl.py +9 -3
- xinference/model/llm/utils.py +9 -0
- xinference/model/utils.py +6 -16
- xinference/thirdparty/llava/mm_utils.py +1 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.87d39ffb.js +3 -0
- xinference/web/ui/build/static/js/main.87d39ffb.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{8d95a492a6b08c192e212189567b59b98282ca6f3ed52dd9bd053a6be7ff4e65.json → 027745bdb9bb9fe375f2eef7377e9f7ba82510ed90c05f9e5b34ba314bb93498.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/03d5e7de9b1728d71cd8c483a35f4373eb66932022026142e03005d9de1a2a44.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0738899eefad7f90261125823d87ea9f0d53667b1479a0c1f398aff14f2bbd2a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/09a21ab449599121eadfdd12d6dba40c5a1c5b35dc90be17108eeff80daf79b8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/09d043a8289da33d05768f5face63cc47e9ce9646d67f5589865db511574e8f9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0b901c8f077905bf153a1390b207277c31e893f83f9ced5d29a0a0c85a07b5eb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0da1ff7c9602609cfa8b82a67140114f3037130fe79583bbf23b69cef81823a6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0f87caef05125425c0ca418a32ce8a53e9becffd464411b2ff94e3c7a37c7b5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{cb50b4d54ce9c249d580f7f1fc2df4beab4f13c4d16f486c89239809570de8fb.json → 0fc333c35ad49c0db7f18f77521984a36067cfce4206df50ce16a2fdc88be1c6.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/13fdd92dd5e2fe11640cf6ec0368e3c098d3f0c43e11703ccf157e8c41410d5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1464f950e5600fe2e5890186a1ae3750cdabbadd8525070cef6273450760bcf6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17c53dc44a324629efef425833ab48cc05243d089f4eb3c731da63568fcff35e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1946a52cb34e675da423de15b703437b4c841e4f5aed6ddd932a091c145e17b0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1e0da5ce4d8ccdbe33705a47bdc0d5d1d7e4e48c8e5c1d0b8c473e9f491930ac.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1e3ec3302ef2253fad5e86bf429baf6b69e57e769f38ec1aaada87f3114b6a1f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1f18804827897c337874b289873df611b44869f86ced1d4351fbe0b45ed17518.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/21b64f7fdb74c2c35479811c3f5683ee161d3b64a4fade0cd1fc0044332c79b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/22873d21eab4b5a04b183cc3d2cbfafa431e0ce99649aeb900cffcfc33ecbca4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{7a5fecf96af770954035948b07dfadeceb33536b4763a2f9e548121ae555b75f.json → 228b78cd0950f1c967cb33b210b11497052a9998f0b1d1f31ecee0fd8b99fb40.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2328fc2360224ac3eef5c5261fc2a75cdce7fe58c210af8d3e532ec609464f5f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/25fd89450dcf73a205a5f8c82dbc604bf872d574b748427c26ba79b8711ec502.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{b6e95546cdd247e50d9c916dd3bef5216f96c32a267e54b51b23cc22631be0c4.json → 288d6573b09f0e2b0277e1f301bcb57a194c01f3983e3c1e03c8b2ea72653194.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2c04900a020b813e2e99b15748c3c3ef5a1627c33d5668e45bc6a798f0d28dae.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cd0fd50d8f213cdd94daffe7d0f997991072b11936ead7c48be2af3fdbd1fda.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2e057c12322da07a537a576a3917d512ac6cb291be5aa29ce23d2679a4fd16b1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{abd0662d21fbe39e483fbc4d37faa9e24d93889b5b40d42e7e20773c66ee3894.json → 327c21db6124a5209f41996d0d3359b462d5baff3974296f5acfddfc532434e6.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/33294bb0080595100e22c960929a6f3086f5ea632a4bcd9216069f95347a63a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3361869e45eddd7d48ff7995e1a12ac454c9eb92d4c87f49ce80c1e9482c1e91.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/375d2f6a31e01c5a82fd0e18f42198f9fbb7117491f3e92d70cf36055f159a6e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3808217efee76345ba19af3ffdba045888e0908b0dc7fc38e54655cdc3504c60.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/39db740a99454006312dec9e4845f850c51e043458ca94eba8bb256521fabcd4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3e737bcdbcbc407ccd65b90e199ef0c3214b261e8e41dbf14d921384a717d9ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3e87c7d4f1e5c93603eed3e861945e44dd1e664061a89aaef1aa6435f4f4d7d0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3ecf70018028c8cf6cdc2d24d2b88f2707bd5c23a336fd3d82bd6a1af0db7ded.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/43314d1e9fed30ed6cf5cab3f5033da067e58be29e36448e3c0a8127403a9388.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/442467c09440131f7c25ddcdeb356d2a87783ad4e32d89f410a16cb6e7d83b22.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/487524f2b3494bfcfbd8c12918092a52ff2ce3eefe9bf0a557389ec1c74d3e67.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4de4b0282a3d1ae02bec0bfe7d6658f039141406e8bfdbe86bf1dc5b115a2524.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4ec72faab00e4cbffefdf43b987ae3d15beb60e282b9ca86dad37f4d0fc3f2bd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/51403f1b70af0f4577d4594cbfcf9d9759997e5c49097e7a07219000d1be4ff1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/566a0829bfeab41e40bbdca55ad056067e172508af4b333403d4fd334a8c8742.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5689d9aee417a0834fed66720cf3b48c34cc26364cd9ab7e3d809997bb9faec9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5bedd27475bda204eafccdc636a56ba5765353391049a0b947234faaab3d021a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{94daea23b698bbb26bbbab69bbca8ad521c79fcbd6b495c84c740e4006a2ced2.json → 61014d40df4486972fb6d5909dff45f24bec48573af99ca8bcd79835385c7da0.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/6139c80ec46d77469e9a89924de41d93050f19e9ed541e358f09cbf20ccd7c86.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63ead4dee6a4f4ae47aed6918a2ab3e172739ecdffaa85e063f9a2c9165ff93c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6bf8c25e0f2051d994990bf73fdcb3e08963e11589842895593f199eab13ba9a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6d42f63cd673d1c60e534a174cd851e9aefd2d5d45a4aa6128d794457b64de53.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7044e626a91582077b8cbad09bcf0cc118e92e11ebfa2ebc17622baf98aa56bf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/71ec408030881aad69854815b8681185d0174cc0cf67f20fb2c3cd13c74fce07.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7280614d5bddce524182024cc288953eb89d46c1dbe470ed5eb5ecab5de28385.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7293ad8ba7b5fc6ed791aed44a766a396516c782f5d0e564b9ef0997645ec1dd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/79be1e0dd6b1c3cbb3f08b349454f4791edaac451fa4d1f10eaf433e57ecf70f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7c0571c03aa8c90eb9452ba23133aed9b3b2a049910d4d8276588b438a4caaba.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/81ed094096cd9dc36f3ddebe61818766f43d3338c171f2269958f47265e60f24.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/86355e55059ddb15f71c0c651a2e622cee6f3141da3a018775ca00c74dd37dc2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{d1af6db565bae36218251c01a817c080ef764172dd0694e38d27b525a3ffd1d2.json → 8710fe63bcc6b95502287c36a7bd881b74ee2926982e120e1e8070a2c8a668fc.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8779104679d8f85017b551d1d108ea1ac701e184ad7bcc8a367526353a6c5175.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/881c573e6aa9954e7bd5e3bc5a6ef62c0db9b93815030080f677f6834e431434.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8d24c51a0905b919436aac2bad441eb1651c46c4e7aa17c99eb63c7760ce6615.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/93bf5cc8cbe3f0b10f291800e1896f03506a8933cd490e07ee0ff94498f93bce.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9401db0316ac34790aa5b972e9e1c50de58ef313b50840a37e08f0936a7197b6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/98afeafdfc285a744d116fb5f189a16ad2b977adbd4fc0757f7ade1b29b8ad31.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9a4e624d1ce66832fb213ead02745d84661d9bfcc21b62b4c37c680c3c3f5cbb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9a7b26f85224d33d431354ac8d7849388b7525fc014801824e705561ab2ac7a7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9dcb04ac6cb341233a2e98f2fb417d059c2ea6a56d2ff6f30bb27dae9bd6c756.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9ebede8a4bccef016eef418a668b3212962779ea6d2531edb23d3dca81163496.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0efc8db4289116bdb312014df91fc781f66fe171d63027bbd5aae25cd54c325.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a212defc32ca338ba673722c72815d30974275f1a82b67405a898f826fc8d782.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a2685acab57fd6d874bf6069a587e3770b55c72cacc69cb167bb3dbd16a551c5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3b7d6233e50dc3832421793a5752c1db99f3a214c9896c97bbd395d007f09b1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3d0c810a255108613be0a175d53d673c4b6843c2a87dd0f157b4d3731af2092.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a6633774053545c60721e345820e21cf3b089df219f7ee3fcceaf27af3a9e84b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a6bb917e4869d7d6ddf50b75932cad64e94b9e33db2aa40c4a489a045819bb36.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a816031fafa564dc4af27383612e51ac29af51354f3bba750a70ac0c03b6bd30.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/af6349a1e9761cb98af4b76904440fb7f8d54288ee6ba37116f670226796e689.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afce63eaad32a9968590e4500e123c81b385ea3b6046b2af1ec7ea8a49ba657b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/aff24b61e5d55147d0a36ef1896b9bd35c995c357f44eeee1209f23db2118636.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b01676c5ca97e3409a1b7bac91b6a797be3719a8b7ec72ad06c9beb296c67bb7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b0c5e967d1af90d545f1d34c9893a841296b0d1dd50de65eeaf19054995d2a94.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b561d429cea6143e1bf7e6e7e690f9e6ce1dce4adfcacb57d7c5578195d1d190.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b70187bb5aecf3bd7afbe8469b49e2a0a94a0c62727530d0dc2f84ee4577bcfc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bd21f6039e23775fb4631bb56f2eef1d132c03a60cc6fe4530569ecfeef456a3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/be09b5508d0ca5ac0300144a682d4cde2d5b4b96ecb9fc690c1d5e7ee7411b3e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/be8c9e83f0dd4af2e9bcf6844dd8c4445fe634b6c71da6debfa7acabb7a875c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c0d1317dfae0c5ddf5a3ba9aef6bd2204f2daf9754f3270c4cab5c4632f78000.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c1e3afdaae5649274f7d34910015b88bc02ba6a00b935e6597f9c670fedc05c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c29ec02243dd370616a489f7de67e4e479d2f7f12504ba7f77304cffe60befbc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c2a4d298149e6c66713eed4f56f4bbbfa4022a193891029b84e429baf59cc246.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c3d2fc03602f7604a65e631e8595ce4cfabd8887343489b34a65e3a73c5546b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c41078c8692e0635fb52d01bc5157528c7308ae5140c134aa07c1fcda4f7a5e5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c4b5f1e444ddd41f8cd30e618c185b16235e313ea34d944959628001a91e38a5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c7ed9bc3125498e475f3170b8126c93368e9cbfd6a5e73ee63437d90e39b69e5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c996b3dc884130f034a37551845201bc5661f2dcd282aed737329f8d4dd387b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c9fef6e7f0a532c2d1cc25dffd496ea8355dd14ce0105a169b391bfeb9277e4f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cc8f5a2bbe4da9852ccdf64150115271baa2d282d74d7fab854dc5ca641dd519.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ce45adea497443bfc1b6baabd0aa3d85888dd10b1b7fd009edf608e390b2a844.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ce4b82a3855d18ba09e922f6e2cc1b8c20f0e5261b27303c52d4e1ba5a09a078.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cf16614311e144768bdc064da754efad77597cac5efe6852be996b9eaaed695a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d074bfe592a195f339acbcd8743f8b64cfd09b45ca1013598269227fc092f2db.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d2a95455cc492d4fbf2839a21eeaef00e7fad2aac93825d2443a38127ad02007.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d4e6e712ebbbf0f30745093e2b6d235c13fb43d37b586a0109e87c19eafdf35d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d88a02264125b07881eecff752d9c8d718edcf2d87e5431b13e54809dc7bf4f2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d907925fb28912f2e98cccfe758bce680aa4f894cc9d2079d73d7e2d85481d47.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/daf050ec3a864b9831dcb32cee1fb2739dafb68b369e9108469ca1e6fd166867.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/de9645d55ce8744856862eae4a3d3a1266e9df9d575aa3e2f62a75a5d97444f7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e04199bdfb87226d6aeae387ef677eae06483f9f135b5a3f1360ec1a243c86cd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e97d1f31f54263bb90b7c3e71be0e19973d76d945fb5d526bf93fb449708bbf0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e9f87f5c370a731ee42098ae6ff38ca6ad256396158c03f84c5e770c6b76a43d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f2ad375dbc8779f54eb9c377fd951cd442f3766d5eb70d40613b952b811643da.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f35df9d5a0e0341bcc0f39a462e46988c330cdb905cc690b08de4a9cf5771f28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f433e33369483f75d31fe937b8a597ef2d7cdfed339931643ff966a33dbc6158.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f4c164c89cfbcb1d81bc0b60e3e7c8715e07c7a23118acf4e3e16715beac09c5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f545d937cfb9d974f4aaaa9dc1c12adfed0d4f608de7d0a7247452d9bbbb55a4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{3cc5ec39e76e1768ffb070a8ae18ee61b68f3ed3593178134d3edda5ed378c6c.json → f8d2c4372093d0ff3a2f78260e004e6c4e2b8ab4f5be9713ebc16c50e9c2ab4b.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fa41c6c8ffa1047b7f2f52b6532ea4d9f5754e5bbc11d1340bae7f75ae5ed84e.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +45 -45
- xinference/web/ui/node_modules/@babel/runtime/package.json +64 -37
- xinference/web/ui/node_modules/@mui/private-theming/package.json +7 -6
- xinference/web/ui/node_modules/@mui/styled-engine/package.json +6 -5
- xinference/web/ui/node_modules/@mui/system/node_modules/clsx/package.json +22 -10
- xinference/web/ui/node_modules/@mui/system/package.json +13 -12
- xinference/web/ui/node_modules/@mui/types/package.json +3 -2
- xinference/web/ui/node_modules/@mui/utils/ClassNameGenerator/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/HTMLElementType/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/capitalize/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/chainPropTypes/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/clamp/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/composeClasses/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/createChainedFunction/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/debounce/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/deepmerge/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/deprecatedPropType/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/elementAcceptingRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/elementTypeAcceptingRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/exactProp/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/formatMuiErrorMessage/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/generateUtilityClass/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/generateUtilityClasses/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/getDisplayName/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/getScrollbarSize/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/getValidReactChildren/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/integerPropType/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/isMuiElement/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/ownerDocument/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/ownerWindow/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/package.json +8 -7
- xinference/web/ui/node_modules/@mui/utils/ponyfillGlobal/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/refType/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/requirePropFactory/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/resolveProps/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/scrollLeft/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/setRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/unsupportedProp/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useControlled/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useEnhancedEffect/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useEventCallback/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useForkRef/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useId/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useIsFocusVisible/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useLazyRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useOnMount/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/usePreviousProps/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useTimeout/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/visuallyHidden/package.json +6 -0
- xinference/web/ui/node_modules/@types/prop-types/package.json +2 -2
- xinference/web/ui/node_modules/csstype/package.json +3 -3
- xinference/web/ui/package-lock.json +47 -45
- xinference/web/ui/package.json +2 -0
- {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/METADATA +4 -1
- {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/RECORD +206 -150
- xinference/web/ui/build/static/js/main.9715fe74.js +0 -3
- xinference/web/ui/build/static/js/main.9715fe74.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/02f54fcadc6fe9b1be8451435f9a097c82b3d665bae2d2df03ba0b36cebeca0c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/051391807ffd255b652b63dc2ff12e5ce80113e1b712c81b6c1986a11cd85d3c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/05710e94caaf5a2da790d67aa572bf6bc19bae0237668c8d97f4da62ab52fe5c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/08a3470840a55d359a967d0763cd1ae5c1f15f3ca31118e29b37b4fbbdb248c7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0e68b0e268559a563dde6fa4c654922d98c74f5140854437f3e5a01bf23ba41f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/10dc7661741dfc88df5538bbf835870ae47b1a18f36c2f7b9650651e7304ce72.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/18b3d28a2692ab4f2d05254221f53b3c13259435bd98cb90ff5d7250d93f11e0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1b612ab687e3a4e14db156bd92a75b8a3e96ff04bfbf8f3351c7e801bc61cf90.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1bb839ffeb39ed12367c179fbf3b784e20f6f98fb87c597717bc11ebe3201c59.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1d12e0b0e4e87372b18fef72fcd6e264fa40b0cffdbeddcef004becc4e32cf27.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1d5b806c08ffb55539ee26b5fa9746a373f602354d6e55a0d4379d7e2f903868.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1ecb7ce2bc103bec0d1a9f0cb07132e5ec7731d974998c465798c0e15cafb924.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/20cfc57ae2b8ba23571f386a5f2bf194bd02e8a3913113881bb9804ee822ad58.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/222a437b6773707bbead06ba5db1ed8869e44d9f051a7f6b1a745aeb654679d0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/249709fb20c1254cecc9ba6666e6bcf37b21e47e7a020d5a7f0513680b49473f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/257ad4979294ee3a7ef400feab3b29ebcd259c47b9410a3011a66f4b33a97904.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2779bdc8ed8372f4015d07a45e4b09756bd953250944c3e3efe6b65bf8eaecd8.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2bee7b8bd3d52976a45d6068e1333df88b943e0e679403c809e45382e3818037.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/31c71a85b2d0bbdcdc027e7950a9d1f778288f98888d8595fad5e4fdda5bb31f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/371214b6649e323e7281885b1412c5a515ccc2274f4e1a42921e45777694009c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3eced030d2b3da645f4cbe3dc4ff49744312316e4306ce170991e802e9bfcf8e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/452ae8e7cff99efc35d134014d32610d13217b031b817d3663bb689b3778ebd2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/48e3f5d01ccedde86348a29392bc9b6e301a51b3f7e2476ae0a95a31eb6bf18d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4a311b5bd98f16588caf60b8de4c8d91d038198e6bfd496f880f60d4c1ea076d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4ccd71d3f73490a18a5a4de4ad82d63cf019caff6f8d881fd1434755a033bfc5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4ee8343d4254c95e685d49601f1649e8222d7423c2a822e1d01c8fc4cd3582a2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/523a712748fbb7abf0c5423df62e65c4659b7e450182c890e847cd78ace7cac5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5777049bbacf7322891761a5b959e62eec22b0f96a70dbf97e58d530c6258a55.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5905957ce95f7dee64f0739359e990b8b518cff088c6958487641a3d21737d1e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5ac79df79d2dff64da97b02d5c90b36bc0d44a4a4229f6efed2993f93ee1d7a1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/64d3ef38fd394979b09061e67fc47f04b7295401677825c64c72ea2f44cf910c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/68a5d8e66687770e05aebcae9a8047189dc577d6150fbf0e62d2b2c11c4335c5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/69d5f27e40d5e7040c8e8e303ccaee019b54092345acc3838d6eb943c474ce39.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/6a07b8d00482c07a4b7c73f6bebb79fad54bbea35ba358681ede6ff299a09f6b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/70b0307a43b9f8c876bf2a1f7dea4cc56cc0ea33743e863c0b3983bc48109f2d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/710c39a26816820c2cfd00b19cd0112851d64b8ac6b201b18727b43b5a69d92c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/722f4c03da79795d1fb738323f3eb6794ab18711e20f141c0b678c5d4cb3dce9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8237bdfff695d71496ee2ff83ba0392d753f8e1dff0d002a1b2c9e249f03083c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8342e3800fed63e8e805850810aee4fd9b0558aca10c8b6b770fe5bff1bed266.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/847a71f54f7a059efc71f70c297b6b4182a633130715c80f817eb533de108c12.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8731a959d9b7278ba7a1c044bb4253bfff63079da69b21ff80221ae5e037dbbe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/87b4e94b045df561c8f4bd253c84447c96e0dededbcb6646c0cd180119960990.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/894a334446bac96d74385c23c9d6f2d117ee29511897700430cd0e578cb32527.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8c5ba88b620eeedbb99db18a610138353a5a657170fdb8bd6969585f81b34f74.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9004bc083cf179941e64130390a27afcf7593b21dd64a87d03e1e6489032aa13.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9b84631723b52988d3748d80b994e43d1944ca55098a5d908c9061eeb4c84f90.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9ee1f84bde10cd78ca23a4681c7372195748bacd5e41c3b1df8e39d960294d81.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a127f13651a7398f5fc378456a52cfa0c6260f6109611d3fb012aa7c7a4ee17a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a5e2e9f707eb7039bea096ca117d996b8f9cbc2a5613fd8e0c5b0094444ce23c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a6131b7fb1b30fd63d798845dab508a22b21c616b361e6214bf711915c5fef3d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a7b8ccb199158c897ed7d7835942eb37569221be4101eecfa7a7ce62e0c76f1f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/acaf6dbddd57b63a874e9062803850ce4911c8dc63a362eacdc4d2d0f48f8c16.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ade3cd82af85d2d28138bd942228b01e27559a5a4072ee049dcb47612ee9a506.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/aea82c71088dc9afd6f4ba71434286af5c6ab5e9b87088b0e84f351146d06489.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/afbd547078b20bb14596f20e165cb40413147e3de501b126c97bb417f8b9a53e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/afc597494ebc62d22e4d3ac2a839bd5f30d5e47ce1d0576e0f62407ef6c78652.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/affbbac58777e26ba8b6dcda8cea72c76975bd0207f0a510bfde5983d3081f74.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b0aa2ece62a1f5fa015f9e6b59d20b79be727fb5a3f4fc747200163097f320ad.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b10391d3ae53452c8ec97d230f064e03a8377aab2be4b1b3e30cb3e054b2341a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b4bab0c147360f3e335251166d3d25602e2e0e160fb3734ac019a5b42499179c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b57dcff363e045fd11ea2bcbc58cce470f1bc22d3ac14c8acbf63a36637050ff.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b8de42f7f5dc1df9e802898a0ebb35adb57d269d7af21e6c66a0fde0ff226a08.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/be696cadea3bf577c0defc8f8d100232369763dde754b5ba9399f45f42aed215.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c5a30d7c6cb484268e9126dd6d078229a1dcaa07fb064077b87d15224c61a3aa.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c67e2077e70c4751e9e1dd98f378d979abd9c8a39fbf6c69f02fb4e5b04eef87.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c81c1cad35be60a4fad090374edf50a884540e306d464b803120554df61e817e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d1054fd4d09666318a00ca0aea5ff42a2fdb17b3a42b5f77b93433d7408eb2f4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d1ec524ce17a48816fc55fe665572c1ef4982a4bd1ecb01badd0fdffc465242c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d4d61a537c45c4506a6b7e3478206b71dc96e0c6c6410cef9b5d2f47018bc578.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e2e563b606cac9d51661cf68ba1729eb5df7861af521adccedc1b163439b657d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e3890e804dfaeb19b79f52e1b20198b587da61a9a3a1f55417d2cc5ce4ad7dae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e8c3fadbd12f0e2c7b70d84acbcabd0a5c96b05380a7b3167b00e8739d829d24.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ebb5ae1a82b33b94e2b0cfec599bec7500aff0a1701f91736a6f3a4f77617560.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f30f0396f4e54f0355112d5eba4b16c3a56f47e456eb117a9dc7fa6218ce21fe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f318ac15e999a553c00832029245730fe25c2916860b0ff55eee1f0ab926d84e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f55f4e657cc2d797b7e73a64ce40352f9a5dde31763a3e89c14d3b983307f30e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f8a68931d652a5e3be4e1ae06210a223de1239f3884ad858cecf9fe8d11c2ffe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f95a8bd358eeb55fa2f49f1224cc2f4f36006359856744ff09ae4bb295f59ec1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fb6fbd693a53d3c542d9adf83e0bb458918d39bf101eaf6d6662a2fbc35c1adc.json +0 -1
- /xinference/web/ui/build/static/js/{main.9715fe74.js.LICENSE.txt → main.87d39ffb.js.LICENSE.txt} +0 -0
- {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/LICENSE +0 -0
- {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/WHEEL +0 -0
- {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/top_level.txt +0 -0
xinference/__init__.py
CHANGED
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2024-02-
|
|
11
|
+
"date": "2024-02-22T15:40:53+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.
|
|
14
|
+
"full-revisionid": "c653c975847f9f6a81382033a9c8f5bd81bf70f2",
|
|
15
|
+
"version": "0.9.0"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -51,6 +51,7 @@ from uvicorn import Config, Server
|
|
|
51
51
|
from xoscar.utils import get_next_port
|
|
52
52
|
|
|
53
53
|
from .._compat import BaseModel, Field
|
|
54
|
+
from .._version import get_versions
|
|
54
55
|
from ..constants import XINFERENCE_DEFAULT_ENDPOINT_PORT
|
|
55
56
|
from ..core.event import Event, EventCollectorActor, EventType
|
|
56
57
|
from ..core.supervisor import SupervisorActor
|
|
@@ -221,6 +222,9 @@ class RESTfulAPI:
|
|
|
221
222
|
self._router.add_api_route(
|
|
222
223
|
"/v1/cluster/info", self.get_cluster_device_info, methods=["GET"]
|
|
223
224
|
)
|
|
225
|
+
self._router.add_api_route(
|
|
226
|
+
"/v1/cluster/version", self.get_cluster_version, methods=["GET"]
|
|
227
|
+
)
|
|
224
228
|
self._router.add_api_route(
|
|
225
229
|
"/v1/cluster/devices", self._get_devices_count, methods=["GET"]
|
|
226
230
|
)
|
|
@@ -231,9 +235,11 @@ class RESTfulAPI:
|
|
|
231
235
|
"/v1/ui/{model_uid}",
|
|
232
236
|
self.build_gradio_interface,
|
|
233
237
|
methods=["POST"],
|
|
234
|
-
dependencies=
|
|
235
|
-
|
|
236
|
-
|
|
238
|
+
dependencies=(
|
|
239
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
240
|
+
if self.is_authenticated()
|
|
241
|
+
else None
|
|
242
|
+
),
|
|
237
243
|
)
|
|
238
244
|
self._router.add_api_route(
|
|
239
245
|
"/token", self.login_for_access_token, methods=["POST"]
|
|
@@ -246,142 +252,176 @@ class RESTfulAPI:
|
|
|
246
252
|
"/v1/models/instances",
|
|
247
253
|
self.get_instance_info,
|
|
248
254
|
methods=["GET"],
|
|
249
|
-
dependencies=
|
|
250
|
-
|
|
251
|
-
|
|
255
|
+
dependencies=(
|
|
256
|
+
[Security(self._auth_service, scopes=["models:list"])]
|
|
257
|
+
if self.is_authenticated()
|
|
258
|
+
else None
|
|
259
|
+
),
|
|
252
260
|
)
|
|
253
261
|
self._router.add_api_route(
|
|
254
262
|
"/v1/models/{model_type}/{model_name}/versions",
|
|
255
263
|
self.get_model_versions,
|
|
256
264
|
methods=["GET"],
|
|
257
|
-
dependencies=
|
|
258
|
-
|
|
259
|
-
|
|
265
|
+
dependencies=(
|
|
266
|
+
[Security(self._auth_service, scopes=["models:list"])]
|
|
267
|
+
if self.is_authenticated()
|
|
268
|
+
else None
|
|
269
|
+
),
|
|
260
270
|
)
|
|
261
271
|
self._router.add_api_route(
|
|
262
272
|
"/v1/models",
|
|
263
273
|
self.list_models,
|
|
264
274
|
methods=["GET"],
|
|
265
|
-
dependencies=
|
|
266
|
-
|
|
267
|
-
|
|
275
|
+
dependencies=(
|
|
276
|
+
[Security(self._auth_service, scopes=["models:list"])]
|
|
277
|
+
if self.is_authenticated()
|
|
278
|
+
else None
|
|
279
|
+
),
|
|
268
280
|
)
|
|
269
281
|
|
|
270
282
|
self._router.add_api_route(
|
|
271
283
|
"/v1/models/{model_uid}",
|
|
272
284
|
self.describe_model,
|
|
273
285
|
methods=["GET"],
|
|
274
|
-
dependencies=
|
|
275
|
-
|
|
276
|
-
|
|
286
|
+
dependencies=(
|
|
287
|
+
[Security(self._auth_service, scopes=["models:list"])]
|
|
288
|
+
if self.is_authenticated()
|
|
289
|
+
else None
|
|
290
|
+
),
|
|
277
291
|
)
|
|
278
292
|
self._router.add_api_route(
|
|
279
293
|
"/v1/models/{model_uid}/events",
|
|
280
294
|
self.get_model_events,
|
|
281
295
|
methods=["GET"],
|
|
282
|
-
dependencies=
|
|
283
|
-
|
|
284
|
-
|
|
296
|
+
dependencies=(
|
|
297
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
298
|
+
if self.is_authenticated()
|
|
299
|
+
else None
|
|
300
|
+
),
|
|
285
301
|
)
|
|
286
302
|
self._router.add_api_route(
|
|
287
303
|
"/v1/models/instance",
|
|
288
304
|
self.launch_model_by_version,
|
|
289
305
|
methods=["POST"],
|
|
290
|
-
dependencies=
|
|
291
|
-
|
|
292
|
-
|
|
306
|
+
dependencies=(
|
|
307
|
+
[Security(self._auth_service, scopes=["models:start"])]
|
|
308
|
+
if self.is_authenticated()
|
|
309
|
+
else None
|
|
310
|
+
),
|
|
293
311
|
)
|
|
294
312
|
self._router.add_api_route(
|
|
295
313
|
"/v1/models",
|
|
296
314
|
self.launch_model,
|
|
297
315
|
methods=["POST"],
|
|
298
|
-
dependencies=
|
|
299
|
-
|
|
300
|
-
|
|
316
|
+
dependencies=(
|
|
317
|
+
[Security(self._auth_service, scopes=["models:start"])]
|
|
318
|
+
if self.is_authenticated()
|
|
319
|
+
else None
|
|
320
|
+
),
|
|
301
321
|
)
|
|
302
322
|
self._router.add_api_route(
|
|
303
323
|
"/experimental/speculative_llms",
|
|
304
324
|
self.launch_speculative_llm,
|
|
305
325
|
methods=["POST"],
|
|
306
|
-
dependencies=
|
|
307
|
-
|
|
308
|
-
|
|
326
|
+
dependencies=(
|
|
327
|
+
[Security(self._auth_service, scopes=["models:start"])]
|
|
328
|
+
if self.is_authenticated()
|
|
329
|
+
else None
|
|
330
|
+
),
|
|
309
331
|
)
|
|
310
332
|
self._router.add_api_route(
|
|
311
333
|
"/v1/models/{model_uid}",
|
|
312
334
|
self.terminate_model,
|
|
313
335
|
methods=["DELETE"],
|
|
314
|
-
dependencies=
|
|
315
|
-
|
|
316
|
-
|
|
336
|
+
dependencies=(
|
|
337
|
+
[Security(self._auth_service, scopes=["models:stop"])]
|
|
338
|
+
if self.is_authenticated()
|
|
339
|
+
else None
|
|
340
|
+
),
|
|
317
341
|
)
|
|
318
342
|
self._router.add_api_route(
|
|
319
343
|
"/v1/completions",
|
|
320
344
|
self.create_completion,
|
|
321
345
|
methods=["POST"],
|
|
322
346
|
response_model=Completion,
|
|
323
|
-
dependencies=
|
|
324
|
-
|
|
325
|
-
|
|
347
|
+
dependencies=(
|
|
348
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
349
|
+
if self.is_authenticated()
|
|
350
|
+
else None
|
|
351
|
+
),
|
|
326
352
|
)
|
|
327
353
|
self._router.add_api_route(
|
|
328
354
|
"/v1/embeddings",
|
|
329
355
|
self.create_embedding,
|
|
330
356
|
methods=["POST"],
|
|
331
|
-
dependencies=
|
|
332
|
-
|
|
333
|
-
|
|
357
|
+
dependencies=(
|
|
358
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
359
|
+
if self.is_authenticated()
|
|
360
|
+
else None
|
|
361
|
+
),
|
|
334
362
|
)
|
|
335
363
|
self._router.add_api_route(
|
|
336
364
|
"/v1/rerank",
|
|
337
365
|
self.rerank,
|
|
338
366
|
methods=["POST"],
|
|
339
|
-
dependencies=
|
|
340
|
-
|
|
341
|
-
|
|
367
|
+
dependencies=(
|
|
368
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
369
|
+
if self.is_authenticated()
|
|
370
|
+
else None
|
|
371
|
+
),
|
|
342
372
|
)
|
|
343
373
|
self._router.add_api_route(
|
|
344
374
|
"/v1/audio/transcriptions",
|
|
345
375
|
self.create_transcriptions,
|
|
346
376
|
methods=["POST"],
|
|
347
|
-
dependencies=
|
|
348
|
-
|
|
349
|
-
|
|
377
|
+
dependencies=(
|
|
378
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
379
|
+
if self.is_authenticated()
|
|
380
|
+
else None
|
|
381
|
+
),
|
|
350
382
|
)
|
|
351
383
|
self._router.add_api_route(
|
|
352
384
|
"/v1/audio/translations",
|
|
353
385
|
self.create_translations,
|
|
354
386
|
methods=["POST"],
|
|
355
|
-
dependencies=
|
|
356
|
-
|
|
357
|
-
|
|
387
|
+
dependencies=(
|
|
388
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
389
|
+
if self.is_authenticated()
|
|
390
|
+
else None
|
|
391
|
+
),
|
|
358
392
|
)
|
|
359
393
|
self._router.add_api_route(
|
|
360
394
|
"/v1/images/generations",
|
|
361
395
|
self.create_images,
|
|
362
396
|
methods=["POST"],
|
|
363
397
|
response_model=ImageList,
|
|
364
|
-
dependencies=
|
|
365
|
-
|
|
366
|
-
|
|
398
|
+
dependencies=(
|
|
399
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
400
|
+
if self.is_authenticated()
|
|
401
|
+
else None
|
|
402
|
+
),
|
|
367
403
|
)
|
|
368
404
|
self._router.add_api_route(
|
|
369
405
|
"/v1/images/variations",
|
|
370
406
|
self.create_variations,
|
|
371
407
|
methods=["POST"],
|
|
372
408
|
response_model=ImageList,
|
|
373
|
-
dependencies=
|
|
374
|
-
|
|
375
|
-
|
|
409
|
+
dependencies=(
|
|
410
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
411
|
+
if self.is_authenticated()
|
|
412
|
+
else None
|
|
413
|
+
),
|
|
376
414
|
)
|
|
377
415
|
self._router.add_api_route(
|
|
378
416
|
"/v1/chat/completions",
|
|
379
417
|
self.create_chat_completion,
|
|
380
418
|
methods=["POST"],
|
|
381
419
|
response_model=ChatCompletion,
|
|
382
|
-
dependencies=
|
|
383
|
-
|
|
384
|
-
|
|
420
|
+
dependencies=(
|
|
421
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
422
|
+
if self.is_authenticated()
|
|
423
|
+
else None
|
|
424
|
+
),
|
|
385
425
|
)
|
|
386
426
|
|
|
387
427
|
# for custom models
|
|
@@ -389,33 +429,41 @@ class RESTfulAPI:
|
|
|
389
429
|
"/v1/model_registrations/{model_type}",
|
|
390
430
|
self.register_model,
|
|
391
431
|
methods=["POST"],
|
|
392
|
-
dependencies=
|
|
393
|
-
|
|
394
|
-
|
|
432
|
+
dependencies=(
|
|
433
|
+
[Security(self._auth_service, scopes=["models:register"])]
|
|
434
|
+
if self.is_authenticated()
|
|
435
|
+
else None
|
|
436
|
+
),
|
|
395
437
|
)
|
|
396
438
|
self._router.add_api_route(
|
|
397
439
|
"/v1/model_registrations/{model_type}/{model_name}",
|
|
398
440
|
self.unregister_model,
|
|
399
441
|
methods=["DELETE"],
|
|
400
|
-
dependencies=
|
|
401
|
-
|
|
402
|
-
|
|
442
|
+
dependencies=(
|
|
443
|
+
[Security(self._auth_service, scopes=["models:unregister"])]
|
|
444
|
+
if self.is_authenticated()
|
|
445
|
+
else None
|
|
446
|
+
),
|
|
403
447
|
)
|
|
404
448
|
self._router.add_api_route(
|
|
405
449
|
"/v1/model_registrations/{model_type}",
|
|
406
450
|
self.list_model_registrations,
|
|
407
451
|
methods=["GET"],
|
|
408
|
-
dependencies=
|
|
409
|
-
|
|
410
|
-
|
|
452
|
+
dependencies=(
|
|
453
|
+
[Security(self._auth_service, scopes=["models:list"])]
|
|
454
|
+
if self.is_authenticated()
|
|
455
|
+
else None
|
|
456
|
+
),
|
|
411
457
|
)
|
|
412
458
|
self._router.add_api_route(
|
|
413
459
|
"/v1/model_registrations/{model_type}/{model_name}",
|
|
414
460
|
self.get_model_registrations,
|
|
415
461
|
methods=["GET"],
|
|
416
|
-
dependencies=
|
|
417
|
-
|
|
418
|
-
|
|
462
|
+
dependencies=(
|
|
463
|
+
[Security(self._auth_service, scopes=["models:list"])]
|
|
464
|
+
if self.is_authenticated()
|
|
465
|
+
else None
|
|
466
|
+
),
|
|
419
467
|
)
|
|
420
468
|
|
|
421
469
|
# Clear the global Registry for the MetricsMiddleware, or
|
|
@@ -1094,10 +1142,12 @@ class RESTfulAPI:
|
|
|
1094
1142
|
if body.logit_bias is not None:
|
|
1095
1143
|
raise HTTPException(status_code=501, detail="Not implemented")
|
|
1096
1144
|
|
|
1145
|
+
messages = body.messages and list(body.messages) or None
|
|
1146
|
+
|
|
1097
1147
|
if (
|
|
1098
|
-
not
|
|
1099
|
-
or
|
|
1100
|
-
or not
|
|
1148
|
+
not messages
|
|
1149
|
+
or messages[-1].get("role") not in ["user", "system", "tool"]
|
|
1150
|
+
or not messages[-1].get("content")
|
|
1101
1151
|
):
|
|
1102
1152
|
raise HTTPException(
|
|
1103
1153
|
status_code=400, detail="Invalid input. Please specify the prompt."
|
|
@@ -1105,7 +1155,7 @@ class RESTfulAPI:
|
|
|
1105
1155
|
|
|
1106
1156
|
system_messages = []
|
|
1107
1157
|
non_system_messages = []
|
|
1108
|
-
for msg in
|
|
1158
|
+
for msg in messages:
|
|
1109
1159
|
assert (
|
|
1110
1160
|
msg.get("content") != SPECIAL_TOOL_PROMPT
|
|
1111
1161
|
), f"Invalid message content {SPECIAL_TOOL_PROMPT}"
|
|
@@ -1118,13 +1168,13 @@ class RESTfulAPI:
|
|
|
1118
1168
|
raise HTTPException(
|
|
1119
1169
|
status_code=400, detail="Multiple system messages are not supported."
|
|
1120
1170
|
)
|
|
1121
|
-
if len(system_messages) == 1 and
|
|
1171
|
+
if len(system_messages) == 1 and messages[0]["role"] != "system":
|
|
1122
1172
|
raise HTTPException(
|
|
1123
1173
|
status_code=400, detail="System message should be the first one."
|
|
1124
1174
|
)
|
|
1125
1175
|
assert non_system_messages
|
|
1126
1176
|
|
|
1127
|
-
has_tool_message =
|
|
1177
|
+
has_tool_message = messages[-1].get("role") == "tool"
|
|
1128
1178
|
if has_tool_message:
|
|
1129
1179
|
prompt = SPECIAL_TOOL_PROMPT
|
|
1130
1180
|
system_prompt = system_messages[0]["content"] if system_messages else None
|
|
@@ -1298,9 +1348,21 @@ class RESTfulAPI:
|
|
|
1298
1348
|
logger.error(e, exc_info=True)
|
|
1299
1349
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1300
1350
|
|
|
1301
|
-
async def get_cluster_device_info(
|
|
1351
|
+
async def get_cluster_device_info(
|
|
1352
|
+
self, detailed: bool = Query(False)
|
|
1353
|
+
) -> JSONResponse:
|
|
1354
|
+
try:
|
|
1355
|
+
data = await (await self._get_supervisor_ref()).get_cluster_device_info(
|
|
1356
|
+
detailed=detailed
|
|
1357
|
+
)
|
|
1358
|
+
return JSONResponse(content=data)
|
|
1359
|
+
except Exception as e:
|
|
1360
|
+
logger.error(e, exc_info=True)
|
|
1361
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1362
|
+
|
|
1363
|
+
async def get_cluster_version(self) -> JSONResponse:
|
|
1302
1364
|
try:
|
|
1303
|
-
data =
|
|
1365
|
+
data = get_versions()
|
|
1304
1366
|
return JSONResponse(content=data)
|
|
1305
1367
|
except Exception as e:
|
|
1306
1368
|
logger.error(e, exc_info=True)
|
xinference/core/model.py
CHANGED
|
@@ -44,6 +44,7 @@ import logging
|
|
|
44
44
|
|
|
45
45
|
logger = logging.getLogger(__name__)
|
|
46
46
|
|
|
47
|
+
from ..device_utils import empty_cache
|
|
47
48
|
from .utils import json_dumps, log_async
|
|
48
49
|
|
|
49
50
|
try:
|
|
@@ -130,7 +131,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
130
131
|
try:
|
|
131
132
|
import gc
|
|
132
133
|
|
|
133
|
-
import torch
|
|
134
|
+
import torch # noqa: F401
|
|
134
135
|
except ImportError:
|
|
135
136
|
error_message = "Failed to import module 'torch'"
|
|
136
137
|
installation_guide = [
|
|
@@ -141,7 +142,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
141
142
|
|
|
142
143
|
del self._model
|
|
143
144
|
gc.collect()
|
|
144
|
-
|
|
145
|
+
empty_cache()
|
|
145
146
|
|
|
146
147
|
def __init__(
|
|
147
148
|
self,
|
xinference/core/resource.py
CHANGED
|
@@ -22,8 +22,9 @@ from .utils import get_nvidia_gpu_info
|
|
|
22
22
|
|
|
23
23
|
@dataclass
|
|
24
24
|
class ResourceStatus:
|
|
25
|
-
|
|
25
|
+
usage: float
|
|
26
26
|
total: float
|
|
27
|
+
memory_used: float
|
|
27
28
|
memory_available: float
|
|
28
29
|
memory_total: float
|
|
29
30
|
|
|
@@ -39,8 +40,9 @@ def gather_node_info() -> Dict[str, Union[ResourceStatus, GPUStatus]]:
|
|
|
39
40
|
node_resource = dict()
|
|
40
41
|
mem_info = psutil.virtual_memory()
|
|
41
42
|
node_resource["cpu"] = ResourceStatus(
|
|
42
|
-
|
|
43
|
+
usage=psutil.cpu_percent() / 100.0,
|
|
43
44
|
total=psutil.cpu_count(),
|
|
45
|
+
memory_used=mem_info.used,
|
|
44
46
|
memory_available=mem_info.available,
|
|
45
47
|
memory_total=mem_info.total,
|
|
46
48
|
)
|
xinference/core/supervisor.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import asyncio
|
|
16
16
|
import itertools
|
|
17
17
|
import time
|
|
18
|
+
import typing
|
|
18
19
|
from dataclasses import dataclass
|
|
19
20
|
from logging import getLogger
|
|
20
21
|
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union
|
|
@@ -179,12 +180,26 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
179
180
|
model_version_infos, self.address
|
|
180
181
|
)
|
|
181
182
|
|
|
182
|
-
|
|
183
|
+
@typing.no_type_check
|
|
184
|
+
async def get_cluster_device_info(self, detailed: bool = False) -> List:
|
|
185
|
+
import psutil
|
|
186
|
+
|
|
183
187
|
supervisor_device_info = {
|
|
184
188
|
"ip_address": self.address.split(":")[0],
|
|
185
189
|
"gpu_count": 0,
|
|
186
190
|
"gpu_vram_total": 0,
|
|
187
191
|
}
|
|
192
|
+
if detailed:
|
|
193
|
+
supervisor_device_info["gpu_vram_total"] = 0
|
|
194
|
+
supervisor_device_info["gpu_vram_available"] = 0
|
|
195
|
+
supervisor_device_info["cpu_available"] = psutil.cpu_count() * (
|
|
196
|
+
1 - psutil.cpu_percent() / 100.0
|
|
197
|
+
)
|
|
198
|
+
supervisor_device_info["cpu_count"] = psutil.cpu_count()
|
|
199
|
+
mem_info = psutil.virtual_memory()
|
|
200
|
+
supervisor_device_info["mem_used"] = mem_info.used
|
|
201
|
+
supervisor_device_info["mem_available"] = mem_info.available
|
|
202
|
+
supervisor_device_info["mem_total"] = mem_info.total
|
|
188
203
|
res = [{"node_type": "Supervisor", **supervisor_device_info}]
|
|
189
204
|
for worker_addr, worker_status in self._worker_status.items():
|
|
190
205
|
vram_total: float = sum(
|
|
@@ -193,14 +208,24 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
193
208
|
total = (
|
|
194
209
|
vram_total if vram_total == 0 else f"{int(vram_total / 1024 / 1024)}MiB"
|
|
195
210
|
)
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
211
|
+
info = {
|
|
212
|
+
"node_type": "Worker",
|
|
213
|
+
"ip_address": worker_addr.split(":")[0],
|
|
214
|
+
"gpu_count": len(worker_status.status) - 1,
|
|
215
|
+
"gpu_vram_total": total,
|
|
216
|
+
}
|
|
217
|
+
if detailed:
|
|
218
|
+
cpu_info = worker_status.status["cpu"]
|
|
219
|
+
info["cpu_available"] = cpu_info.total * (1 - cpu_info.usage)
|
|
220
|
+
info["cpu_count"] = cpu_info.total
|
|
221
|
+
info["mem_used"] = cpu_info.memory_used
|
|
222
|
+
info["mem_available"] = cpu_info.memory_available
|
|
223
|
+
info["mem_total"] = cpu_info.memory_total
|
|
224
|
+
info["gpu_vram_total"] = vram_total
|
|
225
|
+
info["gpu_vram_available"] = sum(
|
|
226
|
+
[v.mem_free for k, v in worker_status.status.items() if k != "cpu"]
|
|
227
|
+
)
|
|
228
|
+
res.append(info)
|
|
204
229
|
return res
|
|
205
230
|
|
|
206
231
|
@staticmethod
|
|
@@ -227,11 +252,11 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
227
252
|
}
|
|
228
253
|
|
|
229
254
|
async def get_devices_count(self) -> int:
|
|
230
|
-
from ..
|
|
255
|
+
from ..device_utils import gpu_count
|
|
231
256
|
|
|
232
257
|
if self.is_local_deployment():
|
|
233
|
-
return
|
|
234
|
-
# distributed deployment, choose a worker and return its
|
|
258
|
+
return gpu_count()
|
|
259
|
+
# distributed deployment, choose a worker and return its device_count.
|
|
235
260
|
# Assume that each worker has the same count of cards.
|
|
236
261
|
worker_ref = await self._choose_worker()
|
|
237
262
|
return await worker_ref.get_devices_count()
|
xinference/core/worker.py
CHANGED
|
@@ -30,8 +30,8 @@ from xoscar import MainActorPoolType
|
|
|
30
30
|
from ..constants import XINFERENCE_CACHE_DIR
|
|
31
31
|
from ..core import ModelActor
|
|
32
32
|
from ..core.status_guard import LaunchStatus
|
|
33
|
+
from ..device_utils import gpu_count
|
|
33
34
|
from ..model.core import ModelDescription, create_model_instance
|
|
34
|
-
from ..utils import cuda_count
|
|
35
35
|
from .event import Event, EventCollectorActor, EventType
|
|
36
36
|
from .metrics import launch_metrics_export_server, record_metrics
|
|
37
37
|
from .resource import gather_node_info
|
|
@@ -54,13 +54,13 @@ class WorkerActor(xo.StatelessActor):
|
|
|
54
54
|
self,
|
|
55
55
|
supervisor_address: str,
|
|
56
56
|
main_pool: MainActorPoolType,
|
|
57
|
-
|
|
57
|
+
gpu_devices: List[int],
|
|
58
58
|
metrics_exporter_host: Optional[str] = None,
|
|
59
59
|
metrics_exporter_port: Optional[int] = None,
|
|
60
60
|
):
|
|
61
61
|
super().__init__()
|
|
62
62
|
# static attrs.
|
|
63
|
-
self.
|
|
63
|
+
self._total_gpu_devices = gpu_devices
|
|
64
64
|
self._supervisor_address = supervisor_address
|
|
65
65
|
self._supervisor_ref = None
|
|
66
66
|
self._main_pool = main_pool
|
|
@@ -244,9 +244,9 @@ class WorkerActor(xo.StatelessActor):
|
|
|
244
244
|
|
|
245
245
|
@staticmethod
|
|
246
246
|
def get_devices_count():
|
|
247
|
-
from ..
|
|
247
|
+
from ..device_utils import gpu_count
|
|
248
248
|
|
|
249
|
-
return
|
|
249
|
+
return gpu_count()
|
|
250
250
|
|
|
251
251
|
@log_sync(logger=logger)
|
|
252
252
|
def get_model_count(self) -> int:
|
|
@@ -263,7 +263,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
263
263
|
we assume that embedding model only takes 1 GPU slot.
|
|
264
264
|
"""
|
|
265
265
|
candidates = []
|
|
266
|
-
for _dev in self.
|
|
266
|
+
for _dev in self._total_gpu_devices:
|
|
267
267
|
if _dev not in self._gpu_to_model_uid:
|
|
268
268
|
candidates.append(_dev)
|
|
269
269
|
else:
|
|
@@ -291,11 +291,11 @@ class WorkerActor(xo.StatelessActor):
|
|
|
291
291
|
return device
|
|
292
292
|
|
|
293
293
|
def allocate_devices(self, model_uid: str, n_gpu: int) -> List[int]:
|
|
294
|
-
if n_gpu > len(self.
|
|
294
|
+
if n_gpu > len(self._total_gpu_devices) - len(self._gpu_to_model_uid):
|
|
295
295
|
raise RuntimeError("No available slot found for the model")
|
|
296
296
|
|
|
297
297
|
devices: List[int] = [
|
|
298
|
-
dev for dev in self.
|
|
298
|
+
dev for dev in self._total_gpu_devices if dev not in self._gpu_to_model_uid
|
|
299
299
|
][:n_gpu]
|
|
300
300
|
for dev in devices:
|
|
301
301
|
self._gpu_to_model_uid[int(dev)] = model_uid
|
|
@@ -324,7 +324,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
324
324
|
) -> Tuple[str, List[str]]:
|
|
325
325
|
env = {}
|
|
326
326
|
devices = []
|
|
327
|
-
if isinstance(n_gpu, int) or (n_gpu == "auto" and
|
|
327
|
+
if isinstance(n_gpu, int) or (n_gpu == "auto" and gpu_count() > 0):
|
|
328
328
|
# Currently, n_gpu=auto means using 1 GPU
|
|
329
329
|
gpu_cnt = n_gpu if isinstance(n_gpu, int) else 1
|
|
330
330
|
devices = (
|
|
@@ -396,10 +396,10 @@ class WorkerActor(xo.StatelessActor):
|
|
|
396
396
|
n_gpu: Optional[Union[int, str]] = "auto",
|
|
397
397
|
):
|
|
398
398
|
if n_gpu is not None:
|
|
399
|
-
if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu >
|
|
399
|
+
if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > gpu_count()):
|
|
400
400
|
raise ValueError(
|
|
401
401
|
f"The parameter `n_gpu` must be greater than 0 and "
|
|
402
|
-
f"not greater than the number of GPUs: {
|
|
402
|
+
f"not greater than the number of GPUs: {gpu_count()} on the machine."
|
|
403
403
|
)
|
|
404
404
|
if isinstance(n_gpu, str) and n_gpu != "auto":
|
|
405
405
|
raise ValueError("Currently `n_gpu` only supports `auto`.")
|
|
@@ -504,10 +504,10 @@ class WorkerActor(xo.StatelessActor):
|
|
|
504
504
|
launch_args.pop("kwargs")
|
|
505
505
|
launch_args.update(kwargs)
|
|
506
506
|
if n_gpu is not None:
|
|
507
|
-
if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu >
|
|
507
|
+
if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > gpu_count()):
|
|
508
508
|
raise ValueError(
|
|
509
509
|
f"The parameter `n_gpu` must be greater than 0 and "
|
|
510
|
-
f"not greater than the number of GPUs: {
|
|
510
|
+
f"not greater than the number of GPUs: {gpu_count()} on the machine."
|
|
511
511
|
)
|
|
512
512
|
if isinstance(n_gpu, str) and n_gpu != "auto":
|
|
513
513
|
raise ValueError("Currently `n_gpu` only supports `auto`.")
|
xinference/deploy/worker.py
CHANGED
|
@@ -21,7 +21,7 @@ import xoscar as xo
|
|
|
21
21
|
from xoscar import MainActorPoolType
|
|
22
22
|
|
|
23
23
|
from ..core.worker import WorkerActor
|
|
24
|
-
from ..
|
|
24
|
+
from ..device_utils import gpu_count
|
|
25
25
|
|
|
26
26
|
logger = logging.getLogger(__name__)
|
|
27
27
|
|
|
@@ -33,12 +33,12 @@ async def start_worker_components(
|
|
|
33
33
|
metrics_exporter_host: Optional[str],
|
|
34
34
|
metrics_exporter_port: Optional[int],
|
|
35
35
|
):
|
|
36
|
-
|
|
37
|
-
cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES")
|
|
38
|
-
if cuda_visible_devices:
|
|
39
|
-
|
|
36
|
+
gpu_device_indices = []
|
|
37
|
+
cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", None)
|
|
38
|
+
if cuda_visible_devices is not None and cuda_visible_devices != "-1":
|
|
39
|
+
gpu_device_indices.extend([int(i) for i in cuda_visible_devices.split(",")])
|
|
40
40
|
else:
|
|
41
|
-
|
|
41
|
+
gpu_device_indices = list(range(gpu_count()))
|
|
42
42
|
|
|
43
43
|
await xo.create_actor(
|
|
44
44
|
WorkerActor,
|
|
@@ -46,7 +46,7 @@ async def start_worker_components(
|
|
|
46
46
|
uid=WorkerActor.uid(),
|
|
47
47
|
supervisor_address=supervisor_address,
|
|
48
48
|
main_pool=main_pool,
|
|
49
|
-
|
|
49
|
+
gpu_devices=gpu_device_indices,
|
|
50
50
|
metrics_exporter_host=metrics_exporter_host,
|
|
51
51
|
metrics_exporter_port=metrics_exporter_port,
|
|
52
52
|
)
|