xinference 0.8.4__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/__init__.py +6 -0
- xinference/_compat.py +52 -0
- xinference/_version.py +3 -3
- xinference/api/oauth2/auth_service.py +2 -3
- xinference/api/oauth2/types.py +1 -1
- xinference/api/restful_api.py +176 -108
- xinference/client/restful/restful_client.py +10 -6
- xinference/core/model.py +3 -2
- xinference/core/resource.py +4 -2
- xinference/core/status_guard.py +2 -1
- xinference/core/supervisor.py +37 -12
- xinference/core/utils.py +2 -1
- xinference/core/worker.py +13 -13
- xinference/deploy/worker.py +7 -7
- xinference/device_utils.py +100 -0
- xinference/fields.py +1 -1
- xinference/model/audio/core.py +1 -2
- xinference/model/audio/whisper.py +20 -8
- xinference/model/core.py +9 -0
- xinference/model/embedding/core.py +5 -136
- xinference/model/image/__init__.py +13 -1
- xinference/model/image/core.py +22 -43
- xinference/model/image/model_spec_modelscope.json +94 -0
- xinference/model/image/stable_diffusion/core.py +3 -5
- xinference/model/llm/ggml/llamacpp.py +1 -1
- xinference/model/llm/llm_family.json +333 -3
- xinference/model/llm/llm_family.py +11 -5
- xinference/model/llm/llm_family_modelscope.json +311 -1
- xinference/model/llm/pytorch/compression.py +3 -1
- xinference/model/llm/pytorch/core.py +34 -15
- xinference/model/llm/pytorch/qwen_vl.py +5 -3
- xinference/model/llm/pytorch/spec_decoding_utils.py +3 -1
- xinference/model/llm/pytorch/spec_model.py +20 -17
- xinference/model/llm/pytorch/utils.py +4 -3
- xinference/model/llm/pytorch/yi_vl.py +9 -5
- xinference/model/llm/utils.py +10 -1
- xinference/model/llm/vllm/core.py +4 -0
- xinference/model/rerank/core.py +5 -136
- xinference/model/utils.py +143 -18
- xinference/thirdparty/llava/mm_utils.py +1 -1
- xinference/types.py +3 -3
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.87d39ffb.js +3 -0
- xinference/web/ui/build/static/js/main.87d39ffb.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{8d95a492a6b08c192e212189567b59b98282ca6f3ed52dd9bd053a6be7ff4e65.json → 027745bdb9bb9fe375f2eef7377e9f7ba82510ed90c05f9e5b34ba314bb93498.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/03d5e7de9b1728d71cd8c483a35f4373eb66932022026142e03005d9de1a2a44.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0738899eefad7f90261125823d87ea9f0d53667b1479a0c1f398aff14f2bbd2a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/09a21ab449599121eadfdd12d6dba40c5a1c5b35dc90be17108eeff80daf79b8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/09d043a8289da33d05768f5face63cc47e9ce9646d67f5589865db511574e8f9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0b901c8f077905bf153a1390b207277c31e893f83f9ced5d29a0a0c85a07b5eb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0da1ff7c9602609cfa8b82a67140114f3037130fe79583bbf23b69cef81823a6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0f87caef05125425c0ca418a32ce8a53e9becffd464411b2ff94e3c7a37c7b5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{cb50b4d54ce9c249d580f7f1fc2df4beab4f13c4d16f486c89239809570de8fb.json → 0fc333c35ad49c0db7f18f77521984a36067cfce4206df50ce16a2fdc88be1c6.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/13fdd92dd5e2fe11640cf6ec0368e3c098d3f0c43e11703ccf157e8c41410d5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1464f950e5600fe2e5890186a1ae3750cdabbadd8525070cef6273450760bcf6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17c53dc44a324629efef425833ab48cc05243d089f4eb3c731da63568fcff35e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1946a52cb34e675da423de15b703437b4c841e4f5aed6ddd932a091c145e17b0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1e0da5ce4d8ccdbe33705a47bdc0d5d1d7e4e48c8e5c1d0b8c473e9f491930ac.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1e3ec3302ef2253fad5e86bf429baf6b69e57e769f38ec1aaada87f3114b6a1f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1f18804827897c337874b289873df611b44869f86ced1d4351fbe0b45ed17518.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/21b64f7fdb74c2c35479811c3f5683ee161d3b64a4fade0cd1fc0044332c79b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/22873d21eab4b5a04b183cc3d2cbfafa431e0ce99649aeb900cffcfc33ecbca4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{7a5fecf96af770954035948b07dfadeceb33536b4763a2f9e548121ae555b75f.json → 228b78cd0950f1c967cb33b210b11497052a9998f0b1d1f31ecee0fd8b99fb40.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2328fc2360224ac3eef5c5261fc2a75cdce7fe58c210af8d3e532ec609464f5f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/25fd89450dcf73a205a5f8c82dbc604bf872d574b748427c26ba79b8711ec502.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{b6e95546cdd247e50d9c916dd3bef5216f96c32a267e54b51b23cc22631be0c4.json → 288d6573b09f0e2b0277e1f301bcb57a194c01f3983e3c1e03c8b2ea72653194.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2c04900a020b813e2e99b15748c3c3ef5a1627c33d5668e45bc6a798f0d28dae.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cd0fd50d8f213cdd94daffe7d0f997991072b11936ead7c48be2af3fdbd1fda.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2e057c12322da07a537a576a3917d512ac6cb291be5aa29ce23d2679a4fd16b1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{abd0662d21fbe39e483fbc4d37faa9e24d93889b5b40d42e7e20773c66ee3894.json → 327c21db6124a5209f41996d0d3359b462d5baff3974296f5acfddfc532434e6.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/33294bb0080595100e22c960929a6f3086f5ea632a4bcd9216069f95347a63a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3361869e45eddd7d48ff7995e1a12ac454c9eb92d4c87f49ce80c1e9482c1e91.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/375d2f6a31e01c5a82fd0e18f42198f9fbb7117491f3e92d70cf36055f159a6e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3808217efee76345ba19af3ffdba045888e0908b0dc7fc38e54655cdc3504c60.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/39db740a99454006312dec9e4845f850c51e043458ca94eba8bb256521fabcd4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3e737bcdbcbc407ccd65b90e199ef0c3214b261e8e41dbf14d921384a717d9ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3e87c7d4f1e5c93603eed3e861945e44dd1e664061a89aaef1aa6435f4f4d7d0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3ecf70018028c8cf6cdc2d24d2b88f2707bd5c23a336fd3d82bd6a1af0db7ded.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/43314d1e9fed30ed6cf5cab3f5033da067e58be29e36448e3c0a8127403a9388.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/442467c09440131f7c25ddcdeb356d2a87783ad4e32d89f410a16cb6e7d83b22.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/487524f2b3494bfcfbd8c12918092a52ff2ce3eefe9bf0a557389ec1c74d3e67.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4de4b0282a3d1ae02bec0bfe7d6658f039141406e8bfdbe86bf1dc5b115a2524.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4ec72faab00e4cbffefdf43b987ae3d15beb60e282b9ca86dad37f4d0fc3f2bd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/51403f1b70af0f4577d4594cbfcf9d9759997e5c49097e7a07219000d1be4ff1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5282ee05e064b3a80bc991e9003ddef6a4958471d8f4fc65589dc64553365cdd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/566a0829bfeab41e40bbdca55ad056067e172508af4b333403d4fd334a8c8742.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5689d9aee417a0834fed66720cf3b48c34cc26364cd9ab7e3d809997bb9faec9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5bedd27475bda204eafccdc636a56ba5765353391049a0b947234faaab3d021a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{94daea23b698bbb26bbbab69bbca8ad521c79fcbd6b495c84c740e4006a2ced2.json → 61014d40df4486972fb6d5909dff45f24bec48573af99ca8bcd79835385c7da0.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/6139c80ec46d77469e9a89924de41d93050f19e9ed541e358f09cbf20ccd7c86.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63ead4dee6a4f4ae47aed6918a2ab3e172739ecdffaa85e063f9a2c9165ff93c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6bf8c25e0f2051d994990bf73fdcb3e08963e11589842895593f199eab13ba9a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6d42f63cd673d1c60e534a174cd851e9aefd2d5d45a4aa6128d794457b64de53.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7044e626a91582077b8cbad09bcf0cc118e92e11ebfa2ebc17622baf98aa56bf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/71ec408030881aad69854815b8681185d0174cc0cf67f20fb2c3cd13c74fce07.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7280614d5bddce524182024cc288953eb89d46c1dbe470ed5eb5ecab5de28385.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7293ad8ba7b5fc6ed791aed44a766a396516c782f5d0e564b9ef0997645ec1dd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{65ca3ba225b8c8dac907210545b51f2fcdb2591f0feeb7195f1c037f2bc956a0.json → 77d4d795f078408fa2dd49da26d1ba1543d51b63cc253e736f4bef2e6014e888.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/79be1e0dd6b1c3cbb3f08b349454f4791edaac451fa4d1f10eaf433e57ecf70f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7c0571c03aa8c90eb9452ba23133aed9b3b2a049910d4d8276588b438a4caaba.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/81ed094096cd9dc36f3ddebe61818766f43d3338c171f2269958f47265e60f24.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/83beb31daa7169fb0057453d4f86411f1effd3e3f7af97472cbd22accbfc65bb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/86355e55059ddb15f71c0c651a2e622cee6f3141da3a018775ca00c74dd37dc2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{d1af6db565bae36218251c01a817c080ef764172dd0694e38d27b525a3ffd1d2.json → 8710fe63bcc6b95502287c36a7bd881b74ee2926982e120e1e8070a2c8a668fc.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8779104679d8f85017b551d1d108ea1ac701e184ad7bcc8a367526353a6c5175.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/881c573e6aa9954e7bd5e3bc5a6ef62c0db9b93815030080f677f6834e431434.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8d24c51a0905b919436aac2bad441eb1651c46c4e7aa17c99eb63c7760ce6615.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/93bf5cc8cbe3f0b10f291800e1896f03506a8933cd490e07ee0ff94498f93bce.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9401db0316ac34790aa5b972e9e1c50de58ef313b50840a37e08f0936a7197b6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/98afeafdfc285a744d116fb5f189a16ad2b977adbd4fc0757f7ade1b29b8ad31.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9a4e624d1ce66832fb213ead02745d84661d9bfcc21b62b4c37c680c3c3f5cbb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9a7b26f85224d33d431354ac8d7849388b7525fc014801824e705561ab2ac7a7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9dcb04ac6cb341233a2e98f2fb417d059c2ea6a56d2ff6f30bb27dae9bd6c756.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9ebede8a4bccef016eef418a668b3212962779ea6d2531edb23d3dca81163496.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0efc8db4289116bdb312014df91fc781f66fe171d63027bbd5aae25cd54c325.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a212defc32ca338ba673722c72815d30974275f1a82b67405a898f826fc8d782.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a2685acab57fd6d874bf6069a587e3770b55c72cacc69cb167bb3dbd16a551c5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3b7d6233e50dc3832421793a5752c1db99f3a214c9896c97bbd395d007f09b1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3d0c810a255108613be0a175d53d673c4b6843c2a87dd0f157b4d3731af2092.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a6633774053545c60721e345820e21cf3b089df219f7ee3fcceaf27af3a9e84b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a6bb917e4869d7d6ddf50b75932cad64e94b9e33db2aa40c4a489a045819bb36.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a816031fafa564dc4af27383612e51ac29af51354f3bba750a70ac0c03b6bd30.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/af6349a1e9761cb98af4b76904440fb7f8d54288ee6ba37116f670226796e689.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afce63eaad32a9968590e4500e123c81b385ea3b6046b2af1ec7ea8a49ba657b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/aff24b61e5d55147d0a36ef1896b9bd35c995c357f44eeee1209f23db2118636.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b01676c5ca97e3409a1b7bac91b6a797be3719a8b7ec72ad06c9beb296c67bb7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b0c5e967d1af90d545f1d34c9893a841296b0d1dd50de65eeaf19054995d2a94.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b561d429cea6143e1bf7e6e7e690f9e6ce1dce4adfcacb57d7c5578195d1d190.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b70187bb5aecf3bd7afbe8469b49e2a0a94a0c62727530d0dc2f84ee4577bcfc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bd21f6039e23775fb4631bb56f2eef1d132c03a60cc6fe4530569ecfeef456a3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/be09b5508d0ca5ac0300144a682d4cde2d5b4b96ecb9fc690c1d5e7ee7411b3e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/be8c9e83f0dd4af2e9bcf6844dd8c4445fe634b6c71da6debfa7acabb7a875c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c0d1317dfae0c5ddf5a3ba9aef6bd2204f2daf9754f3270c4cab5c4632f78000.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c1e3afdaae5649274f7d34910015b88bc02ba6a00b935e6597f9c670fedc05c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c29ec02243dd370616a489f7de67e4e479d2f7f12504ba7f77304cffe60befbc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c2a4d298149e6c66713eed4f56f4bbbfa4022a193891029b84e429baf59cc246.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c3d2fc03602f7604a65e631e8595ce4cfabd8887343489b34a65e3a73c5546b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c41078c8692e0635fb52d01bc5157528c7308ae5140c134aa07c1fcda4f7a5e5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c4b5f1e444ddd41f8cd30e618c185b16235e313ea34d944959628001a91e38a5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c7ed9bc3125498e475f3170b8126c93368e9cbfd6a5e73ee63437d90e39b69e5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c996b3dc884130f034a37551845201bc5661f2dcd282aed737329f8d4dd387b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c9fef6e7f0a532c2d1cc25dffd496ea8355dd14ce0105a169b391bfeb9277e4f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cc8f5a2bbe4da9852ccdf64150115271baa2d282d74d7fab854dc5ca641dd519.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ce45adea497443bfc1b6baabd0aa3d85888dd10b1b7fd009edf608e390b2a844.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ce4b82a3855d18ba09e922f6e2cc1b8c20f0e5261b27303c52d4e1ba5a09a078.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cf16614311e144768bdc064da754efad77597cac5efe6852be996b9eaaed695a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d074bfe592a195f339acbcd8743f8b64cfd09b45ca1013598269227fc092f2db.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d2a95455cc492d4fbf2839a21eeaef00e7fad2aac93825d2443a38127ad02007.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d4e6e712ebbbf0f30745093e2b6d235c13fb43d37b586a0109e87c19eafdf35d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d88a02264125b07881eecff752d9c8d718edcf2d87e5431b13e54809dc7bf4f2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d907925fb28912f2e98cccfe758bce680aa4f894cc9d2079d73d7e2d85481d47.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/daf050ec3a864b9831dcb32cee1fb2739dafb68b369e9108469ca1e6fd166867.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/de9645d55ce8744856862eae4a3d3a1266e9df9d575aa3e2f62a75a5d97444f7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e04199bdfb87226d6aeae387ef677eae06483f9f135b5a3f1360ec1a243c86cd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e97d1f31f54263bb90b7c3e71be0e19973d76d945fb5d526bf93fb449708bbf0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e9f87f5c370a731ee42098ae6ff38ca6ad256396158c03f84c5e770c6b76a43d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f2ad375dbc8779f54eb9c377fd951cd442f3766d5eb70d40613b952b811643da.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f35df9d5a0e0341bcc0f39a462e46988c330cdb905cc690b08de4a9cf5771f28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f433e33369483f75d31fe937b8a597ef2d7cdfed339931643ff966a33dbc6158.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f4c164c89cfbcb1d81bc0b60e3e7c8715e07c7a23118acf4e3e16715beac09c5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f545d937cfb9d974f4aaaa9dc1c12adfed0d4f608de7d0a7247452d9bbbb55a4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{3cc5ec39e76e1768ffb070a8ae18ee61b68f3ed3593178134d3edda5ed378c6c.json → f8d2c4372093d0ff3a2f78260e004e6c4e2b8ab4f5be9713ebc16c50e9c2ab4b.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fa41c6c8ffa1047b7f2f52b6532ea4d9f5754e5bbc11d1340bae7f75ae5ed84e.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +45 -45
- xinference/web/ui/node_modules/@babel/runtime/package.json +64 -37
- xinference/web/ui/node_modules/@mui/private-theming/package.json +7 -6
- xinference/web/ui/node_modules/@mui/styled-engine/package.json +6 -5
- xinference/web/ui/node_modules/@mui/system/node_modules/clsx/package.json +22 -10
- xinference/web/ui/node_modules/@mui/system/package.json +13 -12
- xinference/web/ui/node_modules/@mui/types/package.json +3 -2
- xinference/web/ui/node_modules/@mui/utils/ClassNameGenerator/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/HTMLElementType/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/capitalize/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/chainPropTypes/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/clamp/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/composeClasses/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/createChainedFunction/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/debounce/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/deepmerge/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/deprecatedPropType/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/elementAcceptingRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/elementTypeAcceptingRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/exactProp/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/formatMuiErrorMessage/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/generateUtilityClass/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/generateUtilityClasses/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/getDisplayName/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/getScrollbarSize/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/getValidReactChildren/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/integerPropType/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/isMuiElement/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/ownerDocument/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/ownerWindow/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/package.json +8 -7
- xinference/web/ui/node_modules/@mui/utils/ponyfillGlobal/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/refType/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/requirePropFactory/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/resolveProps/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/scrollLeft/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/setRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/unsupportedProp/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useControlled/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useEnhancedEffect/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useEventCallback/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useForkRef/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useId/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useIsFocusVisible/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useLazyRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useOnMount/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/usePreviousProps/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useTimeout/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/visuallyHidden/package.json +6 -0
- xinference/web/ui/node_modules/@types/prop-types/package.json +2 -2
- xinference/web/ui/node_modules/csstype/package.json +3 -3
- xinference/web/ui/package-lock.json +47 -45
- xinference/web/ui/package.json +2 -0
- {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/METADATA +6 -3
- {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/RECORD +227 -167
- xinference/web/ui/build/static/js/main.476e35cc.js +0 -3
- xinference/web/ui/build/static/js/main.476e35cc.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/02f54fcadc6fe9b1be8451435f9a097c82b3d665bae2d2df03ba0b36cebeca0c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/051391807ffd255b652b63dc2ff12e5ce80113e1b712c81b6c1986a11cd85d3c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/05710e94caaf5a2da790d67aa572bf6bc19bae0237668c8d97f4da62ab52fe5c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/08a3470840a55d359a967d0763cd1ae5c1f15f3ca31118e29b37b4fbbdb248c7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0e68b0e268559a563dde6fa4c654922d98c74f5140854437f3e5a01bf23ba41f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/10dc7661741dfc88df5538bbf835870ae47b1a18f36c2f7b9650651e7304ce72.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/18b3d28a2692ab4f2d05254221f53b3c13259435bd98cb90ff5d7250d93f11e0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1b612ab687e3a4e14db156bd92a75b8a3e96ff04bfbf8f3351c7e801bc61cf90.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1bb839ffeb39ed12367c179fbf3b784e20f6f98fb87c597717bc11ebe3201c59.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1d12e0b0e4e87372b18fef72fcd6e264fa40b0cffdbeddcef004becc4e32cf27.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1d5b806c08ffb55539ee26b5fa9746a373f602354d6e55a0d4379d7e2f903868.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1ecb7ce2bc103bec0d1a9f0cb07132e5ec7731d974998c465798c0e15cafb924.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/20cfc57ae2b8ba23571f386a5f2bf194bd02e8a3913113881bb9804ee822ad58.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/222a437b6773707bbead06ba5db1ed8869e44d9f051a7f6b1a745aeb654679d0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/249709fb20c1254cecc9ba6666e6bcf37b21e47e7a020d5a7f0513680b49473f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/257ad4979294ee3a7ef400feab3b29ebcd259c47b9410a3011a66f4b33a97904.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2779bdc8ed8372f4015d07a45e4b09756bd953250944c3e3efe6b65bf8eaecd8.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2bee7b8bd3d52976a45d6068e1333df88b943e0e679403c809e45382e3818037.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/31c71a85b2d0bbdcdc027e7950a9d1f778288f98888d8595fad5e4fdda5bb31f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/371214b6649e323e7281885b1412c5a515ccc2274f4e1a42921e45777694009c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3eced030d2b3da645f4cbe3dc4ff49744312316e4306ce170991e802e9bfcf8e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/452ae8e7cff99efc35d134014d32610d13217b031b817d3663bb689b3778ebd2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/48e3f5d01ccedde86348a29392bc9b6e301a51b3f7e2476ae0a95a31eb6bf18d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4a311b5bd98f16588caf60b8de4c8d91d038198e6bfd496f880f60d4c1ea076d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4ccd71d3f73490a18a5a4de4ad82d63cf019caff6f8d881fd1434755a033bfc5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4ee8343d4254c95e685d49601f1649e8222d7423c2a822e1d01c8fc4cd3582a2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/523a712748fbb7abf0c5423df62e65c4659b7e450182c890e847cd78ace7cac5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5777049bbacf7322891761a5b959e62eec22b0f96a70dbf97e58d530c6258a55.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5905957ce95f7dee64f0739359e990b8b518cff088c6958487641a3d21737d1e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5ac79df79d2dff64da97b02d5c90b36bc0d44a4a4229f6efed2993f93ee1d7a1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/64d3ef38fd394979b09061e67fc47f04b7295401677825c64c72ea2f44cf910c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/68a5d8e66687770e05aebcae9a8047189dc577d6150fbf0e62d2b2c11c4335c5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/69d5f27e40d5e7040c8e8e303ccaee019b54092345acc3838d6eb943c474ce39.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/6a07b8d00482c07a4b7c73f6bebb79fad54bbea35ba358681ede6ff299a09f6b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/70b0307a43b9f8c876bf2a1f7dea4cc56cc0ea33743e863c0b3983bc48109f2d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/710c39a26816820c2cfd00b19cd0112851d64b8ac6b201b18727b43b5a69d92c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/722f4c03da79795d1fb738323f3eb6794ab18711e20f141c0b678c5d4cb3dce9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8237bdfff695d71496ee2ff83ba0392d753f8e1dff0d002a1b2c9e249f03083c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8342e3800fed63e8e805850810aee4fd9b0558aca10c8b6b770fe5bff1bed266.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/847a71f54f7a059efc71f70c297b6b4182a633130715c80f817eb533de108c12.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8731a959d9b7278ba7a1c044bb4253bfff63079da69b21ff80221ae5e037dbbe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/87b4e94b045df561c8f4bd253c84447c96e0dededbcb6646c0cd180119960990.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/894a334446bac96d74385c23c9d6f2d117ee29511897700430cd0e578cb32527.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8c5ba88b620eeedbb99db18a610138353a5a657170fdb8bd6969585f81b34f74.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9004bc083cf179941e64130390a27afcf7593b21dd64a87d03e1e6489032aa13.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9b84631723b52988d3748d80b994e43d1944ca55098a5d908c9061eeb4c84f90.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9ee1f84bde10cd78ca23a4681c7372195748bacd5e41c3b1df8e39d960294d81.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a127f13651a7398f5fc378456a52cfa0c6260f6109611d3fb012aa7c7a4ee17a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a5e2e9f707eb7039bea096ca117d996b8f9cbc2a5613fd8e0c5b0094444ce23c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a6131b7fb1b30fd63d798845dab508a22b21c616b361e6214bf711915c5fef3d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a7b8ccb199158c897ed7d7835942eb37569221be4101eecfa7a7ce62e0c76f1f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/acaf6dbddd57b63a874e9062803850ce4911c8dc63a362eacdc4d2d0f48f8c16.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ade3cd82af85d2d28138bd942228b01e27559a5a4072ee049dcb47612ee9a506.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/aea82c71088dc9afd6f4ba71434286af5c6ab5e9b87088b0e84f351146d06489.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/afbd547078b20bb14596f20e165cb40413147e3de501b126c97bb417f8b9a53e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/afc597494ebc62d22e4d3ac2a839bd5f30d5e47ce1d0576e0f62407ef6c78652.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/affbbac58777e26ba8b6dcda8cea72c76975bd0207f0a510bfde5983d3081f74.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b0aa2ece62a1f5fa015f9e6b59d20b79be727fb5a3f4fc747200163097f320ad.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b10391d3ae53452c8ec97d230f064e03a8377aab2be4b1b3e30cb3e054b2341a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b4bab0c147360f3e335251166d3d25602e2e0e160fb3734ac019a5b42499179c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b57dcff363e045fd11ea2bcbc58cce470f1bc22d3ac14c8acbf63a36637050ff.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b80db1012318b97c329c4e3e72454f7512fb107e57c444b437dbe4ba1a3faa5a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b8de42f7f5dc1df9e802898a0ebb35adb57d269d7af21e6c66a0fde0ff226a08.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/be696cadea3bf577c0defc8f8d100232369763dde754b5ba9399f45f42aed215.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c5a30d7c6cb484268e9126dd6d078229a1dcaa07fb064077b87d15224c61a3aa.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c67e2077e70c4751e9e1dd98f378d979abd9c8a39fbf6c69f02fb4e5b04eef87.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c81c1cad35be60a4fad090374edf50a884540e306d464b803120554df61e817e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d1054fd4d09666318a00ca0aea5ff42a2fdb17b3a42b5f77b93433d7408eb2f4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d1ec524ce17a48816fc55fe665572c1ef4982a4bd1ecb01badd0fdffc465242c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d4d61a537c45c4506a6b7e3478206b71dc96e0c6c6410cef9b5d2f47018bc578.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e2e563b606cac9d51661cf68ba1729eb5df7861af521adccedc1b163439b657d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e3890e804dfaeb19b79f52e1b20198b587da61a9a3a1f55417d2cc5ce4ad7dae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e8c3fadbd12f0e2c7b70d84acbcabd0a5c96b05380a7b3167b00e8739d829d24.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ebb5ae1a82b33b94e2b0cfec599bec7500aff0a1701f91736a6f3a4f77617560.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f30f0396f4e54f0355112d5eba4b16c3a56f47e456eb117a9dc7fa6218ce21fe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f318ac15e999a553c00832029245730fe25c2916860b0ff55eee1f0ab926d84e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f55f4e657cc2d797b7e73a64ce40352f9a5dde31763a3e89c14d3b983307f30e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f8a68931d652a5e3be4e1ae06210a223de1239f3884ad858cecf9fe8d11c2ffe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f95a8bd358eeb55fa2f49f1224cc2f4f36006359856744ff09ae4bb295f59ec1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fb6fbd693a53d3c542d9adf83e0bb458918d39bf101eaf6d6662a2fbc35c1adc.json +0 -1
- /xinference/web/ui/build/static/js/{main.476e35cc.js.LICENSE.txt → main.87d39ffb.js.LICENSE.txt} +0 -0
- {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/LICENSE +0 -0
- {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/WHEEL +0 -0
- {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/top_level.txt +0 -0
|
@@ -665,12 +665,16 @@ class Client:
|
|
|
665
665
|
def _check_cluster_authenticated(self):
|
|
666
666
|
url = f"{self.base_url}/v1/cluster/auth"
|
|
667
667
|
response = requests.get(url)
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
668
|
+
# compatible with old version of xinference
|
|
669
|
+
if response.status_code == 404:
|
|
670
|
+
self._cluster_authed = False
|
|
671
|
+
else:
|
|
672
|
+
if response.status_code != 200:
|
|
673
|
+
raise RuntimeError(
|
|
674
|
+
f"Failed to get cluster information, detail: {response.json()['detail']}"
|
|
675
|
+
)
|
|
676
|
+
response_data = response.json()
|
|
677
|
+
self._cluster_authed = bool(response_data["auth"])
|
|
674
678
|
|
|
675
679
|
def login(self, username: str, password: str):
|
|
676
680
|
if not self._cluster_authed:
|
xinference/core/model.py
CHANGED
|
@@ -44,6 +44,7 @@ import logging
|
|
|
44
44
|
|
|
45
45
|
logger = logging.getLogger(__name__)
|
|
46
46
|
|
|
47
|
+
from ..device_utils import empty_cache
|
|
47
48
|
from .utils import json_dumps, log_async
|
|
48
49
|
|
|
49
50
|
try:
|
|
@@ -130,7 +131,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
130
131
|
try:
|
|
131
132
|
import gc
|
|
132
133
|
|
|
133
|
-
import torch
|
|
134
|
+
import torch # noqa: F401
|
|
134
135
|
except ImportError:
|
|
135
136
|
error_message = "Failed to import module 'torch'"
|
|
136
137
|
installation_guide = [
|
|
@@ -141,7 +142,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
141
142
|
|
|
142
143
|
del self._model
|
|
143
144
|
gc.collect()
|
|
144
|
-
|
|
145
|
+
empty_cache()
|
|
145
146
|
|
|
146
147
|
def __init__(
|
|
147
148
|
self,
|
xinference/core/resource.py
CHANGED
|
@@ -22,8 +22,9 @@ from .utils import get_nvidia_gpu_info
|
|
|
22
22
|
|
|
23
23
|
@dataclass
|
|
24
24
|
class ResourceStatus:
|
|
25
|
-
|
|
25
|
+
usage: float
|
|
26
26
|
total: float
|
|
27
|
+
memory_used: float
|
|
27
28
|
memory_available: float
|
|
28
29
|
memory_total: float
|
|
29
30
|
|
|
@@ -39,8 +40,9 @@ def gather_node_info() -> Dict[str, Union[ResourceStatus, GPUStatus]]:
|
|
|
39
40
|
node_resource = dict()
|
|
40
41
|
mem_info = psutil.virtual_memory()
|
|
41
42
|
node_resource["cpu"] = ResourceStatus(
|
|
42
|
-
|
|
43
|
+
usage=psutil.cpu_percent() / 100.0,
|
|
43
44
|
total=psutil.cpu_count(),
|
|
45
|
+
memory_used=mem_info.used,
|
|
44
46
|
memory_available=mem_info.available,
|
|
45
47
|
memory_total=mem_info.total,
|
|
46
48
|
)
|
xinference/core/status_guard.py
CHANGED
xinference/core/supervisor.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import asyncio
|
|
16
16
|
import itertools
|
|
17
17
|
import time
|
|
18
|
+
import typing
|
|
18
19
|
from dataclasses import dataclass
|
|
19
20
|
from logging import getLogger
|
|
20
21
|
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union
|
|
@@ -179,12 +180,26 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
179
180
|
model_version_infos, self.address
|
|
180
181
|
)
|
|
181
182
|
|
|
182
|
-
|
|
183
|
+
@typing.no_type_check
|
|
184
|
+
async def get_cluster_device_info(self, detailed: bool = False) -> List:
|
|
185
|
+
import psutil
|
|
186
|
+
|
|
183
187
|
supervisor_device_info = {
|
|
184
188
|
"ip_address": self.address.split(":")[0],
|
|
185
189
|
"gpu_count": 0,
|
|
186
190
|
"gpu_vram_total": 0,
|
|
187
191
|
}
|
|
192
|
+
if detailed:
|
|
193
|
+
supervisor_device_info["gpu_vram_total"] = 0
|
|
194
|
+
supervisor_device_info["gpu_vram_available"] = 0
|
|
195
|
+
supervisor_device_info["cpu_available"] = psutil.cpu_count() * (
|
|
196
|
+
1 - psutil.cpu_percent() / 100.0
|
|
197
|
+
)
|
|
198
|
+
supervisor_device_info["cpu_count"] = psutil.cpu_count()
|
|
199
|
+
mem_info = psutil.virtual_memory()
|
|
200
|
+
supervisor_device_info["mem_used"] = mem_info.used
|
|
201
|
+
supervisor_device_info["mem_available"] = mem_info.available
|
|
202
|
+
supervisor_device_info["mem_total"] = mem_info.total
|
|
188
203
|
res = [{"node_type": "Supervisor", **supervisor_device_info}]
|
|
189
204
|
for worker_addr, worker_status in self._worker_status.items():
|
|
190
205
|
vram_total: float = sum(
|
|
@@ -193,14 +208,24 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
193
208
|
total = (
|
|
194
209
|
vram_total if vram_total == 0 else f"{int(vram_total / 1024 / 1024)}MiB"
|
|
195
210
|
)
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
211
|
+
info = {
|
|
212
|
+
"node_type": "Worker",
|
|
213
|
+
"ip_address": worker_addr.split(":")[0],
|
|
214
|
+
"gpu_count": len(worker_status.status) - 1,
|
|
215
|
+
"gpu_vram_total": total,
|
|
216
|
+
}
|
|
217
|
+
if detailed:
|
|
218
|
+
cpu_info = worker_status.status["cpu"]
|
|
219
|
+
info["cpu_available"] = cpu_info.total * (1 - cpu_info.usage)
|
|
220
|
+
info["cpu_count"] = cpu_info.total
|
|
221
|
+
info["mem_used"] = cpu_info.memory_used
|
|
222
|
+
info["mem_available"] = cpu_info.memory_available
|
|
223
|
+
info["mem_total"] = cpu_info.memory_total
|
|
224
|
+
info["gpu_vram_total"] = vram_total
|
|
225
|
+
info["gpu_vram_available"] = sum(
|
|
226
|
+
[v.mem_free for k, v in worker_status.status.items() if k != "cpu"]
|
|
227
|
+
)
|
|
228
|
+
res.append(info)
|
|
204
229
|
return res
|
|
205
230
|
|
|
206
231
|
@staticmethod
|
|
@@ -227,11 +252,11 @@ class SupervisorActor(xo.StatelessActor):
|
|
|
227
252
|
}
|
|
228
253
|
|
|
229
254
|
async def get_devices_count(self) -> int:
|
|
230
|
-
from ..
|
|
255
|
+
from ..device_utils import gpu_count
|
|
231
256
|
|
|
232
257
|
if self.is_local_deployment():
|
|
233
|
-
return
|
|
234
|
-
# distributed deployment, choose a worker and return its
|
|
258
|
+
return gpu_count()
|
|
259
|
+
# distributed deployment, choose a worker and return its device_count.
|
|
235
260
|
# Assume that each worker has the same count of cards.
|
|
236
261
|
worker_ref = await self._choose_worker()
|
|
237
262
|
return await worker_ref.get_devices_count()
|
xinference/core/utils.py
CHANGED
|
@@ -19,9 +19,10 @@ import string
|
|
|
19
19
|
from typing import Dict, Generator, List, Tuple, Union
|
|
20
20
|
|
|
21
21
|
import orjson
|
|
22
|
-
from pydantic import BaseModel
|
|
23
22
|
from pynvml import nvmlDeviceGetCount, nvmlInit, nvmlShutdown
|
|
24
23
|
|
|
24
|
+
from .._compat import BaseModel
|
|
25
|
+
|
|
25
26
|
logger = logging.getLogger(__name__)
|
|
26
27
|
|
|
27
28
|
|
xinference/core/worker.py
CHANGED
|
@@ -30,8 +30,8 @@ from xoscar import MainActorPoolType
|
|
|
30
30
|
from ..constants import XINFERENCE_CACHE_DIR
|
|
31
31
|
from ..core import ModelActor
|
|
32
32
|
from ..core.status_guard import LaunchStatus
|
|
33
|
+
from ..device_utils import gpu_count
|
|
33
34
|
from ..model.core import ModelDescription, create_model_instance
|
|
34
|
-
from ..utils import cuda_count
|
|
35
35
|
from .event import Event, EventCollectorActor, EventType
|
|
36
36
|
from .metrics import launch_metrics_export_server, record_metrics
|
|
37
37
|
from .resource import gather_node_info
|
|
@@ -54,13 +54,13 @@ class WorkerActor(xo.StatelessActor):
|
|
|
54
54
|
self,
|
|
55
55
|
supervisor_address: str,
|
|
56
56
|
main_pool: MainActorPoolType,
|
|
57
|
-
|
|
57
|
+
gpu_devices: List[int],
|
|
58
58
|
metrics_exporter_host: Optional[str] = None,
|
|
59
59
|
metrics_exporter_port: Optional[int] = None,
|
|
60
60
|
):
|
|
61
61
|
super().__init__()
|
|
62
62
|
# static attrs.
|
|
63
|
-
self.
|
|
63
|
+
self._total_gpu_devices = gpu_devices
|
|
64
64
|
self._supervisor_address = supervisor_address
|
|
65
65
|
self._supervisor_ref = None
|
|
66
66
|
self._main_pool = main_pool
|
|
@@ -244,9 +244,9 @@ class WorkerActor(xo.StatelessActor):
|
|
|
244
244
|
|
|
245
245
|
@staticmethod
|
|
246
246
|
def get_devices_count():
|
|
247
|
-
from ..
|
|
247
|
+
from ..device_utils import gpu_count
|
|
248
248
|
|
|
249
|
-
return
|
|
249
|
+
return gpu_count()
|
|
250
250
|
|
|
251
251
|
@log_sync(logger=logger)
|
|
252
252
|
def get_model_count(self) -> int:
|
|
@@ -263,7 +263,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
263
263
|
we assume that embedding model only takes 1 GPU slot.
|
|
264
264
|
"""
|
|
265
265
|
candidates = []
|
|
266
|
-
for _dev in self.
|
|
266
|
+
for _dev in self._total_gpu_devices:
|
|
267
267
|
if _dev not in self._gpu_to_model_uid:
|
|
268
268
|
candidates.append(_dev)
|
|
269
269
|
else:
|
|
@@ -291,11 +291,11 @@ class WorkerActor(xo.StatelessActor):
|
|
|
291
291
|
return device
|
|
292
292
|
|
|
293
293
|
def allocate_devices(self, model_uid: str, n_gpu: int) -> List[int]:
|
|
294
|
-
if n_gpu > len(self.
|
|
294
|
+
if n_gpu > len(self._total_gpu_devices) - len(self._gpu_to_model_uid):
|
|
295
295
|
raise RuntimeError("No available slot found for the model")
|
|
296
296
|
|
|
297
297
|
devices: List[int] = [
|
|
298
|
-
dev for dev in self.
|
|
298
|
+
dev for dev in self._total_gpu_devices if dev not in self._gpu_to_model_uid
|
|
299
299
|
][:n_gpu]
|
|
300
300
|
for dev in devices:
|
|
301
301
|
self._gpu_to_model_uid[int(dev)] = model_uid
|
|
@@ -324,7 +324,7 @@ class WorkerActor(xo.StatelessActor):
|
|
|
324
324
|
) -> Tuple[str, List[str]]:
|
|
325
325
|
env = {}
|
|
326
326
|
devices = []
|
|
327
|
-
if isinstance(n_gpu, int) or (n_gpu == "auto" and
|
|
327
|
+
if isinstance(n_gpu, int) or (n_gpu == "auto" and gpu_count() > 0):
|
|
328
328
|
# Currently, n_gpu=auto means using 1 GPU
|
|
329
329
|
gpu_cnt = n_gpu if isinstance(n_gpu, int) else 1
|
|
330
330
|
devices = (
|
|
@@ -396,10 +396,10 @@ class WorkerActor(xo.StatelessActor):
|
|
|
396
396
|
n_gpu: Optional[Union[int, str]] = "auto",
|
|
397
397
|
):
|
|
398
398
|
if n_gpu is not None:
|
|
399
|
-
if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu >
|
|
399
|
+
if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > gpu_count()):
|
|
400
400
|
raise ValueError(
|
|
401
401
|
f"The parameter `n_gpu` must be greater than 0 and "
|
|
402
|
-
f"not greater than the number of GPUs: {
|
|
402
|
+
f"not greater than the number of GPUs: {gpu_count()} on the machine."
|
|
403
403
|
)
|
|
404
404
|
if isinstance(n_gpu, str) and n_gpu != "auto":
|
|
405
405
|
raise ValueError("Currently `n_gpu` only supports `auto`.")
|
|
@@ -504,10 +504,10 @@ class WorkerActor(xo.StatelessActor):
|
|
|
504
504
|
launch_args.pop("kwargs")
|
|
505
505
|
launch_args.update(kwargs)
|
|
506
506
|
if n_gpu is not None:
|
|
507
|
-
if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu >
|
|
507
|
+
if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > gpu_count()):
|
|
508
508
|
raise ValueError(
|
|
509
509
|
f"The parameter `n_gpu` must be greater than 0 and "
|
|
510
|
-
f"not greater than the number of GPUs: {
|
|
510
|
+
f"not greater than the number of GPUs: {gpu_count()} on the machine."
|
|
511
511
|
)
|
|
512
512
|
if isinstance(n_gpu, str) and n_gpu != "auto":
|
|
513
513
|
raise ValueError("Currently `n_gpu` only supports `auto`.")
|
xinference/deploy/worker.py
CHANGED
|
@@ -21,7 +21,7 @@ import xoscar as xo
|
|
|
21
21
|
from xoscar import MainActorPoolType
|
|
22
22
|
|
|
23
23
|
from ..core.worker import WorkerActor
|
|
24
|
-
from ..
|
|
24
|
+
from ..device_utils import gpu_count
|
|
25
25
|
|
|
26
26
|
logger = logging.getLogger(__name__)
|
|
27
27
|
|
|
@@ -33,12 +33,12 @@ async def start_worker_components(
|
|
|
33
33
|
metrics_exporter_host: Optional[str],
|
|
34
34
|
metrics_exporter_port: Optional[int],
|
|
35
35
|
):
|
|
36
|
-
|
|
37
|
-
cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES")
|
|
38
|
-
if cuda_visible_devices:
|
|
39
|
-
|
|
36
|
+
gpu_device_indices = []
|
|
37
|
+
cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", None)
|
|
38
|
+
if cuda_visible_devices is not None and cuda_visible_devices != "-1":
|
|
39
|
+
gpu_device_indices.extend([int(i) for i in cuda_visible_devices.split(",")])
|
|
40
40
|
else:
|
|
41
|
-
|
|
41
|
+
gpu_device_indices = list(range(gpu_count()))
|
|
42
42
|
|
|
43
43
|
await xo.create_actor(
|
|
44
44
|
WorkerActor,
|
|
@@ -46,7 +46,7 @@ async def start_worker_components(
|
|
|
46
46
|
uid=WorkerActor.uid(),
|
|
47
47
|
supervisor_address=supervisor_address,
|
|
48
48
|
main_pool=main_pool,
|
|
49
|
-
|
|
49
|
+
gpu_devices=gpu_device_indices,
|
|
50
50
|
metrics_exporter_host=metrics_exporter_host,
|
|
51
51
|
metrics_exporter_port=metrics_exporter_port,
|
|
52
52
|
)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
import torch
|
|
18
|
+
from typing_extensions import Literal, Union
|
|
19
|
+
|
|
20
|
+
DeviceType = Literal["cuda", "mps", "xpu", "cpu"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def is_xpu_available() -> bool:
|
|
24
|
+
return hasattr(torch, "xpu") and torch.xpu.is_available()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_available_device() -> DeviceType:
|
|
28
|
+
if torch.cuda.is_available():
|
|
29
|
+
return "cuda"
|
|
30
|
+
elif torch.backends.mps.is_available():
|
|
31
|
+
return "mps"
|
|
32
|
+
elif is_xpu_available():
|
|
33
|
+
return "xpu"
|
|
34
|
+
return "cpu"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def is_device_available(device: str) -> bool:
|
|
38
|
+
if device == "cuda":
|
|
39
|
+
return torch.cuda.is_available()
|
|
40
|
+
elif device == "mps":
|
|
41
|
+
return torch.backends.mps.is_available()
|
|
42
|
+
elif device == "xpu":
|
|
43
|
+
return is_xpu_available()
|
|
44
|
+
elif device == "cpu":
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def move_model_to_available_device(model):
|
|
51
|
+
device = get_available_device()
|
|
52
|
+
|
|
53
|
+
if device == "cpu":
|
|
54
|
+
return model
|
|
55
|
+
|
|
56
|
+
return model.to(device)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_device_preferred_dtype(device: str) -> Union[torch.dtype, None]:
|
|
60
|
+
if device == "cpu":
|
|
61
|
+
return torch.float32
|
|
62
|
+
elif device == "cuda" or device == "mps":
|
|
63
|
+
return torch.float16
|
|
64
|
+
elif device == "xpu":
|
|
65
|
+
return torch.bfloat16
|
|
66
|
+
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def is_hf_accelerate_supported(device: str) -> bool:
|
|
71
|
+
return device == "cuda" or device == "xpu"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def empty_cache():
|
|
75
|
+
if torch.cuda.is_available():
|
|
76
|
+
torch.cuda.empty_cache()
|
|
77
|
+
if torch.backends.mps.is_available():
|
|
78
|
+
torch.mps.empty_cache()
|
|
79
|
+
if is_xpu_available():
|
|
80
|
+
torch.xpu.empty_cache()
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def gpu_count():
|
|
84
|
+
if torch.cuda.is_available():
|
|
85
|
+
cuda_visible_devices_env = os.getenv("CUDA_VISIBLE_DEVICES", None)
|
|
86
|
+
|
|
87
|
+
if cuda_visible_devices_env is None:
|
|
88
|
+
return torch.cuda.device_count()
|
|
89
|
+
|
|
90
|
+
cuda_visible_devices = (
|
|
91
|
+
cuda_visible_devices_env.split(",") if cuda_visible_devices_env else []
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return min(torch.cuda.device_count(), len(cuda_visible_devices))
|
|
95
|
+
elif torch.backends.mps.is_available():
|
|
96
|
+
return 1
|
|
97
|
+
elif is_xpu_available():
|
|
98
|
+
return torch.xpu.device_count()
|
|
99
|
+
else:
|
|
100
|
+
return 0
|
xinference/fields.py
CHANGED
xinference/model/audio/core.py
CHANGED
|
@@ -16,8 +16,7 @@ import os
|
|
|
16
16
|
from collections import defaultdict
|
|
17
17
|
from typing import Dict, List, Optional, Tuple
|
|
18
18
|
|
|
19
|
-
from
|
|
20
|
-
|
|
19
|
+
from ..._compat import BaseModel
|
|
21
20
|
from ...constants import XINFERENCE_CACHE_DIR
|
|
22
21
|
from ..core import ModelDescription
|
|
23
22
|
from ..utils import valid_model_revision
|
|
@@ -14,6 +14,12 @@
|
|
|
14
14
|
import logging
|
|
15
15
|
from typing import TYPE_CHECKING, Dict, Optional
|
|
16
16
|
|
|
17
|
+
from xinference.device_utils import (
|
|
18
|
+
get_available_device,
|
|
19
|
+
get_device_preferred_dtype,
|
|
20
|
+
is_device_available,
|
|
21
|
+
)
|
|
22
|
+
|
|
17
23
|
if TYPE_CHECKING:
|
|
18
24
|
from .core import AudioModelFamilyV1
|
|
19
25
|
|
|
@@ -37,11 +43,15 @@ class WhisperModel:
|
|
|
37
43
|
self._kwargs = kwargs
|
|
38
44
|
|
|
39
45
|
def load(self):
|
|
40
|
-
import torch
|
|
41
46
|
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
|
42
47
|
|
|
43
|
-
|
|
44
|
-
|
|
48
|
+
if self._device is None:
|
|
49
|
+
self._device = get_available_device()
|
|
50
|
+
else:
|
|
51
|
+
if not is_device_available(self._device):
|
|
52
|
+
raise ValueError(f"Device {self._device} is not available!")
|
|
53
|
+
|
|
54
|
+
torch_dtype = get_device_preferred_dtype(self._device)
|
|
45
55
|
|
|
46
56
|
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
|
47
57
|
self._model_path,
|
|
@@ -49,7 +59,7 @@ class WhisperModel:
|
|
|
49
59
|
low_cpu_mem_usage=True,
|
|
50
60
|
use_safetensors=True,
|
|
51
61
|
)
|
|
52
|
-
model.to(
|
|
62
|
+
model.to(self._device)
|
|
53
63
|
|
|
54
64
|
processor = AutoProcessor.from_pretrained(self._model_path)
|
|
55
65
|
|
|
@@ -63,7 +73,7 @@ class WhisperModel:
|
|
|
63
73
|
batch_size=16,
|
|
64
74
|
return_timestamps=False,
|
|
65
75
|
torch_dtype=torch_dtype,
|
|
66
|
-
device=
|
|
76
|
+
device=self._device,
|
|
67
77
|
)
|
|
68
78
|
|
|
69
79
|
def _call_model(
|
|
@@ -99,9 +109,11 @@ class WhisperModel:
|
|
|
99
109
|
)
|
|
100
110
|
return self._call_model(
|
|
101
111
|
audio=audio,
|
|
102
|
-
generate_kwargs=
|
|
103
|
-
|
|
104
|
-
|
|
112
|
+
generate_kwargs=(
|
|
113
|
+
{"language": language, "task": "transcribe"}
|
|
114
|
+
if language is not None
|
|
115
|
+
else {"task": "transcribe"}
|
|
116
|
+
),
|
|
105
117
|
response_format=response_format,
|
|
106
118
|
)
|
|
107
119
|
|
xinference/model/core.py
CHANGED
|
@@ -15,6 +15,8 @@
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
16
|
from typing import Any, List, Optional, Tuple
|
|
17
17
|
|
|
18
|
+
from .._compat import BaseModel
|
|
19
|
+
|
|
18
20
|
|
|
19
21
|
class ModelDescription(ABC):
|
|
20
22
|
def __init__(
|
|
@@ -94,3 +96,10 @@ def create_model_instance(
|
|
|
94
96
|
)
|
|
95
97
|
else:
|
|
96
98
|
raise ValueError(f"Unsupported model type: {model_type}.")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class CacheableModelSpec(BaseModel):
|
|
102
|
+
model_name: str
|
|
103
|
+
model_id: str
|
|
104
|
+
model_revision: Optional[str]
|
|
105
|
+
model_hub: str = "huggingface"
|