xinference 0.8.5__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/__init__.py +6 -0
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +136 -74
- xinference/core/model.py +3 -2
- xinference/core/resource.py +4 -2
- xinference/core/supervisor.py +37 -12
- xinference/core/worker.py +13 -13
- xinference/deploy/worker.py +7 -7
- xinference/device_utils.py +100 -0
- xinference/model/audio/whisper.py +20 -8
- xinference/model/image/core.py +5 -1
- xinference/model/image/stable_diffusion/core.py +3 -5
- xinference/model/llm/llm_family.json +93 -3
- xinference/model/llm/llm_family_modelscope.json +46 -10
- xinference/model/llm/pytorch/compression.py +3 -1
- xinference/model/llm/pytorch/core.py +33 -14
- xinference/model/llm/pytorch/qwen_vl.py +5 -3
- xinference/model/llm/pytorch/spec_decoding_utils.py +3 -1
- xinference/model/llm/pytorch/spec_model.py +20 -17
- xinference/model/llm/pytorch/utils.py +3 -2
- xinference/model/llm/pytorch/yi_vl.py +9 -3
- xinference/model/llm/utils.py +9 -0
- xinference/model/utils.py +6 -16
- xinference/thirdparty/llava/mm_utils.py +1 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.87d39ffb.js +3 -0
- xinference/web/ui/build/static/js/main.87d39ffb.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{8d95a492a6b08c192e212189567b59b98282ca6f3ed52dd9bd053a6be7ff4e65.json → 027745bdb9bb9fe375f2eef7377e9f7ba82510ed90c05f9e5b34ba314bb93498.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/03d5e7de9b1728d71cd8c483a35f4373eb66932022026142e03005d9de1a2a44.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0738899eefad7f90261125823d87ea9f0d53667b1479a0c1f398aff14f2bbd2a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/09a21ab449599121eadfdd12d6dba40c5a1c5b35dc90be17108eeff80daf79b8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/09d043a8289da33d05768f5face63cc47e9ce9646d67f5589865db511574e8f9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0b901c8f077905bf153a1390b207277c31e893f83f9ced5d29a0a0c85a07b5eb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0da1ff7c9602609cfa8b82a67140114f3037130fe79583bbf23b69cef81823a6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0f87caef05125425c0ca418a32ce8a53e9becffd464411b2ff94e3c7a37c7b5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{cb50b4d54ce9c249d580f7f1fc2df4beab4f13c4d16f486c89239809570de8fb.json → 0fc333c35ad49c0db7f18f77521984a36067cfce4206df50ce16a2fdc88be1c6.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/13fdd92dd5e2fe11640cf6ec0368e3c098d3f0c43e11703ccf157e8c41410d5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1464f950e5600fe2e5890186a1ae3750cdabbadd8525070cef6273450760bcf6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17c53dc44a324629efef425833ab48cc05243d089f4eb3c731da63568fcff35e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1946a52cb34e675da423de15b703437b4c841e4f5aed6ddd932a091c145e17b0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1e0da5ce4d8ccdbe33705a47bdc0d5d1d7e4e48c8e5c1d0b8c473e9f491930ac.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1e3ec3302ef2253fad5e86bf429baf6b69e57e769f38ec1aaada87f3114b6a1f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1f18804827897c337874b289873df611b44869f86ced1d4351fbe0b45ed17518.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/21b64f7fdb74c2c35479811c3f5683ee161d3b64a4fade0cd1fc0044332c79b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/22873d21eab4b5a04b183cc3d2cbfafa431e0ce99649aeb900cffcfc33ecbca4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{7a5fecf96af770954035948b07dfadeceb33536b4763a2f9e548121ae555b75f.json → 228b78cd0950f1c967cb33b210b11497052a9998f0b1d1f31ecee0fd8b99fb40.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2328fc2360224ac3eef5c5261fc2a75cdce7fe58c210af8d3e532ec609464f5f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/25fd89450dcf73a205a5f8c82dbc604bf872d574b748427c26ba79b8711ec502.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{b6e95546cdd247e50d9c916dd3bef5216f96c32a267e54b51b23cc22631be0c4.json → 288d6573b09f0e2b0277e1f301bcb57a194c01f3983e3c1e03c8b2ea72653194.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2c04900a020b813e2e99b15748c3c3ef5a1627c33d5668e45bc6a798f0d28dae.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cd0fd50d8f213cdd94daffe7d0f997991072b11936ead7c48be2af3fdbd1fda.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2e057c12322da07a537a576a3917d512ac6cb291be5aa29ce23d2679a4fd16b1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{abd0662d21fbe39e483fbc4d37faa9e24d93889b5b40d42e7e20773c66ee3894.json → 327c21db6124a5209f41996d0d3359b462d5baff3974296f5acfddfc532434e6.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/33294bb0080595100e22c960929a6f3086f5ea632a4bcd9216069f95347a63a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3361869e45eddd7d48ff7995e1a12ac454c9eb92d4c87f49ce80c1e9482c1e91.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/375d2f6a31e01c5a82fd0e18f42198f9fbb7117491f3e92d70cf36055f159a6e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3808217efee76345ba19af3ffdba045888e0908b0dc7fc38e54655cdc3504c60.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/39db740a99454006312dec9e4845f850c51e043458ca94eba8bb256521fabcd4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3e737bcdbcbc407ccd65b90e199ef0c3214b261e8e41dbf14d921384a717d9ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3e87c7d4f1e5c93603eed3e861945e44dd1e664061a89aaef1aa6435f4f4d7d0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3ecf70018028c8cf6cdc2d24d2b88f2707bd5c23a336fd3d82bd6a1af0db7ded.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/43314d1e9fed30ed6cf5cab3f5033da067e58be29e36448e3c0a8127403a9388.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/442467c09440131f7c25ddcdeb356d2a87783ad4e32d89f410a16cb6e7d83b22.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/487524f2b3494bfcfbd8c12918092a52ff2ce3eefe9bf0a557389ec1c74d3e67.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4de4b0282a3d1ae02bec0bfe7d6658f039141406e8bfdbe86bf1dc5b115a2524.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4ec72faab00e4cbffefdf43b987ae3d15beb60e282b9ca86dad37f4d0fc3f2bd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/51403f1b70af0f4577d4594cbfcf9d9759997e5c49097e7a07219000d1be4ff1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/566a0829bfeab41e40bbdca55ad056067e172508af4b333403d4fd334a8c8742.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5689d9aee417a0834fed66720cf3b48c34cc26364cd9ab7e3d809997bb9faec9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5bedd27475bda204eafccdc636a56ba5765353391049a0b947234faaab3d021a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{94daea23b698bbb26bbbab69bbca8ad521c79fcbd6b495c84c740e4006a2ced2.json → 61014d40df4486972fb6d5909dff45f24bec48573af99ca8bcd79835385c7da0.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/6139c80ec46d77469e9a89924de41d93050f19e9ed541e358f09cbf20ccd7c86.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63ead4dee6a4f4ae47aed6918a2ab3e172739ecdffaa85e063f9a2c9165ff93c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6bf8c25e0f2051d994990bf73fdcb3e08963e11589842895593f199eab13ba9a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6d42f63cd673d1c60e534a174cd851e9aefd2d5d45a4aa6128d794457b64de53.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7044e626a91582077b8cbad09bcf0cc118e92e11ebfa2ebc17622baf98aa56bf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/71ec408030881aad69854815b8681185d0174cc0cf67f20fb2c3cd13c74fce07.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7280614d5bddce524182024cc288953eb89d46c1dbe470ed5eb5ecab5de28385.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7293ad8ba7b5fc6ed791aed44a766a396516c782f5d0e564b9ef0997645ec1dd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/79be1e0dd6b1c3cbb3f08b349454f4791edaac451fa4d1f10eaf433e57ecf70f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7c0571c03aa8c90eb9452ba23133aed9b3b2a049910d4d8276588b438a4caaba.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/81ed094096cd9dc36f3ddebe61818766f43d3338c171f2269958f47265e60f24.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/86355e55059ddb15f71c0c651a2e622cee6f3141da3a018775ca00c74dd37dc2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{d1af6db565bae36218251c01a817c080ef764172dd0694e38d27b525a3ffd1d2.json → 8710fe63bcc6b95502287c36a7bd881b74ee2926982e120e1e8070a2c8a668fc.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8779104679d8f85017b551d1d108ea1ac701e184ad7bcc8a367526353a6c5175.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/881c573e6aa9954e7bd5e3bc5a6ef62c0db9b93815030080f677f6834e431434.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8d24c51a0905b919436aac2bad441eb1651c46c4e7aa17c99eb63c7760ce6615.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/93bf5cc8cbe3f0b10f291800e1896f03506a8933cd490e07ee0ff94498f93bce.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9401db0316ac34790aa5b972e9e1c50de58ef313b50840a37e08f0936a7197b6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/98afeafdfc285a744d116fb5f189a16ad2b977adbd4fc0757f7ade1b29b8ad31.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9a4e624d1ce66832fb213ead02745d84661d9bfcc21b62b4c37c680c3c3f5cbb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9a7b26f85224d33d431354ac8d7849388b7525fc014801824e705561ab2ac7a7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9dcb04ac6cb341233a2e98f2fb417d059c2ea6a56d2ff6f30bb27dae9bd6c756.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9ebede8a4bccef016eef418a668b3212962779ea6d2531edb23d3dca81163496.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0efc8db4289116bdb312014df91fc781f66fe171d63027bbd5aae25cd54c325.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a212defc32ca338ba673722c72815d30974275f1a82b67405a898f826fc8d782.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a2685acab57fd6d874bf6069a587e3770b55c72cacc69cb167bb3dbd16a551c5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3b7d6233e50dc3832421793a5752c1db99f3a214c9896c97bbd395d007f09b1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3d0c810a255108613be0a175d53d673c4b6843c2a87dd0f157b4d3731af2092.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a6633774053545c60721e345820e21cf3b089df219f7ee3fcceaf27af3a9e84b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a6bb917e4869d7d6ddf50b75932cad64e94b9e33db2aa40c4a489a045819bb36.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a816031fafa564dc4af27383612e51ac29af51354f3bba750a70ac0c03b6bd30.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/af6349a1e9761cb98af4b76904440fb7f8d54288ee6ba37116f670226796e689.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afce63eaad32a9968590e4500e123c81b385ea3b6046b2af1ec7ea8a49ba657b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/aff24b61e5d55147d0a36ef1896b9bd35c995c357f44eeee1209f23db2118636.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b01676c5ca97e3409a1b7bac91b6a797be3719a8b7ec72ad06c9beb296c67bb7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b0c5e967d1af90d545f1d34c9893a841296b0d1dd50de65eeaf19054995d2a94.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b561d429cea6143e1bf7e6e7e690f9e6ce1dce4adfcacb57d7c5578195d1d190.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b70187bb5aecf3bd7afbe8469b49e2a0a94a0c62727530d0dc2f84ee4577bcfc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bd21f6039e23775fb4631bb56f2eef1d132c03a60cc6fe4530569ecfeef456a3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/be09b5508d0ca5ac0300144a682d4cde2d5b4b96ecb9fc690c1d5e7ee7411b3e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/be8c9e83f0dd4af2e9bcf6844dd8c4445fe634b6c71da6debfa7acabb7a875c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c0d1317dfae0c5ddf5a3ba9aef6bd2204f2daf9754f3270c4cab5c4632f78000.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c1e3afdaae5649274f7d34910015b88bc02ba6a00b935e6597f9c670fedc05c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c29ec02243dd370616a489f7de67e4e479d2f7f12504ba7f77304cffe60befbc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c2a4d298149e6c66713eed4f56f4bbbfa4022a193891029b84e429baf59cc246.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c3d2fc03602f7604a65e631e8595ce4cfabd8887343489b34a65e3a73c5546b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c41078c8692e0635fb52d01bc5157528c7308ae5140c134aa07c1fcda4f7a5e5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c4b5f1e444ddd41f8cd30e618c185b16235e313ea34d944959628001a91e38a5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c7ed9bc3125498e475f3170b8126c93368e9cbfd6a5e73ee63437d90e39b69e5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c996b3dc884130f034a37551845201bc5661f2dcd282aed737329f8d4dd387b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c9fef6e7f0a532c2d1cc25dffd496ea8355dd14ce0105a169b391bfeb9277e4f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cc8f5a2bbe4da9852ccdf64150115271baa2d282d74d7fab854dc5ca641dd519.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ce45adea497443bfc1b6baabd0aa3d85888dd10b1b7fd009edf608e390b2a844.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ce4b82a3855d18ba09e922f6e2cc1b8c20f0e5261b27303c52d4e1ba5a09a078.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cf16614311e144768bdc064da754efad77597cac5efe6852be996b9eaaed695a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d074bfe592a195f339acbcd8743f8b64cfd09b45ca1013598269227fc092f2db.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d2a95455cc492d4fbf2839a21eeaef00e7fad2aac93825d2443a38127ad02007.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d4e6e712ebbbf0f30745093e2b6d235c13fb43d37b586a0109e87c19eafdf35d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d88a02264125b07881eecff752d9c8d718edcf2d87e5431b13e54809dc7bf4f2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d907925fb28912f2e98cccfe758bce680aa4f894cc9d2079d73d7e2d85481d47.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/daf050ec3a864b9831dcb32cee1fb2739dafb68b369e9108469ca1e6fd166867.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/de9645d55ce8744856862eae4a3d3a1266e9df9d575aa3e2f62a75a5d97444f7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e04199bdfb87226d6aeae387ef677eae06483f9f135b5a3f1360ec1a243c86cd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e97d1f31f54263bb90b7c3e71be0e19973d76d945fb5d526bf93fb449708bbf0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e9f87f5c370a731ee42098ae6ff38ca6ad256396158c03f84c5e770c6b76a43d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f2ad375dbc8779f54eb9c377fd951cd442f3766d5eb70d40613b952b811643da.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f35df9d5a0e0341bcc0f39a462e46988c330cdb905cc690b08de4a9cf5771f28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f433e33369483f75d31fe937b8a597ef2d7cdfed339931643ff966a33dbc6158.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f4c164c89cfbcb1d81bc0b60e3e7c8715e07c7a23118acf4e3e16715beac09c5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f545d937cfb9d974f4aaaa9dc1c12adfed0d4f608de7d0a7247452d9bbbb55a4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{3cc5ec39e76e1768ffb070a8ae18ee61b68f3ed3593178134d3edda5ed378c6c.json → f8d2c4372093d0ff3a2f78260e004e6c4e2b8ab4f5be9713ebc16c50e9c2ab4b.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fa41c6c8ffa1047b7f2f52b6532ea4d9f5754e5bbc11d1340bae7f75ae5ed84e.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +45 -45
- xinference/web/ui/node_modules/@babel/runtime/package.json +64 -37
- xinference/web/ui/node_modules/@mui/private-theming/package.json +7 -6
- xinference/web/ui/node_modules/@mui/styled-engine/package.json +6 -5
- xinference/web/ui/node_modules/@mui/system/node_modules/clsx/package.json +22 -10
- xinference/web/ui/node_modules/@mui/system/package.json +13 -12
- xinference/web/ui/node_modules/@mui/types/package.json +3 -2
- xinference/web/ui/node_modules/@mui/utils/ClassNameGenerator/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/HTMLElementType/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/capitalize/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/chainPropTypes/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/clamp/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/composeClasses/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/createChainedFunction/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/debounce/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/deepmerge/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/deprecatedPropType/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/elementAcceptingRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/elementTypeAcceptingRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/exactProp/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/formatMuiErrorMessage/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/generateUtilityClass/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/generateUtilityClasses/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/getDisplayName/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/getScrollbarSize/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/getValidReactChildren/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/integerPropType/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/isMuiElement/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/ownerDocument/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/ownerWindow/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/package.json +8 -7
- xinference/web/ui/node_modules/@mui/utils/ponyfillGlobal/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/refType/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/requirePropFactory/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/resolveProps/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/scrollLeft/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/setRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/unsupportedProp/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useControlled/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useEnhancedEffect/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useEventCallback/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useForkRef/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useId/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useIsFocusVisible/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useLazyRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useOnMount/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/usePreviousProps/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useTimeout/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/visuallyHidden/package.json +6 -0
- xinference/web/ui/node_modules/@types/prop-types/package.json +2 -2
- xinference/web/ui/node_modules/csstype/package.json +3 -3
- xinference/web/ui/package-lock.json +47 -45
- xinference/web/ui/package.json +2 -0
- {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/METADATA +4 -1
- {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/RECORD +206 -150
- xinference/web/ui/build/static/js/main.9715fe74.js +0 -3
- xinference/web/ui/build/static/js/main.9715fe74.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/02f54fcadc6fe9b1be8451435f9a097c82b3d665bae2d2df03ba0b36cebeca0c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/051391807ffd255b652b63dc2ff12e5ce80113e1b712c81b6c1986a11cd85d3c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/05710e94caaf5a2da790d67aa572bf6bc19bae0237668c8d97f4da62ab52fe5c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/08a3470840a55d359a967d0763cd1ae5c1f15f3ca31118e29b37b4fbbdb248c7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0e68b0e268559a563dde6fa4c654922d98c74f5140854437f3e5a01bf23ba41f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/10dc7661741dfc88df5538bbf835870ae47b1a18f36c2f7b9650651e7304ce72.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/18b3d28a2692ab4f2d05254221f53b3c13259435bd98cb90ff5d7250d93f11e0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1b612ab687e3a4e14db156bd92a75b8a3e96ff04bfbf8f3351c7e801bc61cf90.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1bb839ffeb39ed12367c179fbf3b784e20f6f98fb87c597717bc11ebe3201c59.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1d12e0b0e4e87372b18fef72fcd6e264fa40b0cffdbeddcef004becc4e32cf27.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1d5b806c08ffb55539ee26b5fa9746a373f602354d6e55a0d4379d7e2f903868.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1ecb7ce2bc103bec0d1a9f0cb07132e5ec7731d974998c465798c0e15cafb924.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/20cfc57ae2b8ba23571f386a5f2bf194bd02e8a3913113881bb9804ee822ad58.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/222a437b6773707bbead06ba5db1ed8869e44d9f051a7f6b1a745aeb654679d0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/249709fb20c1254cecc9ba6666e6bcf37b21e47e7a020d5a7f0513680b49473f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/257ad4979294ee3a7ef400feab3b29ebcd259c47b9410a3011a66f4b33a97904.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2779bdc8ed8372f4015d07a45e4b09756bd953250944c3e3efe6b65bf8eaecd8.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2bee7b8bd3d52976a45d6068e1333df88b943e0e679403c809e45382e3818037.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/31c71a85b2d0bbdcdc027e7950a9d1f778288f98888d8595fad5e4fdda5bb31f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/371214b6649e323e7281885b1412c5a515ccc2274f4e1a42921e45777694009c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3eced030d2b3da645f4cbe3dc4ff49744312316e4306ce170991e802e9bfcf8e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/452ae8e7cff99efc35d134014d32610d13217b031b817d3663bb689b3778ebd2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/48e3f5d01ccedde86348a29392bc9b6e301a51b3f7e2476ae0a95a31eb6bf18d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4a311b5bd98f16588caf60b8de4c8d91d038198e6bfd496f880f60d4c1ea076d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4ccd71d3f73490a18a5a4de4ad82d63cf019caff6f8d881fd1434755a033bfc5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4ee8343d4254c95e685d49601f1649e8222d7423c2a822e1d01c8fc4cd3582a2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/523a712748fbb7abf0c5423df62e65c4659b7e450182c890e847cd78ace7cac5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5777049bbacf7322891761a5b959e62eec22b0f96a70dbf97e58d530c6258a55.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5905957ce95f7dee64f0739359e990b8b518cff088c6958487641a3d21737d1e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5ac79df79d2dff64da97b02d5c90b36bc0d44a4a4229f6efed2993f93ee1d7a1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/64d3ef38fd394979b09061e67fc47f04b7295401677825c64c72ea2f44cf910c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/68a5d8e66687770e05aebcae9a8047189dc577d6150fbf0e62d2b2c11c4335c5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/69d5f27e40d5e7040c8e8e303ccaee019b54092345acc3838d6eb943c474ce39.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/6a07b8d00482c07a4b7c73f6bebb79fad54bbea35ba358681ede6ff299a09f6b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/70b0307a43b9f8c876bf2a1f7dea4cc56cc0ea33743e863c0b3983bc48109f2d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/710c39a26816820c2cfd00b19cd0112851d64b8ac6b201b18727b43b5a69d92c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/722f4c03da79795d1fb738323f3eb6794ab18711e20f141c0b678c5d4cb3dce9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8237bdfff695d71496ee2ff83ba0392d753f8e1dff0d002a1b2c9e249f03083c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8342e3800fed63e8e805850810aee4fd9b0558aca10c8b6b770fe5bff1bed266.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/847a71f54f7a059efc71f70c297b6b4182a633130715c80f817eb533de108c12.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8731a959d9b7278ba7a1c044bb4253bfff63079da69b21ff80221ae5e037dbbe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/87b4e94b045df561c8f4bd253c84447c96e0dededbcb6646c0cd180119960990.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/894a334446bac96d74385c23c9d6f2d117ee29511897700430cd0e578cb32527.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8c5ba88b620eeedbb99db18a610138353a5a657170fdb8bd6969585f81b34f74.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9004bc083cf179941e64130390a27afcf7593b21dd64a87d03e1e6489032aa13.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9b84631723b52988d3748d80b994e43d1944ca55098a5d908c9061eeb4c84f90.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9ee1f84bde10cd78ca23a4681c7372195748bacd5e41c3b1df8e39d960294d81.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a127f13651a7398f5fc378456a52cfa0c6260f6109611d3fb012aa7c7a4ee17a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a5e2e9f707eb7039bea096ca117d996b8f9cbc2a5613fd8e0c5b0094444ce23c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a6131b7fb1b30fd63d798845dab508a22b21c616b361e6214bf711915c5fef3d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a7b8ccb199158c897ed7d7835942eb37569221be4101eecfa7a7ce62e0c76f1f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/acaf6dbddd57b63a874e9062803850ce4911c8dc63a362eacdc4d2d0f48f8c16.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ade3cd82af85d2d28138bd942228b01e27559a5a4072ee049dcb47612ee9a506.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/aea82c71088dc9afd6f4ba71434286af5c6ab5e9b87088b0e84f351146d06489.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/afbd547078b20bb14596f20e165cb40413147e3de501b126c97bb417f8b9a53e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/afc597494ebc62d22e4d3ac2a839bd5f30d5e47ce1d0576e0f62407ef6c78652.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/affbbac58777e26ba8b6dcda8cea72c76975bd0207f0a510bfde5983d3081f74.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b0aa2ece62a1f5fa015f9e6b59d20b79be727fb5a3f4fc747200163097f320ad.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b10391d3ae53452c8ec97d230f064e03a8377aab2be4b1b3e30cb3e054b2341a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b4bab0c147360f3e335251166d3d25602e2e0e160fb3734ac019a5b42499179c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b57dcff363e045fd11ea2bcbc58cce470f1bc22d3ac14c8acbf63a36637050ff.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b8de42f7f5dc1df9e802898a0ebb35adb57d269d7af21e6c66a0fde0ff226a08.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/be696cadea3bf577c0defc8f8d100232369763dde754b5ba9399f45f42aed215.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c5a30d7c6cb484268e9126dd6d078229a1dcaa07fb064077b87d15224c61a3aa.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c67e2077e70c4751e9e1dd98f378d979abd9c8a39fbf6c69f02fb4e5b04eef87.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c81c1cad35be60a4fad090374edf50a884540e306d464b803120554df61e817e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d1054fd4d09666318a00ca0aea5ff42a2fdb17b3a42b5f77b93433d7408eb2f4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d1ec524ce17a48816fc55fe665572c1ef4982a4bd1ecb01badd0fdffc465242c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d4d61a537c45c4506a6b7e3478206b71dc96e0c6c6410cef9b5d2f47018bc578.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e2e563b606cac9d51661cf68ba1729eb5df7861af521adccedc1b163439b657d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e3890e804dfaeb19b79f52e1b20198b587da61a9a3a1f55417d2cc5ce4ad7dae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e8c3fadbd12f0e2c7b70d84acbcabd0a5c96b05380a7b3167b00e8739d829d24.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ebb5ae1a82b33b94e2b0cfec599bec7500aff0a1701f91736a6f3a4f77617560.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f30f0396f4e54f0355112d5eba4b16c3a56f47e456eb117a9dc7fa6218ce21fe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f318ac15e999a553c00832029245730fe25c2916860b0ff55eee1f0ab926d84e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f55f4e657cc2d797b7e73a64ce40352f9a5dde31763a3e89c14d3b983307f30e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f8a68931d652a5e3be4e1ae06210a223de1239f3884ad858cecf9fe8d11c2ffe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f95a8bd358eeb55fa2f49f1224cc2f4f36006359856744ff09ae4bb295f59ec1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fb6fbd693a53d3c542d9adf83e0bb458918d39bf101eaf6d6662a2fbc35c1adc.json +0 -1
- /xinference/web/ui/build/static/js/{main.9715fe74.js.LICENSE.txt → main.87d39ffb.js.LICENSE.txt} +0 -0
- {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/LICENSE +0 -0
- {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/WHEEL +0 -0
- {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# Copyright 2022-2023 XProbe Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
import torch
|
|
18
|
+
from typing_extensions import Literal, Union
|
|
19
|
+
|
|
20
|
+
DeviceType = Literal["cuda", "mps", "xpu", "cpu"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def is_xpu_available() -> bool:
|
|
24
|
+
return hasattr(torch, "xpu") and torch.xpu.is_available()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_available_device() -> DeviceType:
|
|
28
|
+
if torch.cuda.is_available():
|
|
29
|
+
return "cuda"
|
|
30
|
+
elif torch.backends.mps.is_available():
|
|
31
|
+
return "mps"
|
|
32
|
+
elif is_xpu_available():
|
|
33
|
+
return "xpu"
|
|
34
|
+
return "cpu"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def is_device_available(device: str) -> bool:
|
|
38
|
+
if device == "cuda":
|
|
39
|
+
return torch.cuda.is_available()
|
|
40
|
+
elif device == "mps":
|
|
41
|
+
return torch.backends.mps.is_available()
|
|
42
|
+
elif device == "xpu":
|
|
43
|
+
return is_xpu_available()
|
|
44
|
+
elif device == "cpu":
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def move_model_to_available_device(model):
|
|
51
|
+
device = get_available_device()
|
|
52
|
+
|
|
53
|
+
if device == "cpu":
|
|
54
|
+
return model
|
|
55
|
+
|
|
56
|
+
return model.to(device)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_device_preferred_dtype(device: str) -> Union[torch.dtype, None]:
|
|
60
|
+
if device == "cpu":
|
|
61
|
+
return torch.float32
|
|
62
|
+
elif device == "cuda" or device == "mps":
|
|
63
|
+
return torch.float16
|
|
64
|
+
elif device == "xpu":
|
|
65
|
+
return torch.bfloat16
|
|
66
|
+
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def is_hf_accelerate_supported(device: str) -> bool:
|
|
71
|
+
return device == "cuda" or device == "xpu"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def empty_cache():
|
|
75
|
+
if torch.cuda.is_available():
|
|
76
|
+
torch.cuda.empty_cache()
|
|
77
|
+
if torch.backends.mps.is_available():
|
|
78
|
+
torch.mps.empty_cache()
|
|
79
|
+
if is_xpu_available():
|
|
80
|
+
torch.xpu.empty_cache()
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def gpu_count():
|
|
84
|
+
if torch.cuda.is_available():
|
|
85
|
+
cuda_visible_devices_env = os.getenv("CUDA_VISIBLE_DEVICES", None)
|
|
86
|
+
|
|
87
|
+
if cuda_visible_devices_env is None:
|
|
88
|
+
return torch.cuda.device_count()
|
|
89
|
+
|
|
90
|
+
cuda_visible_devices = (
|
|
91
|
+
cuda_visible_devices_env.split(",") if cuda_visible_devices_env else []
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return min(torch.cuda.device_count(), len(cuda_visible_devices))
|
|
95
|
+
elif torch.backends.mps.is_available():
|
|
96
|
+
return 1
|
|
97
|
+
elif is_xpu_available():
|
|
98
|
+
return torch.xpu.device_count()
|
|
99
|
+
else:
|
|
100
|
+
return 0
|
|
@@ -14,6 +14,12 @@
|
|
|
14
14
|
import logging
|
|
15
15
|
from typing import TYPE_CHECKING, Dict, Optional
|
|
16
16
|
|
|
17
|
+
from xinference.device_utils import (
|
|
18
|
+
get_available_device,
|
|
19
|
+
get_device_preferred_dtype,
|
|
20
|
+
is_device_available,
|
|
21
|
+
)
|
|
22
|
+
|
|
17
23
|
if TYPE_CHECKING:
|
|
18
24
|
from .core import AudioModelFamilyV1
|
|
19
25
|
|
|
@@ -37,11 +43,15 @@ class WhisperModel:
|
|
|
37
43
|
self._kwargs = kwargs
|
|
38
44
|
|
|
39
45
|
def load(self):
|
|
40
|
-
import torch
|
|
41
46
|
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
|
42
47
|
|
|
43
|
-
|
|
44
|
-
|
|
48
|
+
if self._device is None:
|
|
49
|
+
self._device = get_available_device()
|
|
50
|
+
else:
|
|
51
|
+
if not is_device_available(self._device):
|
|
52
|
+
raise ValueError(f"Device {self._device} is not available!")
|
|
53
|
+
|
|
54
|
+
torch_dtype = get_device_preferred_dtype(self._device)
|
|
45
55
|
|
|
46
56
|
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
|
47
57
|
self._model_path,
|
|
@@ -49,7 +59,7 @@ class WhisperModel:
|
|
|
49
59
|
low_cpu_mem_usage=True,
|
|
50
60
|
use_safetensors=True,
|
|
51
61
|
)
|
|
52
|
-
model.to(
|
|
62
|
+
model.to(self._device)
|
|
53
63
|
|
|
54
64
|
processor = AutoProcessor.from_pretrained(self._model_path)
|
|
55
65
|
|
|
@@ -63,7 +73,7 @@ class WhisperModel:
|
|
|
63
73
|
batch_size=16,
|
|
64
74
|
return_timestamps=False,
|
|
65
75
|
torch_dtype=torch_dtype,
|
|
66
|
-
device=
|
|
76
|
+
device=self._device,
|
|
67
77
|
)
|
|
68
78
|
|
|
69
79
|
def _call_model(
|
|
@@ -99,9 +109,11 @@ class WhisperModel:
|
|
|
99
109
|
)
|
|
100
110
|
return self._call_model(
|
|
101
111
|
audio=audio,
|
|
102
|
-
generate_kwargs=
|
|
103
|
-
|
|
104
|
-
|
|
112
|
+
generate_kwargs=(
|
|
113
|
+
{"language": language, "task": "transcribe"}
|
|
114
|
+
if language is not None
|
|
115
|
+
else {"task": "transcribe"}
|
|
116
|
+
),
|
|
105
117
|
response_format=response_format,
|
|
106
118
|
)
|
|
107
119
|
|
xinference/model/image/core.py
CHANGED
|
@@ -56,6 +56,10 @@ class ImageModelDescription(ModelDescription):
|
|
|
56
56
|
self._model_spec = model_spec
|
|
57
57
|
|
|
58
58
|
def to_dict(self):
|
|
59
|
+
if self._model_spec.controlnet is not None:
|
|
60
|
+
controlnet = [cn.dict() for cn in self._model_spec.controlnet]
|
|
61
|
+
else:
|
|
62
|
+
controlnet = self._model_spec.controlnet
|
|
59
63
|
return {
|
|
60
64
|
"model_type": "image",
|
|
61
65
|
"address": self.address,
|
|
@@ -63,7 +67,7 @@ class ImageModelDescription(ModelDescription):
|
|
|
63
67
|
"model_name": self._model_spec.model_name,
|
|
64
68
|
"model_family": self._model_spec.model_family,
|
|
65
69
|
"model_revision": self._model_spec.model_revision,
|
|
66
|
-
"controlnet":
|
|
70
|
+
"controlnet": controlnet,
|
|
67
71
|
}
|
|
68
72
|
|
|
69
73
|
def to_version_info(self):
|
|
@@ -24,6 +24,7 @@ from io import BytesIO
|
|
|
24
24
|
from typing import List, Optional, Union
|
|
25
25
|
|
|
26
26
|
from ....constants import XINFERENCE_IMAGE_DIR
|
|
27
|
+
from ....device_utils import move_model_to_available_device
|
|
27
28
|
from ....types import Image, ImageList
|
|
28
29
|
|
|
29
30
|
logger = logging.getLogger(__name__)
|
|
@@ -40,7 +41,7 @@ class DiffusionModel:
|
|
|
40
41
|
self._kwargs = kwargs
|
|
41
42
|
|
|
42
43
|
def load(self):
|
|
43
|
-
import torch
|
|
44
|
+
# import torch
|
|
44
45
|
from diffusers import AutoPipelineForText2Image
|
|
45
46
|
|
|
46
47
|
controlnet = self._kwargs.get("controlnet")
|
|
@@ -57,10 +58,7 @@ class DiffusionModel:
|
|
|
57
58
|
# torch_dtype=torch.float16,
|
|
58
59
|
# use_safetensors=True,
|
|
59
60
|
)
|
|
60
|
-
|
|
61
|
-
self._model = self._model.to("cuda")
|
|
62
|
-
elif torch.backends.mps.is_available():
|
|
63
|
-
self._model = self._model.to("mps")
|
|
61
|
+
self._model = move_model_to_available_device(self._model)
|
|
64
62
|
# Recommended if your computer has < 64 GB of RAM
|
|
65
63
|
self._model.enable_attention_slicing()
|
|
66
64
|
|
|
@@ -1514,10 +1514,33 @@
|
|
|
1514
1514
|
],
|
|
1515
1515
|
"model_id": "Qwen/Qwen1.5-72B-Chat-AWQ"
|
|
1516
1516
|
},
|
|
1517
|
+
{
|
|
1518
|
+
"model_format": "ggufv2",
|
|
1519
|
+
"model_size_in_billions": "0_5",
|
|
1520
|
+
"quantizations": [
|
|
1521
|
+
"q2_k",
|
|
1522
|
+
"q3_k_m",
|
|
1523
|
+
"q4_0",
|
|
1524
|
+
"q4_k_m",
|
|
1525
|
+
"q5_0",
|
|
1526
|
+
"q5_k_m",
|
|
1527
|
+
"q6_k",
|
|
1528
|
+
"q8_0"
|
|
1529
|
+
],
|
|
1530
|
+
"model_id": "Qwen/Qwen1.5-0.5B-Chat-GGUF",
|
|
1531
|
+
"model_file_name_template": "qwen1_5-0_5b-chat-{quantization}.gguf"
|
|
1532
|
+
},
|
|
1517
1533
|
{
|
|
1518
1534
|
"model_format": "ggufv2",
|
|
1519
1535
|
"model_size_in_billions": "1_8",
|
|
1520
1536
|
"quantizations": [
|
|
1537
|
+
"q2_k",
|
|
1538
|
+
"q3_k_m",
|
|
1539
|
+
"q4_0",
|
|
1540
|
+
"q4_k_m",
|
|
1541
|
+
"q5_0",
|
|
1542
|
+
"q5_k_m",
|
|
1543
|
+
"q6_k",
|
|
1521
1544
|
"q8_0"
|
|
1522
1545
|
],
|
|
1523
1546
|
"model_id": "Qwen/Qwen1.5-1.8B-Chat-GGUF",
|
|
@@ -1527,6 +1550,13 @@
|
|
|
1527
1550
|
"model_format": "ggufv2",
|
|
1528
1551
|
"model_size_in_billions": 4,
|
|
1529
1552
|
"quantizations": [
|
|
1553
|
+
"q2_k",
|
|
1554
|
+
"q3_k_m",
|
|
1555
|
+
"q4_0",
|
|
1556
|
+
"q4_k_m",
|
|
1557
|
+
"q5_0",
|
|
1558
|
+
"q5_k_m",
|
|
1559
|
+
"q6_k",
|
|
1530
1560
|
"q8_0"
|
|
1531
1561
|
],
|
|
1532
1562
|
"model_id": "Qwen/Qwen1.5-4B-Chat-GGUF",
|
|
@@ -1536,7 +1566,14 @@
|
|
|
1536
1566
|
"model_format": "ggufv2",
|
|
1537
1567
|
"model_size_in_billions": 7,
|
|
1538
1568
|
"quantizations": [
|
|
1539
|
-
"
|
|
1569
|
+
"q2_k",
|
|
1570
|
+
"q3_k_m",
|
|
1571
|
+
"q4_0",
|
|
1572
|
+
"q4_k_m",
|
|
1573
|
+
"q5_0",
|
|
1574
|
+
"q5_k_m",
|
|
1575
|
+
"q6_k",
|
|
1576
|
+
"q8_0"
|
|
1540
1577
|
],
|
|
1541
1578
|
"model_id": "Qwen/Qwen1.5-7B-Chat-GGUF",
|
|
1542
1579
|
"model_file_name_template": "qwen1_5-7b-chat-{quantization}.gguf"
|
|
@@ -1545,7 +1582,14 @@
|
|
|
1545
1582
|
"model_format": "ggufv2",
|
|
1546
1583
|
"model_size_in_billions": 14,
|
|
1547
1584
|
"quantizations": [
|
|
1548
|
-
"
|
|
1585
|
+
"q2_k",
|
|
1586
|
+
"q3_k_m",
|
|
1587
|
+
"q4_0",
|
|
1588
|
+
"q4_k_m",
|
|
1589
|
+
"q5_0",
|
|
1590
|
+
"q5_k_m",
|
|
1591
|
+
"q6_k",
|
|
1592
|
+
"q8_0"
|
|
1549
1593
|
],
|
|
1550
1594
|
"model_id": "Qwen/Qwen1.5-14B-Chat-GGUF",
|
|
1551
1595
|
"model_file_name_template": "qwen1_5-14b-chat-{quantization}.gguf"
|
|
@@ -1554,7 +1598,8 @@
|
|
|
1554
1598
|
"model_format": "ggufv2",
|
|
1555
1599
|
"model_size_in_billions": 72,
|
|
1556
1600
|
"quantizations": [
|
|
1557
|
-
"q2_k"
|
|
1601
|
+
"q2_k",
|
|
1602
|
+
"q3_k_m"
|
|
1558
1603
|
],
|
|
1559
1604
|
"model_id": "Qwen/Qwen1.5-72B-Chat-GGUF",
|
|
1560
1605
|
"model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf"
|
|
@@ -3708,5 +3753,50 @@
|
|
|
3708
3753
|
"<|im_sep|>"
|
|
3709
3754
|
]
|
|
3710
3755
|
}
|
|
3756
|
+
},
|
|
3757
|
+
{
|
|
3758
|
+
"version": 1,
|
|
3759
|
+
"context_length": 8192,
|
|
3760
|
+
"model_name": "gemma-it",
|
|
3761
|
+
"model_lang": [
|
|
3762
|
+
"en"
|
|
3763
|
+
],
|
|
3764
|
+
"model_ability": [
|
|
3765
|
+
"chat"
|
|
3766
|
+
],
|
|
3767
|
+
"model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
|
|
3768
|
+
"model_specs": [
|
|
3769
|
+
{
|
|
3770
|
+
"model_format": "pytorch",
|
|
3771
|
+
"model_size_in_billions": 2,
|
|
3772
|
+
"quantizations": [
|
|
3773
|
+
"none",
|
|
3774
|
+
"4-bit",
|
|
3775
|
+
"8-bit"
|
|
3776
|
+
],
|
|
3777
|
+
"model_id": "google/gemma-2b-it"
|
|
3778
|
+
},
|
|
3779
|
+
{
|
|
3780
|
+
"model_format": "pytorch",
|
|
3781
|
+
"model_size_in_billions": 7,
|
|
3782
|
+
"quantizations": [
|
|
3783
|
+
"none",
|
|
3784
|
+
"4-bit",
|
|
3785
|
+
"8-bit"
|
|
3786
|
+
],
|
|
3787
|
+
"model_id": "google/gemma-7b-it"
|
|
3788
|
+
}
|
|
3789
|
+
],
|
|
3790
|
+
"prompt_style": {
|
|
3791
|
+
"style_name": "gemma",
|
|
3792
|
+
"roles": [
|
|
3793
|
+
"user",
|
|
3794
|
+
"model"
|
|
3795
|
+
],
|
|
3796
|
+
"stop": [
|
|
3797
|
+
"<end_of_turn>",
|
|
3798
|
+
"<start_of_turn>"
|
|
3799
|
+
]
|
|
3800
|
+
}
|
|
3711
3801
|
}
|
|
3712
3802
|
]
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
"Q4_K_M"
|
|
30
30
|
],
|
|
31
31
|
"model_id": "Xorbits/Llama-2-13b-Chat-GGUF",
|
|
32
|
-
"model_file_name_template": "llama-2-
|
|
32
|
+
"model_file_name_template": "llama-2-13b-chat.{quantization}.gguf",
|
|
33
33
|
"model_hub": "modelscope",
|
|
34
34
|
"model_revision": "v0.0.1"
|
|
35
35
|
},
|
|
@@ -1821,61 +1821,97 @@
|
|
|
1821
1821
|
"model_format": "ggufv2",
|
|
1822
1822
|
"model_size_in_billions": "0_5",
|
|
1823
1823
|
"quantizations": [
|
|
1824
|
+
"q2_k",
|
|
1825
|
+
"q3_k_m",
|
|
1826
|
+
"q4_0",
|
|
1827
|
+
"q4_k_m",
|
|
1828
|
+
"q5_0",
|
|
1829
|
+
"q5_k_m",
|
|
1830
|
+
"q6_k",
|
|
1824
1831
|
"q8_0"
|
|
1825
1832
|
],
|
|
1826
1833
|
"model_id": "qwen/Qwen1.5-0.5B-Chat-GGUF",
|
|
1827
1834
|
"model_hub": "modelscope",
|
|
1828
|
-
"model_file_name_template": "
|
|
1835
|
+
"model_file_name_template": "qwen1_5-0_5b-chat-{quantization}.gguf"
|
|
1829
1836
|
},
|
|
1830
1837
|
{
|
|
1831
1838
|
"model_format": "ggufv2",
|
|
1832
1839
|
"model_size_in_billions": "1_8",
|
|
1833
1840
|
"quantizations": [
|
|
1841
|
+
"q2_k",
|
|
1842
|
+
"q3_k_m",
|
|
1843
|
+
"q4_0",
|
|
1844
|
+
"q4_k_m",
|
|
1845
|
+
"q5_0",
|
|
1846
|
+
"q5_k_m",
|
|
1847
|
+
"q6_k",
|
|
1834
1848
|
"q8_0"
|
|
1835
1849
|
],
|
|
1836
1850
|
"model_id": "qwen/Qwen1.5-1.8B-Chat-GGUF",
|
|
1837
1851
|
"model_hub": "modelscope",
|
|
1838
|
-
"model_file_name_template": "
|
|
1852
|
+
"model_file_name_template": "qwen1_5-1_8b-chat-{quantization}.gguf"
|
|
1839
1853
|
},
|
|
1840
1854
|
{
|
|
1841
1855
|
"model_format": "ggufv2",
|
|
1842
1856
|
"model_size_in_billions": 4,
|
|
1843
1857
|
"quantizations": [
|
|
1858
|
+
"q2_k",
|
|
1859
|
+
"q3_k_m",
|
|
1860
|
+
"q4_0",
|
|
1861
|
+
"q4_k_m",
|
|
1862
|
+
"q5_0",
|
|
1863
|
+
"q5_k_m",
|
|
1864
|
+
"q6_k",
|
|
1844
1865
|
"q8_0"
|
|
1845
1866
|
],
|
|
1846
1867
|
"model_id": "qwen/Qwen1.5-4B-Chat-GGUF",
|
|
1847
1868
|
"model_hub": "modelscope",
|
|
1848
|
-
"model_file_name_template": "
|
|
1869
|
+
"model_file_name_template": "qwen1_5-4b-chat-{quantization}.gguf"
|
|
1849
1870
|
},
|
|
1850
1871
|
{
|
|
1851
1872
|
"model_format": "ggufv2",
|
|
1852
1873
|
"model_size_in_billions": 7,
|
|
1853
1874
|
"quantizations": [
|
|
1854
|
-
"
|
|
1875
|
+
"q2_k",
|
|
1876
|
+
"q3_k_m",
|
|
1877
|
+
"q4_0",
|
|
1878
|
+
"q4_k_m",
|
|
1879
|
+
"q5_0",
|
|
1880
|
+
"q5_k_m",
|
|
1881
|
+
"q6_k",
|
|
1882
|
+
"q8_0"
|
|
1855
1883
|
],
|
|
1856
1884
|
"model_id": "qwen/Qwen1.5-7B-Chat-GGUF",
|
|
1857
1885
|
"model_hub": "modelscope",
|
|
1858
|
-
"model_file_name_template": "
|
|
1886
|
+
"model_file_name_template": "qwen1_5-7b-chat-{quantization}.gguf"
|
|
1859
1887
|
},
|
|
1860
1888
|
{
|
|
1861
1889
|
"model_format": "ggufv2",
|
|
1862
1890
|
"model_size_in_billions": 14,
|
|
1863
1891
|
"quantizations": [
|
|
1864
|
-
"
|
|
1892
|
+
"q2_k",
|
|
1893
|
+
"q3_k_m",
|
|
1894
|
+
"q4_0",
|
|
1895
|
+
"q4_k_m",
|
|
1896
|
+
"q5_0",
|
|
1897
|
+
"q5_k_m",
|
|
1898
|
+
"q6_k",
|
|
1899
|
+
"q8_0"
|
|
1865
1900
|
],
|
|
1866
1901
|
"model_id": "qwen/Qwen1.5-14B-Chat-GGUF",
|
|
1867
1902
|
"model_hub": "modelscope",
|
|
1868
|
-
"model_file_name_template": "
|
|
1903
|
+
"model_file_name_template": "qwen1_5-14b-chat-{quantization}.gguf"
|
|
1869
1904
|
},
|
|
1870
1905
|
{
|
|
1871
1906
|
"model_format": "ggufv2",
|
|
1872
1907
|
"model_size_in_billions": 72,
|
|
1873
1908
|
"quantizations": [
|
|
1874
|
-
"q2_k"
|
|
1909
|
+
"q2_k",
|
|
1910
|
+
"q3_k_m"
|
|
1875
1911
|
],
|
|
1876
1912
|
"model_id": "qwen/Qwen1.5-72B-Chat-GGUF",
|
|
1877
1913
|
"model_hub": "modelscope",
|
|
1878
|
-
"model_file_name_template": "
|
|
1914
|
+
"model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf"
|
|
1879
1915
|
}
|
|
1880
1916
|
],
|
|
1881
1917
|
"prompt_style": {
|
|
@@ -25,6 +25,8 @@ from torch.nn import functional as F
|
|
|
25
25
|
from tqdm import tqdm
|
|
26
26
|
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
|
|
27
27
|
|
|
28
|
+
from ....device_utils import empty_cache
|
|
29
|
+
|
|
28
30
|
|
|
29
31
|
@dataclasses.dataclass
|
|
30
32
|
class CompressionConfig:
|
|
@@ -153,7 +155,7 @@ def load_compress_model(
|
|
|
153
155
|
tmp_state_dict[name] = None
|
|
154
156
|
tensor = None
|
|
155
157
|
gc.collect()
|
|
156
|
-
|
|
158
|
+
empty_cache()
|
|
157
159
|
|
|
158
160
|
for name in model.state_dict():
|
|
159
161
|
if name not in linear_weights:
|
|
@@ -12,10 +12,16 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import json
|
|
15
16
|
import logging
|
|
16
17
|
import os
|
|
17
18
|
from typing import Iterable, Iterator, List, Optional, Union
|
|
18
19
|
|
|
20
|
+
from ....device_utils import (
|
|
21
|
+
get_device_preferred_dtype,
|
|
22
|
+
gpu_count,
|
|
23
|
+
is_hf_accelerate_supported,
|
|
24
|
+
)
|
|
19
25
|
from ....types import (
|
|
20
26
|
ChatCompletion,
|
|
21
27
|
ChatCompletionChunk,
|
|
@@ -115,23 +121,18 @@ class PytorchModel(LLM):
|
|
|
115
121
|
)
|
|
116
122
|
from .compression import load_compress_model
|
|
117
123
|
|
|
118
|
-
cuda_visible_devices_env = os.getenv("CUDA_VISIBLE_DEVICES", None)
|
|
119
|
-
cuda_visible_devices = (
|
|
120
|
-
cuda_visible_devices_env.split(",") if cuda_visible_devices_env else []
|
|
121
|
-
)
|
|
122
|
-
|
|
123
124
|
quantization = self.quantization
|
|
124
|
-
num_gpus =
|
|
125
|
+
num_gpus = gpu_count()
|
|
125
126
|
device = self._pytorch_model_config.get("device", "auto")
|
|
126
127
|
self._pytorch_model_config["device"] = select_device(device)
|
|
127
128
|
self._device = self._pytorch_model_config["device"]
|
|
128
129
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
kwargs
|
|
130
|
+
kwargs = {}
|
|
131
|
+
|
|
132
|
+
dtype = get_device_preferred_dtype(self._device)
|
|
133
|
+
|
|
134
|
+
if dtype is not None:
|
|
135
|
+
kwargs["torch_dtype"] = dtype
|
|
135
136
|
else:
|
|
136
137
|
raise ValueError(f"Device {self._device} is not supported in temporary")
|
|
137
138
|
|
|
@@ -142,9 +143,25 @@ class PytorchModel(LLM):
|
|
|
142
143
|
"trust_remote_code"
|
|
143
144
|
)
|
|
144
145
|
model_format = self.model_spec.model_format
|
|
146
|
+
|
|
147
|
+
is_device_map_auto = False
|
|
148
|
+
|
|
149
|
+
# This is required for Intel GPU to actually work with accelerate device_map until
|
|
150
|
+
# https://github.com/intel/intel-extension-for-pytorch/issues/522
|
|
151
|
+
# is resolved
|
|
152
|
+
max_memory_env = os.getenv("ACCELERATE_MAX_MEMORY", None)
|
|
153
|
+
|
|
154
|
+
if max_memory_env is not None:
|
|
155
|
+
max_memory_raw = json.loads(max_memory_env)
|
|
156
|
+
max_memory = {
|
|
157
|
+
int(k) if k.isdigit() else k: max_memory_raw[k] for k in max_memory_raw
|
|
158
|
+
}
|
|
159
|
+
kwargs["max_memory"] = max_memory
|
|
160
|
+
|
|
145
161
|
if quantization != "none" and model_format == "pytorch":
|
|
146
162
|
if self._device == "cuda" and self._is_linux():
|
|
147
163
|
kwargs["device_map"] = "auto"
|
|
164
|
+
is_device_map_auto = True
|
|
148
165
|
if quantization == "4-bit":
|
|
149
166
|
kwargs["load_in_4bit"] = True
|
|
150
167
|
kwargs["bnb_4bit_compute_dtype"] = torch.float16
|
|
@@ -178,11 +195,13 @@ class PytorchModel(LLM):
|
|
|
178
195
|
logger.debug(f"Model Memory: {self._model.get_memory_footprint()}")
|
|
179
196
|
return
|
|
180
197
|
|
|
181
|
-
if num_gpus > 0 and self._device
|
|
198
|
+
if num_gpus > 0 and is_hf_accelerate_supported(self._device):
|
|
182
199
|
kwargs.update({"device_map": "auto"})
|
|
200
|
+
is_device_map_auto = True
|
|
201
|
+
|
|
183
202
|
self._model, self._tokenizer = self._load_model(**kwargs)
|
|
184
203
|
|
|
185
|
-
if
|
|
204
|
+
if not is_device_map_auto:
|
|
186
205
|
self._model.to(self._device)
|
|
187
206
|
logger.debug(f"Model Memory: {self._model.get_memory_footprint()}")
|
|
188
207
|
|
|
@@ -95,9 +95,11 @@ class QwenVLChatModel(PytorchChatModel):
|
|
|
95
95
|
if not isinstance(content, str):
|
|
96
96
|
# TODO(codingl2k1): Optimize _ensure_url
|
|
97
97
|
content = [
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
98
|
+
(
|
|
99
|
+
{"image": _ensure_url(c["image_url"]["url"]), "type": "image"}
|
|
100
|
+
if c.get("type") == "image_url"
|
|
101
|
+
else c
|
|
102
|
+
)
|
|
101
103
|
for c in content
|
|
102
104
|
]
|
|
103
105
|
content = sorted(content, key=operator.itemgetter("type"))
|
|
@@ -17,6 +17,8 @@ import time
|
|
|
17
17
|
import uuid
|
|
18
18
|
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple
|
|
19
19
|
|
|
20
|
+
from ....device_utils import empty_cache
|
|
21
|
+
|
|
20
22
|
try:
|
|
21
23
|
import torch
|
|
22
24
|
from torch.nn import functional as F
|
|
@@ -526,4 +528,4 @@ def speculative_generate_stream(
|
|
|
526
528
|
del kv_cache
|
|
527
529
|
del draft_kv_cache
|
|
528
530
|
gc.collect()
|
|
529
|
-
|
|
531
|
+
empty_cache()
|