xinference 0.8.4__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/__init__.py +6 -0
- xinference/_compat.py +52 -0
- xinference/_version.py +3 -3
- xinference/api/oauth2/auth_service.py +2 -3
- xinference/api/oauth2/types.py +1 -1
- xinference/api/restful_api.py +176 -108
- xinference/client/restful/restful_client.py +10 -6
- xinference/core/model.py +3 -2
- xinference/core/resource.py +4 -2
- xinference/core/status_guard.py +2 -1
- xinference/core/supervisor.py +37 -12
- xinference/core/utils.py +2 -1
- xinference/core/worker.py +13 -13
- xinference/deploy/worker.py +7 -7
- xinference/device_utils.py +100 -0
- xinference/fields.py +1 -1
- xinference/model/audio/core.py +1 -2
- xinference/model/audio/whisper.py +20 -8
- xinference/model/core.py +9 -0
- xinference/model/embedding/core.py +5 -136
- xinference/model/image/__init__.py +13 -1
- xinference/model/image/core.py +22 -43
- xinference/model/image/model_spec_modelscope.json +94 -0
- xinference/model/image/stable_diffusion/core.py +3 -5
- xinference/model/llm/ggml/llamacpp.py +1 -1
- xinference/model/llm/llm_family.json +333 -3
- xinference/model/llm/llm_family.py +11 -5
- xinference/model/llm/llm_family_modelscope.json +311 -1
- xinference/model/llm/pytorch/compression.py +3 -1
- xinference/model/llm/pytorch/core.py +34 -15
- xinference/model/llm/pytorch/qwen_vl.py +5 -3
- xinference/model/llm/pytorch/spec_decoding_utils.py +3 -1
- xinference/model/llm/pytorch/spec_model.py +20 -17
- xinference/model/llm/pytorch/utils.py +4 -3
- xinference/model/llm/pytorch/yi_vl.py +9 -5
- xinference/model/llm/utils.py +10 -1
- xinference/model/llm/vllm/core.py +4 -0
- xinference/model/rerank/core.py +5 -136
- xinference/model/utils.py +143 -18
- xinference/thirdparty/llava/mm_utils.py +1 -1
- xinference/types.py +3 -3
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.87d39ffb.js +3 -0
- xinference/web/ui/build/static/js/main.87d39ffb.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{8d95a492a6b08c192e212189567b59b98282ca6f3ed52dd9bd053a6be7ff4e65.json → 027745bdb9bb9fe375f2eef7377e9f7ba82510ed90c05f9e5b34ba314bb93498.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/03d5e7de9b1728d71cd8c483a35f4373eb66932022026142e03005d9de1a2a44.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0738899eefad7f90261125823d87ea9f0d53667b1479a0c1f398aff14f2bbd2a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/09a21ab449599121eadfdd12d6dba40c5a1c5b35dc90be17108eeff80daf79b8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/09d043a8289da33d05768f5face63cc47e9ce9646d67f5589865db511574e8f9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0b901c8f077905bf153a1390b207277c31e893f83f9ced5d29a0a0c85a07b5eb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0da1ff7c9602609cfa8b82a67140114f3037130fe79583bbf23b69cef81823a6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0f87caef05125425c0ca418a32ce8a53e9becffd464411b2ff94e3c7a37c7b5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{cb50b4d54ce9c249d580f7f1fc2df4beab4f13c4d16f486c89239809570de8fb.json → 0fc333c35ad49c0db7f18f77521984a36067cfce4206df50ce16a2fdc88be1c6.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/13fdd92dd5e2fe11640cf6ec0368e3c098d3f0c43e11703ccf157e8c41410d5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1464f950e5600fe2e5890186a1ae3750cdabbadd8525070cef6273450760bcf6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17c53dc44a324629efef425833ab48cc05243d089f4eb3c731da63568fcff35e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1946a52cb34e675da423de15b703437b4c841e4f5aed6ddd932a091c145e17b0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1e0da5ce4d8ccdbe33705a47bdc0d5d1d7e4e48c8e5c1d0b8c473e9f491930ac.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1e3ec3302ef2253fad5e86bf429baf6b69e57e769f38ec1aaada87f3114b6a1f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1f18804827897c337874b289873df611b44869f86ced1d4351fbe0b45ed17518.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/21b64f7fdb74c2c35479811c3f5683ee161d3b64a4fade0cd1fc0044332c79b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/22873d21eab4b5a04b183cc3d2cbfafa431e0ce99649aeb900cffcfc33ecbca4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{7a5fecf96af770954035948b07dfadeceb33536b4763a2f9e548121ae555b75f.json → 228b78cd0950f1c967cb33b210b11497052a9998f0b1d1f31ecee0fd8b99fb40.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2328fc2360224ac3eef5c5261fc2a75cdce7fe58c210af8d3e532ec609464f5f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/25fd89450dcf73a205a5f8c82dbc604bf872d574b748427c26ba79b8711ec502.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{b6e95546cdd247e50d9c916dd3bef5216f96c32a267e54b51b23cc22631be0c4.json → 288d6573b09f0e2b0277e1f301bcb57a194c01f3983e3c1e03c8b2ea72653194.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2c04900a020b813e2e99b15748c3c3ef5a1627c33d5668e45bc6a798f0d28dae.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cd0fd50d8f213cdd94daffe7d0f997991072b11936ead7c48be2af3fdbd1fda.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2e057c12322da07a537a576a3917d512ac6cb291be5aa29ce23d2679a4fd16b1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{abd0662d21fbe39e483fbc4d37faa9e24d93889b5b40d42e7e20773c66ee3894.json → 327c21db6124a5209f41996d0d3359b462d5baff3974296f5acfddfc532434e6.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/33294bb0080595100e22c960929a6f3086f5ea632a4bcd9216069f95347a63a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3361869e45eddd7d48ff7995e1a12ac454c9eb92d4c87f49ce80c1e9482c1e91.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/375d2f6a31e01c5a82fd0e18f42198f9fbb7117491f3e92d70cf36055f159a6e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3808217efee76345ba19af3ffdba045888e0908b0dc7fc38e54655cdc3504c60.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/39db740a99454006312dec9e4845f850c51e043458ca94eba8bb256521fabcd4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3e737bcdbcbc407ccd65b90e199ef0c3214b261e8e41dbf14d921384a717d9ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3e87c7d4f1e5c93603eed3e861945e44dd1e664061a89aaef1aa6435f4f4d7d0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3ecf70018028c8cf6cdc2d24d2b88f2707bd5c23a336fd3d82bd6a1af0db7ded.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/43314d1e9fed30ed6cf5cab3f5033da067e58be29e36448e3c0a8127403a9388.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/442467c09440131f7c25ddcdeb356d2a87783ad4e32d89f410a16cb6e7d83b22.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/487524f2b3494bfcfbd8c12918092a52ff2ce3eefe9bf0a557389ec1c74d3e67.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4de4b0282a3d1ae02bec0bfe7d6658f039141406e8bfdbe86bf1dc5b115a2524.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4ec72faab00e4cbffefdf43b987ae3d15beb60e282b9ca86dad37f4d0fc3f2bd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/51403f1b70af0f4577d4594cbfcf9d9759997e5c49097e7a07219000d1be4ff1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5282ee05e064b3a80bc991e9003ddef6a4958471d8f4fc65589dc64553365cdd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/566a0829bfeab41e40bbdca55ad056067e172508af4b333403d4fd334a8c8742.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5689d9aee417a0834fed66720cf3b48c34cc26364cd9ab7e3d809997bb9faec9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5bedd27475bda204eafccdc636a56ba5765353391049a0b947234faaab3d021a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{94daea23b698bbb26bbbab69bbca8ad521c79fcbd6b495c84c740e4006a2ced2.json → 61014d40df4486972fb6d5909dff45f24bec48573af99ca8bcd79835385c7da0.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/6139c80ec46d77469e9a89924de41d93050f19e9ed541e358f09cbf20ccd7c86.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63ead4dee6a4f4ae47aed6918a2ab3e172739ecdffaa85e063f9a2c9165ff93c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6bf8c25e0f2051d994990bf73fdcb3e08963e11589842895593f199eab13ba9a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6d42f63cd673d1c60e534a174cd851e9aefd2d5d45a4aa6128d794457b64de53.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7044e626a91582077b8cbad09bcf0cc118e92e11ebfa2ebc17622baf98aa56bf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/71ec408030881aad69854815b8681185d0174cc0cf67f20fb2c3cd13c74fce07.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7280614d5bddce524182024cc288953eb89d46c1dbe470ed5eb5ecab5de28385.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7293ad8ba7b5fc6ed791aed44a766a396516c782f5d0e564b9ef0997645ec1dd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{65ca3ba225b8c8dac907210545b51f2fcdb2591f0feeb7195f1c037f2bc956a0.json → 77d4d795f078408fa2dd49da26d1ba1543d51b63cc253e736f4bef2e6014e888.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/79be1e0dd6b1c3cbb3f08b349454f4791edaac451fa4d1f10eaf433e57ecf70f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/7c0571c03aa8c90eb9452ba23133aed9b3b2a049910d4d8276588b438a4caaba.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/81ed094096cd9dc36f3ddebe61818766f43d3338c171f2269958f47265e60f24.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/83beb31daa7169fb0057453d4f86411f1effd3e3f7af97472cbd22accbfc65bb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/86355e55059ddb15f71c0c651a2e622cee6f3141da3a018775ca00c74dd37dc2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{d1af6db565bae36218251c01a817c080ef764172dd0694e38d27b525a3ffd1d2.json → 8710fe63bcc6b95502287c36a7bd881b74ee2926982e120e1e8070a2c8a668fc.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8779104679d8f85017b551d1d108ea1ac701e184ad7bcc8a367526353a6c5175.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/881c573e6aa9954e7bd5e3bc5a6ef62c0db9b93815030080f677f6834e431434.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8d24c51a0905b919436aac2bad441eb1651c46c4e7aa17c99eb63c7760ce6615.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/93bf5cc8cbe3f0b10f291800e1896f03506a8933cd490e07ee0ff94498f93bce.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9401db0316ac34790aa5b972e9e1c50de58ef313b50840a37e08f0936a7197b6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/98afeafdfc285a744d116fb5f189a16ad2b977adbd4fc0757f7ade1b29b8ad31.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9a4e624d1ce66832fb213ead02745d84661d9bfcc21b62b4c37c680c3c3f5cbb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9a7b26f85224d33d431354ac8d7849388b7525fc014801824e705561ab2ac7a7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9dcb04ac6cb341233a2e98f2fb417d059c2ea6a56d2ff6f30bb27dae9bd6c756.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9ebede8a4bccef016eef418a668b3212962779ea6d2531edb23d3dca81163496.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0efc8db4289116bdb312014df91fc781f66fe171d63027bbd5aae25cd54c325.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a212defc32ca338ba673722c72815d30974275f1a82b67405a898f826fc8d782.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a2685acab57fd6d874bf6069a587e3770b55c72cacc69cb167bb3dbd16a551c5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3b7d6233e50dc3832421793a5752c1db99f3a214c9896c97bbd395d007f09b1.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3d0c810a255108613be0a175d53d673c4b6843c2a87dd0f157b4d3731af2092.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a6633774053545c60721e345820e21cf3b089df219f7ee3fcceaf27af3a9e84b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a6bb917e4869d7d6ddf50b75932cad64e94b9e33db2aa40c4a489a045819bb36.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a816031fafa564dc4af27383612e51ac29af51354f3bba750a70ac0c03b6bd30.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/af6349a1e9761cb98af4b76904440fb7f8d54288ee6ba37116f670226796e689.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afce63eaad32a9968590e4500e123c81b385ea3b6046b2af1ec7ea8a49ba657b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/aff24b61e5d55147d0a36ef1896b9bd35c995c357f44eeee1209f23db2118636.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b01676c5ca97e3409a1b7bac91b6a797be3719a8b7ec72ad06c9beb296c67bb7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b0c5e967d1af90d545f1d34c9893a841296b0d1dd50de65eeaf19054995d2a94.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b561d429cea6143e1bf7e6e7e690f9e6ce1dce4adfcacb57d7c5578195d1d190.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b70187bb5aecf3bd7afbe8469b49e2a0a94a0c62727530d0dc2f84ee4577bcfc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bd21f6039e23775fb4631bb56f2eef1d132c03a60cc6fe4530569ecfeef456a3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/be09b5508d0ca5ac0300144a682d4cde2d5b4b96ecb9fc690c1d5e7ee7411b3e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/be8c9e83f0dd4af2e9bcf6844dd8c4445fe634b6c71da6debfa7acabb7a875c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c0d1317dfae0c5ddf5a3ba9aef6bd2204f2daf9754f3270c4cab5c4632f78000.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c1e3afdaae5649274f7d34910015b88bc02ba6a00b935e6597f9c670fedc05c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c29ec02243dd370616a489f7de67e4e479d2f7f12504ba7f77304cffe60befbc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c2a4d298149e6c66713eed4f56f4bbbfa4022a193891029b84e429baf59cc246.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c3d2fc03602f7604a65e631e8595ce4cfabd8887343489b34a65e3a73c5546b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c41078c8692e0635fb52d01bc5157528c7308ae5140c134aa07c1fcda4f7a5e5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c4b5f1e444ddd41f8cd30e618c185b16235e313ea34d944959628001a91e38a5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c7ed9bc3125498e475f3170b8126c93368e9cbfd6a5e73ee63437d90e39b69e5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c996b3dc884130f034a37551845201bc5661f2dcd282aed737329f8d4dd387b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c9fef6e7f0a532c2d1cc25dffd496ea8355dd14ce0105a169b391bfeb9277e4f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cc8f5a2bbe4da9852ccdf64150115271baa2d282d74d7fab854dc5ca641dd519.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ce45adea497443bfc1b6baabd0aa3d85888dd10b1b7fd009edf608e390b2a844.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ce4b82a3855d18ba09e922f6e2cc1b8c20f0e5261b27303c52d4e1ba5a09a078.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cf16614311e144768bdc064da754efad77597cac5efe6852be996b9eaaed695a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d074bfe592a195f339acbcd8743f8b64cfd09b45ca1013598269227fc092f2db.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d2a95455cc492d4fbf2839a21eeaef00e7fad2aac93825d2443a38127ad02007.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d4e6e712ebbbf0f30745093e2b6d235c13fb43d37b586a0109e87c19eafdf35d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d88a02264125b07881eecff752d9c8d718edcf2d87e5431b13e54809dc7bf4f2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d907925fb28912f2e98cccfe758bce680aa4f894cc9d2079d73d7e2d85481d47.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/daf050ec3a864b9831dcb32cee1fb2739dafb68b369e9108469ca1e6fd166867.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/de9645d55ce8744856862eae4a3d3a1266e9df9d575aa3e2f62a75a5d97444f7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e04199bdfb87226d6aeae387ef677eae06483f9f135b5a3f1360ec1a243c86cd.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e97d1f31f54263bb90b7c3e71be0e19973d76d945fb5d526bf93fb449708bbf0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e9f87f5c370a731ee42098ae6ff38ca6ad256396158c03f84c5e770c6b76a43d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f2ad375dbc8779f54eb9c377fd951cd442f3766d5eb70d40613b952b811643da.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f35df9d5a0e0341bcc0f39a462e46988c330cdb905cc690b08de4a9cf5771f28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f433e33369483f75d31fe937b8a597ef2d7cdfed339931643ff966a33dbc6158.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f4c164c89cfbcb1d81bc0b60e3e7c8715e07c7a23118acf4e3e16715beac09c5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f545d937cfb9d974f4aaaa9dc1c12adfed0d4f608de7d0a7247452d9bbbb55a4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/{3cc5ec39e76e1768ffb070a8ae18ee61b68f3ed3593178134d3edda5ed378c6c.json → f8d2c4372093d0ff3a2f78260e004e6c4e2b8ab4f5be9713ebc16c50e9c2ab4b.json} +1 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fa41c6c8ffa1047b7f2f52b6532ea4d9f5754e5bbc11d1340bae7f75ae5ed84e.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +45 -45
- xinference/web/ui/node_modules/@babel/runtime/package.json +64 -37
- xinference/web/ui/node_modules/@mui/private-theming/package.json +7 -6
- xinference/web/ui/node_modules/@mui/styled-engine/package.json +6 -5
- xinference/web/ui/node_modules/@mui/system/node_modules/clsx/package.json +22 -10
- xinference/web/ui/node_modules/@mui/system/package.json +13 -12
- xinference/web/ui/node_modules/@mui/types/package.json +3 -2
- xinference/web/ui/node_modules/@mui/utils/ClassNameGenerator/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/HTMLElementType/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/capitalize/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/chainPropTypes/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/clamp/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/composeClasses/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/createChainedFunction/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/debounce/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/deepmerge/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/deprecatedPropType/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/elementAcceptingRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/elementTypeAcceptingRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/exactProp/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/formatMuiErrorMessage/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/generateUtilityClass/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/generateUtilityClasses/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/getDisplayName/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/getScrollbarSize/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/getValidReactChildren/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/integerPropType/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/isMuiElement/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/ownerDocument/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/ownerWindow/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/package.json +8 -7
- xinference/web/ui/node_modules/@mui/utils/ponyfillGlobal/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/refType/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/requirePropFactory/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/resolveProps/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/scrollLeft/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/setRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/unsupportedProp/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useControlled/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useEnhancedEffect/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useEventCallback/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useForkRef/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useId/package.json +2 -2
- xinference/web/ui/node_modules/@mui/utils/useIsFocusVisible/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useLazyRef/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useOnMount/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/usePreviousProps/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/useTimeout/package.json +6 -0
- xinference/web/ui/node_modules/@mui/utils/visuallyHidden/package.json +6 -0
- xinference/web/ui/node_modules/@types/prop-types/package.json +2 -2
- xinference/web/ui/node_modules/csstype/package.json +3 -3
- xinference/web/ui/package-lock.json +47 -45
- xinference/web/ui/package.json +2 -0
- {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/METADATA +6 -3
- {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/RECORD +227 -167
- xinference/web/ui/build/static/js/main.476e35cc.js +0 -3
- xinference/web/ui/build/static/js/main.476e35cc.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/02f54fcadc6fe9b1be8451435f9a097c82b3d665bae2d2df03ba0b36cebeca0c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/051391807ffd255b652b63dc2ff12e5ce80113e1b712c81b6c1986a11cd85d3c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/05710e94caaf5a2da790d67aa572bf6bc19bae0237668c8d97f4da62ab52fe5c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/08a3470840a55d359a967d0763cd1ae5c1f15f3ca31118e29b37b4fbbdb248c7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0e68b0e268559a563dde6fa4c654922d98c74f5140854437f3e5a01bf23ba41f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/10dc7661741dfc88df5538bbf835870ae47b1a18f36c2f7b9650651e7304ce72.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/18b3d28a2692ab4f2d05254221f53b3c13259435bd98cb90ff5d7250d93f11e0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1b612ab687e3a4e14db156bd92a75b8a3e96ff04bfbf8f3351c7e801bc61cf90.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1bb839ffeb39ed12367c179fbf3b784e20f6f98fb87c597717bc11ebe3201c59.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1d12e0b0e4e87372b18fef72fcd6e264fa40b0cffdbeddcef004becc4e32cf27.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1d5b806c08ffb55539ee26b5fa9746a373f602354d6e55a0d4379d7e2f903868.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1ecb7ce2bc103bec0d1a9f0cb07132e5ec7731d974998c465798c0e15cafb924.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/20cfc57ae2b8ba23571f386a5f2bf194bd02e8a3913113881bb9804ee822ad58.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/222a437b6773707bbead06ba5db1ed8869e44d9f051a7f6b1a745aeb654679d0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/249709fb20c1254cecc9ba6666e6bcf37b21e47e7a020d5a7f0513680b49473f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/257ad4979294ee3a7ef400feab3b29ebcd259c47b9410a3011a66f4b33a97904.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2779bdc8ed8372f4015d07a45e4b09756bd953250944c3e3efe6b65bf8eaecd8.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/2bee7b8bd3d52976a45d6068e1333df88b943e0e679403c809e45382e3818037.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/31c71a85b2d0bbdcdc027e7950a9d1f778288f98888d8595fad5e4fdda5bb31f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/371214b6649e323e7281885b1412c5a515ccc2274f4e1a42921e45777694009c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3eced030d2b3da645f4cbe3dc4ff49744312316e4306ce170991e802e9bfcf8e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/452ae8e7cff99efc35d134014d32610d13217b031b817d3663bb689b3778ebd2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/48e3f5d01ccedde86348a29392bc9b6e301a51b3f7e2476ae0a95a31eb6bf18d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4a311b5bd98f16588caf60b8de4c8d91d038198e6bfd496f880f60d4c1ea076d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4ccd71d3f73490a18a5a4de4ad82d63cf019caff6f8d881fd1434755a033bfc5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4ee8343d4254c95e685d49601f1649e8222d7423c2a822e1d01c8fc4cd3582a2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/523a712748fbb7abf0c5423df62e65c4659b7e450182c890e847cd78ace7cac5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5777049bbacf7322891761a5b959e62eec22b0f96a70dbf97e58d530c6258a55.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5905957ce95f7dee64f0739359e990b8b518cff088c6958487641a3d21737d1e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5ac79df79d2dff64da97b02d5c90b36bc0d44a4a4229f6efed2993f93ee1d7a1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/64d3ef38fd394979b09061e67fc47f04b7295401677825c64c72ea2f44cf910c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/68a5d8e66687770e05aebcae9a8047189dc577d6150fbf0e62d2b2c11c4335c5.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/69d5f27e40d5e7040c8e8e303ccaee019b54092345acc3838d6eb943c474ce39.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/6a07b8d00482c07a4b7c73f6bebb79fad54bbea35ba358681ede6ff299a09f6b.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/70b0307a43b9f8c876bf2a1f7dea4cc56cc0ea33743e863c0b3983bc48109f2d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/710c39a26816820c2cfd00b19cd0112851d64b8ac6b201b18727b43b5a69d92c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/722f4c03da79795d1fb738323f3eb6794ab18711e20f141c0b678c5d4cb3dce9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8237bdfff695d71496ee2ff83ba0392d753f8e1dff0d002a1b2c9e249f03083c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8342e3800fed63e8e805850810aee4fd9b0558aca10c8b6b770fe5bff1bed266.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/847a71f54f7a059efc71f70c297b6b4182a633130715c80f817eb533de108c12.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8731a959d9b7278ba7a1c044bb4253bfff63079da69b21ff80221ae5e037dbbe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/87b4e94b045df561c8f4bd253c84447c96e0dededbcb6646c0cd180119960990.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/894a334446bac96d74385c23c9d6f2d117ee29511897700430cd0e578cb32527.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8c5ba88b620eeedbb99db18a610138353a5a657170fdb8bd6969585f81b34f74.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9004bc083cf179941e64130390a27afcf7593b21dd64a87d03e1e6489032aa13.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9b84631723b52988d3748d80b994e43d1944ca55098a5d908c9061eeb4c84f90.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9ee1f84bde10cd78ca23a4681c7372195748bacd5e41c3b1df8e39d960294d81.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a127f13651a7398f5fc378456a52cfa0c6260f6109611d3fb012aa7c7a4ee17a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a5e2e9f707eb7039bea096ca117d996b8f9cbc2a5613fd8e0c5b0094444ce23c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a6131b7fb1b30fd63d798845dab508a22b21c616b361e6214bf711915c5fef3d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a7b8ccb199158c897ed7d7835942eb37569221be4101eecfa7a7ce62e0c76f1f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/acaf6dbddd57b63a874e9062803850ce4911c8dc63a362eacdc4d2d0f48f8c16.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ade3cd82af85d2d28138bd942228b01e27559a5a4072ee049dcb47612ee9a506.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/aea82c71088dc9afd6f4ba71434286af5c6ab5e9b87088b0e84f351146d06489.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/afbd547078b20bb14596f20e165cb40413147e3de501b126c97bb417f8b9a53e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/afc597494ebc62d22e4d3ac2a839bd5f30d5e47ce1d0576e0f62407ef6c78652.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/affbbac58777e26ba8b6dcda8cea72c76975bd0207f0a510bfde5983d3081f74.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b0aa2ece62a1f5fa015f9e6b59d20b79be727fb5a3f4fc747200163097f320ad.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b10391d3ae53452c8ec97d230f064e03a8377aab2be4b1b3e30cb3e054b2341a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b4bab0c147360f3e335251166d3d25602e2e0e160fb3734ac019a5b42499179c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b57dcff363e045fd11ea2bcbc58cce470f1bc22d3ac14c8acbf63a36637050ff.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b80db1012318b97c329c4e3e72454f7512fb107e57c444b437dbe4ba1a3faa5a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b8de42f7f5dc1df9e802898a0ebb35adb57d269d7af21e6c66a0fde0ff226a08.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/be696cadea3bf577c0defc8f8d100232369763dde754b5ba9399f45f42aed215.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c5a30d7c6cb484268e9126dd6d078229a1dcaa07fb064077b87d15224c61a3aa.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c67e2077e70c4751e9e1dd98f378d979abd9c8a39fbf6c69f02fb4e5b04eef87.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c81c1cad35be60a4fad090374edf50a884540e306d464b803120554df61e817e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d1054fd4d09666318a00ca0aea5ff42a2fdb17b3a42b5f77b93433d7408eb2f4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d1ec524ce17a48816fc55fe665572c1ef4982a4bd1ecb01badd0fdffc465242c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d4d61a537c45c4506a6b7e3478206b71dc96e0c6c6410cef9b5d2f47018bc578.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e2e563b606cac9d51661cf68ba1729eb5df7861af521adccedc1b163439b657d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e3890e804dfaeb19b79f52e1b20198b587da61a9a3a1f55417d2cc5ce4ad7dae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e8c3fadbd12f0e2c7b70d84acbcabd0a5c96b05380a7b3167b00e8739d829d24.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ebb5ae1a82b33b94e2b0cfec599bec7500aff0a1701f91736a6f3a4f77617560.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f30f0396f4e54f0355112d5eba4b16c3a56f47e456eb117a9dc7fa6218ce21fe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f318ac15e999a553c00832029245730fe25c2916860b0ff55eee1f0ab926d84e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f55f4e657cc2d797b7e73a64ce40352f9a5dde31763a3e89c14d3b983307f30e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f8a68931d652a5e3be4e1ae06210a223de1239f3884ad858cecf9fe8d11c2ffe.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f95a8bd358eeb55fa2f49f1224cc2f4f36006359856744ff09ae4bb295f59ec1.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fb6fbd693a53d3c542d9adf83e0bb458918d39bf101eaf6d6662a2fbc35c1adc.json +0 -1
- /xinference/web/ui/build/static/js/{main.476e35cc.js.LICENSE.txt → main.87d39ffb.js.LICENSE.txt} +0 -0
- {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/LICENSE +0 -0
- {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/WHEEL +0 -0
- {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/top_level.txt +0 -0
|
@@ -25,6 +25,8 @@ from torch.nn import functional as F
|
|
|
25
25
|
from tqdm import tqdm
|
|
26
26
|
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
|
|
27
27
|
|
|
28
|
+
from ....device_utils import empty_cache
|
|
29
|
+
|
|
28
30
|
|
|
29
31
|
@dataclasses.dataclass
|
|
30
32
|
class CompressionConfig:
|
|
@@ -153,7 +155,7 @@ def load_compress_model(
|
|
|
153
155
|
tmp_state_dict[name] = None
|
|
154
156
|
tensor = None
|
|
155
157
|
gc.collect()
|
|
156
|
-
|
|
158
|
+
empty_cache()
|
|
157
159
|
|
|
158
160
|
for name in model.state_dict():
|
|
159
161
|
if name not in linear_weights:
|
|
@@ -12,10 +12,16 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import json
|
|
15
16
|
import logging
|
|
16
17
|
import os
|
|
17
18
|
from typing import Iterable, Iterator, List, Optional, Union
|
|
18
19
|
|
|
20
|
+
from ....device_utils import (
|
|
21
|
+
get_device_preferred_dtype,
|
|
22
|
+
gpu_count,
|
|
23
|
+
is_hf_accelerate_supported,
|
|
24
|
+
)
|
|
19
25
|
from ....types import (
|
|
20
26
|
ChatCompletion,
|
|
21
27
|
ChatCompletionChunk,
|
|
@@ -115,23 +121,18 @@ class PytorchModel(LLM):
|
|
|
115
121
|
)
|
|
116
122
|
from .compression import load_compress_model
|
|
117
123
|
|
|
118
|
-
cuda_visible_devices_env = os.getenv("CUDA_VISIBLE_DEVICES", None)
|
|
119
|
-
cuda_visible_devices = (
|
|
120
|
-
cuda_visible_devices_env.split(",") if cuda_visible_devices_env else []
|
|
121
|
-
)
|
|
122
|
-
|
|
123
124
|
quantization = self.quantization
|
|
124
|
-
num_gpus =
|
|
125
|
+
num_gpus = gpu_count()
|
|
125
126
|
device = self._pytorch_model_config.get("device", "auto")
|
|
126
127
|
self._pytorch_model_config["device"] = select_device(device)
|
|
127
128
|
self._device = self._pytorch_model_config["device"]
|
|
128
129
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
kwargs
|
|
130
|
+
kwargs = {}
|
|
131
|
+
|
|
132
|
+
dtype = get_device_preferred_dtype(self._device)
|
|
133
|
+
|
|
134
|
+
if dtype is not None:
|
|
135
|
+
kwargs["torch_dtype"] = dtype
|
|
135
136
|
else:
|
|
136
137
|
raise ValueError(f"Device {self._device} is not supported in temporary")
|
|
137
138
|
|
|
@@ -142,9 +143,25 @@ class PytorchModel(LLM):
|
|
|
142
143
|
"trust_remote_code"
|
|
143
144
|
)
|
|
144
145
|
model_format = self.model_spec.model_format
|
|
146
|
+
|
|
147
|
+
is_device_map_auto = False
|
|
148
|
+
|
|
149
|
+
# This is required for Intel GPU to actually work with accelerate device_map until
|
|
150
|
+
# https://github.com/intel/intel-extension-for-pytorch/issues/522
|
|
151
|
+
# is resolved
|
|
152
|
+
max_memory_env = os.getenv("ACCELERATE_MAX_MEMORY", None)
|
|
153
|
+
|
|
154
|
+
if max_memory_env is not None:
|
|
155
|
+
max_memory_raw = json.loads(max_memory_env)
|
|
156
|
+
max_memory = {
|
|
157
|
+
int(k) if k.isdigit() else k: max_memory_raw[k] for k in max_memory_raw
|
|
158
|
+
}
|
|
159
|
+
kwargs["max_memory"] = max_memory
|
|
160
|
+
|
|
145
161
|
if quantization != "none" and model_format == "pytorch":
|
|
146
162
|
if self._device == "cuda" and self._is_linux():
|
|
147
163
|
kwargs["device_map"] = "auto"
|
|
164
|
+
is_device_map_auto = True
|
|
148
165
|
if quantization == "4-bit":
|
|
149
166
|
kwargs["load_in_4bit"] = True
|
|
150
167
|
kwargs["bnb_4bit_compute_dtype"] = torch.float16
|
|
@@ -178,11 +195,13 @@ class PytorchModel(LLM):
|
|
|
178
195
|
logger.debug(f"Model Memory: {self._model.get_memory_footprint()}")
|
|
179
196
|
return
|
|
180
197
|
|
|
181
|
-
if num_gpus > 0 and self._device
|
|
198
|
+
if num_gpus > 0 and is_hf_accelerate_supported(self._device):
|
|
182
199
|
kwargs.update({"device_map": "auto"})
|
|
200
|
+
is_device_map_auto = True
|
|
201
|
+
|
|
183
202
|
self._model, self._tokenizer = self._load_model(**kwargs)
|
|
184
203
|
|
|
185
|
-
if
|
|
204
|
+
if not is_device_map_auto:
|
|
186
205
|
self._model.to(self._device)
|
|
187
206
|
logger.debug(f"Model Memory: {self._model.get_memory_footprint()}")
|
|
188
207
|
|
|
@@ -448,7 +467,7 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
|
|
|
448
467
|
generate_config = self._sanitize_generate_config(generate_config)
|
|
449
468
|
# TODO(codingl2k1): qwen hacky to set stop for function call.
|
|
450
469
|
model_family = self.model_family.model_family or self.model_family.model_name
|
|
451
|
-
if tools and "qwen-chat"
|
|
470
|
+
if tools and model_family in ["qwen-chat", "qwen1.5-chat"]:
|
|
452
471
|
stop = generate_config.get("stop")
|
|
453
472
|
if isinstance(stop, str):
|
|
454
473
|
generate_config["stop"] = [stop, "Observation:"]
|
|
@@ -95,9 +95,11 @@ class QwenVLChatModel(PytorchChatModel):
|
|
|
95
95
|
if not isinstance(content, str):
|
|
96
96
|
# TODO(codingl2k1): Optimize _ensure_url
|
|
97
97
|
content = [
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
98
|
+
(
|
|
99
|
+
{"image": _ensure_url(c["image_url"]["url"]), "type": "image"}
|
|
100
|
+
if c.get("type") == "image_url"
|
|
101
|
+
else c
|
|
102
|
+
)
|
|
101
103
|
for c in content
|
|
102
104
|
]
|
|
103
105
|
content = sorted(content, key=operator.itemgetter("type"))
|
|
@@ -17,6 +17,8 @@ import time
|
|
|
17
17
|
import uuid
|
|
18
18
|
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple
|
|
19
19
|
|
|
20
|
+
from ....device_utils import empty_cache
|
|
21
|
+
|
|
20
22
|
try:
|
|
21
23
|
import torch
|
|
22
24
|
from torch.nn import functional as F
|
|
@@ -526,4 +528,4 @@ def speculative_generate_stream(
|
|
|
526
528
|
del kv_cache
|
|
527
529
|
del draft_kv_cache
|
|
528
530
|
gc.collect()
|
|
529
|
-
|
|
531
|
+
empty_cache()
|
|
@@ -13,9 +13,13 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import logging
|
|
16
|
-
import os
|
|
17
16
|
from typing import Iterator, List, Optional, Union
|
|
18
17
|
|
|
18
|
+
from ....device_utils import (
|
|
19
|
+
get_device_preferred_dtype,
|
|
20
|
+
gpu_count,
|
|
21
|
+
is_hf_accelerate_supported,
|
|
22
|
+
)
|
|
19
23
|
from ....types import Completion, CompletionChunk, Embedding
|
|
20
24
|
from ...utils import select_device
|
|
21
25
|
from .. import LLMFamilyV1, LLMSpecV1
|
|
@@ -73,30 +77,26 @@ class SpeculativeModel(PytorchChatModel):
|
|
|
73
77
|
|
|
74
78
|
def load(self):
|
|
75
79
|
try:
|
|
76
|
-
import torch
|
|
80
|
+
import torch # noqa: F401
|
|
77
81
|
except ImportError:
|
|
78
82
|
raise ImportError(
|
|
79
83
|
f"Failed to import module 'torch'. Please make sure 'torch' is installed.\n\n"
|
|
80
84
|
)
|
|
81
85
|
|
|
82
|
-
|
|
83
|
-
cuda_visible_devices = (
|
|
84
|
-
cuda_visible_devices_env.split(",") if cuda_visible_devices_env else []
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
num_gpus = len(cuda_visible_devices) if cuda_visible_devices_env != "-1" else 0
|
|
86
|
+
num_gpus = gpu_count()
|
|
88
87
|
device = self._pytorch_model_config.get("device", "auto")
|
|
89
88
|
self._pytorch_model_config["device"] = select_device(device)
|
|
90
89
|
self._device = self._pytorch_model_config["device"]
|
|
91
90
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
kwargs
|
|
91
|
+
kwargs = {}
|
|
92
|
+
|
|
93
|
+
dtype = get_device_preferred_dtype(self._device)
|
|
94
|
+
|
|
95
|
+
if dtype is not None:
|
|
96
|
+
kwargs["torch_dtype"] = dtype
|
|
98
97
|
else:
|
|
99
98
|
raise ValueError(f"Device {self._device} is not supported in temporary")
|
|
99
|
+
|
|
100
100
|
kwargs["trust_remote_code"] = self._pytorch_model_config.get(
|
|
101
101
|
"trust_remote_code"
|
|
102
102
|
)
|
|
@@ -106,15 +106,18 @@ class SpeculativeModel(PytorchChatModel):
|
|
|
106
106
|
"Quantization is not supported by speculative decoding yet"
|
|
107
107
|
)
|
|
108
108
|
|
|
109
|
-
|
|
109
|
+
is_device_map_auto = False
|
|
110
|
+
|
|
111
|
+
if num_gpus > 0 and is_hf_accelerate_supported(self._device):
|
|
110
112
|
kwargs.update({"device_map": "auto"})
|
|
113
|
+
is_device_map_auto = True
|
|
111
114
|
|
|
112
115
|
self._model, self._tokenizer = self._load_model(
|
|
113
116
|
model_path=self.model_path,
|
|
114
117
|
revision=self.model_spec.model_revision,
|
|
115
118
|
**kwargs,
|
|
116
119
|
)
|
|
117
|
-
if
|
|
120
|
+
if not is_device_map_auto:
|
|
118
121
|
self._model.to(self._device)
|
|
119
122
|
logger.debug(
|
|
120
123
|
f"Model {self.model_uid} memory footprint: {self._model.get_memory_footprint()}"
|
|
@@ -125,7 +128,7 @@ class SpeculativeModel(PytorchChatModel):
|
|
|
125
128
|
revision=self._draft_model_spec.model_revision,
|
|
126
129
|
**kwargs,
|
|
127
130
|
)
|
|
128
|
-
if
|
|
131
|
+
if not is_device_map_auto:
|
|
129
132
|
self._model.to(self._device)
|
|
130
133
|
logger.debug(
|
|
131
134
|
f"Draft model {self.model_uid} memory footprint: {self._model.get_memory_footprint()}"
|
|
@@ -29,6 +29,7 @@ from transformers.generation.logits_process import (
|
|
|
29
29
|
TopPLogitsWarper,
|
|
30
30
|
)
|
|
31
31
|
|
|
32
|
+
from ....device_utils import empty_cache
|
|
32
33
|
from ....types import (
|
|
33
34
|
CompletionChoice,
|
|
34
35
|
CompletionChunk,
|
|
@@ -122,7 +123,7 @@ def generate_stream(
|
|
|
122
123
|
temperature, repetition_penalty, top_p, top_k
|
|
123
124
|
)
|
|
124
125
|
|
|
125
|
-
if "
|
|
126
|
+
if ".modeling_qwen." in str(type(model)).lower():
|
|
126
127
|
# TODO: hacky
|
|
127
128
|
input_ids = tokenizer(prompt, allowed_special="all").input_ids
|
|
128
129
|
else:
|
|
@@ -335,7 +336,7 @@ def generate_stream(
|
|
|
335
336
|
# clean
|
|
336
337
|
del past_key_values, out
|
|
337
338
|
gc.collect()
|
|
338
|
-
|
|
339
|
+
empty_cache()
|
|
339
340
|
|
|
340
341
|
|
|
341
342
|
@torch.inference_mode()
|
|
@@ -489,4 +490,4 @@ def generate_stream_falcon(
|
|
|
489
490
|
|
|
490
491
|
# clean
|
|
491
492
|
gc.collect()
|
|
492
|
-
|
|
493
|
+
empty_cache()
|
|
@@ -57,16 +57,18 @@ class YiVLChatModel(PytorchChatModel):
|
|
|
57
57
|
from ....thirdparty.llava.mm_utils import load_pretrained_model
|
|
58
58
|
from ....thirdparty.llava.model.constants import key_info
|
|
59
59
|
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
self._device = self._pytorch_model_config.get("device", "auto")
|
|
61
|
+
self._device = select_device(self._device)
|
|
62
62
|
|
|
63
63
|
key_info["model_path"] = self.model_path
|
|
64
|
+
# Default device_map is auto, it can loads model to multiple cards.
|
|
65
|
+
# If the device_map is set to cuda, then only 1 card can be used.
|
|
64
66
|
(
|
|
65
67
|
self._tokenizer,
|
|
66
68
|
self._model,
|
|
67
69
|
self._image_processor,
|
|
68
70
|
_,
|
|
69
|
-
) = load_pretrained_model(self.model_path, device_map=
|
|
71
|
+
) = load_pretrained_model(self.model_path, device_map=self._device)
|
|
70
72
|
|
|
71
73
|
@staticmethod
|
|
72
74
|
def _message_content_to_yi(content) -> Union[str, tuple]:
|
|
@@ -187,7 +189,7 @@ class YiVLChatModel(PytorchChatModel):
|
|
|
187
189
|
prompt, self._tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
|
|
188
190
|
)
|
|
189
191
|
.unsqueeze(0)
|
|
190
|
-
.
|
|
192
|
+
.to(self._device)
|
|
191
193
|
)
|
|
192
194
|
|
|
193
195
|
images = state.get_images(return_pil=True)
|
|
@@ -210,7 +212,9 @@ class YiVLChatModel(PytorchChatModel):
|
|
|
210
212
|
max_new_tokens = generate_config.get("max_tokens", 512)
|
|
211
213
|
generate_kwargs = {
|
|
212
214
|
"input_ids": input_ids,
|
|
213
|
-
"images": image_tensor.unsqueeze(0)
|
|
215
|
+
"images": image_tensor.unsqueeze(0)
|
|
216
|
+
.to(dtype=torch.bfloat16)
|
|
217
|
+
.to(self._device),
|
|
214
218
|
"streamer": streamer,
|
|
215
219
|
"do_sample": True,
|
|
216
220
|
"top_p": float(top_p),
|
xinference/model/llm/utils.py
CHANGED
|
@@ -402,6 +402,15 @@ Begin!"""
|
|
|
402
402
|
else:
|
|
403
403
|
ret += role + ": </s>"
|
|
404
404
|
return ret
|
|
405
|
+
elif prompt_style.style_name == "gemma":
|
|
406
|
+
ret = ""
|
|
407
|
+
for message in chat_history:
|
|
408
|
+
content = message["content"]
|
|
409
|
+
role = get_role(message["role"])
|
|
410
|
+
ret += "<start_of_turn>" + role + "\n"
|
|
411
|
+
if content:
|
|
412
|
+
ret += content + "<end_of_turn>\n"
|
|
413
|
+
return ret
|
|
405
414
|
else:
|
|
406
415
|
raise ValueError(f"Invalid prompt style: {prompt_style.style_name}")
|
|
407
416
|
|
|
@@ -556,7 +565,7 @@ Begin!"""
|
|
|
556
565
|
content, func, args = cls._eval_gorilla_openfunctions_arguments(c, tools)
|
|
557
566
|
elif "chatglm3" == family:
|
|
558
567
|
content, func, args = cls._eval_chatglm3_arguments(c, tools)
|
|
559
|
-
elif "qwen-chat"
|
|
568
|
+
elif family in ["qwen-chat", "qwen1.5-chat"]:
|
|
560
569
|
content, func, args = cls._eval_qwen_chat_arguments(c, tools)
|
|
561
570
|
else:
|
|
562
571
|
raise Exception(
|
|
@@ -56,6 +56,7 @@ class VLLMModelConfig(TypedDict, total=False):
|
|
|
56
56
|
max_num_batched_tokens: int
|
|
57
57
|
max_num_seqs: int
|
|
58
58
|
quantization: Optional[str]
|
|
59
|
+
max_model_len: Optional[int]
|
|
59
60
|
|
|
60
61
|
|
|
61
62
|
class VLLMGenerateConfig(TypedDict, total=False):
|
|
@@ -98,6 +99,8 @@ VLLM_SUPPORTED_CHAT_MODELS = [
|
|
|
98
99
|
"mixtral-instruct-v0.1",
|
|
99
100
|
"chatglm3",
|
|
100
101
|
]
|
|
102
|
+
if VLLM_INSTALLED and vllm.__version__ >= "0.3.0":
|
|
103
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("qwen1.5-chat")
|
|
101
104
|
|
|
102
105
|
|
|
103
106
|
class VLLMModel(LLM):
|
|
@@ -151,6 +154,7 @@ class VLLMModel(LLM):
|
|
|
151
154
|
model_config.setdefault("gpu_memory_utilization", 0.90)
|
|
152
155
|
model_config.setdefault("max_num_seqs", 256)
|
|
153
156
|
model_config.setdefault("quantization", None)
|
|
157
|
+
model_config.setdefault("max_model_len", 4096)
|
|
154
158
|
|
|
155
159
|
return model_config
|
|
156
160
|
|
xinference/model/rerank/core.py
CHANGED
|
@@ -14,28 +14,22 @@
|
|
|
14
14
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
|
-
import shutil
|
|
18
17
|
import uuid
|
|
19
18
|
from collections import defaultdict
|
|
20
19
|
from typing import Dict, List, Optional, Tuple
|
|
21
20
|
|
|
22
21
|
import numpy as np
|
|
23
|
-
from pydantic import BaseModel
|
|
24
22
|
|
|
25
23
|
from ...constants import XINFERENCE_CACHE_DIR
|
|
26
24
|
from ...types import Document, DocumentObj, Rerank
|
|
27
|
-
from ..core import ModelDescription
|
|
28
|
-
from ..utils import is_model_cached
|
|
25
|
+
from ..core import CacheableModelSpec, ModelDescription
|
|
26
|
+
from ..utils import is_model_cached
|
|
29
27
|
|
|
30
28
|
logger = logging.getLogger(__name__)
|
|
31
29
|
|
|
32
30
|
# Used for check whether the model is cached.
|
|
33
31
|
# Init when registering all the builtin models.
|
|
34
32
|
MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
|
|
35
|
-
|
|
36
|
-
SUPPORTED_SCHEMES = ["s3"]
|
|
37
|
-
|
|
38
|
-
|
|
39
33
|
RERANK_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
|
|
40
34
|
|
|
41
35
|
|
|
@@ -45,7 +39,7 @@ def get_rerank_model_descriptions():
|
|
|
45
39
|
return copy.deepcopy(RERANK_MODEL_DESCRIPTIONS)
|
|
46
40
|
|
|
47
41
|
|
|
48
|
-
class RerankModelSpec(
|
|
42
|
+
class RerankModelSpec(CacheableModelSpec):
|
|
49
43
|
model_name: str
|
|
50
44
|
language: List[str]
|
|
51
45
|
model_id: str
|
|
@@ -180,135 +174,10 @@ def get_cache_status(
|
|
|
180
174
|
return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
|
|
181
175
|
|
|
182
176
|
|
|
183
|
-
def cache_from_uri(
|
|
184
|
-
model_spec: RerankModelSpec,
|
|
185
|
-
self_hosted_storage: bool = False,
|
|
186
|
-
) -> str:
|
|
187
|
-
from fsspec import AbstractFileSystem, filesystem
|
|
188
|
-
|
|
189
|
-
from ..utils import copy_from_src_to_dst, parse_uri
|
|
190
|
-
|
|
191
|
-
cache_dir = get_cache_dir(model_spec)
|
|
192
|
-
if os.path.exists(cache_dir):
|
|
193
|
-
logger.info(f"Rerank cache {cache_dir} exists")
|
|
194
|
-
return cache_dir
|
|
195
|
-
|
|
196
|
-
assert model_spec.model_uri is not None
|
|
197
|
-
src_scheme, src_root = parse_uri(model_spec.model_uri)
|
|
198
|
-
if src_root.endswith("/"):
|
|
199
|
-
# remove trailing path separator.
|
|
200
|
-
src_root = src_root[:-1]
|
|
201
|
-
|
|
202
|
-
if src_scheme == "file":
|
|
203
|
-
if not os.path.isabs(src_root):
|
|
204
|
-
raise ValueError(
|
|
205
|
-
f"Model URI cannot be a relative path: {model_spec.model_uri}"
|
|
206
|
-
)
|
|
207
|
-
os.makedirs(XINFERENCE_CACHE_DIR, exist_ok=True)
|
|
208
|
-
os.symlink(src_root, cache_dir, target_is_directory=True)
|
|
209
|
-
return cache_dir
|
|
210
|
-
elif src_scheme in SUPPORTED_SCHEMES:
|
|
211
|
-
# use anonymous connection for self-hosted storage.
|
|
212
|
-
src_fs: AbstractFileSystem = filesystem(src_scheme, anon=self_hosted_storage)
|
|
213
|
-
local_fs: AbstractFileSystem = filesystem("file")
|
|
214
|
-
|
|
215
|
-
files_to_download = []
|
|
216
|
-
os.makedirs(cache_dir, exist_ok=True)
|
|
217
|
-
|
|
218
|
-
for path, _, files in src_fs.walk(model_spec.model_uri):
|
|
219
|
-
for file in files:
|
|
220
|
-
src_path = f"{path}/{file}"
|
|
221
|
-
local_path = src_path.replace(src_root, cache_dir)
|
|
222
|
-
files_to_download.append((src_path, local_path))
|
|
223
|
-
|
|
224
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
225
|
-
|
|
226
|
-
failed = False
|
|
227
|
-
with ThreadPoolExecutor(max_workers=min(len(files_to_download), 4)) as executor:
|
|
228
|
-
futures = [
|
|
229
|
-
(
|
|
230
|
-
src_path,
|
|
231
|
-
executor.submit(
|
|
232
|
-
copy_from_src_to_dst, src_fs, src_path, local_fs, local_path
|
|
233
|
-
),
|
|
234
|
-
)
|
|
235
|
-
for src_path, local_path in files_to_download
|
|
236
|
-
]
|
|
237
|
-
for src_path, future in futures:
|
|
238
|
-
if failed:
|
|
239
|
-
future.cancel()
|
|
240
|
-
else:
|
|
241
|
-
try:
|
|
242
|
-
future.result()
|
|
243
|
-
except:
|
|
244
|
-
logger.error(f"Download {src_path} failed", exc_info=True)
|
|
245
|
-
failed = True
|
|
246
|
-
|
|
247
|
-
if failed:
|
|
248
|
-
logger.warning(f"Removing cache directory: {cache_dir}")
|
|
249
|
-
shutil.rmtree(cache_dir, ignore_errors=True)
|
|
250
|
-
raise RuntimeError(
|
|
251
|
-
f"Failed to download rerank model '{model_spec.model_name}' "
|
|
252
|
-
)
|
|
253
|
-
return cache_dir
|
|
254
|
-
else:
|
|
255
|
-
raise ValueError(f"Unsupported URL scheme: {src_scheme}")
|
|
256
|
-
|
|
257
|
-
|
|
258
177
|
def cache(model_spec: RerankModelSpec):
|
|
259
|
-
from
|
|
260
|
-
from modelscope.hub.snapshot_download import snapshot_download as ms_download
|
|
261
|
-
|
|
262
|
-
from ..utils import retry_download, symlink_local_file
|
|
263
|
-
|
|
264
|
-
if (
|
|
265
|
-
hasattr(model_spec, "model_uri")
|
|
266
|
-
and getattr(model_spec, "model_uri", None) is not None
|
|
267
|
-
):
|
|
268
|
-
logger.info(f"Rerank model caching from URI: {model_spec.model_uri}")
|
|
269
|
-
return cache_from_uri(model_spec=model_spec)
|
|
270
|
-
|
|
271
|
-
cache_dir = get_cache_dir(model_spec)
|
|
272
|
-
if not os.path.exists(cache_dir):
|
|
273
|
-
os.makedirs(cache_dir, exist_ok=True)
|
|
274
|
-
meta_path = os.path.join(cache_dir, "__valid_download")
|
|
275
|
-
if valid_model_revision(meta_path, model_spec.model_revision):
|
|
276
|
-
return cache_dir
|
|
277
|
-
|
|
278
|
-
if model_spec.model_hub == "modelscope":
|
|
279
|
-
logger.info(
|
|
280
|
-
f"Download {model_spec.model_name} from modelscope {model_spec.model_id}"
|
|
281
|
-
)
|
|
282
|
-
download_dir = retry_download(
|
|
283
|
-
ms_download,
|
|
284
|
-
model_spec.model_name,
|
|
285
|
-
None,
|
|
286
|
-
model_spec.model_id,
|
|
287
|
-
revision=model_spec.model_revision,
|
|
288
|
-
)
|
|
289
|
-
for subdir, dirs, files in os.walk(download_dir):
|
|
290
|
-
for file in files:
|
|
291
|
-
relpath = os.path.relpath(os.path.join(subdir, file), download_dir)
|
|
292
|
-
symlink_local_file(os.path.join(subdir, file), cache_dir, relpath)
|
|
293
|
-
else:
|
|
294
|
-
logger.info(
|
|
295
|
-
f"Download {model_spec.model_name} from huggingface {model_spec.model_id}"
|
|
296
|
-
)
|
|
297
|
-
retry_download(
|
|
298
|
-
hf_download,
|
|
299
|
-
model_spec.model_name,
|
|
300
|
-
None,
|
|
301
|
-
model_spec.model_id,
|
|
302
|
-
revision=model_spec.model_revision,
|
|
303
|
-
local_dir=cache_dir,
|
|
304
|
-
local_dir_use_symlinks=True,
|
|
305
|
-
)
|
|
306
|
-
with open(meta_path, "w") as f:
|
|
307
|
-
import json
|
|
178
|
+
from ..utils import cache
|
|
308
179
|
|
|
309
|
-
|
|
310
|
-
json.dump(desc.to_dict(), f)
|
|
311
|
-
return cache_dir
|
|
180
|
+
return cache(model_spec, RerankModelDescription)
|
|
312
181
|
|
|
313
182
|
|
|
314
183
|
def create_rerank_model_instance(
|