xinference 0.8.4__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (309) hide show
  1. xinference/__init__.py +6 -0
  2. xinference/_compat.py +52 -0
  3. xinference/_version.py +3 -3
  4. xinference/api/oauth2/auth_service.py +2 -3
  5. xinference/api/oauth2/types.py +1 -1
  6. xinference/api/restful_api.py +176 -108
  7. xinference/client/restful/restful_client.py +10 -6
  8. xinference/core/model.py +3 -2
  9. xinference/core/resource.py +4 -2
  10. xinference/core/status_guard.py +2 -1
  11. xinference/core/supervisor.py +37 -12
  12. xinference/core/utils.py +2 -1
  13. xinference/core/worker.py +13 -13
  14. xinference/deploy/worker.py +7 -7
  15. xinference/device_utils.py +100 -0
  16. xinference/fields.py +1 -1
  17. xinference/model/audio/core.py +1 -2
  18. xinference/model/audio/whisper.py +20 -8
  19. xinference/model/core.py +9 -0
  20. xinference/model/embedding/core.py +5 -136
  21. xinference/model/image/__init__.py +13 -1
  22. xinference/model/image/core.py +22 -43
  23. xinference/model/image/model_spec_modelscope.json +94 -0
  24. xinference/model/image/stable_diffusion/core.py +3 -5
  25. xinference/model/llm/ggml/llamacpp.py +1 -1
  26. xinference/model/llm/llm_family.json +333 -3
  27. xinference/model/llm/llm_family.py +11 -5
  28. xinference/model/llm/llm_family_modelscope.json +311 -1
  29. xinference/model/llm/pytorch/compression.py +3 -1
  30. xinference/model/llm/pytorch/core.py +34 -15
  31. xinference/model/llm/pytorch/qwen_vl.py +5 -3
  32. xinference/model/llm/pytorch/spec_decoding_utils.py +3 -1
  33. xinference/model/llm/pytorch/spec_model.py +20 -17
  34. xinference/model/llm/pytorch/utils.py +4 -3
  35. xinference/model/llm/pytorch/yi_vl.py +9 -5
  36. xinference/model/llm/utils.py +10 -1
  37. xinference/model/llm/vllm/core.py +4 -0
  38. xinference/model/rerank/core.py +5 -136
  39. xinference/model/utils.py +143 -18
  40. xinference/thirdparty/llava/mm_utils.py +1 -1
  41. xinference/types.py +3 -3
  42. xinference/web/ui/build/asset-manifest.json +3 -3
  43. xinference/web/ui/build/index.html +1 -1
  44. xinference/web/ui/build/static/js/main.87d39ffb.js +3 -0
  45. xinference/web/ui/build/static/js/main.87d39ffb.js.map +1 -0
  46. xinference/web/ui/node_modules/.cache/babel-loader/{8d95a492a6b08c192e212189567b59b98282ca6f3ed52dd9bd053a6be7ff4e65.json → 027745bdb9bb9fe375f2eef7377e9f7ba82510ed90c05f9e5b34ba314bb93498.json} +1 -1
  47. xinference/web/ui/node_modules/.cache/babel-loader/03d5e7de9b1728d71cd8c483a35f4373eb66932022026142e03005d9de1a2a44.json +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/0738899eefad7f90261125823d87ea9f0d53667b1479a0c1f398aff14f2bbd2a.json +1 -0
  49. xinference/web/ui/node_modules/.cache/babel-loader/09a21ab449599121eadfdd12d6dba40c5a1c5b35dc90be17108eeff80daf79b8.json +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/09d043a8289da33d05768f5face63cc47e9ce9646d67f5589865db511574e8f9.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/0b901c8f077905bf153a1390b207277c31e893f83f9ced5d29a0a0c85a07b5eb.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/0da1ff7c9602609cfa8b82a67140114f3037130fe79583bbf23b69cef81823a6.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/0f87caef05125425c0ca418a32ce8a53e9becffd464411b2ff94e3c7a37c7b5b.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/{cb50b4d54ce9c249d580f7f1fc2df4beab4f13c4d16f486c89239809570de8fb.json → 0fc333c35ad49c0db7f18f77521984a36067cfce4206df50ce16a2fdc88be1c6.json} +1 -1
  56. xinference/web/ui/node_modules/.cache/babel-loader/13fdd92dd5e2fe11640cf6ec0368e3c098d3f0c43e11703ccf157e8c41410d5b.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/1464f950e5600fe2e5890186a1ae3750cdabbadd8525070cef6273450760bcf6.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/17c53dc44a324629efef425833ab48cc05243d089f4eb3c731da63568fcff35e.json +1 -0
  59. xinference/web/ui/node_modules/.cache/babel-loader/1946a52cb34e675da423de15b703437b4c841e4f5aed6ddd932a091c145e17b0.json +1 -0
  60. xinference/web/ui/node_modules/.cache/babel-loader/1e0da5ce4d8ccdbe33705a47bdc0d5d1d7e4e48c8e5c1d0b8c473e9f491930ac.json +1 -0
  61. xinference/web/ui/node_modules/.cache/babel-loader/1e3ec3302ef2253fad5e86bf429baf6b69e57e769f38ec1aaada87f3114b6a1f.json +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/1f18804827897c337874b289873df611b44869f86ced1d4351fbe0b45ed17518.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/21b64f7fdb74c2c35479811c3f5683ee161d3b64a4fade0cd1fc0044332c79b2.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/22873d21eab4b5a04b183cc3d2cbfafa431e0ce99649aeb900cffcfc33ecbca4.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/{7a5fecf96af770954035948b07dfadeceb33536b4763a2f9e548121ae555b75f.json → 228b78cd0950f1c967cb33b210b11497052a9998f0b1d1f31ecee0fd8b99fb40.json} +1 -1
  66. xinference/web/ui/node_modules/.cache/babel-loader/2328fc2360224ac3eef5c5261fc2a75cdce7fe58c210af8d3e532ec609464f5f.json +1 -0
  67. xinference/web/ui/node_modules/.cache/babel-loader/25fd89450dcf73a205a5f8c82dbc604bf872d574b748427c26ba79b8711ec502.json +1 -0
  68. xinference/web/ui/node_modules/.cache/babel-loader/{b6e95546cdd247e50d9c916dd3bef5216f96c32a267e54b51b23cc22631be0c4.json → 288d6573b09f0e2b0277e1f301bcb57a194c01f3983e3c1e03c8b2ea72653194.json} +1 -1
  69. xinference/web/ui/node_modules/.cache/babel-loader/2c04900a020b813e2e99b15748c3c3ef5a1627c33d5668e45bc6a798f0d28dae.json +1 -0
  70. xinference/web/ui/node_modules/.cache/babel-loader/2cd0fd50d8f213cdd94daffe7d0f997991072b11936ead7c48be2af3fdbd1fda.json +1 -0
  71. xinference/web/ui/node_modules/.cache/babel-loader/2e057c12322da07a537a576a3917d512ac6cb291be5aa29ce23d2679a4fd16b1.json +1 -0
  72. xinference/web/ui/node_modules/.cache/babel-loader/{abd0662d21fbe39e483fbc4d37faa9e24d93889b5b40d42e7e20773c66ee3894.json → 327c21db6124a5209f41996d0d3359b462d5baff3974296f5acfddfc532434e6.json} +1 -1
  73. xinference/web/ui/node_modules/.cache/babel-loader/33294bb0080595100e22c960929a6f3086f5ea632a4bcd9216069f95347a63a9.json +1 -0
  74. xinference/web/ui/node_modules/.cache/babel-loader/3361869e45eddd7d48ff7995e1a12ac454c9eb92d4c87f49ce80c1e9482c1e91.json +1 -0
  75. xinference/web/ui/node_modules/.cache/babel-loader/375d2f6a31e01c5a82fd0e18f42198f9fbb7117491f3e92d70cf36055f159a6e.json +1 -0
  76. xinference/web/ui/node_modules/.cache/babel-loader/3808217efee76345ba19af3ffdba045888e0908b0dc7fc38e54655cdc3504c60.json +1 -0
  77. xinference/web/ui/node_modules/.cache/babel-loader/39db740a99454006312dec9e4845f850c51e043458ca94eba8bb256521fabcd4.json +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +1 -0
  79. xinference/web/ui/node_modules/.cache/babel-loader/3e737bcdbcbc407ccd65b90e199ef0c3214b261e8e41dbf14d921384a717d9ee.json +1 -0
  80. xinference/web/ui/node_modules/.cache/babel-loader/3e87c7d4f1e5c93603eed3e861945e44dd1e664061a89aaef1aa6435f4f4d7d0.json +1 -0
  81. xinference/web/ui/node_modules/.cache/babel-loader/3ecf70018028c8cf6cdc2d24d2b88f2707bd5c23a336fd3d82bd6a1af0db7ded.json +1 -0
  82. xinference/web/ui/node_modules/.cache/babel-loader/43314d1e9fed30ed6cf5cab3f5033da067e58be29e36448e3c0a8127403a9388.json +1 -0
  83. xinference/web/ui/node_modules/.cache/babel-loader/442467c09440131f7c25ddcdeb356d2a87783ad4e32d89f410a16cb6e7d83b22.json +1 -0
  84. xinference/web/ui/node_modules/.cache/babel-loader/487524f2b3494bfcfbd8c12918092a52ff2ce3eefe9bf0a557389ec1c74d3e67.json +1 -0
  85. xinference/web/ui/node_modules/.cache/babel-loader/4de4b0282a3d1ae02bec0bfe7d6658f039141406e8bfdbe86bf1dc5b115a2524.json +1 -0
  86. xinference/web/ui/node_modules/.cache/babel-loader/4ec72faab00e4cbffefdf43b987ae3d15beb60e282b9ca86dad37f4d0fc3f2bd.json +1 -0
  87. xinference/web/ui/node_modules/.cache/babel-loader/51403f1b70af0f4577d4594cbfcf9d9759997e5c49097e7a07219000d1be4ff1.json +1 -0
  88. xinference/web/ui/node_modules/.cache/babel-loader/5282ee05e064b3a80bc991e9003ddef6a4958471d8f4fc65589dc64553365cdd.json +1 -0
  89. xinference/web/ui/node_modules/.cache/babel-loader/566a0829bfeab41e40bbdca55ad056067e172508af4b333403d4fd334a8c8742.json +1 -0
  90. xinference/web/ui/node_modules/.cache/babel-loader/5689d9aee417a0834fed66720cf3b48c34cc26364cd9ab7e3d809997bb9faec9.json +1 -0
  91. xinference/web/ui/node_modules/.cache/babel-loader/5bedd27475bda204eafccdc636a56ba5765353391049a0b947234faaab3d021a.json +1 -0
  92. xinference/web/ui/node_modules/.cache/babel-loader/{94daea23b698bbb26bbbab69bbca8ad521c79fcbd6b495c84c740e4006a2ced2.json → 61014d40df4486972fb6d5909dff45f24bec48573af99ca8bcd79835385c7da0.json} +1 -1
  93. xinference/web/ui/node_modules/.cache/babel-loader/6139c80ec46d77469e9a89924de41d93050f19e9ed541e358f09cbf20ccd7c86.json +1 -0
  94. xinference/web/ui/node_modules/.cache/babel-loader/63ead4dee6a4f4ae47aed6918a2ab3e172739ecdffaa85e063f9a2c9165ff93c.json +1 -0
  95. xinference/web/ui/node_modules/.cache/babel-loader/6bf8c25e0f2051d994990bf73fdcb3e08963e11589842895593f199eab13ba9a.json +1 -0
  96. xinference/web/ui/node_modules/.cache/babel-loader/6d42f63cd673d1c60e534a174cd851e9aefd2d5d45a4aa6128d794457b64de53.json +1 -0
  97. xinference/web/ui/node_modules/.cache/babel-loader/7044e626a91582077b8cbad09bcf0cc118e92e11ebfa2ebc17622baf98aa56bf.json +1 -0
  98. xinference/web/ui/node_modules/.cache/babel-loader/71ec408030881aad69854815b8681185d0174cc0cf67f20fb2c3cd13c74fce07.json +1 -0
  99. xinference/web/ui/node_modules/.cache/babel-loader/7280614d5bddce524182024cc288953eb89d46c1dbe470ed5eb5ecab5de28385.json +1 -0
  100. xinference/web/ui/node_modules/.cache/babel-loader/7293ad8ba7b5fc6ed791aed44a766a396516c782f5d0e564b9ef0997645ec1dd.json +1 -0
  101. xinference/web/ui/node_modules/.cache/babel-loader/{65ca3ba225b8c8dac907210545b51f2fcdb2591f0feeb7195f1c037f2bc956a0.json → 77d4d795f078408fa2dd49da26d1ba1543d51b63cc253e736f4bef2e6014e888.json} +1 -1
  102. xinference/web/ui/node_modules/.cache/babel-loader/79be1e0dd6b1c3cbb3f08b349454f4791edaac451fa4d1f10eaf433e57ecf70f.json +1 -0
  103. xinference/web/ui/node_modules/.cache/babel-loader/7c0571c03aa8c90eb9452ba23133aed9b3b2a049910d4d8276588b438a4caaba.json +1 -0
  104. xinference/web/ui/node_modules/.cache/babel-loader/81ed094096cd9dc36f3ddebe61818766f43d3338c171f2269958f47265e60f24.json +1 -0
  105. xinference/web/ui/node_modules/.cache/babel-loader/83beb31daa7169fb0057453d4f86411f1effd3e3f7af97472cbd22accbfc65bb.json +1 -0
  106. xinference/web/ui/node_modules/.cache/babel-loader/86355e55059ddb15f71c0c651a2e622cee6f3141da3a018775ca00c74dd37dc2.json +1 -0
  107. xinference/web/ui/node_modules/.cache/babel-loader/{d1af6db565bae36218251c01a817c080ef764172dd0694e38d27b525a3ffd1d2.json → 8710fe63bcc6b95502287c36a7bd881b74ee2926982e120e1e8070a2c8a668fc.json} +1 -1
  108. xinference/web/ui/node_modules/.cache/babel-loader/8779104679d8f85017b551d1d108ea1ac701e184ad7bcc8a367526353a6c5175.json +1 -0
  109. xinference/web/ui/node_modules/.cache/babel-loader/881c573e6aa9954e7bd5e3bc5a6ef62c0db9b93815030080f677f6834e431434.json +1 -0
  110. xinference/web/ui/node_modules/.cache/babel-loader/8d24c51a0905b919436aac2bad441eb1651c46c4e7aa17c99eb63c7760ce6615.json +1 -0
  111. xinference/web/ui/node_modules/.cache/babel-loader/93bf5cc8cbe3f0b10f291800e1896f03506a8933cd490e07ee0ff94498f93bce.json +1 -0
  112. xinference/web/ui/node_modules/.cache/babel-loader/9401db0316ac34790aa5b972e9e1c50de58ef313b50840a37e08f0936a7197b6.json +1 -0
  113. xinference/web/ui/node_modules/.cache/babel-loader/98afeafdfc285a744d116fb5f189a16ad2b977adbd4fc0757f7ade1b29b8ad31.json +1 -0
  114. xinference/web/ui/node_modules/.cache/babel-loader/9a4e624d1ce66832fb213ead02745d84661d9bfcc21b62b4c37c680c3c3f5cbb.json +1 -0
  115. xinference/web/ui/node_modules/.cache/babel-loader/9a7b26f85224d33d431354ac8d7849388b7525fc014801824e705561ab2ac7a7.json +1 -0
  116. xinference/web/ui/node_modules/.cache/babel-loader/9dcb04ac6cb341233a2e98f2fb417d059c2ea6a56d2ff6f30bb27dae9bd6c756.json +1 -0
  117. xinference/web/ui/node_modules/.cache/babel-loader/9ebede8a4bccef016eef418a668b3212962779ea6d2531edb23d3dca81163496.json +1 -0
  118. xinference/web/ui/node_modules/.cache/babel-loader/a0efc8db4289116bdb312014df91fc781f66fe171d63027bbd5aae25cd54c325.json +1 -0
  119. xinference/web/ui/node_modules/.cache/babel-loader/a212defc32ca338ba673722c72815d30974275f1a82b67405a898f826fc8d782.json +1 -0
  120. xinference/web/ui/node_modules/.cache/babel-loader/a2685acab57fd6d874bf6069a587e3770b55c72cacc69cb167bb3dbd16a551c5.json +1 -0
  121. xinference/web/ui/node_modules/.cache/babel-loader/a3b7d6233e50dc3832421793a5752c1db99f3a214c9896c97bbd395d007f09b1.json +1 -0
  122. xinference/web/ui/node_modules/.cache/babel-loader/a3d0c810a255108613be0a175d53d673c4b6843c2a87dd0f157b4d3731af2092.json +1 -0
  123. xinference/web/ui/node_modules/.cache/babel-loader/a6633774053545c60721e345820e21cf3b089df219f7ee3fcceaf27af3a9e84b.json +1 -0
  124. xinference/web/ui/node_modules/.cache/babel-loader/a6bb917e4869d7d6ddf50b75932cad64e94b9e33db2aa40c4a489a045819bb36.json +1 -0
  125. xinference/web/ui/node_modules/.cache/babel-loader/a816031fafa564dc4af27383612e51ac29af51354f3bba750a70ac0c03b6bd30.json +1 -0
  126. xinference/web/ui/node_modules/.cache/babel-loader/af6349a1e9761cb98af4b76904440fb7f8d54288ee6ba37116f670226796e689.json +1 -0
  127. xinference/web/ui/node_modules/.cache/babel-loader/afce63eaad32a9968590e4500e123c81b385ea3b6046b2af1ec7ea8a49ba657b.json +1 -0
  128. xinference/web/ui/node_modules/.cache/babel-loader/aff24b61e5d55147d0a36ef1896b9bd35c995c357f44eeee1209f23db2118636.json +1 -0
  129. xinference/web/ui/node_modules/.cache/babel-loader/b01676c5ca97e3409a1b7bac91b6a797be3719a8b7ec72ad06c9beb296c67bb7.json +1 -0
  130. xinference/web/ui/node_modules/.cache/babel-loader/b0c5e967d1af90d545f1d34c9893a841296b0d1dd50de65eeaf19054995d2a94.json +1 -0
  131. xinference/web/ui/node_modules/.cache/babel-loader/b561d429cea6143e1bf7e6e7e690f9e6ce1dce4adfcacb57d7c5578195d1d190.json +1 -0
  132. xinference/web/ui/node_modules/.cache/babel-loader/b70187bb5aecf3bd7afbe8469b49e2a0a94a0c62727530d0dc2f84ee4577bcfc.json +1 -0
  133. xinference/web/ui/node_modules/.cache/babel-loader/bd21f6039e23775fb4631bb56f2eef1d132c03a60cc6fe4530569ecfeef456a3.json +1 -0
  134. xinference/web/ui/node_modules/.cache/babel-loader/be09b5508d0ca5ac0300144a682d4cde2d5b4b96ecb9fc690c1d5e7ee7411b3e.json +1 -0
  135. xinference/web/ui/node_modules/.cache/babel-loader/be8c9e83f0dd4af2e9bcf6844dd8c4445fe634b6c71da6debfa7acabb7a875c0.json +1 -0
  136. xinference/web/ui/node_modules/.cache/babel-loader/c0d1317dfae0c5ddf5a3ba9aef6bd2204f2daf9754f3270c4cab5c4632f78000.json +1 -0
  137. xinference/web/ui/node_modules/.cache/babel-loader/c1e3afdaae5649274f7d34910015b88bc02ba6a00b935e6597f9c670fedc05c0.json +1 -0
  138. xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +1 -0
  139. xinference/web/ui/node_modules/.cache/babel-loader/c29ec02243dd370616a489f7de67e4e479d2f7f12504ba7f77304cffe60befbc.json +1 -0
  140. xinference/web/ui/node_modules/.cache/babel-loader/c2a4d298149e6c66713eed4f56f4bbbfa4022a193891029b84e429baf59cc246.json +1 -0
  141. xinference/web/ui/node_modules/.cache/babel-loader/c3d2fc03602f7604a65e631e8595ce4cfabd8887343489b34a65e3a73c5546b3.json +1 -0
  142. xinference/web/ui/node_modules/.cache/babel-loader/c41078c8692e0635fb52d01bc5157528c7308ae5140c134aa07c1fcda4f7a5e5.json +1 -0
  143. xinference/web/ui/node_modules/.cache/babel-loader/c4b5f1e444ddd41f8cd30e618c185b16235e313ea34d944959628001a91e38a5.json +1 -0
  144. xinference/web/ui/node_modules/.cache/babel-loader/c7ed9bc3125498e475f3170b8126c93368e9cbfd6a5e73ee63437d90e39b69e5.json +1 -0
  145. xinference/web/ui/node_modules/.cache/babel-loader/c996b3dc884130f034a37551845201bc5661f2dcd282aed737329f8d4dd387b5.json +1 -0
  146. xinference/web/ui/node_modules/.cache/babel-loader/c9fef6e7f0a532c2d1cc25dffd496ea8355dd14ce0105a169b391bfeb9277e4f.json +1 -0
  147. xinference/web/ui/node_modules/.cache/babel-loader/cc8f5a2bbe4da9852ccdf64150115271baa2d282d74d7fab854dc5ca641dd519.json +1 -0
  148. xinference/web/ui/node_modules/.cache/babel-loader/ce45adea497443bfc1b6baabd0aa3d85888dd10b1b7fd009edf608e390b2a844.json +1 -0
  149. xinference/web/ui/node_modules/.cache/babel-loader/ce4b82a3855d18ba09e922f6e2cc1b8c20f0e5261b27303c52d4e1ba5a09a078.json +1 -0
  150. xinference/web/ui/node_modules/.cache/babel-loader/cf16614311e144768bdc064da754efad77597cac5efe6852be996b9eaaed695a.json +1 -0
  151. xinference/web/ui/node_modules/.cache/babel-loader/d074bfe592a195f339acbcd8743f8b64cfd09b45ca1013598269227fc092f2db.json +1 -0
  152. xinference/web/ui/node_modules/.cache/babel-loader/d2a95455cc492d4fbf2839a21eeaef00e7fad2aac93825d2443a38127ad02007.json +1 -0
  153. xinference/web/ui/node_modules/.cache/babel-loader/d4e6e712ebbbf0f30745093e2b6d235c13fb43d37b586a0109e87c19eafdf35d.json +1 -0
  154. xinference/web/ui/node_modules/.cache/babel-loader/d88a02264125b07881eecff752d9c8d718edcf2d87e5431b13e54809dc7bf4f2.json +1 -0
  155. xinference/web/ui/node_modules/.cache/babel-loader/d907925fb28912f2e98cccfe758bce680aa4f894cc9d2079d73d7e2d85481d47.json +1 -0
  156. xinference/web/ui/node_modules/.cache/babel-loader/daf050ec3a864b9831dcb32cee1fb2739dafb68b369e9108469ca1e6fd166867.json +1 -0
  157. xinference/web/ui/node_modules/.cache/babel-loader/de9645d55ce8744856862eae4a3d3a1266e9df9d575aa3e2f62a75a5d97444f7.json +1 -0
  158. xinference/web/ui/node_modules/.cache/babel-loader/e04199bdfb87226d6aeae387ef677eae06483f9f135b5a3f1360ec1a243c86cd.json +1 -0
  159. xinference/web/ui/node_modules/.cache/babel-loader/e97d1f31f54263bb90b7c3e71be0e19973d76d945fb5d526bf93fb449708bbf0.json +1 -0
  160. xinference/web/ui/node_modules/.cache/babel-loader/e9f87f5c370a731ee42098ae6ff38ca6ad256396158c03f84c5e770c6b76a43d.json +1 -0
  161. xinference/web/ui/node_modules/.cache/babel-loader/f2ad375dbc8779f54eb9c377fd951cd442f3766d5eb70d40613b952b811643da.json +1 -0
  162. xinference/web/ui/node_modules/.cache/babel-loader/f35df9d5a0e0341bcc0f39a462e46988c330cdb905cc690b08de4a9cf5771f28.json +1 -0
  163. xinference/web/ui/node_modules/.cache/babel-loader/f433e33369483f75d31fe937b8a597ef2d7cdfed339931643ff966a33dbc6158.json +1 -0
  164. xinference/web/ui/node_modules/.cache/babel-loader/f4c164c89cfbcb1d81bc0b60e3e7c8715e07c7a23118acf4e3e16715beac09c5.json +1 -0
  165. xinference/web/ui/node_modules/.cache/babel-loader/f545d937cfb9d974f4aaaa9dc1c12adfed0d4f608de7d0a7247452d9bbbb55a4.json +1 -0
  166. xinference/web/ui/node_modules/.cache/babel-loader/{3cc5ec39e76e1768ffb070a8ae18ee61b68f3ed3593178134d3edda5ed378c6c.json → f8d2c4372093d0ff3a2f78260e004e6c4e2b8ab4f5be9713ebc16c50e9c2ab4b.json} +1 -1
  167. xinference/web/ui/node_modules/.cache/babel-loader/fa41c6c8ffa1047b7f2f52b6532ea4d9f5754e5bbc11d1340bae7f75ae5ed84e.json +1 -0
  168. xinference/web/ui/node_modules/.package-lock.json +45 -45
  169. xinference/web/ui/node_modules/@babel/runtime/package.json +64 -37
  170. xinference/web/ui/node_modules/@mui/private-theming/package.json +7 -6
  171. xinference/web/ui/node_modules/@mui/styled-engine/package.json +6 -5
  172. xinference/web/ui/node_modules/@mui/system/node_modules/clsx/package.json +22 -10
  173. xinference/web/ui/node_modules/@mui/system/package.json +13 -12
  174. xinference/web/ui/node_modules/@mui/types/package.json +3 -2
  175. xinference/web/ui/node_modules/@mui/utils/ClassNameGenerator/package.json +2 -2
  176. xinference/web/ui/node_modules/@mui/utils/HTMLElementType/package.json +2 -2
  177. xinference/web/ui/node_modules/@mui/utils/capitalize/package.json +2 -2
  178. xinference/web/ui/node_modules/@mui/utils/chainPropTypes/package.json +2 -2
  179. xinference/web/ui/node_modules/@mui/utils/clamp/package.json +6 -0
  180. xinference/web/ui/node_modules/@mui/utils/composeClasses/package.json +2 -2
  181. xinference/web/ui/node_modules/@mui/utils/createChainedFunction/package.json +6 -0
  182. xinference/web/ui/node_modules/@mui/utils/debounce/package.json +2 -2
  183. xinference/web/ui/node_modules/@mui/utils/deepmerge/package.json +6 -0
  184. xinference/web/ui/node_modules/@mui/utils/deprecatedPropType/package.json +6 -0
  185. xinference/web/ui/node_modules/@mui/utils/elementAcceptingRef/package.json +6 -0
  186. xinference/web/ui/node_modules/@mui/utils/elementTypeAcceptingRef/package.json +6 -0
  187. xinference/web/ui/node_modules/@mui/utils/exactProp/package.json +2 -2
  188. xinference/web/ui/node_modules/@mui/utils/formatMuiErrorMessage/package.json +6 -0
  189. xinference/web/ui/node_modules/@mui/utils/generateUtilityClass/package.json +2 -2
  190. xinference/web/ui/node_modules/@mui/utils/generateUtilityClasses/package.json +2 -2
  191. xinference/web/ui/node_modules/@mui/utils/getDisplayName/package.json +6 -0
  192. xinference/web/ui/node_modules/@mui/utils/getScrollbarSize/package.json +6 -0
  193. xinference/web/ui/node_modules/@mui/utils/getValidReactChildren/package.json +6 -0
  194. xinference/web/ui/node_modules/@mui/utils/integerPropType/package.json +6 -0
  195. xinference/web/ui/node_modules/@mui/utils/isMuiElement/package.json +6 -0
  196. xinference/web/ui/node_modules/@mui/utils/ownerDocument/package.json +2 -2
  197. xinference/web/ui/node_modules/@mui/utils/ownerWindow/package.json +2 -2
  198. xinference/web/ui/node_modules/@mui/utils/package.json +8 -7
  199. xinference/web/ui/node_modules/@mui/utils/ponyfillGlobal/package.json +2 -2
  200. xinference/web/ui/node_modules/@mui/utils/refType/package.json +6 -0
  201. xinference/web/ui/node_modules/@mui/utils/requirePropFactory/package.json +6 -0
  202. xinference/web/ui/node_modules/@mui/utils/resolveProps/package.json +6 -0
  203. xinference/web/ui/node_modules/@mui/utils/scrollLeft/package.json +6 -0
  204. xinference/web/ui/node_modules/@mui/utils/setRef/package.json +6 -0
  205. xinference/web/ui/node_modules/@mui/utils/unsupportedProp/package.json +6 -0
  206. xinference/web/ui/node_modules/@mui/utils/useControlled/package.json +2 -2
  207. xinference/web/ui/node_modules/@mui/utils/useEnhancedEffect/package.json +2 -2
  208. xinference/web/ui/node_modules/@mui/utils/useEventCallback/package.json +2 -2
  209. xinference/web/ui/node_modules/@mui/utils/useForkRef/package.json +2 -2
  210. xinference/web/ui/node_modules/@mui/utils/useId/package.json +2 -2
  211. xinference/web/ui/node_modules/@mui/utils/useIsFocusVisible/package.json +6 -0
  212. xinference/web/ui/node_modules/@mui/utils/useLazyRef/package.json +6 -0
  213. xinference/web/ui/node_modules/@mui/utils/useOnMount/package.json +6 -0
  214. xinference/web/ui/node_modules/@mui/utils/usePreviousProps/package.json +6 -0
  215. xinference/web/ui/node_modules/@mui/utils/useTimeout/package.json +6 -0
  216. xinference/web/ui/node_modules/@mui/utils/visuallyHidden/package.json +6 -0
  217. xinference/web/ui/node_modules/@types/prop-types/package.json +2 -2
  218. xinference/web/ui/node_modules/csstype/package.json +3 -3
  219. xinference/web/ui/package-lock.json +47 -45
  220. xinference/web/ui/package.json +2 -0
  221. {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/METADATA +6 -3
  222. {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/RECORD +227 -167
  223. xinference/web/ui/build/static/js/main.476e35cc.js +0 -3
  224. xinference/web/ui/build/static/js/main.476e35cc.js.map +0 -1
  225. xinference/web/ui/node_modules/.cache/babel-loader/02f54fcadc6fe9b1be8451435f9a097c82b3d665bae2d2df03ba0b36cebeca0c.json +0 -1
  226. xinference/web/ui/node_modules/.cache/babel-loader/051391807ffd255b652b63dc2ff12e5ce80113e1b712c81b6c1986a11cd85d3c.json +0 -1
  227. xinference/web/ui/node_modules/.cache/babel-loader/05710e94caaf5a2da790d67aa572bf6bc19bae0237668c8d97f4da62ab52fe5c.json +0 -1
  228. xinference/web/ui/node_modules/.cache/babel-loader/08a3470840a55d359a967d0763cd1ae5c1f15f3ca31118e29b37b4fbbdb248c7.json +0 -1
  229. xinference/web/ui/node_modules/.cache/babel-loader/0e68b0e268559a563dde6fa4c654922d98c74f5140854437f3e5a01bf23ba41f.json +0 -1
  230. xinference/web/ui/node_modules/.cache/babel-loader/10dc7661741dfc88df5538bbf835870ae47b1a18f36c2f7b9650651e7304ce72.json +0 -1
  231. xinference/web/ui/node_modules/.cache/babel-loader/18b3d28a2692ab4f2d05254221f53b3c13259435bd98cb90ff5d7250d93f11e0.json +0 -1
  232. xinference/web/ui/node_modules/.cache/babel-loader/1b612ab687e3a4e14db156bd92a75b8a3e96ff04bfbf8f3351c7e801bc61cf90.json +0 -1
  233. xinference/web/ui/node_modules/.cache/babel-loader/1bb839ffeb39ed12367c179fbf3b784e20f6f98fb87c597717bc11ebe3201c59.json +0 -1
  234. xinference/web/ui/node_modules/.cache/babel-loader/1d12e0b0e4e87372b18fef72fcd6e264fa40b0cffdbeddcef004becc4e32cf27.json +0 -1
  235. xinference/web/ui/node_modules/.cache/babel-loader/1d5b806c08ffb55539ee26b5fa9746a373f602354d6e55a0d4379d7e2f903868.json +0 -1
  236. xinference/web/ui/node_modules/.cache/babel-loader/1ecb7ce2bc103bec0d1a9f0cb07132e5ec7731d974998c465798c0e15cafb924.json +0 -1
  237. xinference/web/ui/node_modules/.cache/babel-loader/20cfc57ae2b8ba23571f386a5f2bf194bd02e8a3913113881bb9804ee822ad58.json +0 -1
  238. xinference/web/ui/node_modules/.cache/babel-loader/222a437b6773707bbead06ba5db1ed8869e44d9f051a7f6b1a745aeb654679d0.json +0 -1
  239. xinference/web/ui/node_modules/.cache/babel-loader/249709fb20c1254cecc9ba6666e6bcf37b21e47e7a020d5a7f0513680b49473f.json +0 -1
  240. xinference/web/ui/node_modules/.cache/babel-loader/257ad4979294ee3a7ef400feab3b29ebcd259c47b9410a3011a66f4b33a97904.json +0 -1
  241. xinference/web/ui/node_modules/.cache/babel-loader/2779bdc8ed8372f4015d07a45e4b09756bd953250944c3e3efe6b65bf8eaecd8.json +0 -1
  242. xinference/web/ui/node_modules/.cache/babel-loader/2bee7b8bd3d52976a45d6068e1333df88b943e0e679403c809e45382e3818037.json +0 -1
  243. xinference/web/ui/node_modules/.cache/babel-loader/31c71a85b2d0bbdcdc027e7950a9d1f778288f98888d8595fad5e4fdda5bb31f.json +0 -1
  244. xinference/web/ui/node_modules/.cache/babel-loader/371214b6649e323e7281885b1412c5a515ccc2274f4e1a42921e45777694009c.json +0 -1
  245. xinference/web/ui/node_modules/.cache/babel-loader/3eced030d2b3da645f4cbe3dc4ff49744312316e4306ce170991e802e9bfcf8e.json +0 -1
  246. xinference/web/ui/node_modules/.cache/babel-loader/452ae8e7cff99efc35d134014d32610d13217b031b817d3663bb689b3778ebd2.json +0 -1
  247. xinference/web/ui/node_modules/.cache/babel-loader/48e3f5d01ccedde86348a29392bc9b6e301a51b3f7e2476ae0a95a31eb6bf18d.json +0 -1
  248. xinference/web/ui/node_modules/.cache/babel-loader/4a311b5bd98f16588caf60b8de4c8d91d038198e6bfd496f880f60d4c1ea076d.json +0 -1
  249. xinference/web/ui/node_modules/.cache/babel-loader/4ccd71d3f73490a18a5a4de4ad82d63cf019caff6f8d881fd1434755a033bfc5.json +0 -1
  250. xinference/web/ui/node_modules/.cache/babel-loader/4ee8343d4254c95e685d49601f1649e8222d7423c2a822e1d01c8fc4cd3582a2.json +0 -1
  251. xinference/web/ui/node_modules/.cache/babel-loader/523a712748fbb7abf0c5423df62e65c4659b7e450182c890e847cd78ace7cac5.json +0 -1
  252. xinference/web/ui/node_modules/.cache/babel-loader/5777049bbacf7322891761a5b959e62eec22b0f96a70dbf97e58d530c6258a55.json +0 -1
  253. xinference/web/ui/node_modules/.cache/babel-loader/5905957ce95f7dee64f0739359e990b8b518cff088c6958487641a3d21737d1e.json +0 -1
  254. xinference/web/ui/node_modules/.cache/babel-loader/5ac79df79d2dff64da97b02d5c90b36bc0d44a4a4229f6efed2993f93ee1d7a1.json +0 -1
  255. xinference/web/ui/node_modules/.cache/babel-loader/64d3ef38fd394979b09061e67fc47f04b7295401677825c64c72ea2f44cf910c.json +0 -1
  256. xinference/web/ui/node_modules/.cache/babel-loader/68a5d8e66687770e05aebcae9a8047189dc577d6150fbf0e62d2b2c11c4335c5.json +0 -1
  257. xinference/web/ui/node_modules/.cache/babel-loader/69d5f27e40d5e7040c8e8e303ccaee019b54092345acc3838d6eb943c474ce39.json +0 -1
  258. xinference/web/ui/node_modules/.cache/babel-loader/6a07b8d00482c07a4b7c73f6bebb79fad54bbea35ba358681ede6ff299a09f6b.json +0 -1
  259. xinference/web/ui/node_modules/.cache/babel-loader/70b0307a43b9f8c876bf2a1f7dea4cc56cc0ea33743e863c0b3983bc48109f2d.json +0 -1
  260. xinference/web/ui/node_modules/.cache/babel-loader/710c39a26816820c2cfd00b19cd0112851d64b8ac6b201b18727b43b5a69d92c.json +0 -1
  261. xinference/web/ui/node_modules/.cache/babel-loader/722f4c03da79795d1fb738323f3eb6794ab18711e20f141c0b678c5d4cb3dce9.json +0 -1
  262. xinference/web/ui/node_modules/.cache/babel-loader/8237bdfff695d71496ee2ff83ba0392d753f8e1dff0d002a1b2c9e249f03083c.json +0 -1
  263. xinference/web/ui/node_modules/.cache/babel-loader/8342e3800fed63e8e805850810aee4fd9b0558aca10c8b6b770fe5bff1bed266.json +0 -1
  264. xinference/web/ui/node_modules/.cache/babel-loader/847a71f54f7a059efc71f70c297b6b4182a633130715c80f817eb533de108c12.json +0 -1
  265. xinference/web/ui/node_modules/.cache/babel-loader/8731a959d9b7278ba7a1c044bb4253bfff63079da69b21ff80221ae5e037dbbe.json +0 -1
  266. xinference/web/ui/node_modules/.cache/babel-loader/87b4e94b045df561c8f4bd253c84447c96e0dededbcb6646c0cd180119960990.json +0 -1
  267. xinference/web/ui/node_modules/.cache/babel-loader/894a334446bac96d74385c23c9d6f2d117ee29511897700430cd0e578cb32527.json +0 -1
  268. xinference/web/ui/node_modules/.cache/babel-loader/8c5ba88b620eeedbb99db18a610138353a5a657170fdb8bd6969585f81b34f74.json +0 -1
  269. xinference/web/ui/node_modules/.cache/babel-loader/9004bc083cf179941e64130390a27afcf7593b21dd64a87d03e1e6489032aa13.json +0 -1
  270. xinference/web/ui/node_modules/.cache/babel-loader/9b84631723b52988d3748d80b994e43d1944ca55098a5d908c9061eeb4c84f90.json +0 -1
  271. xinference/web/ui/node_modules/.cache/babel-loader/9ee1f84bde10cd78ca23a4681c7372195748bacd5e41c3b1df8e39d960294d81.json +0 -1
  272. xinference/web/ui/node_modules/.cache/babel-loader/a127f13651a7398f5fc378456a52cfa0c6260f6109611d3fb012aa7c7a4ee17a.json +0 -1
  273. xinference/web/ui/node_modules/.cache/babel-loader/a5e2e9f707eb7039bea096ca117d996b8f9cbc2a5613fd8e0c5b0094444ce23c.json +0 -1
  274. xinference/web/ui/node_modules/.cache/babel-loader/a6131b7fb1b30fd63d798845dab508a22b21c616b361e6214bf711915c5fef3d.json +0 -1
  275. xinference/web/ui/node_modules/.cache/babel-loader/a7b8ccb199158c897ed7d7835942eb37569221be4101eecfa7a7ce62e0c76f1f.json +0 -1
  276. xinference/web/ui/node_modules/.cache/babel-loader/acaf6dbddd57b63a874e9062803850ce4911c8dc63a362eacdc4d2d0f48f8c16.json +0 -1
  277. xinference/web/ui/node_modules/.cache/babel-loader/ade3cd82af85d2d28138bd942228b01e27559a5a4072ee049dcb47612ee9a506.json +0 -1
  278. xinference/web/ui/node_modules/.cache/babel-loader/aea82c71088dc9afd6f4ba71434286af5c6ab5e9b87088b0e84f351146d06489.json +0 -1
  279. xinference/web/ui/node_modules/.cache/babel-loader/afbd547078b20bb14596f20e165cb40413147e3de501b126c97bb417f8b9a53e.json +0 -1
  280. xinference/web/ui/node_modules/.cache/babel-loader/afc597494ebc62d22e4d3ac2a839bd5f30d5e47ce1d0576e0f62407ef6c78652.json +0 -1
  281. xinference/web/ui/node_modules/.cache/babel-loader/affbbac58777e26ba8b6dcda8cea72c76975bd0207f0a510bfde5983d3081f74.json +0 -1
  282. xinference/web/ui/node_modules/.cache/babel-loader/b0aa2ece62a1f5fa015f9e6b59d20b79be727fb5a3f4fc747200163097f320ad.json +0 -1
  283. xinference/web/ui/node_modules/.cache/babel-loader/b10391d3ae53452c8ec97d230f064e03a8377aab2be4b1b3e30cb3e054b2341a.json +0 -1
  284. xinference/web/ui/node_modules/.cache/babel-loader/b4bab0c147360f3e335251166d3d25602e2e0e160fb3734ac019a5b42499179c.json +0 -1
  285. xinference/web/ui/node_modules/.cache/babel-loader/b57dcff363e045fd11ea2bcbc58cce470f1bc22d3ac14c8acbf63a36637050ff.json +0 -1
  286. xinference/web/ui/node_modules/.cache/babel-loader/b80db1012318b97c329c4e3e72454f7512fb107e57c444b437dbe4ba1a3faa5a.json +0 -1
  287. xinference/web/ui/node_modules/.cache/babel-loader/b8de42f7f5dc1df9e802898a0ebb35adb57d269d7af21e6c66a0fde0ff226a08.json +0 -1
  288. xinference/web/ui/node_modules/.cache/babel-loader/be696cadea3bf577c0defc8f8d100232369763dde754b5ba9399f45f42aed215.json +0 -1
  289. xinference/web/ui/node_modules/.cache/babel-loader/c5a30d7c6cb484268e9126dd6d078229a1dcaa07fb064077b87d15224c61a3aa.json +0 -1
  290. xinference/web/ui/node_modules/.cache/babel-loader/c67e2077e70c4751e9e1dd98f378d979abd9c8a39fbf6c69f02fb4e5b04eef87.json +0 -1
  291. xinference/web/ui/node_modules/.cache/babel-loader/c81c1cad35be60a4fad090374edf50a884540e306d464b803120554df61e817e.json +0 -1
  292. xinference/web/ui/node_modules/.cache/babel-loader/d1054fd4d09666318a00ca0aea5ff42a2fdb17b3a42b5f77b93433d7408eb2f4.json +0 -1
  293. xinference/web/ui/node_modules/.cache/babel-loader/d1ec524ce17a48816fc55fe665572c1ef4982a4bd1ecb01badd0fdffc465242c.json +0 -1
  294. xinference/web/ui/node_modules/.cache/babel-loader/d4d61a537c45c4506a6b7e3478206b71dc96e0c6c6410cef9b5d2f47018bc578.json +0 -1
  295. xinference/web/ui/node_modules/.cache/babel-loader/e2e563b606cac9d51661cf68ba1729eb5df7861af521adccedc1b163439b657d.json +0 -1
  296. xinference/web/ui/node_modules/.cache/babel-loader/e3890e804dfaeb19b79f52e1b20198b587da61a9a3a1f55417d2cc5ce4ad7dae.json +0 -1
  297. xinference/web/ui/node_modules/.cache/babel-loader/e8c3fadbd12f0e2c7b70d84acbcabd0a5c96b05380a7b3167b00e8739d829d24.json +0 -1
  298. xinference/web/ui/node_modules/.cache/babel-loader/ebb5ae1a82b33b94e2b0cfec599bec7500aff0a1701f91736a6f3a4f77617560.json +0 -1
  299. xinference/web/ui/node_modules/.cache/babel-loader/f30f0396f4e54f0355112d5eba4b16c3a56f47e456eb117a9dc7fa6218ce21fe.json +0 -1
  300. xinference/web/ui/node_modules/.cache/babel-loader/f318ac15e999a553c00832029245730fe25c2916860b0ff55eee1f0ab926d84e.json +0 -1
  301. xinference/web/ui/node_modules/.cache/babel-loader/f55f4e657cc2d797b7e73a64ce40352f9a5dde31763a3e89c14d3b983307f30e.json +0 -1
  302. xinference/web/ui/node_modules/.cache/babel-loader/f8a68931d652a5e3be4e1ae06210a223de1239f3884ad858cecf9fe8d11c2ffe.json +0 -1
  303. xinference/web/ui/node_modules/.cache/babel-loader/f95a8bd358eeb55fa2f49f1224cc2f4f36006359856744ff09ae4bb295f59ec1.json +0 -1
  304. xinference/web/ui/node_modules/.cache/babel-loader/fb6fbd693a53d3c542d9adf83e0bb458918d39bf101eaf6d6662a2fbc35c1adc.json +0 -1
  305. /xinference/web/ui/build/static/js/{main.476e35cc.js.LICENSE.txt → main.87d39ffb.js.LICENSE.txt} +0 -0
  306. {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/LICENSE +0 -0
  307. {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/WHEEL +0 -0
  308. {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/entry_points.txt +0 -0
  309. {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/top_level.txt +0 -0
@@ -25,6 +25,8 @@ from torch.nn import functional as F
25
25
  from tqdm import tqdm
26
26
  from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
27
27
 
28
+ from ....device_utils import empty_cache
29
+
28
30
 
29
31
  @dataclasses.dataclass
30
32
  class CompressionConfig:
@@ -153,7 +155,7 @@ def load_compress_model(
153
155
  tmp_state_dict[name] = None
154
156
  tensor = None
155
157
  gc.collect()
156
- torch.cuda.empty_cache()
158
+ empty_cache()
157
159
 
158
160
  for name in model.state_dict():
159
161
  if name not in linear_weights:
@@ -12,10 +12,16 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import json
15
16
  import logging
16
17
  import os
17
18
  from typing import Iterable, Iterator, List, Optional, Union
18
19
 
20
+ from ....device_utils import (
21
+ get_device_preferred_dtype,
22
+ gpu_count,
23
+ is_hf_accelerate_supported,
24
+ )
19
25
  from ....types import (
20
26
  ChatCompletion,
21
27
  ChatCompletionChunk,
@@ -115,23 +121,18 @@ class PytorchModel(LLM):
115
121
  )
116
122
  from .compression import load_compress_model
117
123
 
118
- cuda_visible_devices_env = os.getenv("CUDA_VISIBLE_DEVICES", None)
119
- cuda_visible_devices = (
120
- cuda_visible_devices_env.split(",") if cuda_visible_devices_env else []
121
- )
122
-
123
124
  quantization = self.quantization
124
- num_gpus = len(cuda_visible_devices) if cuda_visible_devices_env != "-1" else 0
125
+ num_gpus = gpu_count()
125
126
  device = self._pytorch_model_config.get("device", "auto")
126
127
  self._pytorch_model_config["device"] = select_device(device)
127
128
  self._device = self._pytorch_model_config["device"]
128
129
 
129
- if self._device == "cpu":
130
- kwargs = {"torch_dtype": torch.float32}
131
- elif self._device == "cuda":
132
- kwargs = {"torch_dtype": torch.float16}
133
- elif self._device == "mps":
134
- kwargs = {"torch_dtype": torch.float16}
130
+ kwargs = {}
131
+
132
+ dtype = get_device_preferred_dtype(self._device)
133
+
134
+ if dtype is not None:
135
+ kwargs["torch_dtype"] = dtype
135
136
  else:
136
137
  raise ValueError(f"Device {self._device} is not supported in temporary")
137
138
 
@@ -142,9 +143,25 @@ class PytorchModel(LLM):
142
143
  "trust_remote_code"
143
144
  )
144
145
  model_format = self.model_spec.model_format
146
+
147
+ is_device_map_auto = False
148
+
149
+ # This is required for Intel GPU to actually work with accelerate device_map until
150
+ # https://github.com/intel/intel-extension-for-pytorch/issues/522
151
+ # is resolved
152
+ max_memory_env = os.getenv("ACCELERATE_MAX_MEMORY", None)
153
+
154
+ if max_memory_env is not None:
155
+ max_memory_raw = json.loads(max_memory_env)
156
+ max_memory = {
157
+ int(k) if k.isdigit() else k: max_memory_raw[k] for k in max_memory_raw
158
+ }
159
+ kwargs["max_memory"] = max_memory
160
+
145
161
  if quantization != "none" and model_format == "pytorch":
146
162
  if self._device == "cuda" and self._is_linux():
147
163
  kwargs["device_map"] = "auto"
164
+ is_device_map_auto = True
148
165
  if quantization == "4-bit":
149
166
  kwargs["load_in_4bit"] = True
150
167
  kwargs["bnb_4bit_compute_dtype"] = torch.float16
@@ -178,11 +195,13 @@ class PytorchModel(LLM):
178
195
  logger.debug(f"Model Memory: {self._model.get_memory_footprint()}")
179
196
  return
180
197
 
181
- if num_gpus > 0 and self._device == "cuda":
198
+ if num_gpus > 0 and is_hf_accelerate_supported(self._device):
182
199
  kwargs.update({"device_map": "auto"})
200
+ is_device_map_auto = True
201
+
183
202
  self._model, self._tokenizer = self._load_model(**kwargs)
184
203
 
185
- if self._device == "mps":
204
+ if not is_device_map_auto:
186
205
  self._model.to(self._device)
187
206
  logger.debug(f"Model Memory: {self._model.get_memory_footprint()}")
188
207
 
@@ -448,7 +467,7 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
448
467
  generate_config = self._sanitize_generate_config(generate_config)
449
468
  # TODO(codingl2k1): qwen hacky to set stop for function call.
450
469
  model_family = self.model_family.model_family or self.model_family.model_name
451
- if tools and "qwen-chat" == model_family:
470
+ if tools and model_family in ["qwen-chat", "qwen1.5-chat"]:
452
471
  stop = generate_config.get("stop")
453
472
  if isinstance(stop, str):
454
473
  generate_config["stop"] = [stop, "Observation:"]
@@ -95,9 +95,11 @@ class QwenVLChatModel(PytorchChatModel):
95
95
  if not isinstance(content, str):
96
96
  # TODO(codingl2k1): Optimize _ensure_url
97
97
  content = [
98
- {"image": _ensure_url(c["image_url"]["url"]), "type": "image"}
99
- if c.get("type") == "image_url"
100
- else c
98
+ (
99
+ {"image": _ensure_url(c["image_url"]["url"]), "type": "image"}
100
+ if c.get("type") == "image_url"
101
+ else c
102
+ )
101
103
  for c in content
102
104
  ]
103
105
  content = sorted(content, key=operator.itemgetter("type"))
@@ -17,6 +17,8 @@ import time
17
17
  import uuid
18
18
  from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple
19
19
 
20
+ from ....device_utils import empty_cache
21
+
20
22
  try:
21
23
  import torch
22
24
  from torch.nn import functional as F
@@ -526,4 +528,4 @@ def speculative_generate_stream(
526
528
  del kv_cache
527
529
  del draft_kv_cache
528
530
  gc.collect()
529
- torch.cuda.empty_cache()
531
+ empty_cache()
@@ -13,9 +13,13 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import logging
16
- import os
17
16
  from typing import Iterator, List, Optional, Union
18
17
 
18
+ from ....device_utils import (
19
+ get_device_preferred_dtype,
20
+ gpu_count,
21
+ is_hf_accelerate_supported,
22
+ )
19
23
  from ....types import Completion, CompletionChunk, Embedding
20
24
  from ...utils import select_device
21
25
  from .. import LLMFamilyV1, LLMSpecV1
@@ -73,30 +77,26 @@ class SpeculativeModel(PytorchChatModel):
73
77
 
74
78
  def load(self):
75
79
  try:
76
- import torch
80
+ import torch # noqa: F401
77
81
  except ImportError:
78
82
  raise ImportError(
79
83
  f"Failed to import module 'torch'. Please make sure 'torch' is installed.\n\n"
80
84
  )
81
85
 
82
- cuda_visible_devices_env = os.getenv("CUDA_VISIBLE_DEVICES", None)
83
- cuda_visible_devices = (
84
- cuda_visible_devices_env.split(",") if cuda_visible_devices_env else []
85
- )
86
-
87
- num_gpus = len(cuda_visible_devices) if cuda_visible_devices_env != "-1" else 0
86
+ num_gpus = gpu_count()
88
87
  device = self._pytorch_model_config.get("device", "auto")
89
88
  self._pytorch_model_config["device"] = select_device(device)
90
89
  self._device = self._pytorch_model_config["device"]
91
90
 
92
- if self._device == "cpu":
93
- kwargs = {"torch_dtype": torch.float32}
94
- elif self._device == "cuda":
95
- kwargs = {"torch_dtype": torch.float16}
96
- elif self._device == "mps":
97
- kwargs = {"torch_dtype": torch.float16}
91
+ kwargs = {}
92
+
93
+ dtype = get_device_preferred_dtype(self._device)
94
+
95
+ if dtype is not None:
96
+ kwargs["torch_dtype"] = dtype
98
97
  else:
99
98
  raise ValueError(f"Device {self._device} is not supported in temporary")
99
+
100
100
  kwargs["trust_remote_code"] = self._pytorch_model_config.get(
101
101
  "trust_remote_code"
102
102
  )
@@ -106,15 +106,18 @@ class SpeculativeModel(PytorchChatModel):
106
106
  "Quantization is not supported by speculative decoding yet"
107
107
  )
108
108
 
109
- if num_gpus > 0 and self._device == "cuda":
109
+ is_device_map_auto = False
110
+
111
+ if num_gpus > 0 and is_hf_accelerate_supported(self._device):
110
112
  kwargs.update({"device_map": "auto"})
113
+ is_device_map_auto = True
111
114
 
112
115
  self._model, self._tokenizer = self._load_model(
113
116
  model_path=self.model_path,
114
117
  revision=self.model_spec.model_revision,
115
118
  **kwargs,
116
119
  )
117
- if self._device == "mps":
120
+ if not is_device_map_auto:
118
121
  self._model.to(self._device)
119
122
  logger.debug(
120
123
  f"Model {self.model_uid} memory footprint: {self._model.get_memory_footprint()}"
@@ -125,7 +128,7 @@ class SpeculativeModel(PytorchChatModel):
125
128
  revision=self._draft_model_spec.model_revision,
126
129
  **kwargs,
127
130
  )
128
- if self._device == "mps":
131
+ if not is_device_map_auto:
129
132
  self._model.to(self._device)
130
133
  logger.debug(
131
134
  f"Draft model {self.model_uid} memory footprint: {self._model.get_memory_footprint()}"
@@ -29,6 +29,7 @@ from transformers.generation.logits_process import (
29
29
  TopPLogitsWarper,
30
30
  )
31
31
 
32
+ from ....device_utils import empty_cache
32
33
  from ....types import (
33
34
  CompletionChoice,
34
35
  CompletionChunk,
@@ -122,7 +123,7 @@ def generate_stream(
122
123
  temperature, repetition_penalty, top_p, top_k
123
124
  )
124
125
 
125
- if "qwen" in str(type(model)).lower():
126
+ if ".modeling_qwen." in str(type(model)).lower():
126
127
  # TODO: hacky
127
128
  input_ids = tokenizer(prompt, allowed_special="all").input_ids
128
129
  else:
@@ -335,7 +336,7 @@ def generate_stream(
335
336
  # clean
336
337
  del past_key_values, out
337
338
  gc.collect()
338
- torch.cuda.empty_cache()
339
+ empty_cache()
339
340
 
340
341
 
341
342
  @torch.inference_mode()
@@ -489,4 +490,4 @@ def generate_stream_falcon(
489
490
 
490
491
  # clean
491
492
  gc.collect()
492
- torch.cuda.empty_cache()
493
+ empty_cache()
@@ -57,16 +57,18 @@ class YiVLChatModel(PytorchChatModel):
57
57
  from ....thirdparty.llava.mm_utils import load_pretrained_model
58
58
  from ....thirdparty.llava.model.constants import key_info
59
59
 
60
- device = self._pytorch_model_config.get("device", "auto")
61
- device = select_device(device)
60
+ self._device = self._pytorch_model_config.get("device", "auto")
61
+ self._device = select_device(self._device)
62
62
 
63
63
  key_info["model_path"] = self.model_path
64
+ # Default device_map is auto, it can loads model to multiple cards.
65
+ # If the device_map is set to cuda, then only 1 card can be used.
64
66
  (
65
67
  self._tokenizer,
66
68
  self._model,
67
69
  self._image_processor,
68
70
  _,
69
- ) = load_pretrained_model(self.model_path, device_map=device)
71
+ ) = load_pretrained_model(self.model_path, device_map=self._device)
70
72
 
71
73
  @staticmethod
72
74
  def _message_content_to_yi(content) -> Union[str, tuple]:
@@ -187,7 +189,7 @@ class YiVLChatModel(PytorchChatModel):
187
189
  prompt, self._tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
188
190
  )
189
191
  .unsqueeze(0)
190
- .cuda()
192
+ .to(self._device)
191
193
  )
192
194
 
193
195
  images = state.get_images(return_pil=True)
@@ -210,7 +212,9 @@ class YiVLChatModel(PytorchChatModel):
210
212
  max_new_tokens = generate_config.get("max_tokens", 512)
211
213
  generate_kwargs = {
212
214
  "input_ids": input_ids,
213
- "images": image_tensor.unsqueeze(0).to(dtype=torch.bfloat16).cuda(),
215
+ "images": image_tensor.unsqueeze(0)
216
+ .to(dtype=torch.bfloat16)
217
+ .to(self._device),
214
218
  "streamer": streamer,
215
219
  "do_sample": True,
216
220
  "top_p": float(top_p),
@@ -402,6 +402,15 @@ Begin!"""
402
402
  else:
403
403
  ret += role + ": </s>"
404
404
  return ret
405
+ elif prompt_style.style_name == "gemma":
406
+ ret = ""
407
+ for message in chat_history:
408
+ content = message["content"]
409
+ role = get_role(message["role"])
410
+ ret += "<start_of_turn>" + role + "\n"
411
+ if content:
412
+ ret += content + "<end_of_turn>\n"
413
+ return ret
405
414
  else:
406
415
  raise ValueError(f"Invalid prompt style: {prompt_style.style_name}")
407
416
 
@@ -556,7 +565,7 @@ Begin!"""
556
565
  content, func, args = cls._eval_gorilla_openfunctions_arguments(c, tools)
557
566
  elif "chatglm3" == family:
558
567
  content, func, args = cls._eval_chatglm3_arguments(c, tools)
559
- elif "qwen-chat" == family:
568
+ elif family in ["qwen-chat", "qwen1.5-chat"]:
560
569
  content, func, args = cls._eval_qwen_chat_arguments(c, tools)
561
570
  else:
562
571
  raise Exception(
@@ -56,6 +56,7 @@ class VLLMModelConfig(TypedDict, total=False):
56
56
  max_num_batched_tokens: int
57
57
  max_num_seqs: int
58
58
  quantization: Optional[str]
59
+ max_model_len: Optional[int]
59
60
 
60
61
 
61
62
  class VLLMGenerateConfig(TypedDict, total=False):
@@ -98,6 +99,8 @@ VLLM_SUPPORTED_CHAT_MODELS = [
98
99
  "mixtral-instruct-v0.1",
99
100
  "chatglm3",
100
101
  ]
102
+ if VLLM_INSTALLED and vllm.__version__ >= "0.3.0":
103
+ VLLM_SUPPORTED_CHAT_MODELS.append("qwen1.5-chat")
101
104
 
102
105
 
103
106
  class VLLMModel(LLM):
@@ -151,6 +154,7 @@ class VLLMModel(LLM):
151
154
  model_config.setdefault("gpu_memory_utilization", 0.90)
152
155
  model_config.setdefault("max_num_seqs", 256)
153
156
  model_config.setdefault("quantization", None)
157
+ model_config.setdefault("max_model_len", 4096)
154
158
 
155
159
  return model_config
156
160
 
@@ -14,28 +14,22 @@
14
14
 
15
15
  import logging
16
16
  import os
17
- import shutil
18
17
  import uuid
19
18
  from collections import defaultdict
20
19
  from typing import Dict, List, Optional, Tuple
21
20
 
22
21
  import numpy as np
23
- from pydantic import BaseModel
24
22
 
25
23
  from ...constants import XINFERENCE_CACHE_DIR
26
24
  from ...types import Document, DocumentObj, Rerank
27
- from ..core import ModelDescription
28
- from ..utils import is_model_cached, valid_model_revision
25
+ from ..core import CacheableModelSpec, ModelDescription
26
+ from ..utils import is_model_cached
29
27
 
30
28
  logger = logging.getLogger(__name__)
31
29
 
32
30
  # Used for check whether the model is cached.
33
31
  # Init when registering all the builtin models.
34
32
  MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
35
-
36
- SUPPORTED_SCHEMES = ["s3"]
37
-
38
-
39
33
  RERANK_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
40
34
 
41
35
 
@@ -45,7 +39,7 @@ def get_rerank_model_descriptions():
45
39
  return copy.deepcopy(RERANK_MODEL_DESCRIPTIONS)
46
40
 
47
41
 
48
- class RerankModelSpec(BaseModel):
42
+ class RerankModelSpec(CacheableModelSpec):
49
43
  model_name: str
50
44
  language: List[str]
51
45
  model_id: str
@@ -180,135 +174,10 @@ def get_cache_status(
180
174
  return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
181
175
 
182
176
 
183
- def cache_from_uri(
184
- model_spec: RerankModelSpec,
185
- self_hosted_storage: bool = False,
186
- ) -> str:
187
- from fsspec import AbstractFileSystem, filesystem
188
-
189
- from ..utils import copy_from_src_to_dst, parse_uri
190
-
191
- cache_dir = get_cache_dir(model_spec)
192
- if os.path.exists(cache_dir):
193
- logger.info(f"Rerank cache {cache_dir} exists")
194
- return cache_dir
195
-
196
- assert model_spec.model_uri is not None
197
- src_scheme, src_root = parse_uri(model_spec.model_uri)
198
- if src_root.endswith("/"):
199
- # remove trailing path separator.
200
- src_root = src_root[:-1]
201
-
202
- if src_scheme == "file":
203
- if not os.path.isabs(src_root):
204
- raise ValueError(
205
- f"Model URI cannot be a relative path: {model_spec.model_uri}"
206
- )
207
- os.makedirs(XINFERENCE_CACHE_DIR, exist_ok=True)
208
- os.symlink(src_root, cache_dir, target_is_directory=True)
209
- return cache_dir
210
- elif src_scheme in SUPPORTED_SCHEMES:
211
- # use anonymous connection for self-hosted storage.
212
- src_fs: AbstractFileSystem = filesystem(src_scheme, anon=self_hosted_storage)
213
- local_fs: AbstractFileSystem = filesystem("file")
214
-
215
- files_to_download = []
216
- os.makedirs(cache_dir, exist_ok=True)
217
-
218
- for path, _, files in src_fs.walk(model_spec.model_uri):
219
- for file in files:
220
- src_path = f"{path}/{file}"
221
- local_path = src_path.replace(src_root, cache_dir)
222
- files_to_download.append((src_path, local_path))
223
-
224
- from concurrent.futures import ThreadPoolExecutor
225
-
226
- failed = False
227
- with ThreadPoolExecutor(max_workers=min(len(files_to_download), 4)) as executor:
228
- futures = [
229
- (
230
- src_path,
231
- executor.submit(
232
- copy_from_src_to_dst, src_fs, src_path, local_fs, local_path
233
- ),
234
- )
235
- for src_path, local_path in files_to_download
236
- ]
237
- for src_path, future in futures:
238
- if failed:
239
- future.cancel()
240
- else:
241
- try:
242
- future.result()
243
- except:
244
- logger.error(f"Download {src_path} failed", exc_info=True)
245
- failed = True
246
-
247
- if failed:
248
- logger.warning(f"Removing cache directory: {cache_dir}")
249
- shutil.rmtree(cache_dir, ignore_errors=True)
250
- raise RuntimeError(
251
- f"Failed to download rerank model '{model_spec.model_name}' "
252
- )
253
- return cache_dir
254
- else:
255
- raise ValueError(f"Unsupported URL scheme: {src_scheme}")
256
-
257
-
258
177
  def cache(model_spec: RerankModelSpec):
259
- from huggingface_hub import snapshot_download as hf_download
260
- from modelscope.hub.snapshot_download import snapshot_download as ms_download
261
-
262
- from ..utils import retry_download, symlink_local_file
263
-
264
- if (
265
- hasattr(model_spec, "model_uri")
266
- and getattr(model_spec, "model_uri", None) is not None
267
- ):
268
- logger.info(f"Rerank model caching from URI: {model_spec.model_uri}")
269
- return cache_from_uri(model_spec=model_spec)
270
-
271
- cache_dir = get_cache_dir(model_spec)
272
- if not os.path.exists(cache_dir):
273
- os.makedirs(cache_dir, exist_ok=True)
274
- meta_path = os.path.join(cache_dir, "__valid_download")
275
- if valid_model_revision(meta_path, model_spec.model_revision):
276
- return cache_dir
277
-
278
- if model_spec.model_hub == "modelscope":
279
- logger.info(
280
- f"Download {model_spec.model_name} from modelscope {model_spec.model_id}"
281
- )
282
- download_dir = retry_download(
283
- ms_download,
284
- model_spec.model_name,
285
- None,
286
- model_spec.model_id,
287
- revision=model_spec.model_revision,
288
- )
289
- for subdir, dirs, files in os.walk(download_dir):
290
- for file in files:
291
- relpath = os.path.relpath(os.path.join(subdir, file), download_dir)
292
- symlink_local_file(os.path.join(subdir, file), cache_dir, relpath)
293
- else:
294
- logger.info(
295
- f"Download {model_spec.model_name} from huggingface {model_spec.model_id}"
296
- )
297
- retry_download(
298
- hf_download,
299
- model_spec.model_name,
300
- None,
301
- model_spec.model_id,
302
- revision=model_spec.model_revision,
303
- local_dir=cache_dir,
304
- local_dir_use_symlinks=True,
305
- )
306
- with open(meta_path, "w") as f:
307
- import json
178
+ from ..utils import cache
308
179
 
309
- desc = RerankModelDescription(None, None, model_spec)
310
- json.dump(desc.to_dict(), f)
311
- return cache_dir
180
+ return cache(model_spec, RerankModelDescription)
312
181
 
313
182
 
314
183
  def create_rerank_model_instance(