xinference 0.8.4__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (309) hide show
  1. xinference/__init__.py +6 -0
  2. xinference/_compat.py +52 -0
  3. xinference/_version.py +3 -3
  4. xinference/api/oauth2/auth_service.py +2 -3
  5. xinference/api/oauth2/types.py +1 -1
  6. xinference/api/restful_api.py +176 -108
  7. xinference/client/restful/restful_client.py +10 -6
  8. xinference/core/model.py +3 -2
  9. xinference/core/resource.py +4 -2
  10. xinference/core/status_guard.py +2 -1
  11. xinference/core/supervisor.py +37 -12
  12. xinference/core/utils.py +2 -1
  13. xinference/core/worker.py +13 -13
  14. xinference/deploy/worker.py +7 -7
  15. xinference/device_utils.py +100 -0
  16. xinference/fields.py +1 -1
  17. xinference/model/audio/core.py +1 -2
  18. xinference/model/audio/whisper.py +20 -8
  19. xinference/model/core.py +9 -0
  20. xinference/model/embedding/core.py +5 -136
  21. xinference/model/image/__init__.py +13 -1
  22. xinference/model/image/core.py +22 -43
  23. xinference/model/image/model_spec_modelscope.json +94 -0
  24. xinference/model/image/stable_diffusion/core.py +3 -5
  25. xinference/model/llm/ggml/llamacpp.py +1 -1
  26. xinference/model/llm/llm_family.json +333 -3
  27. xinference/model/llm/llm_family.py +11 -5
  28. xinference/model/llm/llm_family_modelscope.json +311 -1
  29. xinference/model/llm/pytorch/compression.py +3 -1
  30. xinference/model/llm/pytorch/core.py +34 -15
  31. xinference/model/llm/pytorch/qwen_vl.py +5 -3
  32. xinference/model/llm/pytorch/spec_decoding_utils.py +3 -1
  33. xinference/model/llm/pytorch/spec_model.py +20 -17
  34. xinference/model/llm/pytorch/utils.py +4 -3
  35. xinference/model/llm/pytorch/yi_vl.py +9 -5
  36. xinference/model/llm/utils.py +10 -1
  37. xinference/model/llm/vllm/core.py +4 -0
  38. xinference/model/rerank/core.py +5 -136
  39. xinference/model/utils.py +143 -18
  40. xinference/thirdparty/llava/mm_utils.py +1 -1
  41. xinference/types.py +3 -3
  42. xinference/web/ui/build/asset-manifest.json +3 -3
  43. xinference/web/ui/build/index.html +1 -1
  44. xinference/web/ui/build/static/js/main.87d39ffb.js +3 -0
  45. xinference/web/ui/build/static/js/main.87d39ffb.js.map +1 -0
  46. xinference/web/ui/node_modules/.cache/babel-loader/{8d95a492a6b08c192e212189567b59b98282ca6f3ed52dd9bd053a6be7ff4e65.json → 027745bdb9bb9fe375f2eef7377e9f7ba82510ed90c05f9e5b34ba314bb93498.json} +1 -1
  47. xinference/web/ui/node_modules/.cache/babel-loader/03d5e7de9b1728d71cd8c483a35f4373eb66932022026142e03005d9de1a2a44.json +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/0738899eefad7f90261125823d87ea9f0d53667b1479a0c1f398aff14f2bbd2a.json +1 -0
  49. xinference/web/ui/node_modules/.cache/babel-loader/09a21ab449599121eadfdd12d6dba40c5a1c5b35dc90be17108eeff80daf79b8.json +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/09d043a8289da33d05768f5face63cc47e9ce9646d67f5589865db511574e8f9.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/0b901c8f077905bf153a1390b207277c31e893f83f9ced5d29a0a0c85a07b5eb.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/0da1ff7c9602609cfa8b82a67140114f3037130fe79583bbf23b69cef81823a6.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/0f87caef05125425c0ca418a32ce8a53e9becffd464411b2ff94e3c7a37c7b5b.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/{cb50b4d54ce9c249d580f7f1fc2df4beab4f13c4d16f486c89239809570de8fb.json → 0fc333c35ad49c0db7f18f77521984a36067cfce4206df50ce16a2fdc88be1c6.json} +1 -1
  56. xinference/web/ui/node_modules/.cache/babel-loader/13fdd92dd5e2fe11640cf6ec0368e3c098d3f0c43e11703ccf157e8c41410d5b.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/1464f950e5600fe2e5890186a1ae3750cdabbadd8525070cef6273450760bcf6.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/17c53dc44a324629efef425833ab48cc05243d089f4eb3c731da63568fcff35e.json +1 -0
  59. xinference/web/ui/node_modules/.cache/babel-loader/1946a52cb34e675da423de15b703437b4c841e4f5aed6ddd932a091c145e17b0.json +1 -0
  60. xinference/web/ui/node_modules/.cache/babel-loader/1e0da5ce4d8ccdbe33705a47bdc0d5d1d7e4e48c8e5c1d0b8c473e9f491930ac.json +1 -0
  61. xinference/web/ui/node_modules/.cache/babel-loader/1e3ec3302ef2253fad5e86bf429baf6b69e57e769f38ec1aaada87f3114b6a1f.json +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/1f18804827897c337874b289873df611b44869f86ced1d4351fbe0b45ed17518.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/21b64f7fdb74c2c35479811c3f5683ee161d3b64a4fade0cd1fc0044332c79b2.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/22873d21eab4b5a04b183cc3d2cbfafa431e0ce99649aeb900cffcfc33ecbca4.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/{7a5fecf96af770954035948b07dfadeceb33536b4763a2f9e548121ae555b75f.json → 228b78cd0950f1c967cb33b210b11497052a9998f0b1d1f31ecee0fd8b99fb40.json} +1 -1
  66. xinference/web/ui/node_modules/.cache/babel-loader/2328fc2360224ac3eef5c5261fc2a75cdce7fe58c210af8d3e532ec609464f5f.json +1 -0
  67. xinference/web/ui/node_modules/.cache/babel-loader/25fd89450dcf73a205a5f8c82dbc604bf872d574b748427c26ba79b8711ec502.json +1 -0
  68. xinference/web/ui/node_modules/.cache/babel-loader/{b6e95546cdd247e50d9c916dd3bef5216f96c32a267e54b51b23cc22631be0c4.json → 288d6573b09f0e2b0277e1f301bcb57a194c01f3983e3c1e03c8b2ea72653194.json} +1 -1
  69. xinference/web/ui/node_modules/.cache/babel-loader/2c04900a020b813e2e99b15748c3c3ef5a1627c33d5668e45bc6a798f0d28dae.json +1 -0
  70. xinference/web/ui/node_modules/.cache/babel-loader/2cd0fd50d8f213cdd94daffe7d0f997991072b11936ead7c48be2af3fdbd1fda.json +1 -0
  71. xinference/web/ui/node_modules/.cache/babel-loader/2e057c12322da07a537a576a3917d512ac6cb291be5aa29ce23d2679a4fd16b1.json +1 -0
  72. xinference/web/ui/node_modules/.cache/babel-loader/{abd0662d21fbe39e483fbc4d37faa9e24d93889b5b40d42e7e20773c66ee3894.json → 327c21db6124a5209f41996d0d3359b462d5baff3974296f5acfddfc532434e6.json} +1 -1
  73. xinference/web/ui/node_modules/.cache/babel-loader/33294bb0080595100e22c960929a6f3086f5ea632a4bcd9216069f95347a63a9.json +1 -0
  74. xinference/web/ui/node_modules/.cache/babel-loader/3361869e45eddd7d48ff7995e1a12ac454c9eb92d4c87f49ce80c1e9482c1e91.json +1 -0
  75. xinference/web/ui/node_modules/.cache/babel-loader/375d2f6a31e01c5a82fd0e18f42198f9fbb7117491f3e92d70cf36055f159a6e.json +1 -0
  76. xinference/web/ui/node_modules/.cache/babel-loader/3808217efee76345ba19af3ffdba045888e0908b0dc7fc38e54655cdc3504c60.json +1 -0
  77. xinference/web/ui/node_modules/.cache/babel-loader/39db740a99454006312dec9e4845f850c51e043458ca94eba8bb256521fabcd4.json +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +1 -0
  79. xinference/web/ui/node_modules/.cache/babel-loader/3e737bcdbcbc407ccd65b90e199ef0c3214b261e8e41dbf14d921384a717d9ee.json +1 -0
  80. xinference/web/ui/node_modules/.cache/babel-loader/3e87c7d4f1e5c93603eed3e861945e44dd1e664061a89aaef1aa6435f4f4d7d0.json +1 -0
  81. xinference/web/ui/node_modules/.cache/babel-loader/3ecf70018028c8cf6cdc2d24d2b88f2707bd5c23a336fd3d82bd6a1af0db7ded.json +1 -0
  82. xinference/web/ui/node_modules/.cache/babel-loader/43314d1e9fed30ed6cf5cab3f5033da067e58be29e36448e3c0a8127403a9388.json +1 -0
  83. xinference/web/ui/node_modules/.cache/babel-loader/442467c09440131f7c25ddcdeb356d2a87783ad4e32d89f410a16cb6e7d83b22.json +1 -0
  84. xinference/web/ui/node_modules/.cache/babel-loader/487524f2b3494bfcfbd8c12918092a52ff2ce3eefe9bf0a557389ec1c74d3e67.json +1 -0
  85. xinference/web/ui/node_modules/.cache/babel-loader/4de4b0282a3d1ae02bec0bfe7d6658f039141406e8bfdbe86bf1dc5b115a2524.json +1 -0
  86. xinference/web/ui/node_modules/.cache/babel-loader/4ec72faab00e4cbffefdf43b987ae3d15beb60e282b9ca86dad37f4d0fc3f2bd.json +1 -0
  87. xinference/web/ui/node_modules/.cache/babel-loader/51403f1b70af0f4577d4594cbfcf9d9759997e5c49097e7a07219000d1be4ff1.json +1 -0
  88. xinference/web/ui/node_modules/.cache/babel-loader/5282ee05e064b3a80bc991e9003ddef6a4958471d8f4fc65589dc64553365cdd.json +1 -0
  89. xinference/web/ui/node_modules/.cache/babel-loader/566a0829bfeab41e40bbdca55ad056067e172508af4b333403d4fd334a8c8742.json +1 -0
  90. xinference/web/ui/node_modules/.cache/babel-loader/5689d9aee417a0834fed66720cf3b48c34cc26364cd9ab7e3d809997bb9faec9.json +1 -0
  91. xinference/web/ui/node_modules/.cache/babel-loader/5bedd27475bda204eafccdc636a56ba5765353391049a0b947234faaab3d021a.json +1 -0
  92. xinference/web/ui/node_modules/.cache/babel-loader/{94daea23b698bbb26bbbab69bbca8ad521c79fcbd6b495c84c740e4006a2ced2.json → 61014d40df4486972fb6d5909dff45f24bec48573af99ca8bcd79835385c7da0.json} +1 -1
  93. xinference/web/ui/node_modules/.cache/babel-loader/6139c80ec46d77469e9a89924de41d93050f19e9ed541e358f09cbf20ccd7c86.json +1 -0
  94. xinference/web/ui/node_modules/.cache/babel-loader/63ead4dee6a4f4ae47aed6918a2ab3e172739ecdffaa85e063f9a2c9165ff93c.json +1 -0
  95. xinference/web/ui/node_modules/.cache/babel-loader/6bf8c25e0f2051d994990bf73fdcb3e08963e11589842895593f199eab13ba9a.json +1 -0
  96. xinference/web/ui/node_modules/.cache/babel-loader/6d42f63cd673d1c60e534a174cd851e9aefd2d5d45a4aa6128d794457b64de53.json +1 -0
  97. xinference/web/ui/node_modules/.cache/babel-loader/7044e626a91582077b8cbad09bcf0cc118e92e11ebfa2ebc17622baf98aa56bf.json +1 -0
  98. xinference/web/ui/node_modules/.cache/babel-loader/71ec408030881aad69854815b8681185d0174cc0cf67f20fb2c3cd13c74fce07.json +1 -0
  99. xinference/web/ui/node_modules/.cache/babel-loader/7280614d5bddce524182024cc288953eb89d46c1dbe470ed5eb5ecab5de28385.json +1 -0
  100. xinference/web/ui/node_modules/.cache/babel-loader/7293ad8ba7b5fc6ed791aed44a766a396516c782f5d0e564b9ef0997645ec1dd.json +1 -0
  101. xinference/web/ui/node_modules/.cache/babel-loader/{65ca3ba225b8c8dac907210545b51f2fcdb2591f0feeb7195f1c037f2bc956a0.json → 77d4d795f078408fa2dd49da26d1ba1543d51b63cc253e736f4bef2e6014e888.json} +1 -1
  102. xinference/web/ui/node_modules/.cache/babel-loader/79be1e0dd6b1c3cbb3f08b349454f4791edaac451fa4d1f10eaf433e57ecf70f.json +1 -0
  103. xinference/web/ui/node_modules/.cache/babel-loader/7c0571c03aa8c90eb9452ba23133aed9b3b2a049910d4d8276588b438a4caaba.json +1 -0
  104. xinference/web/ui/node_modules/.cache/babel-loader/81ed094096cd9dc36f3ddebe61818766f43d3338c171f2269958f47265e60f24.json +1 -0
  105. xinference/web/ui/node_modules/.cache/babel-loader/83beb31daa7169fb0057453d4f86411f1effd3e3f7af97472cbd22accbfc65bb.json +1 -0
  106. xinference/web/ui/node_modules/.cache/babel-loader/86355e55059ddb15f71c0c651a2e622cee6f3141da3a018775ca00c74dd37dc2.json +1 -0
  107. xinference/web/ui/node_modules/.cache/babel-loader/{d1af6db565bae36218251c01a817c080ef764172dd0694e38d27b525a3ffd1d2.json → 8710fe63bcc6b95502287c36a7bd881b74ee2926982e120e1e8070a2c8a668fc.json} +1 -1
  108. xinference/web/ui/node_modules/.cache/babel-loader/8779104679d8f85017b551d1d108ea1ac701e184ad7bcc8a367526353a6c5175.json +1 -0
  109. xinference/web/ui/node_modules/.cache/babel-loader/881c573e6aa9954e7bd5e3bc5a6ef62c0db9b93815030080f677f6834e431434.json +1 -0
  110. xinference/web/ui/node_modules/.cache/babel-loader/8d24c51a0905b919436aac2bad441eb1651c46c4e7aa17c99eb63c7760ce6615.json +1 -0
  111. xinference/web/ui/node_modules/.cache/babel-loader/93bf5cc8cbe3f0b10f291800e1896f03506a8933cd490e07ee0ff94498f93bce.json +1 -0
  112. xinference/web/ui/node_modules/.cache/babel-loader/9401db0316ac34790aa5b972e9e1c50de58ef313b50840a37e08f0936a7197b6.json +1 -0
  113. xinference/web/ui/node_modules/.cache/babel-loader/98afeafdfc285a744d116fb5f189a16ad2b977adbd4fc0757f7ade1b29b8ad31.json +1 -0
  114. xinference/web/ui/node_modules/.cache/babel-loader/9a4e624d1ce66832fb213ead02745d84661d9bfcc21b62b4c37c680c3c3f5cbb.json +1 -0
  115. xinference/web/ui/node_modules/.cache/babel-loader/9a7b26f85224d33d431354ac8d7849388b7525fc014801824e705561ab2ac7a7.json +1 -0
  116. xinference/web/ui/node_modules/.cache/babel-loader/9dcb04ac6cb341233a2e98f2fb417d059c2ea6a56d2ff6f30bb27dae9bd6c756.json +1 -0
  117. xinference/web/ui/node_modules/.cache/babel-loader/9ebede8a4bccef016eef418a668b3212962779ea6d2531edb23d3dca81163496.json +1 -0
  118. xinference/web/ui/node_modules/.cache/babel-loader/a0efc8db4289116bdb312014df91fc781f66fe171d63027bbd5aae25cd54c325.json +1 -0
  119. xinference/web/ui/node_modules/.cache/babel-loader/a212defc32ca338ba673722c72815d30974275f1a82b67405a898f826fc8d782.json +1 -0
  120. xinference/web/ui/node_modules/.cache/babel-loader/a2685acab57fd6d874bf6069a587e3770b55c72cacc69cb167bb3dbd16a551c5.json +1 -0
  121. xinference/web/ui/node_modules/.cache/babel-loader/a3b7d6233e50dc3832421793a5752c1db99f3a214c9896c97bbd395d007f09b1.json +1 -0
  122. xinference/web/ui/node_modules/.cache/babel-loader/a3d0c810a255108613be0a175d53d673c4b6843c2a87dd0f157b4d3731af2092.json +1 -0
  123. xinference/web/ui/node_modules/.cache/babel-loader/a6633774053545c60721e345820e21cf3b089df219f7ee3fcceaf27af3a9e84b.json +1 -0
  124. xinference/web/ui/node_modules/.cache/babel-loader/a6bb917e4869d7d6ddf50b75932cad64e94b9e33db2aa40c4a489a045819bb36.json +1 -0
  125. xinference/web/ui/node_modules/.cache/babel-loader/a816031fafa564dc4af27383612e51ac29af51354f3bba750a70ac0c03b6bd30.json +1 -0
  126. xinference/web/ui/node_modules/.cache/babel-loader/af6349a1e9761cb98af4b76904440fb7f8d54288ee6ba37116f670226796e689.json +1 -0
  127. xinference/web/ui/node_modules/.cache/babel-loader/afce63eaad32a9968590e4500e123c81b385ea3b6046b2af1ec7ea8a49ba657b.json +1 -0
  128. xinference/web/ui/node_modules/.cache/babel-loader/aff24b61e5d55147d0a36ef1896b9bd35c995c357f44eeee1209f23db2118636.json +1 -0
  129. xinference/web/ui/node_modules/.cache/babel-loader/b01676c5ca97e3409a1b7bac91b6a797be3719a8b7ec72ad06c9beb296c67bb7.json +1 -0
  130. xinference/web/ui/node_modules/.cache/babel-loader/b0c5e967d1af90d545f1d34c9893a841296b0d1dd50de65eeaf19054995d2a94.json +1 -0
  131. xinference/web/ui/node_modules/.cache/babel-loader/b561d429cea6143e1bf7e6e7e690f9e6ce1dce4adfcacb57d7c5578195d1d190.json +1 -0
  132. xinference/web/ui/node_modules/.cache/babel-loader/b70187bb5aecf3bd7afbe8469b49e2a0a94a0c62727530d0dc2f84ee4577bcfc.json +1 -0
  133. xinference/web/ui/node_modules/.cache/babel-loader/bd21f6039e23775fb4631bb56f2eef1d132c03a60cc6fe4530569ecfeef456a3.json +1 -0
  134. xinference/web/ui/node_modules/.cache/babel-loader/be09b5508d0ca5ac0300144a682d4cde2d5b4b96ecb9fc690c1d5e7ee7411b3e.json +1 -0
  135. xinference/web/ui/node_modules/.cache/babel-loader/be8c9e83f0dd4af2e9bcf6844dd8c4445fe634b6c71da6debfa7acabb7a875c0.json +1 -0
  136. xinference/web/ui/node_modules/.cache/babel-loader/c0d1317dfae0c5ddf5a3ba9aef6bd2204f2daf9754f3270c4cab5c4632f78000.json +1 -0
  137. xinference/web/ui/node_modules/.cache/babel-loader/c1e3afdaae5649274f7d34910015b88bc02ba6a00b935e6597f9c670fedc05c0.json +1 -0
  138. xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +1 -0
  139. xinference/web/ui/node_modules/.cache/babel-loader/c29ec02243dd370616a489f7de67e4e479d2f7f12504ba7f77304cffe60befbc.json +1 -0
  140. xinference/web/ui/node_modules/.cache/babel-loader/c2a4d298149e6c66713eed4f56f4bbbfa4022a193891029b84e429baf59cc246.json +1 -0
  141. xinference/web/ui/node_modules/.cache/babel-loader/c3d2fc03602f7604a65e631e8595ce4cfabd8887343489b34a65e3a73c5546b3.json +1 -0
  142. xinference/web/ui/node_modules/.cache/babel-loader/c41078c8692e0635fb52d01bc5157528c7308ae5140c134aa07c1fcda4f7a5e5.json +1 -0
  143. xinference/web/ui/node_modules/.cache/babel-loader/c4b5f1e444ddd41f8cd30e618c185b16235e313ea34d944959628001a91e38a5.json +1 -0
  144. xinference/web/ui/node_modules/.cache/babel-loader/c7ed9bc3125498e475f3170b8126c93368e9cbfd6a5e73ee63437d90e39b69e5.json +1 -0
  145. xinference/web/ui/node_modules/.cache/babel-loader/c996b3dc884130f034a37551845201bc5661f2dcd282aed737329f8d4dd387b5.json +1 -0
  146. xinference/web/ui/node_modules/.cache/babel-loader/c9fef6e7f0a532c2d1cc25dffd496ea8355dd14ce0105a169b391bfeb9277e4f.json +1 -0
  147. xinference/web/ui/node_modules/.cache/babel-loader/cc8f5a2bbe4da9852ccdf64150115271baa2d282d74d7fab854dc5ca641dd519.json +1 -0
  148. xinference/web/ui/node_modules/.cache/babel-loader/ce45adea497443bfc1b6baabd0aa3d85888dd10b1b7fd009edf608e390b2a844.json +1 -0
  149. xinference/web/ui/node_modules/.cache/babel-loader/ce4b82a3855d18ba09e922f6e2cc1b8c20f0e5261b27303c52d4e1ba5a09a078.json +1 -0
  150. xinference/web/ui/node_modules/.cache/babel-loader/cf16614311e144768bdc064da754efad77597cac5efe6852be996b9eaaed695a.json +1 -0
  151. xinference/web/ui/node_modules/.cache/babel-loader/d074bfe592a195f339acbcd8743f8b64cfd09b45ca1013598269227fc092f2db.json +1 -0
  152. xinference/web/ui/node_modules/.cache/babel-loader/d2a95455cc492d4fbf2839a21eeaef00e7fad2aac93825d2443a38127ad02007.json +1 -0
  153. xinference/web/ui/node_modules/.cache/babel-loader/d4e6e712ebbbf0f30745093e2b6d235c13fb43d37b586a0109e87c19eafdf35d.json +1 -0
  154. xinference/web/ui/node_modules/.cache/babel-loader/d88a02264125b07881eecff752d9c8d718edcf2d87e5431b13e54809dc7bf4f2.json +1 -0
  155. xinference/web/ui/node_modules/.cache/babel-loader/d907925fb28912f2e98cccfe758bce680aa4f894cc9d2079d73d7e2d85481d47.json +1 -0
  156. xinference/web/ui/node_modules/.cache/babel-loader/daf050ec3a864b9831dcb32cee1fb2739dafb68b369e9108469ca1e6fd166867.json +1 -0
  157. xinference/web/ui/node_modules/.cache/babel-loader/de9645d55ce8744856862eae4a3d3a1266e9df9d575aa3e2f62a75a5d97444f7.json +1 -0
  158. xinference/web/ui/node_modules/.cache/babel-loader/e04199bdfb87226d6aeae387ef677eae06483f9f135b5a3f1360ec1a243c86cd.json +1 -0
  159. xinference/web/ui/node_modules/.cache/babel-loader/e97d1f31f54263bb90b7c3e71be0e19973d76d945fb5d526bf93fb449708bbf0.json +1 -0
  160. xinference/web/ui/node_modules/.cache/babel-loader/e9f87f5c370a731ee42098ae6ff38ca6ad256396158c03f84c5e770c6b76a43d.json +1 -0
  161. xinference/web/ui/node_modules/.cache/babel-loader/f2ad375dbc8779f54eb9c377fd951cd442f3766d5eb70d40613b952b811643da.json +1 -0
  162. xinference/web/ui/node_modules/.cache/babel-loader/f35df9d5a0e0341bcc0f39a462e46988c330cdb905cc690b08de4a9cf5771f28.json +1 -0
  163. xinference/web/ui/node_modules/.cache/babel-loader/f433e33369483f75d31fe937b8a597ef2d7cdfed339931643ff966a33dbc6158.json +1 -0
  164. xinference/web/ui/node_modules/.cache/babel-loader/f4c164c89cfbcb1d81bc0b60e3e7c8715e07c7a23118acf4e3e16715beac09c5.json +1 -0
  165. xinference/web/ui/node_modules/.cache/babel-loader/f545d937cfb9d974f4aaaa9dc1c12adfed0d4f608de7d0a7247452d9bbbb55a4.json +1 -0
  166. xinference/web/ui/node_modules/.cache/babel-loader/{3cc5ec39e76e1768ffb070a8ae18ee61b68f3ed3593178134d3edda5ed378c6c.json → f8d2c4372093d0ff3a2f78260e004e6c4e2b8ab4f5be9713ebc16c50e9c2ab4b.json} +1 -1
  167. xinference/web/ui/node_modules/.cache/babel-loader/fa41c6c8ffa1047b7f2f52b6532ea4d9f5754e5bbc11d1340bae7f75ae5ed84e.json +1 -0
  168. xinference/web/ui/node_modules/.package-lock.json +45 -45
  169. xinference/web/ui/node_modules/@babel/runtime/package.json +64 -37
  170. xinference/web/ui/node_modules/@mui/private-theming/package.json +7 -6
  171. xinference/web/ui/node_modules/@mui/styled-engine/package.json +6 -5
  172. xinference/web/ui/node_modules/@mui/system/node_modules/clsx/package.json +22 -10
  173. xinference/web/ui/node_modules/@mui/system/package.json +13 -12
  174. xinference/web/ui/node_modules/@mui/types/package.json +3 -2
  175. xinference/web/ui/node_modules/@mui/utils/ClassNameGenerator/package.json +2 -2
  176. xinference/web/ui/node_modules/@mui/utils/HTMLElementType/package.json +2 -2
  177. xinference/web/ui/node_modules/@mui/utils/capitalize/package.json +2 -2
  178. xinference/web/ui/node_modules/@mui/utils/chainPropTypes/package.json +2 -2
  179. xinference/web/ui/node_modules/@mui/utils/clamp/package.json +6 -0
  180. xinference/web/ui/node_modules/@mui/utils/composeClasses/package.json +2 -2
  181. xinference/web/ui/node_modules/@mui/utils/createChainedFunction/package.json +6 -0
  182. xinference/web/ui/node_modules/@mui/utils/debounce/package.json +2 -2
  183. xinference/web/ui/node_modules/@mui/utils/deepmerge/package.json +6 -0
  184. xinference/web/ui/node_modules/@mui/utils/deprecatedPropType/package.json +6 -0
  185. xinference/web/ui/node_modules/@mui/utils/elementAcceptingRef/package.json +6 -0
  186. xinference/web/ui/node_modules/@mui/utils/elementTypeAcceptingRef/package.json +6 -0
  187. xinference/web/ui/node_modules/@mui/utils/exactProp/package.json +2 -2
  188. xinference/web/ui/node_modules/@mui/utils/formatMuiErrorMessage/package.json +6 -0
  189. xinference/web/ui/node_modules/@mui/utils/generateUtilityClass/package.json +2 -2
  190. xinference/web/ui/node_modules/@mui/utils/generateUtilityClasses/package.json +2 -2
  191. xinference/web/ui/node_modules/@mui/utils/getDisplayName/package.json +6 -0
  192. xinference/web/ui/node_modules/@mui/utils/getScrollbarSize/package.json +6 -0
  193. xinference/web/ui/node_modules/@mui/utils/getValidReactChildren/package.json +6 -0
  194. xinference/web/ui/node_modules/@mui/utils/integerPropType/package.json +6 -0
  195. xinference/web/ui/node_modules/@mui/utils/isMuiElement/package.json +6 -0
  196. xinference/web/ui/node_modules/@mui/utils/ownerDocument/package.json +2 -2
  197. xinference/web/ui/node_modules/@mui/utils/ownerWindow/package.json +2 -2
  198. xinference/web/ui/node_modules/@mui/utils/package.json +8 -7
  199. xinference/web/ui/node_modules/@mui/utils/ponyfillGlobal/package.json +2 -2
  200. xinference/web/ui/node_modules/@mui/utils/refType/package.json +6 -0
  201. xinference/web/ui/node_modules/@mui/utils/requirePropFactory/package.json +6 -0
  202. xinference/web/ui/node_modules/@mui/utils/resolveProps/package.json +6 -0
  203. xinference/web/ui/node_modules/@mui/utils/scrollLeft/package.json +6 -0
  204. xinference/web/ui/node_modules/@mui/utils/setRef/package.json +6 -0
  205. xinference/web/ui/node_modules/@mui/utils/unsupportedProp/package.json +6 -0
  206. xinference/web/ui/node_modules/@mui/utils/useControlled/package.json +2 -2
  207. xinference/web/ui/node_modules/@mui/utils/useEnhancedEffect/package.json +2 -2
  208. xinference/web/ui/node_modules/@mui/utils/useEventCallback/package.json +2 -2
  209. xinference/web/ui/node_modules/@mui/utils/useForkRef/package.json +2 -2
  210. xinference/web/ui/node_modules/@mui/utils/useId/package.json +2 -2
  211. xinference/web/ui/node_modules/@mui/utils/useIsFocusVisible/package.json +6 -0
  212. xinference/web/ui/node_modules/@mui/utils/useLazyRef/package.json +6 -0
  213. xinference/web/ui/node_modules/@mui/utils/useOnMount/package.json +6 -0
  214. xinference/web/ui/node_modules/@mui/utils/usePreviousProps/package.json +6 -0
  215. xinference/web/ui/node_modules/@mui/utils/useTimeout/package.json +6 -0
  216. xinference/web/ui/node_modules/@mui/utils/visuallyHidden/package.json +6 -0
  217. xinference/web/ui/node_modules/@types/prop-types/package.json +2 -2
  218. xinference/web/ui/node_modules/csstype/package.json +3 -3
  219. xinference/web/ui/package-lock.json +47 -45
  220. xinference/web/ui/package.json +2 -0
  221. {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/METADATA +6 -3
  222. {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/RECORD +227 -167
  223. xinference/web/ui/build/static/js/main.476e35cc.js +0 -3
  224. xinference/web/ui/build/static/js/main.476e35cc.js.map +0 -1
  225. xinference/web/ui/node_modules/.cache/babel-loader/02f54fcadc6fe9b1be8451435f9a097c82b3d665bae2d2df03ba0b36cebeca0c.json +0 -1
  226. xinference/web/ui/node_modules/.cache/babel-loader/051391807ffd255b652b63dc2ff12e5ce80113e1b712c81b6c1986a11cd85d3c.json +0 -1
  227. xinference/web/ui/node_modules/.cache/babel-loader/05710e94caaf5a2da790d67aa572bf6bc19bae0237668c8d97f4da62ab52fe5c.json +0 -1
  228. xinference/web/ui/node_modules/.cache/babel-loader/08a3470840a55d359a967d0763cd1ae5c1f15f3ca31118e29b37b4fbbdb248c7.json +0 -1
  229. xinference/web/ui/node_modules/.cache/babel-loader/0e68b0e268559a563dde6fa4c654922d98c74f5140854437f3e5a01bf23ba41f.json +0 -1
  230. xinference/web/ui/node_modules/.cache/babel-loader/10dc7661741dfc88df5538bbf835870ae47b1a18f36c2f7b9650651e7304ce72.json +0 -1
  231. xinference/web/ui/node_modules/.cache/babel-loader/18b3d28a2692ab4f2d05254221f53b3c13259435bd98cb90ff5d7250d93f11e0.json +0 -1
  232. xinference/web/ui/node_modules/.cache/babel-loader/1b612ab687e3a4e14db156bd92a75b8a3e96ff04bfbf8f3351c7e801bc61cf90.json +0 -1
  233. xinference/web/ui/node_modules/.cache/babel-loader/1bb839ffeb39ed12367c179fbf3b784e20f6f98fb87c597717bc11ebe3201c59.json +0 -1
  234. xinference/web/ui/node_modules/.cache/babel-loader/1d12e0b0e4e87372b18fef72fcd6e264fa40b0cffdbeddcef004becc4e32cf27.json +0 -1
  235. xinference/web/ui/node_modules/.cache/babel-loader/1d5b806c08ffb55539ee26b5fa9746a373f602354d6e55a0d4379d7e2f903868.json +0 -1
  236. xinference/web/ui/node_modules/.cache/babel-loader/1ecb7ce2bc103bec0d1a9f0cb07132e5ec7731d974998c465798c0e15cafb924.json +0 -1
  237. xinference/web/ui/node_modules/.cache/babel-loader/20cfc57ae2b8ba23571f386a5f2bf194bd02e8a3913113881bb9804ee822ad58.json +0 -1
  238. xinference/web/ui/node_modules/.cache/babel-loader/222a437b6773707bbead06ba5db1ed8869e44d9f051a7f6b1a745aeb654679d0.json +0 -1
  239. xinference/web/ui/node_modules/.cache/babel-loader/249709fb20c1254cecc9ba6666e6bcf37b21e47e7a020d5a7f0513680b49473f.json +0 -1
  240. xinference/web/ui/node_modules/.cache/babel-loader/257ad4979294ee3a7ef400feab3b29ebcd259c47b9410a3011a66f4b33a97904.json +0 -1
  241. xinference/web/ui/node_modules/.cache/babel-loader/2779bdc8ed8372f4015d07a45e4b09756bd953250944c3e3efe6b65bf8eaecd8.json +0 -1
  242. xinference/web/ui/node_modules/.cache/babel-loader/2bee7b8bd3d52976a45d6068e1333df88b943e0e679403c809e45382e3818037.json +0 -1
  243. xinference/web/ui/node_modules/.cache/babel-loader/31c71a85b2d0bbdcdc027e7950a9d1f778288f98888d8595fad5e4fdda5bb31f.json +0 -1
  244. xinference/web/ui/node_modules/.cache/babel-loader/371214b6649e323e7281885b1412c5a515ccc2274f4e1a42921e45777694009c.json +0 -1
  245. xinference/web/ui/node_modules/.cache/babel-loader/3eced030d2b3da645f4cbe3dc4ff49744312316e4306ce170991e802e9bfcf8e.json +0 -1
  246. xinference/web/ui/node_modules/.cache/babel-loader/452ae8e7cff99efc35d134014d32610d13217b031b817d3663bb689b3778ebd2.json +0 -1
  247. xinference/web/ui/node_modules/.cache/babel-loader/48e3f5d01ccedde86348a29392bc9b6e301a51b3f7e2476ae0a95a31eb6bf18d.json +0 -1
  248. xinference/web/ui/node_modules/.cache/babel-loader/4a311b5bd98f16588caf60b8de4c8d91d038198e6bfd496f880f60d4c1ea076d.json +0 -1
  249. xinference/web/ui/node_modules/.cache/babel-loader/4ccd71d3f73490a18a5a4de4ad82d63cf019caff6f8d881fd1434755a033bfc5.json +0 -1
  250. xinference/web/ui/node_modules/.cache/babel-loader/4ee8343d4254c95e685d49601f1649e8222d7423c2a822e1d01c8fc4cd3582a2.json +0 -1
  251. xinference/web/ui/node_modules/.cache/babel-loader/523a712748fbb7abf0c5423df62e65c4659b7e450182c890e847cd78ace7cac5.json +0 -1
  252. xinference/web/ui/node_modules/.cache/babel-loader/5777049bbacf7322891761a5b959e62eec22b0f96a70dbf97e58d530c6258a55.json +0 -1
  253. xinference/web/ui/node_modules/.cache/babel-loader/5905957ce95f7dee64f0739359e990b8b518cff088c6958487641a3d21737d1e.json +0 -1
  254. xinference/web/ui/node_modules/.cache/babel-loader/5ac79df79d2dff64da97b02d5c90b36bc0d44a4a4229f6efed2993f93ee1d7a1.json +0 -1
  255. xinference/web/ui/node_modules/.cache/babel-loader/64d3ef38fd394979b09061e67fc47f04b7295401677825c64c72ea2f44cf910c.json +0 -1
  256. xinference/web/ui/node_modules/.cache/babel-loader/68a5d8e66687770e05aebcae9a8047189dc577d6150fbf0e62d2b2c11c4335c5.json +0 -1
  257. xinference/web/ui/node_modules/.cache/babel-loader/69d5f27e40d5e7040c8e8e303ccaee019b54092345acc3838d6eb943c474ce39.json +0 -1
  258. xinference/web/ui/node_modules/.cache/babel-loader/6a07b8d00482c07a4b7c73f6bebb79fad54bbea35ba358681ede6ff299a09f6b.json +0 -1
  259. xinference/web/ui/node_modules/.cache/babel-loader/70b0307a43b9f8c876bf2a1f7dea4cc56cc0ea33743e863c0b3983bc48109f2d.json +0 -1
  260. xinference/web/ui/node_modules/.cache/babel-loader/710c39a26816820c2cfd00b19cd0112851d64b8ac6b201b18727b43b5a69d92c.json +0 -1
  261. xinference/web/ui/node_modules/.cache/babel-loader/722f4c03da79795d1fb738323f3eb6794ab18711e20f141c0b678c5d4cb3dce9.json +0 -1
  262. xinference/web/ui/node_modules/.cache/babel-loader/8237bdfff695d71496ee2ff83ba0392d753f8e1dff0d002a1b2c9e249f03083c.json +0 -1
  263. xinference/web/ui/node_modules/.cache/babel-loader/8342e3800fed63e8e805850810aee4fd9b0558aca10c8b6b770fe5bff1bed266.json +0 -1
  264. xinference/web/ui/node_modules/.cache/babel-loader/847a71f54f7a059efc71f70c297b6b4182a633130715c80f817eb533de108c12.json +0 -1
  265. xinference/web/ui/node_modules/.cache/babel-loader/8731a959d9b7278ba7a1c044bb4253bfff63079da69b21ff80221ae5e037dbbe.json +0 -1
  266. xinference/web/ui/node_modules/.cache/babel-loader/87b4e94b045df561c8f4bd253c84447c96e0dededbcb6646c0cd180119960990.json +0 -1
  267. xinference/web/ui/node_modules/.cache/babel-loader/894a334446bac96d74385c23c9d6f2d117ee29511897700430cd0e578cb32527.json +0 -1
  268. xinference/web/ui/node_modules/.cache/babel-loader/8c5ba88b620eeedbb99db18a610138353a5a657170fdb8bd6969585f81b34f74.json +0 -1
  269. xinference/web/ui/node_modules/.cache/babel-loader/9004bc083cf179941e64130390a27afcf7593b21dd64a87d03e1e6489032aa13.json +0 -1
  270. xinference/web/ui/node_modules/.cache/babel-loader/9b84631723b52988d3748d80b994e43d1944ca55098a5d908c9061eeb4c84f90.json +0 -1
  271. xinference/web/ui/node_modules/.cache/babel-loader/9ee1f84bde10cd78ca23a4681c7372195748bacd5e41c3b1df8e39d960294d81.json +0 -1
  272. xinference/web/ui/node_modules/.cache/babel-loader/a127f13651a7398f5fc378456a52cfa0c6260f6109611d3fb012aa7c7a4ee17a.json +0 -1
  273. xinference/web/ui/node_modules/.cache/babel-loader/a5e2e9f707eb7039bea096ca117d996b8f9cbc2a5613fd8e0c5b0094444ce23c.json +0 -1
  274. xinference/web/ui/node_modules/.cache/babel-loader/a6131b7fb1b30fd63d798845dab508a22b21c616b361e6214bf711915c5fef3d.json +0 -1
  275. xinference/web/ui/node_modules/.cache/babel-loader/a7b8ccb199158c897ed7d7835942eb37569221be4101eecfa7a7ce62e0c76f1f.json +0 -1
  276. xinference/web/ui/node_modules/.cache/babel-loader/acaf6dbddd57b63a874e9062803850ce4911c8dc63a362eacdc4d2d0f48f8c16.json +0 -1
  277. xinference/web/ui/node_modules/.cache/babel-loader/ade3cd82af85d2d28138bd942228b01e27559a5a4072ee049dcb47612ee9a506.json +0 -1
  278. xinference/web/ui/node_modules/.cache/babel-loader/aea82c71088dc9afd6f4ba71434286af5c6ab5e9b87088b0e84f351146d06489.json +0 -1
  279. xinference/web/ui/node_modules/.cache/babel-loader/afbd547078b20bb14596f20e165cb40413147e3de501b126c97bb417f8b9a53e.json +0 -1
  280. xinference/web/ui/node_modules/.cache/babel-loader/afc597494ebc62d22e4d3ac2a839bd5f30d5e47ce1d0576e0f62407ef6c78652.json +0 -1
  281. xinference/web/ui/node_modules/.cache/babel-loader/affbbac58777e26ba8b6dcda8cea72c76975bd0207f0a510bfde5983d3081f74.json +0 -1
  282. xinference/web/ui/node_modules/.cache/babel-loader/b0aa2ece62a1f5fa015f9e6b59d20b79be727fb5a3f4fc747200163097f320ad.json +0 -1
  283. xinference/web/ui/node_modules/.cache/babel-loader/b10391d3ae53452c8ec97d230f064e03a8377aab2be4b1b3e30cb3e054b2341a.json +0 -1
  284. xinference/web/ui/node_modules/.cache/babel-loader/b4bab0c147360f3e335251166d3d25602e2e0e160fb3734ac019a5b42499179c.json +0 -1
  285. xinference/web/ui/node_modules/.cache/babel-loader/b57dcff363e045fd11ea2bcbc58cce470f1bc22d3ac14c8acbf63a36637050ff.json +0 -1
  286. xinference/web/ui/node_modules/.cache/babel-loader/b80db1012318b97c329c4e3e72454f7512fb107e57c444b437dbe4ba1a3faa5a.json +0 -1
  287. xinference/web/ui/node_modules/.cache/babel-loader/b8de42f7f5dc1df9e802898a0ebb35adb57d269d7af21e6c66a0fde0ff226a08.json +0 -1
  288. xinference/web/ui/node_modules/.cache/babel-loader/be696cadea3bf577c0defc8f8d100232369763dde754b5ba9399f45f42aed215.json +0 -1
  289. xinference/web/ui/node_modules/.cache/babel-loader/c5a30d7c6cb484268e9126dd6d078229a1dcaa07fb064077b87d15224c61a3aa.json +0 -1
  290. xinference/web/ui/node_modules/.cache/babel-loader/c67e2077e70c4751e9e1dd98f378d979abd9c8a39fbf6c69f02fb4e5b04eef87.json +0 -1
  291. xinference/web/ui/node_modules/.cache/babel-loader/c81c1cad35be60a4fad090374edf50a884540e306d464b803120554df61e817e.json +0 -1
  292. xinference/web/ui/node_modules/.cache/babel-loader/d1054fd4d09666318a00ca0aea5ff42a2fdb17b3a42b5f77b93433d7408eb2f4.json +0 -1
  293. xinference/web/ui/node_modules/.cache/babel-loader/d1ec524ce17a48816fc55fe665572c1ef4982a4bd1ecb01badd0fdffc465242c.json +0 -1
  294. xinference/web/ui/node_modules/.cache/babel-loader/d4d61a537c45c4506a6b7e3478206b71dc96e0c6c6410cef9b5d2f47018bc578.json +0 -1
  295. xinference/web/ui/node_modules/.cache/babel-loader/e2e563b606cac9d51661cf68ba1729eb5df7861af521adccedc1b163439b657d.json +0 -1
  296. xinference/web/ui/node_modules/.cache/babel-loader/e3890e804dfaeb19b79f52e1b20198b587da61a9a3a1f55417d2cc5ce4ad7dae.json +0 -1
  297. xinference/web/ui/node_modules/.cache/babel-loader/e8c3fadbd12f0e2c7b70d84acbcabd0a5c96b05380a7b3167b00e8739d829d24.json +0 -1
  298. xinference/web/ui/node_modules/.cache/babel-loader/ebb5ae1a82b33b94e2b0cfec599bec7500aff0a1701f91736a6f3a4f77617560.json +0 -1
  299. xinference/web/ui/node_modules/.cache/babel-loader/f30f0396f4e54f0355112d5eba4b16c3a56f47e456eb117a9dc7fa6218ce21fe.json +0 -1
  300. xinference/web/ui/node_modules/.cache/babel-loader/f318ac15e999a553c00832029245730fe25c2916860b0ff55eee1f0ab926d84e.json +0 -1
  301. xinference/web/ui/node_modules/.cache/babel-loader/f55f4e657cc2d797b7e73a64ce40352f9a5dde31763a3e89c14d3b983307f30e.json +0 -1
  302. xinference/web/ui/node_modules/.cache/babel-loader/f8a68931d652a5e3be4e1ae06210a223de1239f3884ad858cecf9fe8d11c2ffe.json +0 -1
  303. xinference/web/ui/node_modules/.cache/babel-loader/f95a8bd358eeb55fa2f49f1224cc2f4f36006359856744ff09ae4bb295f59ec1.json +0 -1
  304. xinference/web/ui/node_modules/.cache/babel-loader/fb6fbd693a53d3c542d9adf83e0bb458918d39bf101eaf6d6662a2fbc35c1adc.json +0 -1
  305. /xinference/web/ui/build/static/js/{main.476e35cc.js.LICENSE.txt → main.87d39ffb.js.LICENSE.txt} +0 -0
  306. {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/LICENSE +0 -0
  307. {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/WHEEL +0 -0
  308. {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/entry_points.txt +0 -0
  309. {xinference-0.8.4.dist-info → xinference-0.9.0.dist-info}/top_level.txt +0 -0
@@ -665,12 +665,16 @@ class Client:
665
665
  def _check_cluster_authenticated(self):
666
666
  url = f"{self.base_url}/v1/cluster/auth"
667
667
  response = requests.get(url)
668
- if response.status_code != 200:
669
- raise RuntimeError(
670
- f"Failed to get cluster information, detail: {response.json()['detail']}"
671
- )
672
- response_data = response.json()
673
- self._cluster_authed = bool(response_data["auth"])
668
+ # compatible with old version of xinference
669
+ if response.status_code == 404:
670
+ self._cluster_authed = False
671
+ else:
672
+ if response.status_code != 200:
673
+ raise RuntimeError(
674
+ f"Failed to get cluster information, detail: {response.json()['detail']}"
675
+ )
676
+ response_data = response.json()
677
+ self._cluster_authed = bool(response_data["auth"])
674
678
 
675
679
  def login(self, username: str, password: str):
676
680
  if not self._cluster_authed:
xinference/core/model.py CHANGED
@@ -44,6 +44,7 @@ import logging
44
44
 
45
45
  logger = logging.getLogger(__name__)
46
46
 
47
+ from ..device_utils import empty_cache
47
48
  from .utils import json_dumps, log_async
48
49
 
49
50
  try:
@@ -130,7 +131,7 @@ class ModelActor(xo.StatelessActor):
130
131
  try:
131
132
  import gc
132
133
 
133
- import torch
134
+ import torch # noqa: F401
134
135
  except ImportError:
135
136
  error_message = "Failed to import module 'torch'"
136
137
  installation_guide = [
@@ -141,7 +142,7 @@ class ModelActor(xo.StatelessActor):
141
142
 
142
143
  del self._model
143
144
  gc.collect()
144
- torch.cuda.empty_cache()
145
+ empty_cache()
145
146
 
146
147
  def __init__(
147
148
  self,
@@ -22,8 +22,9 @@ from .utils import get_nvidia_gpu_info
22
22
 
23
23
  @dataclass
24
24
  class ResourceStatus:
25
- available: float
25
+ usage: float
26
26
  total: float
27
+ memory_used: float
27
28
  memory_available: float
28
29
  memory_total: float
29
30
 
@@ -39,8 +40,9 @@ def gather_node_info() -> Dict[str, Union[ResourceStatus, GPUStatus]]:
39
40
  node_resource = dict()
40
41
  mem_info = psutil.virtual_memory()
41
42
  node_resource["cpu"] = ResourceStatus(
42
- available=psutil.cpu_percent() / 100.0,
43
+ usage=psutil.cpu_percent() / 100.0,
43
44
  total=psutil.cpu_count(),
45
+ memory_used=mem_info.used,
44
46
  memory_available=mem_info.available,
45
47
  memory_total=mem_info.total,
46
48
  )
@@ -16,7 +16,8 @@ from logging import getLogger
16
16
  from typing import Dict, List, Optional
17
17
 
18
18
  import xoscar as xo
19
- from pydantic import BaseModel
19
+
20
+ from .._compat import BaseModel
20
21
 
21
22
  logger = getLogger(__name__)
22
23
 
@@ -15,6 +15,7 @@
15
15
  import asyncio
16
16
  import itertools
17
17
  import time
18
+ import typing
18
19
  from dataclasses import dataclass
19
20
  from logging import getLogger
20
21
  from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union
@@ -179,12 +180,26 @@ class SupervisorActor(xo.StatelessActor):
179
180
  model_version_infos, self.address
180
181
  )
181
182
 
182
- async def get_cluster_device_info(self) -> List:
183
+ @typing.no_type_check
184
+ async def get_cluster_device_info(self, detailed: bool = False) -> List:
185
+ import psutil
186
+
183
187
  supervisor_device_info = {
184
188
  "ip_address": self.address.split(":")[0],
185
189
  "gpu_count": 0,
186
190
  "gpu_vram_total": 0,
187
191
  }
192
+ if detailed:
193
+ supervisor_device_info["gpu_vram_total"] = 0
194
+ supervisor_device_info["gpu_vram_available"] = 0
195
+ supervisor_device_info["cpu_available"] = psutil.cpu_count() * (
196
+ 1 - psutil.cpu_percent() / 100.0
197
+ )
198
+ supervisor_device_info["cpu_count"] = psutil.cpu_count()
199
+ mem_info = psutil.virtual_memory()
200
+ supervisor_device_info["mem_used"] = mem_info.used
201
+ supervisor_device_info["mem_available"] = mem_info.available
202
+ supervisor_device_info["mem_total"] = mem_info.total
188
203
  res = [{"node_type": "Supervisor", **supervisor_device_info}]
189
204
  for worker_addr, worker_status in self._worker_status.items():
190
205
  vram_total: float = sum(
@@ -193,14 +208,24 @@ class SupervisorActor(xo.StatelessActor):
193
208
  total = (
194
209
  vram_total if vram_total == 0 else f"{int(vram_total / 1024 / 1024)}MiB"
195
210
  )
196
- res.append(
197
- {
198
- "node_type": "Worker",
199
- "ip_address": worker_addr.split(":")[0],
200
- "gpu_count": len(worker_status.status) - 1,
201
- "gpu_vram_total": total,
202
- }
203
- )
211
+ info = {
212
+ "node_type": "Worker",
213
+ "ip_address": worker_addr.split(":")[0],
214
+ "gpu_count": len(worker_status.status) - 1,
215
+ "gpu_vram_total": total,
216
+ }
217
+ if detailed:
218
+ cpu_info = worker_status.status["cpu"]
219
+ info["cpu_available"] = cpu_info.total * (1 - cpu_info.usage)
220
+ info["cpu_count"] = cpu_info.total
221
+ info["mem_used"] = cpu_info.memory_used
222
+ info["mem_available"] = cpu_info.memory_available
223
+ info["mem_total"] = cpu_info.memory_total
224
+ info["gpu_vram_total"] = vram_total
225
+ info["gpu_vram_available"] = sum(
226
+ [v.mem_free for k, v in worker_status.status.items() if k != "cpu"]
227
+ )
228
+ res.append(info)
204
229
  return res
205
230
 
206
231
  @staticmethod
@@ -227,11 +252,11 @@ class SupervisorActor(xo.StatelessActor):
227
252
  }
228
253
 
229
254
  async def get_devices_count(self) -> int:
230
- from ..utils import cuda_count
255
+ from ..device_utils import gpu_count
231
256
 
232
257
  if self.is_local_deployment():
233
- return cuda_count()
234
- # distributed deployment, choose a worker and return its cuda_count.
258
+ return gpu_count()
259
+ # distributed deployment, choose a worker and return its device_count.
235
260
  # Assume that each worker has the same count of cards.
236
261
  worker_ref = await self._choose_worker()
237
262
  return await worker_ref.get_devices_count()
xinference/core/utils.py CHANGED
@@ -19,9 +19,10 @@ import string
19
19
  from typing import Dict, Generator, List, Tuple, Union
20
20
 
21
21
  import orjson
22
- from pydantic import BaseModel
23
22
  from pynvml import nvmlDeviceGetCount, nvmlInit, nvmlShutdown
24
23
 
24
+ from .._compat import BaseModel
25
+
25
26
  logger = logging.getLogger(__name__)
26
27
 
27
28
 
xinference/core/worker.py CHANGED
@@ -30,8 +30,8 @@ from xoscar import MainActorPoolType
30
30
  from ..constants import XINFERENCE_CACHE_DIR
31
31
  from ..core import ModelActor
32
32
  from ..core.status_guard import LaunchStatus
33
+ from ..device_utils import gpu_count
33
34
  from ..model.core import ModelDescription, create_model_instance
34
- from ..utils import cuda_count
35
35
  from .event import Event, EventCollectorActor, EventType
36
36
  from .metrics import launch_metrics_export_server, record_metrics
37
37
  from .resource import gather_node_info
@@ -54,13 +54,13 @@ class WorkerActor(xo.StatelessActor):
54
54
  self,
55
55
  supervisor_address: str,
56
56
  main_pool: MainActorPoolType,
57
- cuda_devices: List[int],
57
+ gpu_devices: List[int],
58
58
  metrics_exporter_host: Optional[str] = None,
59
59
  metrics_exporter_port: Optional[int] = None,
60
60
  ):
61
61
  super().__init__()
62
62
  # static attrs.
63
- self._total_cuda_devices = cuda_devices
63
+ self._total_gpu_devices = gpu_devices
64
64
  self._supervisor_address = supervisor_address
65
65
  self._supervisor_ref = None
66
66
  self._main_pool = main_pool
@@ -244,9 +244,9 @@ class WorkerActor(xo.StatelessActor):
244
244
 
245
245
  @staticmethod
246
246
  def get_devices_count():
247
- from ..utils import cuda_count
247
+ from ..device_utils import gpu_count
248
248
 
249
- return cuda_count()
249
+ return gpu_count()
250
250
 
251
251
  @log_sync(logger=logger)
252
252
  def get_model_count(self) -> int:
@@ -263,7 +263,7 @@ class WorkerActor(xo.StatelessActor):
263
263
  we assume that embedding model only takes 1 GPU slot.
264
264
  """
265
265
  candidates = []
266
- for _dev in self._total_cuda_devices:
266
+ for _dev in self._total_gpu_devices:
267
267
  if _dev not in self._gpu_to_model_uid:
268
268
  candidates.append(_dev)
269
269
  else:
@@ -291,11 +291,11 @@ class WorkerActor(xo.StatelessActor):
291
291
  return device
292
292
 
293
293
  def allocate_devices(self, model_uid: str, n_gpu: int) -> List[int]:
294
- if n_gpu > len(self._total_cuda_devices) - len(self._gpu_to_model_uid):
294
+ if n_gpu > len(self._total_gpu_devices) - len(self._gpu_to_model_uid):
295
295
  raise RuntimeError("No available slot found for the model")
296
296
 
297
297
  devices: List[int] = [
298
- dev for dev in self._total_cuda_devices if dev not in self._gpu_to_model_uid
298
+ dev for dev in self._total_gpu_devices if dev not in self._gpu_to_model_uid
299
299
  ][:n_gpu]
300
300
  for dev in devices:
301
301
  self._gpu_to_model_uid[int(dev)] = model_uid
@@ -324,7 +324,7 @@ class WorkerActor(xo.StatelessActor):
324
324
  ) -> Tuple[str, List[str]]:
325
325
  env = {}
326
326
  devices = []
327
- if isinstance(n_gpu, int) or (n_gpu == "auto" and cuda_count() > 0):
327
+ if isinstance(n_gpu, int) or (n_gpu == "auto" and gpu_count() > 0):
328
328
  # Currently, n_gpu=auto means using 1 GPU
329
329
  gpu_cnt = n_gpu if isinstance(n_gpu, int) else 1
330
330
  devices = (
@@ -396,10 +396,10 @@ class WorkerActor(xo.StatelessActor):
396
396
  n_gpu: Optional[Union[int, str]] = "auto",
397
397
  ):
398
398
  if n_gpu is not None:
399
- if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > cuda_count()):
399
+ if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > gpu_count()):
400
400
  raise ValueError(
401
401
  f"The parameter `n_gpu` must be greater than 0 and "
402
- f"not greater than the number of GPUs: {cuda_count()} on the machine."
402
+ f"not greater than the number of GPUs: {gpu_count()} on the machine."
403
403
  )
404
404
  if isinstance(n_gpu, str) and n_gpu != "auto":
405
405
  raise ValueError("Currently `n_gpu` only supports `auto`.")
@@ -504,10 +504,10 @@ class WorkerActor(xo.StatelessActor):
504
504
  launch_args.pop("kwargs")
505
505
  launch_args.update(kwargs)
506
506
  if n_gpu is not None:
507
- if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > cuda_count()):
507
+ if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > gpu_count()):
508
508
  raise ValueError(
509
509
  f"The parameter `n_gpu` must be greater than 0 and "
510
- f"not greater than the number of GPUs: {cuda_count()} on the machine."
510
+ f"not greater than the number of GPUs: {gpu_count()} on the machine."
511
511
  )
512
512
  if isinstance(n_gpu, str) and n_gpu != "auto":
513
513
  raise ValueError("Currently `n_gpu` only supports `auto`.")
@@ -21,7 +21,7 @@ import xoscar as xo
21
21
  from xoscar import MainActorPoolType
22
22
 
23
23
  from ..core.worker import WorkerActor
24
- from ..utils import cuda_count
24
+ from ..device_utils import gpu_count
25
25
 
26
26
  logger = logging.getLogger(__name__)
27
27
 
@@ -33,12 +33,12 @@ async def start_worker_components(
33
33
  metrics_exporter_host: Optional[str],
34
34
  metrics_exporter_port: Optional[int],
35
35
  ):
36
- cuda_device_indices = []
37
- cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES")
38
- if cuda_visible_devices:
39
- cuda_device_indices.extend([int(i) for i in cuda_visible_devices.split(",")])
36
+ gpu_device_indices = []
37
+ cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", None)
38
+ if cuda_visible_devices is not None and cuda_visible_devices != "-1":
39
+ gpu_device_indices.extend([int(i) for i in cuda_visible_devices.split(",")])
40
40
  else:
41
- cuda_device_indices = list(range(cuda_count()))
41
+ gpu_device_indices = list(range(gpu_count()))
42
42
 
43
43
  await xo.create_actor(
44
44
  WorkerActor,
@@ -46,7 +46,7 @@ async def start_worker_components(
46
46
  uid=WorkerActor.uid(),
47
47
  supervisor_address=supervisor_address,
48
48
  main_pool=main_pool,
49
- cuda_devices=cuda_device_indices,
49
+ gpu_devices=gpu_device_indices,
50
50
  metrics_exporter_host=metrics_exporter_host,
51
51
  metrics_exporter_port=metrics_exporter_port,
52
52
  )
@@ -0,0 +1,100 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+
17
+ import torch
18
+ from typing_extensions import Literal, Union
19
+
20
+ DeviceType = Literal["cuda", "mps", "xpu", "cpu"]
21
+
22
+
23
+ def is_xpu_available() -> bool:
24
+ return hasattr(torch, "xpu") and torch.xpu.is_available()
25
+
26
+
27
+ def get_available_device() -> DeviceType:
28
+ if torch.cuda.is_available():
29
+ return "cuda"
30
+ elif torch.backends.mps.is_available():
31
+ return "mps"
32
+ elif is_xpu_available():
33
+ return "xpu"
34
+ return "cpu"
35
+
36
+
37
+ def is_device_available(device: str) -> bool:
38
+ if device == "cuda":
39
+ return torch.cuda.is_available()
40
+ elif device == "mps":
41
+ return torch.backends.mps.is_available()
42
+ elif device == "xpu":
43
+ return is_xpu_available()
44
+ elif device == "cpu":
45
+ return True
46
+
47
+ return False
48
+
49
+
50
+ def move_model_to_available_device(model):
51
+ device = get_available_device()
52
+
53
+ if device == "cpu":
54
+ return model
55
+
56
+ return model.to(device)
57
+
58
+
59
+ def get_device_preferred_dtype(device: str) -> Union[torch.dtype, None]:
60
+ if device == "cpu":
61
+ return torch.float32
62
+ elif device == "cuda" or device == "mps":
63
+ return torch.float16
64
+ elif device == "xpu":
65
+ return torch.bfloat16
66
+
67
+ return None
68
+
69
+
70
+ def is_hf_accelerate_supported(device: str) -> bool:
71
+ return device == "cuda" or device == "xpu"
72
+
73
+
74
+ def empty_cache():
75
+ if torch.cuda.is_available():
76
+ torch.cuda.empty_cache()
77
+ if torch.backends.mps.is_available():
78
+ torch.mps.empty_cache()
79
+ if is_xpu_available():
80
+ torch.xpu.empty_cache()
81
+
82
+
83
+ def gpu_count():
84
+ if torch.cuda.is_available():
85
+ cuda_visible_devices_env = os.getenv("CUDA_VISIBLE_DEVICES", None)
86
+
87
+ if cuda_visible_devices_env is None:
88
+ return torch.cuda.device_count()
89
+
90
+ cuda_visible_devices = (
91
+ cuda_visible_devices_env.split(",") if cuda_visible_devices_env else []
92
+ )
93
+
94
+ return min(torch.cuda.device_count(), len(cuda_visible_devices))
95
+ elif torch.backends.mps.is_available():
96
+ return 1
97
+ elif is_xpu_available():
98
+ return torch.xpu.device_count()
99
+ else:
100
+ return 0
xinference/fields.py CHANGED
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from pydantic import Field
15
+ from ._compat import Field
16
16
 
17
17
  none_field = Field(None)
18
18
 
@@ -16,8 +16,7 @@ import os
16
16
  from collections import defaultdict
17
17
  from typing import Dict, List, Optional, Tuple
18
18
 
19
- from pydantic import BaseModel
20
-
19
+ from ..._compat import BaseModel
21
20
  from ...constants import XINFERENCE_CACHE_DIR
22
21
  from ..core import ModelDescription
23
22
  from ..utils import valid_model_revision
@@ -14,6 +14,12 @@
14
14
  import logging
15
15
  from typing import TYPE_CHECKING, Dict, Optional
16
16
 
17
+ from xinference.device_utils import (
18
+ get_available_device,
19
+ get_device_preferred_dtype,
20
+ is_device_available,
21
+ )
22
+
17
23
  if TYPE_CHECKING:
18
24
  from .core import AudioModelFamilyV1
19
25
 
@@ -37,11 +43,15 @@ class WhisperModel:
37
43
  self._kwargs = kwargs
38
44
 
39
45
  def load(self):
40
- import torch
41
46
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
42
47
 
43
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
44
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
48
+ if self._device is None:
49
+ self._device = get_available_device()
50
+ else:
51
+ if not is_device_available(self._device):
52
+ raise ValueError(f"Device {self._device} is not available!")
53
+
54
+ torch_dtype = get_device_preferred_dtype(self._device)
45
55
 
46
56
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
47
57
  self._model_path,
@@ -49,7 +59,7 @@ class WhisperModel:
49
59
  low_cpu_mem_usage=True,
50
60
  use_safetensors=True,
51
61
  )
52
- model.to(device)
62
+ model.to(self._device)
53
63
 
54
64
  processor = AutoProcessor.from_pretrained(self._model_path)
55
65
 
@@ -63,7 +73,7 @@ class WhisperModel:
63
73
  batch_size=16,
64
74
  return_timestamps=False,
65
75
  torch_dtype=torch_dtype,
66
- device=device,
76
+ device=self._device,
67
77
  )
68
78
 
69
79
  def _call_model(
@@ -99,9 +109,11 @@ class WhisperModel:
99
109
  )
100
110
  return self._call_model(
101
111
  audio=audio,
102
- generate_kwargs={"language": language, "task": "transcribe"}
103
- if language is not None
104
- else {"task": "transcribe"},
112
+ generate_kwargs=(
113
+ {"language": language, "task": "transcribe"}
114
+ if language is not None
115
+ else {"task": "transcribe"}
116
+ ),
105
117
  response_format=response_format,
106
118
  )
107
119
 
xinference/model/core.py CHANGED
@@ -15,6 +15,8 @@
15
15
  from abc import ABC, abstractmethod
16
16
  from typing import Any, List, Optional, Tuple
17
17
 
18
+ from .._compat import BaseModel
19
+
18
20
 
19
21
  class ModelDescription(ABC):
20
22
  def __init__(
@@ -94,3 +96,10 @@ def create_model_instance(
94
96
  )
95
97
  else:
96
98
  raise ValueError(f"Unsupported model type: {model_type}.")
99
+
100
+
101
+ class CacheableModelSpec(BaseModel):
102
+ model_name: str
103
+ model_id: str
104
+ model_revision: Optional[str]
105
+ model_hub: str = "huggingface"