xinference 0.8.5__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (287) hide show
  1. xinference/__init__.py +6 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +136 -74
  4. xinference/core/model.py +3 -2
  5. xinference/core/resource.py +4 -2
  6. xinference/core/supervisor.py +37 -12
  7. xinference/core/worker.py +13 -13
  8. xinference/deploy/worker.py +7 -7
  9. xinference/device_utils.py +100 -0
  10. xinference/model/audio/whisper.py +20 -8
  11. xinference/model/image/core.py +5 -1
  12. xinference/model/image/stable_diffusion/core.py +3 -5
  13. xinference/model/llm/llm_family.json +93 -3
  14. xinference/model/llm/llm_family_modelscope.json +46 -10
  15. xinference/model/llm/pytorch/compression.py +3 -1
  16. xinference/model/llm/pytorch/core.py +33 -14
  17. xinference/model/llm/pytorch/qwen_vl.py +5 -3
  18. xinference/model/llm/pytorch/spec_decoding_utils.py +3 -1
  19. xinference/model/llm/pytorch/spec_model.py +20 -17
  20. xinference/model/llm/pytorch/utils.py +3 -2
  21. xinference/model/llm/pytorch/yi_vl.py +9 -3
  22. xinference/model/llm/utils.py +9 -0
  23. xinference/model/utils.py +6 -16
  24. xinference/thirdparty/llava/mm_utils.py +1 -1
  25. xinference/web/ui/build/asset-manifest.json +3 -3
  26. xinference/web/ui/build/index.html +1 -1
  27. xinference/web/ui/build/static/js/main.87d39ffb.js +3 -0
  28. xinference/web/ui/build/static/js/main.87d39ffb.js.map +1 -0
  29. xinference/web/ui/node_modules/.cache/babel-loader/{8d95a492a6b08c192e212189567b59b98282ca6f3ed52dd9bd053a6be7ff4e65.json → 027745bdb9bb9fe375f2eef7377e9f7ba82510ed90c05f9e5b34ba314bb93498.json} +1 -1
  30. xinference/web/ui/node_modules/.cache/babel-loader/03d5e7de9b1728d71cd8c483a35f4373eb66932022026142e03005d9de1a2a44.json +1 -0
  31. xinference/web/ui/node_modules/.cache/babel-loader/0738899eefad7f90261125823d87ea9f0d53667b1479a0c1f398aff14f2bbd2a.json +1 -0
  32. xinference/web/ui/node_modules/.cache/babel-loader/09a21ab449599121eadfdd12d6dba40c5a1c5b35dc90be17108eeff80daf79b8.json +1 -0
  33. xinference/web/ui/node_modules/.cache/babel-loader/09d043a8289da33d05768f5face63cc47e9ce9646d67f5589865db511574e8f9.json +1 -0
  34. xinference/web/ui/node_modules/.cache/babel-loader/0b901c8f077905bf153a1390b207277c31e893f83f9ced5d29a0a0c85a07b5eb.json +1 -0
  35. xinference/web/ui/node_modules/.cache/babel-loader/0da1ff7c9602609cfa8b82a67140114f3037130fe79583bbf23b69cef81823a6.json +1 -0
  36. xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +1 -0
  37. xinference/web/ui/node_modules/.cache/babel-loader/0f87caef05125425c0ca418a32ce8a53e9becffd464411b2ff94e3c7a37c7b5b.json +1 -0
  38. xinference/web/ui/node_modules/.cache/babel-loader/{cb50b4d54ce9c249d580f7f1fc2df4beab4f13c4d16f486c89239809570de8fb.json → 0fc333c35ad49c0db7f18f77521984a36067cfce4206df50ce16a2fdc88be1c6.json} +1 -1
  39. xinference/web/ui/node_modules/.cache/babel-loader/13fdd92dd5e2fe11640cf6ec0368e3c098d3f0c43e11703ccf157e8c41410d5b.json +1 -0
  40. xinference/web/ui/node_modules/.cache/babel-loader/1464f950e5600fe2e5890186a1ae3750cdabbadd8525070cef6273450760bcf6.json +1 -0
  41. xinference/web/ui/node_modules/.cache/babel-loader/17c53dc44a324629efef425833ab48cc05243d089f4eb3c731da63568fcff35e.json +1 -0
  42. xinference/web/ui/node_modules/.cache/babel-loader/1946a52cb34e675da423de15b703437b4c841e4f5aed6ddd932a091c145e17b0.json +1 -0
  43. xinference/web/ui/node_modules/.cache/babel-loader/1e0da5ce4d8ccdbe33705a47bdc0d5d1d7e4e48c8e5c1d0b8c473e9f491930ac.json +1 -0
  44. xinference/web/ui/node_modules/.cache/babel-loader/1e3ec3302ef2253fad5e86bf429baf6b69e57e769f38ec1aaada87f3114b6a1f.json +1 -0
  45. xinference/web/ui/node_modules/.cache/babel-loader/1f18804827897c337874b289873df611b44869f86ced1d4351fbe0b45ed17518.json +1 -0
  46. xinference/web/ui/node_modules/.cache/babel-loader/21b64f7fdb74c2c35479811c3f5683ee161d3b64a4fade0cd1fc0044332c79b2.json +1 -0
  47. xinference/web/ui/node_modules/.cache/babel-loader/22873d21eab4b5a04b183cc3d2cbfafa431e0ce99649aeb900cffcfc33ecbca4.json +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/{7a5fecf96af770954035948b07dfadeceb33536b4763a2f9e548121ae555b75f.json → 228b78cd0950f1c967cb33b210b11497052a9998f0b1d1f31ecee0fd8b99fb40.json} +1 -1
  49. xinference/web/ui/node_modules/.cache/babel-loader/2328fc2360224ac3eef5c5261fc2a75cdce7fe58c210af8d3e532ec609464f5f.json +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/25fd89450dcf73a205a5f8c82dbc604bf872d574b748427c26ba79b8711ec502.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/{b6e95546cdd247e50d9c916dd3bef5216f96c32a267e54b51b23cc22631be0c4.json → 288d6573b09f0e2b0277e1f301bcb57a194c01f3983e3c1e03c8b2ea72653194.json} +1 -1
  52. xinference/web/ui/node_modules/.cache/babel-loader/2c04900a020b813e2e99b15748c3c3ef5a1627c33d5668e45bc6a798f0d28dae.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/2cd0fd50d8f213cdd94daffe7d0f997991072b11936ead7c48be2af3fdbd1fda.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/2e057c12322da07a537a576a3917d512ac6cb291be5aa29ce23d2679a4fd16b1.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/{abd0662d21fbe39e483fbc4d37faa9e24d93889b5b40d42e7e20773c66ee3894.json → 327c21db6124a5209f41996d0d3359b462d5baff3974296f5acfddfc532434e6.json} +1 -1
  56. xinference/web/ui/node_modules/.cache/babel-loader/33294bb0080595100e22c960929a6f3086f5ea632a4bcd9216069f95347a63a9.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/3361869e45eddd7d48ff7995e1a12ac454c9eb92d4c87f49ce80c1e9482c1e91.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/375d2f6a31e01c5a82fd0e18f42198f9fbb7117491f3e92d70cf36055f159a6e.json +1 -0
  59. xinference/web/ui/node_modules/.cache/babel-loader/3808217efee76345ba19af3ffdba045888e0908b0dc7fc38e54655cdc3504c60.json +1 -0
  60. xinference/web/ui/node_modules/.cache/babel-loader/39db740a99454006312dec9e4845f850c51e043458ca94eba8bb256521fabcd4.json +1 -0
  61. xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/3e737bcdbcbc407ccd65b90e199ef0c3214b261e8e41dbf14d921384a717d9ee.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/3e87c7d4f1e5c93603eed3e861945e44dd1e664061a89aaef1aa6435f4f4d7d0.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/3ecf70018028c8cf6cdc2d24d2b88f2707bd5c23a336fd3d82bd6a1af0db7ded.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/43314d1e9fed30ed6cf5cab3f5033da067e58be29e36448e3c0a8127403a9388.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/442467c09440131f7c25ddcdeb356d2a87783ad4e32d89f410a16cb6e7d83b22.json +1 -0
  67. xinference/web/ui/node_modules/.cache/babel-loader/487524f2b3494bfcfbd8c12918092a52ff2ce3eefe9bf0a557389ec1c74d3e67.json +1 -0
  68. xinference/web/ui/node_modules/.cache/babel-loader/4de4b0282a3d1ae02bec0bfe7d6658f039141406e8bfdbe86bf1dc5b115a2524.json +1 -0
  69. xinference/web/ui/node_modules/.cache/babel-loader/4ec72faab00e4cbffefdf43b987ae3d15beb60e282b9ca86dad37f4d0fc3f2bd.json +1 -0
  70. xinference/web/ui/node_modules/.cache/babel-loader/51403f1b70af0f4577d4594cbfcf9d9759997e5c49097e7a07219000d1be4ff1.json +1 -0
  71. xinference/web/ui/node_modules/.cache/babel-loader/566a0829bfeab41e40bbdca55ad056067e172508af4b333403d4fd334a8c8742.json +1 -0
  72. xinference/web/ui/node_modules/.cache/babel-loader/5689d9aee417a0834fed66720cf3b48c34cc26364cd9ab7e3d809997bb9faec9.json +1 -0
  73. xinference/web/ui/node_modules/.cache/babel-loader/5bedd27475bda204eafccdc636a56ba5765353391049a0b947234faaab3d021a.json +1 -0
  74. xinference/web/ui/node_modules/.cache/babel-loader/{94daea23b698bbb26bbbab69bbca8ad521c79fcbd6b495c84c740e4006a2ced2.json → 61014d40df4486972fb6d5909dff45f24bec48573af99ca8bcd79835385c7da0.json} +1 -1
  75. xinference/web/ui/node_modules/.cache/babel-loader/6139c80ec46d77469e9a89924de41d93050f19e9ed541e358f09cbf20ccd7c86.json +1 -0
  76. xinference/web/ui/node_modules/.cache/babel-loader/63ead4dee6a4f4ae47aed6918a2ab3e172739ecdffaa85e063f9a2c9165ff93c.json +1 -0
  77. xinference/web/ui/node_modules/.cache/babel-loader/6bf8c25e0f2051d994990bf73fdcb3e08963e11589842895593f199eab13ba9a.json +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/6d42f63cd673d1c60e534a174cd851e9aefd2d5d45a4aa6128d794457b64de53.json +1 -0
  79. xinference/web/ui/node_modules/.cache/babel-loader/7044e626a91582077b8cbad09bcf0cc118e92e11ebfa2ebc17622baf98aa56bf.json +1 -0
  80. xinference/web/ui/node_modules/.cache/babel-loader/71ec408030881aad69854815b8681185d0174cc0cf67f20fb2c3cd13c74fce07.json +1 -0
  81. xinference/web/ui/node_modules/.cache/babel-loader/7280614d5bddce524182024cc288953eb89d46c1dbe470ed5eb5ecab5de28385.json +1 -0
  82. xinference/web/ui/node_modules/.cache/babel-loader/7293ad8ba7b5fc6ed791aed44a766a396516c782f5d0e564b9ef0997645ec1dd.json +1 -0
  83. xinference/web/ui/node_modules/.cache/babel-loader/79be1e0dd6b1c3cbb3f08b349454f4791edaac451fa4d1f10eaf433e57ecf70f.json +1 -0
  84. xinference/web/ui/node_modules/.cache/babel-loader/7c0571c03aa8c90eb9452ba23133aed9b3b2a049910d4d8276588b438a4caaba.json +1 -0
  85. xinference/web/ui/node_modules/.cache/babel-loader/81ed094096cd9dc36f3ddebe61818766f43d3338c171f2269958f47265e60f24.json +1 -0
  86. xinference/web/ui/node_modules/.cache/babel-loader/86355e55059ddb15f71c0c651a2e622cee6f3141da3a018775ca00c74dd37dc2.json +1 -0
  87. xinference/web/ui/node_modules/.cache/babel-loader/{d1af6db565bae36218251c01a817c080ef764172dd0694e38d27b525a3ffd1d2.json → 8710fe63bcc6b95502287c36a7bd881b74ee2926982e120e1e8070a2c8a668fc.json} +1 -1
  88. xinference/web/ui/node_modules/.cache/babel-loader/8779104679d8f85017b551d1d108ea1ac701e184ad7bcc8a367526353a6c5175.json +1 -0
  89. xinference/web/ui/node_modules/.cache/babel-loader/881c573e6aa9954e7bd5e3bc5a6ef62c0db9b93815030080f677f6834e431434.json +1 -0
  90. xinference/web/ui/node_modules/.cache/babel-loader/8d24c51a0905b919436aac2bad441eb1651c46c4e7aa17c99eb63c7760ce6615.json +1 -0
  91. xinference/web/ui/node_modules/.cache/babel-loader/93bf5cc8cbe3f0b10f291800e1896f03506a8933cd490e07ee0ff94498f93bce.json +1 -0
  92. xinference/web/ui/node_modules/.cache/babel-loader/9401db0316ac34790aa5b972e9e1c50de58ef313b50840a37e08f0936a7197b6.json +1 -0
  93. xinference/web/ui/node_modules/.cache/babel-loader/98afeafdfc285a744d116fb5f189a16ad2b977adbd4fc0757f7ade1b29b8ad31.json +1 -0
  94. xinference/web/ui/node_modules/.cache/babel-loader/9a4e624d1ce66832fb213ead02745d84661d9bfcc21b62b4c37c680c3c3f5cbb.json +1 -0
  95. xinference/web/ui/node_modules/.cache/babel-loader/9a7b26f85224d33d431354ac8d7849388b7525fc014801824e705561ab2ac7a7.json +1 -0
  96. xinference/web/ui/node_modules/.cache/babel-loader/9dcb04ac6cb341233a2e98f2fb417d059c2ea6a56d2ff6f30bb27dae9bd6c756.json +1 -0
  97. xinference/web/ui/node_modules/.cache/babel-loader/9ebede8a4bccef016eef418a668b3212962779ea6d2531edb23d3dca81163496.json +1 -0
  98. xinference/web/ui/node_modules/.cache/babel-loader/a0efc8db4289116bdb312014df91fc781f66fe171d63027bbd5aae25cd54c325.json +1 -0
  99. xinference/web/ui/node_modules/.cache/babel-loader/a212defc32ca338ba673722c72815d30974275f1a82b67405a898f826fc8d782.json +1 -0
  100. xinference/web/ui/node_modules/.cache/babel-loader/a2685acab57fd6d874bf6069a587e3770b55c72cacc69cb167bb3dbd16a551c5.json +1 -0
  101. xinference/web/ui/node_modules/.cache/babel-loader/a3b7d6233e50dc3832421793a5752c1db99f3a214c9896c97bbd395d007f09b1.json +1 -0
  102. xinference/web/ui/node_modules/.cache/babel-loader/a3d0c810a255108613be0a175d53d673c4b6843c2a87dd0f157b4d3731af2092.json +1 -0
  103. xinference/web/ui/node_modules/.cache/babel-loader/a6633774053545c60721e345820e21cf3b089df219f7ee3fcceaf27af3a9e84b.json +1 -0
  104. xinference/web/ui/node_modules/.cache/babel-loader/a6bb917e4869d7d6ddf50b75932cad64e94b9e33db2aa40c4a489a045819bb36.json +1 -0
  105. xinference/web/ui/node_modules/.cache/babel-loader/a816031fafa564dc4af27383612e51ac29af51354f3bba750a70ac0c03b6bd30.json +1 -0
  106. xinference/web/ui/node_modules/.cache/babel-loader/af6349a1e9761cb98af4b76904440fb7f8d54288ee6ba37116f670226796e689.json +1 -0
  107. xinference/web/ui/node_modules/.cache/babel-loader/afce63eaad32a9968590e4500e123c81b385ea3b6046b2af1ec7ea8a49ba657b.json +1 -0
  108. xinference/web/ui/node_modules/.cache/babel-loader/aff24b61e5d55147d0a36ef1896b9bd35c995c357f44eeee1209f23db2118636.json +1 -0
  109. xinference/web/ui/node_modules/.cache/babel-loader/b01676c5ca97e3409a1b7bac91b6a797be3719a8b7ec72ad06c9beb296c67bb7.json +1 -0
  110. xinference/web/ui/node_modules/.cache/babel-loader/b0c5e967d1af90d545f1d34c9893a841296b0d1dd50de65eeaf19054995d2a94.json +1 -0
  111. xinference/web/ui/node_modules/.cache/babel-loader/b561d429cea6143e1bf7e6e7e690f9e6ce1dce4adfcacb57d7c5578195d1d190.json +1 -0
  112. xinference/web/ui/node_modules/.cache/babel-loader/b70187bb5aecf3bd7afbe8469b49e2a0a94a0c62727530d0dc2f84ee4577bcfc.json +1 -0
  113. xinference/web/ui/node_modules/.cache/babel-loader/bd21f6039e23775fb4631bb56f2eef1d132c03a60cc6fe4530569ecfeef456a3.json +1 -0
  114. xinference/web/ui/node_modules/.cache/babel-loader/be09b5508d0ca5ac0300144a682d4cde2d5b4b96ecb9fc690c1d5e7ee7411b3e.json +1 -0
  115. xinference/web/ui/node_modules/.cache/babel-loader/be8c9e83f0dd4af2e9bcf6844dd8c4445fe634b6c71da6debfa7acabb7a875c0.json +1 -0
  116. xinference/web/ui/node_modules/.cache/babel-loader/c0d1317dfae0c5ddf5a3ba9aef6bd2204f2daf9754f3270c4cab5c4632f78000.json +1 -0
  117. xinference/web/ui/node_modules/.cache/babel-loader/c1e3afdaae5649274f7d34910015b88bc02ba6a00b935e6597f9c670fedc05c0.json +1 -0
  118. xinference/web/ui/node_modules/.cache/babel-loader/c29ec02243dd370616a489f7de67e4e479d2f7f12504ba7f77304cffe60befbc.json +1 -0
  119. xinference/web/ui/node_modules/.cache/babel-loader/c2a4d298149e6c66713eed4f56f4bbbfa4022a193891029b84e429baf59cc246.json +1 -0
  120. xinference/web/ui/node_modules/.cache/babel-loader/c3d2fc03602f7604a65e631e8595ce4cfabd8887343489b34a65e3a73c5546b3.json +1 -0
  121. xinference/web/ui/node_modules/.cache/babel-loader/c41078c8692e0635fb52d01bc5157528c7308ae5140c134aa07c1fcda4f7a5e5.json +1 -0
  122. xinference/web/ui/node_modules/.cache/babel-loader/c4b5f1e444ddd41f8cd30e618c185b16235e313ea34d944959628001a91e38a5.json +1 -0
  123. xinference/web/ui/node_modules/.cache/babel-loader/c7ed9bc3125498e475f3170b8126c93368e9cbfd6a5e73ee63437d90e39b69e5.json +1 -0
  124. xinference/web/ui/node_modules/.cache/babel-loader/c996b3dc884130f034a37551845201bc5661f2dcd282aed737329f8d4dd387b5.json +1 -0
  125. xinference/web/ui/node_modules/.cache/babel-loader/c9fef6e7f0a532c2d1cc25dffd496ea8355dd14ce0105a169b391bfeb9277e4f.json +1 -0
  126. xinference/web/ui/node_modules/.cache/babel-loader/cc8f5a2bbe4da9852ccdf64150115271baa2d282d74d7fab854dc5ca641dd519.json +1 -0
  127. xinference/web/ui/node_modules/.cache/babel-loader/ce45adea497443bfc1b6baabd0aa3d85888dd10b1b7fd009edf608e390b2a844.json +1 -0
  128. xinference/web/ui/node_modules/.cache/babel-loader/ce4b82a3855d18ba09e922f6e2cc1b8c20f0e5261b27303c52d4e1ba5a09a078.json +1 -0
  129. xinference/web/ui/node_modules/.cache/babel-loader/cf16614311e144768bdc064da754efad77597cac5efe6852be996b9eaaed695a.json +1 -0
  130. xinference/web/ui/node_modules/.cache/babel-loader/d074bfe592a195f339acbcd8743f8b64cfd09b45ca1013598269227fc092f2db.json +1 -0
  131. xinference/web/ui/node_modules/.cache/babel-loader/d2a95455cc492d4fbf2839a21eeaef00e7fad2aac93825d2443a38127ad02007.json +1 -0
  132. xinference/web/ui/node_modules/.cache/babel-loader/d4e6e712ebbbf0f30745093e2b6d235c13fb43d37b586a0109e87c19eafdf35d.json +1 -0
  133. xinference/web/ui/node_modules/.cache/babel-loader/d88a02264125b07881eecff752d9c8d718edcf2d87e5431b13e54809dc7bf4f2.json +1 -0
  134. xinference/web/ui/node_modules/.cache/babel-loader/d907925fb28912f2e98cccfe758bce680aa4f894cc9d2079d73d7e2d85481d47.json +1 -0
  135. xinference/web/ui/node_modules/.cache/babel-loader/daf050ec3a864b9831dcb32cee1fb2739dafb68b369e9108469ca1e6fd166867.json +1 -0
  136. xinference/web/ui/node_modules/.cache/babel-loader/de9645d55ce8744856862eae4a3d3a1266e9df9d575aa3e2f62a75a5d97444f7.json +1 -0
  137. xinference/web/ui/node_modules/.cache/babel-loader/e04199bdfb87226d6aeae387ef677eae06483f9f135b5a3f1360ec1a243c86cd.json +1 -0
  138. xinference/web/ui/node_modules/.cache/babel-loader/e97d1f31f54263bb90b7c3e71be0e19973d76d945fb5d526bf93fb449708bbf0.json +1 -0
  139. xinference/web/ui/node_modules/.cache/babel-loader/e9f87f5c370a731ee42098ae6ff38ca6ad256396158c03f84c5e770c6b76a43d.json +1 -0
  140. xinference/web/ui/node_modules/.cache/babel-loader/f2ad375dbc8779f54eb9c377fd951cd442f3766d5eb70d40613b952b811643da.json +1 -0
  141. xinference/web/ui/node_modules/.cache/babel-loader/f35df9d5a0e0341bcc0f39a462e46988c330cdb905cc690b08de4a9cf5771f28.json +1 -0
  142. xinference/web/ui/node_modules/.cache/babel-loader/f433e33369483f75d31fe937b8a597ef2d7cdfed339931643ff966a33dbc6158.json +1 -0
  143. xinference/web/ui/node_modules/.cache/babel-loader/f4c164c89cfbcb1d81bc0b60e3e7c8715e07c7a23118acf4e3e16715beac09c5.json +1 -0
  144. xinference/web/ui/node_modules/.cache/babel-loader/f545d937cfb9d974f4aaaa9dc1c12adfed0d4f608de7d0a7247452d9bbbb55a4.json +1 -0
  145. xinference/web/ui/node_modules/.cache/babel-loader/{3cc5ec39e76e1768ffb070a8ae18ee61b68f3ed3593178134d3edda5ed378c6c.json → f8d2c4372093d0ff3a2f78260e004e6c4e2b8ab4f5be9713ebc16c50e9c2ab4b.json} +1 -1
  146. xinference/web/ui/node_modules/.cache/babel-loader/fa41c6c8ffa1047b7f2f52b6532ea4d9f5754e5bbc11d1340bae7f75ae5ed84e.json +1 -0
  147. xinference/web/ui/node_modules/.package-lock.json +45 -45
  148. xinference/web/ui/node_modules/@babel/runtime/package.json +64 -37
  149. xinference/web/ui/node_modules/@mui/private-theming/package.json +7 -6
  150. xinference/web/ui/node_modules/@mui/styled-engine/package.json +6 -5
  151. xinference/web/ui/node_modules/@mui/system/node_modules/clsx/package.json +22 -10
  152. xinference/web/ui/node_modules/@mui/system/package.json +13 -12
  153. xinference/web/ui/node_modules/@mui/types/package.json +3 -2
  154. xinference/web/ui/node_modules/@mui/utils/ClassNameGenerator/package.json +2 -2
  155. xinference/web/ui/node_modules/@mui/utils/HTMLElementType/package.json +2 -2
  156. xinference/web/ui/node_modules/@mui/utils/capitalize/package.json +2 -2
  157. xinference/web/ui/node_modules/@mui/utils/chainPropTypes/package.json +2 -2
  158. xinference/web/ui/node_modules/@mui/utils/clamp/package.json +6 -0
  159. xinference/web/ui/node_modules/@mui/utils/composeClasses/package.json +2 -2
  160. xinference/web/ui/node_modules/@mui/utils/createChainedFunction/package.json +6 -0
  161. xinference/web/ui/node_modules/@mui/utils/debounce/package.json +2 -2
  162. xinference/web/ui/node_modules/@mui/utils/deepmerge/package.json +6 -0
  163. xinference/web/ui/node_modules/@mui/utils/deprecatedPropType/package.json +6 -0
  164. xinference/web/ui/node_modules/@mui/utils/elementAcceptingRef/package.json +6 -0
  165. xinference/web/ui/node_modules/@mui/utils/elementTypeAcceptingRef/package.json +6 -0
  166. xinference/web/ui/node_modules/@mui/utils/exactProp/package.json +2 -2
  167. xinference/web/ui/node_modules/@mui/utils/formatMuiErrorMessage/package.json +6 -0
  168. xinference/web/ui/node_modules/@mui/utils/generateUtilityClass/package.json +2 -2
  169. xinference/web/ui/node_modules/@mui/utils/generateUtilityClasses/package.json +2 -2
  170. xinference/web/ui/node_modules/@mui/utils/getDisplayName/package.json +6 -0
  171. xinference/web/ui/node_modules/@mui/utils/getScrollbarSize/package.json +6 -0
  172. xinference/web/ui/node_modules/@mui/utils/getValidReactChildren/package.json +6 -0
  173. xinference/web/ui/node_modules/@mui/utils/integerPropType/package.json +6 -0
  174. xinference/web/ui/node_modules/@mui/utils/isMuiElement/package.json +6 -0
  175. xinference/web/ui/node_modules/@mui/utils/ownerDocument/package.json +2 -2
  176. xinference/web/ui/node_modules/@mui/utils/ownerWindow/package.json +2 -2
  177. xinference/web/ui/node_modules/@mui/utils/package.json +8 -7
  178. xinference/web/ui/node_modules/@mui/utils/ponyfillGlobal/package.json +2 -2
  179. xinference/web/ui/node_modules/@mui/utils/refType/package.json +6 -0
  180. xinference/web/ui/node_modules/@mui/utils/requirePropFactory/package.json +6 -0
  181. xinference/web/ui/node_modules/@mui/utils/resolveProps/package.json +6 -0
  182. xinference/web/ui/node_modules/@mui/utils/scrollLeft/package.json +6 -0
  183. xinference/web/ui/node_modules/@mui/utils/setRef/package.json +6 -0
  184. xinference/web/ui/node_modules/@mui/utils/unsupportedProp/package.json +6 -0
  185. xinference/web/ui/node_modules/@mui/utils/useControlled/package.json +2 -2
  186. xinference/web/ui/node_modules/@mui/utils/useEnhancedEffect/package.json +2 -2
  187. xinference/web/ui/node_modules/@mui/utils/useEventCallback/package.json +2 -2
  188. xinference/web/ui/node_modules/@mui/utils/useForkRef/package.json +2 -2
  189. xinference/web/ui/node_modules/@mui/utils/useId/package.json +2 -2
  190. xinference/web/ui/node_modules/@mui/utils/useIsFocusVisible/package.json +6 -0
  191. xinference/web/ui/node_modules/@mui/utils/useLazyRef/package.json +6 -0
  192. xinference/web/ui/node_modules/@mui/utils/useOnMount/package.json +6 -0
  193. xinference/web/ui/node_modules/@mui/utils/usePreviousProps/package.json +6 -0
  194. xinference/web/ui/node_modules/@mui/utils/useTimeout/package.json +6 -0
  195. xinference/web/ui/node_modules/@mui/utils/visuallyHidden/package.json +6 -0
  196. xinference/web/ui/node_modules/@types/prop-types/package.json +2 -2
  197. xinference/web/ui/node_modules/csstype/package.json +3 -3
  198. xinference/web/ui/package-lock.json +47 -45
  199. xinference/web/ui/package.json +2 -0
  200. {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/METADATA +4 -1
  201. {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/RECORD +206 -150
  202. xinference/web/ui/build/static/js/main.9715fe74.js +0 -3
  203. xinference/web/ui/build/static/js/main.9715fe74.js.map +0 -1
  204. xinference/web/ui/node_modules/.cache/babel-loader/02f54fcadc6fe9b1be8451435f9a097c82b3d665bae2d2df03ba0b36cebeca0c.json +0 -1
  205. xinference/web/ui/node_modules/.cache/babel-loader/051391807ffd255b652b63dc2ff12e5ce80113e1b712c81b6c1986a11cd85d3c.json +0 -1
  206. xinference/web/ui/node_modules/.cache/babel-loader/05710e94caaf5a2da790d67aa572bf6bc19bae0237668c8d97f4da62ab52fe5c.json +0 -1
  207. xinference/web/ui/node_modules/.cache/babel-loader/08a3470840a55d359a967d0763cd1ae5c1f15f3ca31118e29b37b4fbbdb248c7.json +0 -1
  208. xinference/web/ui/node_modules/.cache/babel-loader/0e68b0e268559a563dde6fa4c654922d98c74f5140854437f3e5a01bf23ba41f.json +0 -1
  209. xinference/web/ui/node_modules/.cache/babel-loader/10dc7661741dfc88df5538bbf835870ae47b1a18f36c2f7b9650651e7304ce72.json +0 -1
  210. xinference/web/ui/node_modules/.cache/babel-loader/18b3d28a2692ab4f2d05254221f53b3c13259435bd98cb90ff5d7250d93f11e0.json +0 -1
  211. xinference/web/ui/node_modules/.cache/babel-loader/1b612ab687e3a4e14db156bd92a75b8a3e96ff04bfbf8f3351c7e801bc61cf90.json +0 -1
  212. xinference/web/ui/node_modules/.cache/babel-loader/1bb839ffeb39ed12367c179fbf3b784e20f6f98fb87c597717bc11ebe3201c59.json +0 -1
  213. xinference/web/ui/node_modules/.cache/babel-loader/1d12e0b0e4e87372b18fef72fcd6e264fa40b0cffdbeddcef004becc4e32cf27.json +0 -1
  214. xinference/web/ui/node_modules/.cache/babel-loader/1d5b806c08ffb55539ee26b5fa9746a373f602354d6e55a0d4379d7e2f903868.json +0 -1
  215. xinference/web/ui/node_modules/.cache/babel-loader/1ecb7ce2bc103bec0d1a9f0cb07132e5ec7731d974998c465798c0e15cafb924.json +0 -1
  216. xinference/web/ui/node_modules/.cache/babel-loader/20cfc57ae2b8ba23571f386a5f2bf194bd02e8a3913113881bb9804ee822ad58.json +0 -1
  217. xinference/web/ui/node_modules/.cache/babel-loader/222a437b6773707bbead06ba5db1ed8869e44d9f051a7f6b1a745aeb654679d0.json +0 -1
  218. xinference/web/ui/node_modules/.cache/babel-loader/249709fb20c1254cecc9ba6666e6bcf37b21e47e7a020d5a7f0513680b49473f.json +0 -1
  219. xinference/web/ui/node_modules/.cache/babel-loader/257ad4979294ee3a7ef400feab3b29ebcd259c47b9410a3011a66f4b33a97904.json +0 -1
  220. xinference/web/ui/node_modules/.cache/babel-loader/2779bdc8ed8372f4015d07a45e4b09756bd953250944c3e3efe6b65bf8eaecd8.json +0 -1
  221. xinference/web/ui/node_modules/.cache/babel-loader/2bee7b8bd3d52976a45d6068e1333df88b943e0e679403c809e45382e3818037.json +0 -1
  222. xinference/web/ui/node_modules/.cache/babel-loader/31c71a85b2d0bbdcdc027e7950a9d1f778288f98888d8595fad5e4fdda5bb31f.json +0 -1
  223. xinference/web/ui/node_modules/.cache/babel-loader/371214b6649e323e7281885b1412c5a515ccc2274f4e1a42921e45777694009c.json +0 -1
  224. xinference/web/ui/node_modules/.cache/babel-loader/3eced030d2b3da645f4cbe3dc4ff49744312316e4306ce170991e802e9bfcf8e.json +0 -1
  225. xinference/web/ui/node_modules/.cache/babel-loader/452ae8e7cff99efc35d134014d32610d13217b031b817d3663bb689b3778ebd2.json +0 -1
  226. xinference/web/ui/node_modules/.cache/babel-loader/48e3f5d01ccedde86348a29392bc9b6e301a51b3f7e2476ae0a95a31eb6bf18d.json +0 -1
  227. xinference/web/ui/node_modules/.cache/babel-loader/4a311b5bd98f16588caf60b8de4c8d91d038198e6bfd496f880f60d4c1ea076d.json +0 -1
  228. xinference/web/ui/node_modules/.cache/babel-loader/4ccd71d3f73490a18a5a4de4ad82d63cf019caff6f8d881fd1434755a033bfc5.json +0 -1
  229. xinference/web/ui/node_modules/.cache/babel-loader/4ee8343d4254c95e685d49601f1649e8222d7423c2a822e1d01c8fc4cd3582a2.json +0 -1
  230. xinference/web/ui/node_modules/.cache/babel-loader/523a712748fbb7abf0c5423df62e65c4659b7e450182c890e847cd78ace7cac5.json +0 -1
  231. xinference/web/ui/node_modules/.cache/babel-loader/5777049bbacf7322891761a5b959e62eec22b0f96a70dbf97e58d530c6258a55.json +0 -1
  232. xinference/web/ui/node_modules/.cache/babel-loader/5905957ce95f7dee64f0739359e990b8b518cff088c6958487641a3d21737d1e.json +0 -1
  233. xinference/web/ui/node_modules/.cache/babel-loader/5ac79df79d2dff64da97b02d5c90b36bc0d44a4a4229f6efed2993f93ee1d7a1.json +0 -1
  234. xinference/web/ui/node_modules/.cache/babel-loader/64d3ef38fd394979b09061e67fc47f04b7295401677825c64c72ea2f44cf910c.json +0 -1
  235. xinference/web/ui/node_modules/.cache/babel-loader/68a5d8e66687770e05aebcae9a8047189dc577d6150fbf0e62d2b2c11c4335c5.json +0 -1
  236. xinference/web/ui/node_modules/.cache/babel-loader/69d5f27e40d5e7040c8e8e303ccaee019b54092345acc3838d6eb943c474ce39.json +0 -1
  237. xinference/web/ui/node_modules/.cache/babel-loader/6a07b8d00482c07a4b7c73f6bebb79fad54bbea35ba358681ede6ff299a09f6b.json +0 -1
  238. xinference/web/ui/node_modules/.cache/babel-loader/70b0307a43b9f8c876bf2a1f7dea4cc56cc0ea33743e863c0b3983bc48109f2d.json +0 -1
  239. xinference/web/ui/node_modules/.cache/babel-loader/710c39a26816820c2cfd00b19cd0112851d64b8ac6b201b18727b43b5a69d92c.json +0 -1
  240. xinference/web/ui/node_modules/.cache/babel-loader/722f4c03da79795d1fb738323f3eb6794ab18711e20f141c0b678c5d4cb3dce9.json +0 -1
  241. xinference/web/ui/node_modules/.cache/babel-loader/8237bdfff695d71496ee2ff83ba0392d753f8e1dff0d002a1b2c9e249f03083c.json +0 -1
  242. xinference/web/ui/node_modules/.cache/babel-loader/8342e3800fed63e8e805850810aee4fd9b0558aca10c8b6b770fe5bff1bed266.json +0 -1
  243. xinference/web/ui/node_modules/.cache/babel-loader/847a71f54f7a059efc71f70c297b6b4182a633130715c80f817eb533de108c12.json +0 -1
  244. xinference/web/ui/node_modules/.cache/babel-loader/8731a959d9b7278ba7a1c044bb4253bfff63079da69b21ff80221ae5e037dbbe.json +0 -1
  245. xinference/web/ui/node_modules/.cache/babel-loader/87b4e94b045df561c8f4bd253c84447c96e0dededbcb6646c0cd180119960990.json +0 -1
  246. xinference/web/ui/node_modules/.cache/babel-loader/894a334446bac96d74385c23c9d6f2d117ee29511897700430cd0e578cb32527.json +0 -1
  247. xinference/web/ui/node_modules/.cache/babel-loader/8c5ba88b620eeedbb99db18a610138353a5a657170fdb8bd6969585f81b34f74.json +0 -1
  248. xinference/web/ui/node_modules/.cache/babel-loader/9004bc083cf179941e64130390a27afcf7593b21dd64a87d03e1e6489032aa13.json +0 -1
  249. xinference/web/ui/node_modules/.cache/babel-loader/9b84631723b52988d3748d80b994e43d1944ca55098a5d908c9061eeb4c84f90.json +0 -1
  250. xinference/web/ui/node_modules/.cache/babel-loader/9ee1f84bde10cd78ca23a4681c7372195748bacd5e41c3b1df8e39d960294d81.json +0 -1
  251. xinference/web/ui/node_modules/.cache/babel-loader/a127f13651a7398f5fc378456a52cfa0c6260f6109611d3fb012aa7c7a4ee17a.json +0 -1
  252. xinference/web/ui/node_modules/.cache/babel-loader/a5e2e9f707eb7039bea096ca117d996b8f9cbc2a5613fd8e0c5b0094444ce23c.json +0 -1
  253. xinference/web/ui/node_modules/.cache/babel-loader/a6131b7fb1b30fd63d798845dab508a22b21c616b361e6214bf711915c5fef3d.json +0 -1
  254. xinference/web/ui/node_modules/.cache/babel-loader/a7b8ccb199158c897ed7d7835942eb37569221be4101eecfa7a7ce62e0c76f1f.json +0 -1
  255. xinference/web/ui/node_modules/.cache/babel-loader/acaf6dbddd57b63a874e9062803850ce4911c8dc63a362eacdc4d2d0f48f8c16.json +0 -1
  256. xinference/web/ui/node_modules/.cache/babel-loader/ade3cd82af85d2d28138bd942228b01e27559a5a4072ee049dcb47612ee9a506.json +0 -1
  257. xinference/web/ui/node_modules/.cache/babel-loader/aea82c71088dc9afd6f4ba71434286af5c6ab5e9b87088b0e84f351146d06489.json +0 -1
  258. xinference/web/ui/node_modules/.cache/babel-loader/afbd547078b20bb14596f20e165cb40413147e3de501b126c97bb417f8b9a53e.json +0 -1
  259. xinference/web/ui/node_modules/.cache/babel-loader/afc597494ebc62d22e4d3ac2a839bd5f30d5e47ce1d0576e0f62407ef6c78652.json +0 -1
  260. xinference/web/ui/node_modules/.cache/babel-loader/affbbac58777e26ba8b6dcda8cea72c76975bd0207f0a510bfde5983d3081f74.json +0 -1
  261. xinference/web/ui/node_modules/.cache/babel-loader/b0aa2ece62a1f5fa015f9e6b59d20b79be727fb5a3f4fc747200163097f320ad.json +0 -1
  262. xinference/web/ui/node_modules/.cache/babel-loader/b10391d3ae53452c8ec97d230f064e03a8377aab2be4b1b3e30cb3e054b2341a.json +0 -1
  263. xinference/web/ui/node_modules/.cache/babel-loader/b4bab0c147360f3e335251166d3d25602e2e0e160fb3734ac019a5b42499179c.json +0 -1
  264. xinference/web/ui/node_modules/.cache/babel-loader/b57dcff363e045fd11ea2bcbc58cce470f1bc22d3ac14c8acbf63a36637050ff.json +0 -1
  265. xinference/web/ui/node_modules/.cache/babel-loader/b8de42f7f5dc1df9e802898a0ebb35adb57d269d7af21e6c66a0fde0ff226a08.json +0 -1
  266. xinference/web/ui/node_modules/.cache/babel-loader/be696cadea3bf577c0defc8f8d100232369763dde754b5ba9399f45f42aed215.json +0 -1
  267. xinference/web/ui/node_modules/.cache/babel-loader/c5a30d7c6cb484268e9126dd6d078229a1dcaa07fb064077b87d15224c61a3aa.json +0 -1
  268. xinference/web/ui/node_modules/.cache/babel-loader/c67e2077e70c4751e9e1dd98f378d979abd9c8a39fbf6c69f02fb4e5b04eef87.json +0 -1
  269. xinference/web/ui/node_modules/.cache/babel-loader/c81c1cad35be60a4fad090374edf50a884540e306d464b803120554df61e817e.json +0 -1
  270. xinference/web/ui/node_modules/.cache/babel-loader/d1054fd4d09666318a00ca0aea5ff42a2fdb17b3a42b5f77b93433d7408eb2f4.json +0 -1
  271. xinference/web/ui/node_modules/.cache/babel-loader/d1ec524ce17a48816fc55fe665572c1ef4982a4bd1ecb01badd0fdffc465242c.json +0 -1
  272. xinference/web/ui/node_modules/.cache/babel-loader/d4d61a537c45c4506a6b7e3478206b71dc96e0c6c6410cef9b5d2f47018bc578.json +0 -1
  273. xinference/web/ui/node_modules/.cache/babel-loader/e2e563b606cac9d51661cf68ba1729eb5df7861af521adccedc1b163439b657d.json +0 -1
  274. xinference/web/ui/node_modules/.cache/babel-loader/e3890e804dfaeb19b79f52e1b20198b587da61a9a3a1f55417d2cc5ce4ad7dae.json +0 -1
  275. xinference/web/ui/node_modules/.cache/babel-loader/e8c3fadbd12f0e2c7b70d84acbcabd0a5c96b05380a7b3167b00e8739d829d24.json +0 -1
  276. xinference/web/ui/node_modules/.cache/babel-loader/ebb5ae1a82b33b94e2b0cfec599bec7500aff0a1701f91736a6f3a4f77617560.json +0 -1
  277. xinference/web/ui/node_modules/.cache/babel-loader/f30f0396f4e54f0355112d5eba4b16c3a56f47e456eb117a9dc7fa6218ce21fe.json +0 -1
  278. xinference/web/ui/node_modules/.cache/babel-loader/f318ac15e999a553c00832029245730fe25c2916860b0ff55eee1f0ab926d84e.json +0 -1
  279. xinference/web/ui/node_modules/.cache/babel-loader/f55f4e657cc2d797b7e73a64ce40352f9a5dde31763a3e89c14d3b983307f30e.json +0 -1
  280. xinference/web/ui/node_modules/.cache/babel-loader/f8a68931d652a5e3be4e1ae06210a223de1239f3884ad858cecf9fe8d11c2ffe.json +0 -1
  281. xinference/web/ui/node_modules/.cache/babel-loader/f95a8bd358eeb55fa2f49f1224cc2f4f36006359856744ff09ae4bb295f59ec1.json +0 -1
  282. xinference/web/ui/node_modules/.cache/babel-loader/fb6fbd693a53d3c542d9adf83e0bb458918d39bf101eaf6d6662a2fbc35c1adc.json +0 -1
  283. /xinference/web/ui/build/static/js/{main.9715fe74.js.LICENSE.txt → main.87d39ffb.js.LICENSE.txt} +0 -0
  284. {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/LICENSE +0 -0
  285. {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/WHEEL +0 -0
  286. {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/entry_points.txt +0 -0
  287. {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/top_level.txt +0 -0
xinference/__init__.py CHANGED
@@ -18,6 +18,12 @@ from . import _version
18
18
  __version__ = _version.get_versions()["version"]
19
19
 
20
20
 
21
+ try:
22
+ import intel_extension_for_pytorch # noqa: F401
23
+ except:
24
+ pass
25
+
26
+
21
27
  def _install():
22
28
  from xoscar.backends.router import Router
23
29
 
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2024-02-06T13:35:33+0800",
11
+ "date": "2024-02-22T15:40:53+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "e903e05145efa8ad05d61e7e7cac55627f5ace51",
15
- "version": "0.8.5"
14
+ "full-revisionid": "c653c975847f9f6a81382033a9c8f5bd81bf70f2",
15
+ "version": "0.9.0"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -51,6 +51,7 @@ from uvicorn import Config, Server
51
51
  from xoscar.utils import get_next_port
52
52
 
53
53
  from .._compat import BaseModel, Field
54
+ from .._version import get_versions
54
55
  from ..constants import XINFERENCE_DEFAULT_ENDPOINT_PORT
55
56
  from ..core.event import Event, EventCollectorActor, EventType
56
57
  from ..core.supervisor import SupervisorActor
@@ -221,6 +222,9 @@ class RESTfulAPI:
221
222
  self._router.add_api_route(
222
223
  "/v1/cluster/info", self.get_cluster_device_info, methods=["GET"]
223
224
  )
225
+ self._router.add_api_route(
226
+ "/v1/cluster/version", self.get_cluster_version, methods=["GET"]
227
+ )
224
228
  self._router.add_api_route(
225
229
  "/v1/cluster/devices", self._get_devices_count, methods=["GET"]
226
230
  )
@@ -231,9 +235,11 @@ class RESTfulAPI:
231
235
  "/v1/ui/{model_uid}",
232
236
  self.build_gradio_interface,
233
237
  methods=["POST"],
234
- dependencies=[Security(self._auth_service, scopes=["models:read"])]
235
- if self.is_authenticated()
236
- else None,
238
+ dependencies=(
239
+ [Security(self._auth_service, scopes=["models:read"])]
240
+ if self.is_authenticated()
241
+ else None
242
+ ),
237
243
  )
238
244
  self._router.add_api_route(
239
245
  "/token", self.login_for_access_token, methods=["POST"]
@@ -246,142 +252,176 @@ class RESTfulAPI:
246
252
  "/v1/models/instances",
247
253
  self.get_instance_info,
248
254
  methods=["GET"],
249
- dependencies=[Security(self._auth_service, scopes=["models:list"])]
250
- if self.is_authenticated()
251
- else None,
255
+ dependencies=(
256
+ [Security(self._auth_service, scopes=["models:list"])]
257
+ if self.is_authenticated()
258
+ else None
259
+ ),
252
260
  )
253
261
  self._router.add_api_route(
254
262
  "/v1/models/{model_type}/{model_name}/versions",
255
263
  self.get_model_versions,
256
264
  methods=["GET"],
257
- dependencies=[Security(self._auth_service, scopes=["models:list"])]
258
- if self.is_authenticated()
259
- else None,
265
+ dependencies=(
266
+ [Security(self._auth_service, scopes=["models:list"])]
267
+ if self.is_authenticated()
268
+ else None
269
+ ),
260
270
  )
261
271
  self._router.add_api_route(
262
272
  "/v1/models",
263
273
  self.list_models,
264
274
  methods=["GET"],
265
- dependencies=[Security(self._auth_service, scopes=["models:list"])]
266
- if self.is_authenticated()
267
- else None,
275
+ dependencies=(
276
+ [Security(self._auth_service, scopes=["models:list"])]
277
+ if self.is_authenticated()
278
+ else None
279
+ ),
268
280
  )
269
281
 
270
282
  self._router.add_api_route(
271
283
  "/v1/models/{model_uid}",
272
284
  self.describe_model,
273
285
  methods=["GET"],
274
- dependencies=[Security(self._auth_service, scopes=["models:list"])]
275
- if self.is_authenticated()
276
- else None,
286
+ dependencies=(
287
+ [Security(self._auth_service, scopes=["models:list"])]
288
+ if self.is_authenticated()
289
+ else None
290
+ ),
277
291
  )
278
292
  self._router.add_api_route(
279
293
  "/v1/models/{model_uid}/events",
280
294
  self.get_model_events,
281
295
  methods=["GET"],
282
- dependencies=[Security(self._auth_service, scopes=["models:read"])]
283
- if self.is_authenticated()
284
- else None,
296
+ dependencies=(
297
+ [Security(self._auth_service, scopes=["models:read"])]
298
+ if self.is_authenticated()
299
+ else None
300
+ ),
285
301
  )
286
302
  self._router.add_api_route(
287
303
  "/v1/models/instance",
288
304
  self.launch_model_by_version,
289
305
  methods=["POST"],
290
- dependencies=[Security(self._auth_service, scopes=["models:start"])]
291
- if self.is_authenticated()
292
- else None,
306
+ dependencies=(
307
+ [Security(self._auth_service, scopes=["models:start"])]
308
+ if self.is_authenticated()
309
+ else None
310
+ ),
293
311
  )
294
312
  self._router.add_api_route(
295
313
  "/v1/models",
296
314
  self.launch_model,
297
315
  methods=["POST"],
298
- dependencies=[Security(self._auth_service, scopes=["models:start"])]
299
- if self.is_authenticated()
300
- else None,
316
+ dependencies=(
317
+ [Security(self._auth_service, scopes=["models:start"])]
318
+ if self.is_authenticated()
319
+ else None
320
+ ),
301
321
  )
302
322
  self._router.add_api_route(
303
323
  "/experimental/speculative_llms",
304
324
  self.launch_speculative_llm,
305
325
  methods=["POST"],
306
- dependencies=[Security(self._auth_service, scopes=["models:start"])]
307
- if self.is_authenticated()
308
- else None,
326
+ dependencies=(
327
+ [Security(self._auth_service, scopes=["models:start"])]
328
+ if self.is_authenticated()
329
+ else None
330
+ ),
309
331
  )
310
332
  self._router.add_api_route(
311
333
  "/v1/models/{model_uid}",
312
334
  self.terminate_model,
313
335
  methods=["DELETE"],
314
- dependencies=[Security(self._auth_service, scopes=["models:stop"])]
315
- if self.is_authenticated()
316
- else None,
336
+ dependencies=(
337
+ [Security(self._auth_service, scopes=["models:stop"])]
338
+ if self.is_authenticated()
339
+ else None
340
+ ),
317
341
  )
318
342
  self._router.add_api_route(
319
343
  "/v1/completions",
320
344
  self.create_completion,
321
345
  methods=["POST"],
322
346
  response_model=Completion,
323
- dependencies=[Security(self._auth_service, scopes=["models:read"])]
324
- if self.is_authenticated()
325
- else None,
347
+ dependencies=(
348
+ [Security(self._auth_service, scopes=["models:read"])]
349
+ if self.is_authenticated()
350
+ else None
351
+ ),
326
352
  )
327
353
  self._router.add_api_route(
328
354
  "/v1/embeddings",
329
355
  self.create_embedding,
330
356
  methods=["POST"],
331
- dependencies=[Security(self._auth_service, scopes=["models:read"])]
332
- if self.is_authenticated()
333
- else None,
357
+ dependencies=(
358
+ [Security(self._auth_service, scopes=["models:read"])]
359
+ if self.is_authenticated()
360
+ else None
361
+ ),
334
362
  )
335
363
  self._router.add_api_route(
336
364
  "/v1/rerank",
337
365
  self.rerank,
338
366
  methods=["POST"],
339
- dependencies=[Security(self._auth_service, scopes=["models:read"])]
340
- if self.is_authenticated()
341
- else None,
367
+ dependencies=(
368
+ [Security(self._auth_service, scopes=["models:read"])]
369
+ if self.is_authenticated()
370
+ else None
371
+ ),
342
372
  )
343
373
  self._router.add_api_route(
344
374
  "/v1/audio/transcriptions",
345
375
  self.create_transcriptions,
346
376
  methods=["POST"],
347
- dependencies=[Security(self._auth_service, scopes=["models:read"])]
348
- if self.is_authenticated()
349
- else None,
377
+ dependencies=(
378
+ [Security(self._auth_service, scopes=["models:read"])]
379
+ if self.is_authenticated()
380
+ else None
381
+ ),
350
382
  )
351
383
  self._router.add_api_route(
352
384
  "/v1/audio/translations",
353
385
  self.create_translations,
354
386
  methods=["POST"],
355
- dependencies=[Security(self._auth_service, scopes=["models:read"])]
356
- if self.is_authenticated()
357
- else None,
387
+ dependencies=(
388
+ [Security(self._auth_service, scopes=["models:read"])]
389
+ if self.is_authenticated()
390
+ else None
391
+ ),
358
392
  )
359
393
  self._router.add_api_route(
360
394
  "/v1/images/generations",
361
395
  self.create_images,
362
396
  methods=["POST"],
363
397
  response_model=ImageList,
364
- dependencies=[Security(self._auth_service, scopes=["models:read"])]
365
- if self.is_authenticated()
366
- else None,
398
+ dependencies=(
399
+ [Security(self._auth_service, scopes=["models:read"])]
400
+ if self.is_authenticated()
401
+ else None
402
+ ),
367
403
  )
368
404
  self._router.add_api_route(
369
405
  "/v1/images/variations",
370
406
  self.create_variations,
371
407
  methods=["POST"],
372
408
  response_model=ImageList,
373
- dependencies=[Security(self._auth_service, scopes=["models:read"])]
374
- if self.is_authenticated()
375
- else None,
409
+ dependencies=(
410
+ [Security(self._auth_service, scopes=["models:read"])]
411
+ if self.is_authenticated()
412
+ else None
413
+ ),
376
414
  )
377
415
  self._router.add_api_route(
378
416
  "/v1/chat/completions",
379
417
  self.create_chat_completion,
380
418
  methods=["POST"],
381
419
  response_model=ChatCompletion,
382
- dependencies=[Security(self._auth_service, scopes=["models:read"])]
383
- if self.is_authenticated()
384
- else None,
420
+ dependencies=(
421
+ [Security(self._auth_service, scopes=["models:read"])]
422
+ if self.is_authenticated()
423
+ else None
424
+ ),
385
425
  )
386
426
 
387
427
  # for custom models
@@ -389,33 +429,41 @@ class RESTfulAPI:
389
429
  "/v1/model_registrations/{model_type}",
390
430
  self.register_model,
391
431
  methods=["POST"],
392
- dependencies=[Security(self._auth_service, scopes=["models:register"])]
393
- if self.is_authenticated()
394
- else None,
432
+ dependencies=(
433
+ [Security(self._auth_service, scopes=["models:register"])]
434
+ if self.is_authenticated()
435
+ else None
436
+ ),
395
437
  )
396
438
  self._router.add_api_route(
397
439
  "/v1/model_registrations/{model_type}/{model_name}",
398
440
  self.unregister_model,
399
441
  methods=["DELETE"],
400
- dependencies=[Security(self._auth_service, scopes=["models:unregister"])]
401
- if self.is_authenticated()
402
- else None,
442
+ dependencies=(
443
+ [Security(self._auth_service, scopes=["models:unregister"])]
444
+ if self.is_authenticated()
445
+ else None
446
+ ),
403
447
  )
404
448
  self._router.add_api_route(
405
449
  "/v1/model_registrations/{model_type}",
406
450
  self.list_model_registrations,
407
451
  methods=["GET"],
408
- dependencies=[Security(self._auth_service, scopes=["models:list"])]
409
- if self.is_authenticated()
410
- else None,
452
+ dependencies=(
453
+ [Security(self._auth_service, scopes=["models:list"])]
454
+ if self.is_authenticated()
455
+ else None
456
+ ),
411
457
  )
412
458
  self._router.add_api_route(
413
459
  "/v1/model_registrations/{model_type}/{model_name}",
414
460
  self.get_model_registrations,
415
461
  methods=["GET"],
416
- dependencies=[Security(self._auth_service, scopes=["models:list"])]
417
- if self.is_authenticated()
418
- else None,
462
+ dependencies=(
463
+ [Security(self._auth_service, scopes=["models:list"])]
464
+ if self.is_authenticated()
465
+ else None
466
+ ),
419
467
  )
420
468
 
421
469
  # Clear the global Registry for the MetricsMiddleware, or
@@ -1094,10 +1142,12 @@ class RESTfulAPI:
1094
1142
  if body.logit_bias is not None:
1095
1143
  raise HTTPException(status_code=501, detail="Not implemented")
1096
1144
 
1145
+ messages = body.messages and list(body.messages) or None
1146
+
1097
1147
  if (
1098
- not body.messages
1099
- or body.messages[-1].get("role") not in ["user", "system", "tool"]
1100
- or not body.messages[-1].get("content")
1148
+ not messages
1149
+ or messages[-1].get("role") not in ["user", "system", "tool"]
1150
+ or not messages[-1].get("content")
1101
1151
  ):
1102
1152
  raise HTTPException(
1103
1153
  status_code=400, detail="Invalid input. Please specify the prompt."
@@ -1105,7 +1155,7 @@ class RESTfulAPI:
1105
1155
 
1106
1156
  system_messages = []
1107
1157
  non_system_messages = []
1108
- for msg in body.messages:
1158
+ for msg in messages:
1109
1159
  assert (
1110
1160
  msg.get("content") != SPECIAL_TOOL_PROMPT
1111
1161
  ), f"Invalid message content {SPECIAL_TOOL_PROMPT}"
@@ -1118,13 +1168,13 @@ class RESTfulAPI:
1118
1168
  raise HTTPException(
1119
1169
  status_code=400, detail="Multiple system messages are not supported."
1120
1170
  )
1121
- if len(system_messages) == 1 and body.messages[0]["role"] != "system":
1171
+ if len(system_messages) == 1 and messages[0]["role"] != "system":
1122
1172
  raise HTTPException(
1123
1173
  status_code=400, detail="System message should be the first one."
1124
1174
  )
1125
1175
  assert non_system_messages
1126
1176
 
1127
- has_tool_message = body.messages[-1].get("role") == "tool"
1177
+ has_tool_message = messages[-1].get("role") == "tool"
1128
1178
  if has_tool_message:
1129
1179
  prompt = SPECIAL_TOOL_PROMPT
1130
1180
  system_prompt = system_messages[0]["content"] if system_messages else None
@@ -1298,9 +1348,21 @@ class RESTfulAPI:
1298
1348
  logger.error(e, exc_info=True)
1299
1349
  raise HTTPException(status_code=500, detail=str(e))
1300
1350
 
1301
- async def get_cluster_device_info(self) -> JSONResponse:
1351
+ async def get_cluster_device_info(
1352
+ self, detailed: bool = Query(False)
1353
+ ) -> JSONResponse:
1354
+ try:
1355
+ data = await (await self._get_supervisor_ref()).get_cluster_device_info(
1356
+ detailed=detailed
1357
+ )
1358
+ return JSONResponse(content=data)
1359
+ except Exception as e:
1360
+ logger.error(e, exc_info=True)
1361
+ raise HTTPException(status_code=500, detail=str(e))
1362
+
1363
+ async def get_cluster_version(self) -> JSONResponse:
1302
1364
  try:
1303
- data = await (await self._get_supervisor_ref()).get_cluster_device_info()
1365
+ data = get_versions()
1304
1366
  return JSONResponse(content=data)
1305
1367
  except Exception as e:
1306
1368
  logger.error(e, exc_info=True)
xinference/core/model.py CHANGED
@@ -44,6 +44,7 @@ import logging
44
44
 
45
45
  logger = logging.getLogger(__name__)
46
46
 
47
+ from ..device_utils import empty_cache
47
48
  from .utils import json_dumps, log_async
48
49
 
49
50
  try:
@@ -130,7 +131,7 @@ class ModelActor(xo.StatelessActor):
130
131
  try:
131
132
  import gc
132
133
 
133
- import torch
134
+ import torch # noqa: F401
134
135
  except ImportError:
135
136
  error_message = "Failed to import module 'torch'"
136
137
  installation_guide = [
@@ -141,7 +142,7 @@ class ModelActor(xo.StatelessActor):
141
142
 
142
143
  del self._model
143
144
  gc.collect()
144
- torch.cuda.empty_cache()
145
+ empty_cache()
145
146
 
146
147
  def __init__(
147
148
  self,
@@ -22,8 +22,9 @@ from .utils import get_nvidia_gpu_info
22
22
 
23
23
  @dataclass
24
24
  class ResourceStatus:
25
- available: float
25
+ usage: float
26
26
  total: float
27
+ memory_used: float
27
28
  memory_available: float
28
29
  memory_total: float
29
30
 
@@ -39,8 +40,9 @@ def gather_node_info() -> Dict[str, Union[ResourceStatus, GPUStatus]]:
39
40
  node_resource = dict()
40
41
  mem_info = psutil.virtual_memory()
41
42
  node_resource["cpu"] = ResourceStatus(
42
- available=psutil.cpu_percent() / 100.0,
43
+ usage=psutil.cpu_percent() / 100.0,
43
44
  total=psutil.cpu_count(),
45
+ memory_used=mem_info.used,
44
46
  memory_available=mem_info.available,
45
47
  memory_total=mem_info.total,
46
48
  )
@@ -15,6 +15,7 @@
15
15
  import asyncio
16
16
  import itertools
17
17
  import time
18
+ import typing
18
19
  from dataclasses import dataclass
19
20
  from logging import getLogger
20
21
  from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union
@@ -179,12 +180,26 @@ class SupervisorActor(xo.StatelessActor):
179
180
  model_version_infos, self.address
180
181
  )
181
182
 
182
- async def get_cluster_device_info(self) -> List:
183
+ @typing.no_type_check
184
+ async def get_cluster_device_info(self, detailed: bool = False) -> List:
185
+ import psutil
186
+
183
187
  supervisor_device_info = {
184
188
  "ip_address": self.address.split(":")[0],
185
189
  "gpu_count": 0,
186
190
  "gpu_vram_total": 0,
187
191
  }
192
+ if detailed:
193
+ supervisor_device_info["gpu_vram_total"] = 0
194
+ supervisor_device_info["gpu_vram_available"] = 0
195
+ supervisor_device_info["cpu_available"] = psutil.cpu_count() * (
196
+ 1 - psutil.cpu_percent() / 100.0
197
+ )
198
+ supervisor_device_info["cpu_count"] = psutil.cpu_count()
199
+ mem_info = psutil.virtual_memory()
200
+ supervisor_device_info["mem_used"] = mem_info.used
201
+ supervisor_device_info["mem_available"] = mem_info.available
202
+ supervisor_device_info["mem_total"] = mem_info.total
188
203
  res = [{"node_type": "Supervisor", **supervisor_device_info}]
189
204
  for worker_addr, worker_status in self._worker_status.items():
190
205
  vram_total: float = sum(
@@ -193,14 +208,24 @@ class SupervisorActor(xo.StatelessActor):
193
208
  total = (
194
209
  vram_total if vram_total == 0 else f"{int(vram_total / 1024 / 1024)}MiB"
195
210
  )
196
- res.append(
197
- {
198
- "node_type": "Worker",
199
- "ip_address": worker_addr.split(":")[0],
200
- "gpu_count": len(worker_status.status) - 1,
201
- "gpu_vram_total": total,
202
- }
203
- )
211
+ info = {
212
+ "node_type": "Worker",
213
+ "ip_address": worker_addr.split(":")[0],
214
+ "gpu_count": len(worker_status.status) - 1,
215
+ "gpu_vram_total": total,
216
+ }
217
+ if detailed:
218
+ cpu_info = worker_status.status["cpu"]
219
+ info["cpu_available"] = cpu_info.total * (1 - cpu_info.usage)
220
+ info["cpu_count"] = cpu_info.total
221
+ info["mem_used"] = cpu_info.memory_used
222
+ info["mem_available"] = cpu_info.memory_available
223
+ info["mem_total"] = cpu_info.memory_total
224
+ info["gpu_vram_total"] = vram_total
225
+ info["gpu_vram_available"] = sum(
226
+ [v.mem_free for k, v in worker_status.status.items() if k != "cpu"]
227
+ )
228
+ res.append(info)
204
229
  return res
205
230
 
206
231
  @staticmethod
@@ -227,11 +252,11 @@ class SupervisorActor(xo.StatelessActor):
227
252
  }
228
253
 
229
254
  async def get_devices_count(self) -> int:
230
- from ..utils import cuda_count
255
+ from ..device_utils import gpu_count
231
256
 
232
257
  if self.is_local_deployment():
233
- return cuda_count()
234
- # distributed deployment, choose a worker and return its cuda_count.
258
+ return gpu_count()
259
+ # distributed deployment, choose a worker and return its device_count.
235
260
  # Assume that each worker has the same count of cards.
236
261
  worker_ref = await self._choose_worker()
237
262
  return await worker_ref.get_devices_count()
xinference/core/worker.py CHANGED
@@ -30,8 +30,8 @@ from xoscar import MainActorPoolType
30
30
  from ..constants import XINFERENCE_CACHE_DIR
31
31
  from ..core import ModelActor
32
32
  from ..core.status_guard import LaunchStatus
33
+ from ..device_utils import gpu_count
33
34
  from ..model.core import ModelDescription, create_model_instance
34
- from ..utils import cuda_count
35
35
  from .event import Event, EventCollectorActor, EventType
36
36
  from .metrics import launch_metrics_export_server, record_metrics
37
37
  from .resource import gather_node_info
@@ -54,13 +54,13 @@ class WorkerActor(xo.StatelessActor):
54
54
  self,
55
55
  supervisor_address: str,
56
56
  main_pool: MainActorPoolType,
57
- cuda_devices: List[int],
57
+ gpu_devices: List[int],
58
58
  metrics_exporter_host: Optional[str] = None,
59
59
  metrics_exporter_port: Optional[int] = None,
60
60
  ):
61
61
  super().__init__()
62
62
  # static attrs.
63
- self._total_cuda_devices = cuda_devices
63
+ self._total_gpu_devices = gpu_devices
64
64
  self._supervisor_address = supervisor_address
65
65
  self._supervisor_ref = None
66
66
  self._main_pool = main_pool
@@ -244,9 +244,9 @@ class WorkerActor(xo.StatelessActor):
244
244
 
245
245
  @staticmethod
246
246
  def get_devices_count():
247
- from ..utils import cuda_count
247
+ from ..device_utils import gpu_count
248
248
 
249
- return cuda_count()
249
+ return gpu_count()
250
250
 
251
251
  @log_sync(logger=logger)
252
252
  def get_model_count(self) -> int:
@@ -263,7 +263,7 @@ class WorkerActor(xo.StatelessActor):
263
263
  we assume that embedding model only takes 1 GPU slot.
264
264
  """
265
265
  candidates = []
266
- for _dev in self._total_cuda_devices:
266
+ for _dev in self._total_gpu_devices:
267
267
  if _dev not in self._gpu_to_model_uid:
268
268
  candidates.append(_dev)
269
269
  else:
@@ -291,11 +291,11 @@ class WorkerActor(xo.StatelessActor):
291
291
  return device
292
292
 
293
293
  def allocate_devices(self, model_uid: str, n_gpu: int) -> List[int]:
294
- if n_gpu > len(self._total_cuda_devices) - len(self._gpu_to_model_uid):
294
+ if n_gpu > len(self._total_gpu_devices) - len(self._gpu_to_model_uid):
295
295
  raise RuntimeError("No available slot found for the model")
296
296
 
297
297
  devices: List[int] = [
298
- dev for dev in self._total_cuda_devices if dev not in self._gpu_to_model_uid
298
+ dev for dev in self._total_gpu_devices if dev not in self._gpu_to_model_uid
299
299
  ][:n_gpu]
300
300
  for dev in devices:
301
301
  self._gpu_to_model_uid[int(dev)] = model_uid
@@ -324,7 +324,7 @@ class WorkerActor(xo.StatelessActor):
324
324
  ) -> Tuple[str, List[str]]:
325
325
  env = {}
326
326
  devices = []
327
- if isinstance(n_gpu, int) or (n_gpu == "auto" and cuda_count() > 0):
327
+ if isinstance(n_gpu, int) or (n_gpu == "auto" and gpu_count() > 0):
328
328
  # Currently, n_gpu=auto means using 1 GPU
329
329
  gpu_cnt = n_gpu if isinstance(n_gpu, int) else 1
330
330
  devices = (
@@ -396,10 +396,10 @@ class WorkerActor(xo.StatelessActor):
396
396
  n_gpu: Optional[Union[int, str]] = "auto",
397
397
  ):
398
398
  if n_gpu is not None:
399
- if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > cuda_count()):
399
+ if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > gpu_count()):
400
400
  raise ValueError(
401
401
  f"The parameter `n_gpu` must be greater than 0 and "
402
- f"not greater than the number of GPUs: {cuda_count()} on the machine."
402
+ f"not greater than the number of GPUs: {gpu_count()} on the machine."
403
403
  )
404
404
  if isinstance(n_gpu, str) and n_gpu != "auto":
405
405
  raise ValueError("Currently `n_gpu` only supports `auto`.")
@@ -504,10 +504,10 @@ class WorkerActor(xo.StatelessActor):
504
504
  launch_args.pop("kwargs")
505
505
  launch_args.update(kwargs)
506
506
  if n_gpu is not None:
507
- if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > cuda_count()):
507
+ if isinstance(n_gpu, int) and (n_gpu <= 0 or n_gpu > gpu_count()):
508
508
  raise ValueError(
509
509
  f"The parameter `n_gpu` must be greater than 0 and "
510
- f"not greater than the number of GPUs: {cuda_count()} on the machine."
510
+ f"not greater than the number of GPUs: {gpu_count()} on the machine."
511
511
  )
512
512
  if isinstance(n_gpu, str) and n_gpu != "auto":
513
513
  raise ValueError("Currently `n_gpu` only supports `auto`.")
@@ -21,7 +21,7 @@ import xoscar as xo
21
21
  from xoscar import MainActorPoolType
22
22
 
23
23
  from ..core.worker import WorkerActor
24
- from ..utils import cuda_count
24
+ from ..device_utils import gpu_count
25
25
 
26
26
  logger = logging.getLogger(__name__)
27
27
 
@@ -33,12 +33,12 @@ async def start_worker_components(
33
33
  metrics_exporter_host: Optional[str],
34
34
  metrics_exporter_port: Optional[int],
35
35
  ):
36
- cuda_device_indices = []
37
- cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES")
38
- if cuda_visible_devices:
39
- cuda_device_indices.extend([int(i) for i in cuda_visible_devices.split(",")])
36
+ gpu_device_indices = []
37
+ cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", None)
38
+ if cuda_visible_devices is not None and cuda_visible_devices != "-1":
39
+ gpu_device_indices.extend([int(i) for i in cuda_visible_devices.split(",")])
40
40
  else:
41
- cuda_device_indices = list(range(cuda_count()))
41
+ gpu_device_indices = list(range(gpu_count()))
42
42
 
43
43
  await xo.create_actor(
44
44
  WorkerActor,
@@ -46,7 +46,7 @@ async def start_worker_components(
46
46
  uid=WorkerActor.uid(),
47
47
  supervisor_address=supervisor_address,
48
48
  main_pool=main_pool,
49
- cuda_devices=cuda_device_indices,
49
+ gpu_devices=gpu_device_indices,
50
50
  metrics_exporter_host=metrics_exporter_host,
51
51
  metrics_exporter_port=metrics_exporter_port,
52
52
  )