xinference 0.8.5__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (287) hide show
  1. xinference/__init__.py +6 -0
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +136 -74
  4. xinference/core/model.py +3 -2
  5. xinference/core/resource.py +4 -2
  6. xinference/core/supervisor.py +37 -12
  7. xinference/core/worker.py +13 -13
  8. xinference/deploy/worker.py +7 -7
  9. xinference/device_utils.py +100 -0
  10. xinference/model/audio/whisper.py +20 -8
  11. xinference/model/image/core.py +5 -1
  12. xinference/model/image/stable_diffusion/core.py +3 -5
  13. xinference/model/llm/llm_family.json +93 -3
  14. xinference/model/llm/llm_family_modelscope.json +46 -10
  15. xinference/model/llm/pytorch/compression.py +3 -1
  16. xinference/model/llm/pytorch/core.py +33 -14
  17. xinference/model/llm/pytorch/qwen_vl.py +5 -3
  18. xinference/model/llm/pytorch/spec_decoding_utils.py +3 -1
  19. xinference/model/llm/pytorch/spec_model.py +20 -17
  20. xinference/model/llm/pytorch/utils.py +3 -2
  21. xinference/model/llm/pytorch/yi_vl.py +9 -3
  22. xinference/model/llm/utils.py +9 -0
  23. xinference/model/utils.py +6 -16
  24. xinference/thirdparty/llava/mm_utils.py +1 -1
  25. xinference/web/ui/build/asset-manifest.json +3 -3
  26. xinference/web/ui/build/index.html +1 -1
  27. xinference/web/ui/build/static/js/main.87d39ffb.js +3 -0
  28. xinference/web/ui/build/static/js/main.87d39ffb.js.map +1 -0
  29. xinference/web/ui/node_modules/.cache/babel-loader/{8d95a492a6b08c192e212189567b59b98282ca6f3ed52dd9bd053a6be7ff4e65.json → 027745bdb9bb9fe375f2eef7377e9f7ba82510ed90c05f9e5b34ba314bb93498.json} +1 -1
  30. xinference/web/ui/node_modules/.cache/babel-loader/03d5e7de9b1728d71cd8c483a35f4373eb66932022026142e03005d9de1a2a44.json +1 -0
  31. xinference/web/ui/node_modules/.cache/babel-loader/0738899eefad7f90261125823d87ea9f0d53667b1479a0c1f398aff14f2bbd2a.json +1 -0
  32. xinference/web/ui/node_modules/.cache/babel-loader/09a21ab449599121eadfdd12d6dba40c5a1c5b35dc90be17108eeff80daf79b8.json +1 -0
  33. xinference/web/ui/node_modules/.cache/babel-loader/09d043a8289da33d05768f5face63cc47e9ce9646d67f5589865db511574e8f9.json +1 -0
  34. xinference/web/ui/node_modules/.cache/babel-loader/0b901c8f077905bf153a1390b207277c31e893f83f9ced5d29a0a0c85a07b5eb.json +1 -0
  35. xinference/web/ui/node_modules/.cache/babel-loader/0da1ff7c9602609cfa8b82a67140114f3037130fe79583bbf23b69cef81823a6.json +1 -0
  36. xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +1 -0
  37. xinference/web/ui/node_modules/.cache/babel-loader/0f87caef05125425c0ca418a32ce8a53e9becffd464411b2ff94e3c7a37c7b5b.json +1 -0
  38. xinference/web/ui/node_modules/.cache/babel-loader/{cb50b4d54ce9c249d580f7f1fc2df4beab4f13c4d16f486c89239809570de8fb.json → 0fc333c35ad49c0db7f18f77521984a36067cfce4206df50ce16a2fdc88be1c6.json} +1 -1
  39. xinference/web/ui/node_modules/.cache/babel-loader/13fdd92dd5e2fe11640cf6ec0368e3c098d3f0c43e11703ccf157e8c41410d5b.json +1 -0
  40. xinference/web/ui/node_modules/.cache/babel-loader/1464f950e5600fe2e5890186a1ae3750cdabbadd8525070cef6273450760bcf6.json +1 -0
  41. xinference/web/ui/node_modules/.cache/babel-loader/17c53dc44a324629efef425833ab48cc05243d089f4eb3c731da63568fcff35e.json +1 -0
  42. xinference/web/ui/node_modules/.cache/babel-loader/1946a52cb34e675da423de15b703437b4c841e4f5aed6ddd932a091c145e17b0.json +1 -0
  43. xinference/web/ui/node_modules/.cache/babel-loader/1e0da5ce4d8ccdbe33705a47bdc0d5d1d7e4e48c8e5c1d0b8c473e9f491930ac.json +1 -0
  44. xinference/web/ui/node_modules/.cache/babel-loader/1e3ec3302ef2253fad5e86bf429baf6b69e57e769f38ec1aaada87f3114b6a1f.json +1 -0
  45. xinference/web/ui/node_modules/.cache/babel-loader/1f18804827897c337874b289873df611b44869f86ced1d4351fbe0b45ed17518.json +1 -0
  46. xinference/web/ui/node_modules/.cache/babel-loader/21b64f7fdb74c2c35479811c3f5683ee161d3b64a4fade0cd1fc0044332c79b2.json +1 -0
  47. xinference/web/ui/node_modules/.cache/babel-loader/22873d21eab4b5a04b183cc3d2cbfafa431e0ce99649aeb900cffcfc33ecbca4.json +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/{7a5fecf96af770954035948b07dfadeceb33536b4763a2f9e548121ae555b75f.json → 228b78cd0950f1c967cb33b210b11497052a9998f0b1d1f31ecee0fd8b99fb40.json} +1 -1
  49. xinference/web/ui/node_modules/.cache/babel-loader/2328fc2360224ac3eef5c5261fc2a75cdce7fe58c210af8d3e532ec609464f5f.json +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/25fd89450dcf73a205a5f8c82dbc604bf872d574b748427c26ba79b8711ec502.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/{b6e95546cdd247e50d9c916dd3bef5216f96c32a267e54b51b23cc22631be0c4.json → 288d6573b09f0e2b0277e1f301bcb57a194c01f3983e3c1e03c8b2ea72653194.json} +1 -1
  52. xinference/web/ui/node_modules/.cache/babel-loader/2c04900a020b813e2e99b15748c3c3ef5a1627c33d5668e45bc6a798f0d28dae.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/2cd0fd50d8f213cdd94daffe7d0f997991072b11936ead7c48be2af3fdbd1fda.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/2e057c12322da07a537a576a3917d512ac6cb291be5aa29ce23d2679a4fd16b1.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/{abd0662d21fbe39e483fbc4d37faa9e24d93889b5b40d42e7e20773c66ee3894.json → 327c21db6124a5209f41996d0d3359b462d5baff3974296f5acfddfc532434e6.json} +1 -1
  56. xinference/web/ui/node_modules/.cache/babel-loader/33294bb0080595100e22c960929a6f3086f5ea632a4bcd9216069f95347a63a9.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/3361869e45eddd7d48ff7995e1a12ac454c9eb92d4c87f49ce80c1e9482c1e91.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/375d2f6a31e01c5a82fd0e18f42198f9fbb7117491f3e92d70cf36055f159a6e.json +1 -0
  59. xinference/web/ui/node_modules/.cache/babel-loader/3808217efee76345ba19af3ffdba045888e0908b0dc7fc38e54655cdc3504c60.json +1 -0
  60. xinference/web/ui/node_modules/.cache/babel-loader/39db740a99454006312dec9e4845f850c51e043458ca94eba8bb256521fabcd4.json +1 -0
  61. xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/3e737bcdbcbc407ccd65b90e199ef0c3214b261e8e41dbf14d921384a717d9ee.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/3e87c7d4f1e5c93603eed3e861945e44dd1e664061a89aaef1aa6435f4f4d7d0.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/3ecf70018028c8cf6cdc2d24d2b88f2707bd5c23a336fd3d82bd6a1af0db7ded.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/43314d1e9fed30ed6cf5cab3f5033da067e58be29e36448e3c0a8127403a9388.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/442467c09440131f7c25ddcdeb356d2a87783ad4e32d89f410a16cb6e7d83b22.json +1 -0
  67. xinference/web/ui/node_modules/.cache/babel-loader/487524f2b3494bfcfbd8c12918092a52ff2ce3eefe9bf0a557389ec1c74d3e67.json +1 -0
  68. xinference/web/ui/node_modules/.cache/babel-loader/4de4b0282a3d1ae02bec0bfe7d6658f039141406e8bfdbe86bf1dc5b115a2524.json +1 -0
  69. xinference/web/ui/node_modules/.cache/babel-loader/4ec72faab00e4cbffefdf43b987ae3d15beb60e282b9ca86dad37f4d0fc3f2bd.json +1 -0
  70. xinference/web/ui/node_modules/.cache/babel-loader/51403f1b70af0f4577d4594cbfcf9d9759997e5c49097e7a07219000d1be4ff1.json +1 -0
  71. xinference/web/ui/node_modules/.cache/babel-loader/566a0829bfeab41e40bbdca55ad056067e172508af4b333403d4fd334a8c8742.json +1 -0
  72. xinference/web/ui/node_modules/.cache/babel-loader/5689d9aee417a0834fed66720cf3b48c34cc26364cd9ab7e3d809997bb9faec9.json +1 -0
  73. xinference/web/ui/node_modules/.cache/babel-loader/5bedd27475bda204eafccdc636a56ba5765353391049a0b947234faaab3d021a.json +1 -0
  74. xinference/web/ui/node_modules/.cache/babel-loader/{94daea23b698bbb26bbbab69bbca8ad521c79fcbd6b495c84c740e4006a2ced2.json → 61014d40df4486972fb6d5909dff45f24bec48573af99ca8bcd79835385c7da0.json} +1 -1
  75. xinference/web/ui/node_modules/.cache/babel-loader/6139c80ec46d77469e9a89924de41d93050f19e9ed541e358f09cbf20ccd7c86.json +1 -0
  76. xinference/web/ui/node_modules/.cache/babel-loader/63ead4dee6a4f4ae47aed6918a2ab3e172739ecdffaa85e063f9a2c9165ff93c.json +1 -0
  77. xinference/web/ui/node_modules/.cache/babel-loader/6bf8c25e0f2051d994990bf73fdcb3e08963e11589842895593f199eab13ba9a.json +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/6d42f63cd673d1c60e534a174cd851e9aefd2d5d45a4aa6128d794457b64de53.json +1 -0
  79. xinference/web/ui/node_modules/.cache/babel-loader/7044e626a91582077b8cbad09bcf0cc118e92e11ebfa2ebc17622baf98aa56bf.json +1 -0
  80. xinference/web/ui/node_modules/.cache/babel-loader/71ec408030881aad69854815b8681185d0174cc0cf67f20fb2c3cd13c74fce07.json +1 -0
  81. xinference/web/ui/node_modules/.cache/babel-loader/7280614d5bddce524182024cc288953eb89d46c1dbe470ed5eb5ecab5de28385.json +1 -0
  82. xinference/web/ui/node_modules/.cache/babel-loader/7293ad8ba7b5fc6ed791aed44a766a396516c782f5d0e564b9ef0997645ec1dd.json +1 -0
  83. xinference/web/ui/node_modules/.cache/babel-loader/79be1e0dd6b1c3cbb3f08b349454f4791edaac451fa4d1f10eaf433e57ecf70f.json +1 -0
  84. xinference/web/ui/node_modules/.cache/babel-loader/7c0571c03aa8c90eb9452ba23133aed9b3b2a049910d4d8276588b438a4caaba.json +1 -0
  85. xinference/web/ui/node_modules/.cache/babel-loader/81ed094096cd9dc36f3ddebe61818766f43d3338c171f2269958f47265e60f24.json +1 -0
  86. xinference/web/ui/node_modules/.cache/babel-loader/86355e55059ddb15f71c0c651a2e622cee6f3141da3a018775ca00c74dd37dc2.json +1 -0
  87. xinference/web/ui/node_modules/.cache/babel-loader/{d1af6db565bae36218251c01a817c080ef764172dd0694e38d27b525a3ffd1d2.json → 8710fe63bcc6b95502287c36a7bd881b74ee2926982e120e1e8070a2c8a668fc.json} +1 -1
  88. xinference/web/ui/node_modules/.cache/babel-loader/8779104679d8f85017b551d1d108ea1ac701e184ad7bcc8a367526353a6c5175.json +1 -0
  89. xinference/web/ui/node_modules/.cache/babel-loader/881c573e6aa9954e7bd5e3bc5a6ef62c0db9b93815030080f677f6834e431434.json +1 -0
  90. xinference/web/ui/node_modules/.cache/babel-loader/8d24c51a0905b919436aac2bad441eb1651c46c4e7aa17c99eb63c7760ce6615.json +1 -0
  91. xinference/web/ui/node_modules/.cache/babel-loader/93bf5cc8cbe3f0b10f291800e1896f03506a8933cd490e07ee0ff94498f93bce.json +1 -0
  92. xinference/web/ui/node_modules/.cache/babel-loader/9401db0316ac34790aa5b972e9e1c50de58ef313b50840a37e08f0936a7197b6.json +1 -0
  93. xinference/web/ui/node_modules/.cache/babel-loader/98afeafdfc285a744d116fb5f189a16ad2b977adbd4fc0757f7ade1b29b8ad31.json +1 -0
  94. xinference/web/ui/node_modules/.cache/babel-loader/9a4e624d1ce66832fb213ead02745d84661d9bfcc21b62b4c37c680c3c3f5cbb.json +1 -0
  95. xinference/web/ui/node_modules/.cache/babel-loader/9a7b26f85224d33d431354ac8d7849388b7525fc014801824e705561ab2ac7a7.json +1 -0
  96. xinference/web/ui/node_modules/.cache/babel-loader/9dcb04ac6cb341233a2e98f2fb417d059c2ea6a56d2ff6f30bb27dae9bd6c756.json +1 -0
  97. xinference/web/ui/node_modules/.cache/babel-loader/9ebede8a4bccef016eef418a668b3212962779ea6d2531edb23d3dca81163496.json +1 -0
  98. xinference/web/ui/node_modules/.cache/babel-loader/a0efc8db4289116bdb312014df91fc781f66fe171d63027bbd5aae25cd54c325.json +1 -0
  99. xinference/web/ui/node_modules/.cache/babel-loader/a212defc32ca338ba673722c72815d30974275f1a82b67405a898f826fc8d782.json +1 -0
  100. xinference/web/ui/node_modules/.cache/babel-loader/a2685acab57fd6d874bf6069a587e3770b55c72cacc69cb167bb3dbd16a551c5.json +1 -0
  101. xinference/web/ui/node_modules/.cache/babel-loader/a3b7d6233e50dc3832421793a5752c1db99f3a214c9896c97bbd395d007f09b1.json +1 -0
  102. xinference/web/ui/node_modules/.cache/babel-loader/a3d0c810a255108613be0a175d53d673c4b6843c2a87dd0f157b4d3731af2092.json +1 -0
  103. xinference/web/ui/node_modules/.cache/babel-loader/a6633774053545c60721e345820e21cf3b089df219f7ee3fcceaf27af3a9e84b.json +1 -0
  104. xinference/web/ui/node_modules/.cache/babel-loader/a6bb917e4869d7d6ddf50b75932cad64e94b9e33db2aa40c4a489a045819bb36.json +1 -0
  105. xinference/web/ui/node_modules/.cache/babel-loader/a816031fafa564dc4af27383612e51ac29af51354f3bba750a70ac0c03b6bd30.json +1 -0
  106. xinference/web/ui/node_modules/.cache/babel-loader/af6349a1e9761cb98af4b76904440fb7f8d54288ee6ba37116f670226796e689.json +1 -0
  107. xinference/web/ui/node_modules/.cache/babel-loader/afce63eaad32a9968590e4500e123c81b385ea3b6046b2af1ec7ea8a49ba657b.json +1 -0
  108. xinference/web/ui/node_modules/.cache/babel-loader/aff24b61e5d55147d0a36ef1896b9bd35c995c357f44eeee1209f23db2118636.json +1 -0
  109. xinference/web/ui/node_modules/.cache/babel-loader/b01676c5ca97e3409a1b7bac91b6a797be3719a8b7ec72ad06c9beb296c67bb7.json +1 -0
  110. xinference/web/ui/node_modules/.cache/babel-loader/b0c5e967d1af90d545f1d34c9893a841296b0d1dd50de65eeaf19054995d2a94.json +1 -0
  111. xinference/web/ui/node_modules/.cache/babel-loader/b561d429cea6143e1bf7e6e7e690f9e6ce1dce4adfcacb57d7c5578195d1d190.json +1 -0
  112. xinference/web/ui/node_modules/.cache/babel-loader/b70187bb5aecf3bd7afbe8469b49e2a0a94a0c62727530d0dc2f84ee4577bcfc.json +1 -0
  113. xinference/web/ui/node_modules/.cache/babel-loader/bd21f6039e23775fb4631bb56f2eef1d132c03a60cc6fe4530569ecfeef456a3.json +1 -0
  114. xinference/web/ui/node_modules/.cache/babel-loader/be09b5508d0ca5ac0300144a682d4cde2d5b4b96ecb9fc690c1d5e7ee7411b3e.json +1 -0
  115. xinference/web/ui/node_modules/.cache/babel-loader/be8c9e83f0dd4af2e9bcf6844dd8c4445fe634b6c71da6debfa7acabb7a875c0.json +1 -0
  116. xinference/web/ui/node_modules/.cache/babel-loader/c0d1317dfae0c5ddf5a3ba9aef6bd2204f2daf9754f3270c4cab5c4632f78000.json +1 -0
  117. xinference/web/ui/node_modules/.cache/babel-loader/c1e3afdaae5649274f7d34910015b88bc02ba6a00b935e6597f9c670fedc05c0.json +1 -0
  118. xinference/web/ui/node_modules/.cache/babel-loader/c29ec02243dd370616a489f7de67e4e479d2f7f12504ba7f77304cffe60befbc.json +1 -0
  119. xinference/web/ui/node_modules/.cache/babel-loader/c2a4d298149e6c66713eed4f56f4bbbfa4022a193891029b84e429baf59cc246.json +1 -0
  120. xinference/web/ui/node_modules/.cache/babel-loader/c3d2fc03602f7604a65e631e8595ce4cfabd8887343489b34a65e3a73c5546b3.json +1 -0
  121. xinference/web/ui/node_modules/.cache/babel-loader/c41078c8692e0635fb52d01bc5157528c7308ae5140c134aa07c1fcda4f7a5e5.json +1 -0
  122. xinference/web/ui/node_modules/.cache/babel-loader/c4b5f1e444ddd41f8cd30e618c185b16235e313ea34d944959628001a91e38a5.json +1 -0
  123. xinference/web/ui/node_modules/.cache/babel-loader/c7ed9bc3125498e475f3170b8126c93368e9cbfd6a5e73ee63437d90e39b69e5.json +1 -0
  124. xinference/web/ui/node_modules/.cache/babel-loader/c996b3dc884130f034a37551845201bc5661f2dcd282aed737329f8d4dd387b5.json +1 -0
  125. xinference/web/ui/node_modules/.cache/babel-loader/c9fef6e7f0a532c2d1cc25dffd496ea8355dd14ce0105a169b391bfeb9277e4f.json +1 -0
  126. xinference/web/ui/node_modules/.cache/babel-loader/cc8f5a2bbe4da9852ccdf64150115271baa2d282d74d7fab854dc5ca641dd519.json +1 -0
  127. xinference/web/ui/node_modules/.cache/babel-loader/ce45adea497443bfc1b6baabd0aa3d85888dd10b1b7fd009edf608e390b2a844.json +1 -0
  128. xinference/web/ui/node_modules/.cache/babel-loader/ce4b82a3855d18ba09e922f6e2cc1b8c20f0e5261b27303c52d4e1ba5a09a078.json +1 -0
  129. xinference/web/ui/node_modules/.cache/babel-loader/cf16614311e144768bdc064da754efad77597cac5efe6852be996b9eaaed695a.json +1 -0
  130. xinference/web/ui/node_modules/.cache/babel-loader/d074bfe592a195f339acbcd8743f8b64cfd09b45ca1013598269227fc092f2db.json +1 -0
  131. xinference/web/ui/node_modules/.cache/babel-loader/d2a95455cc492d4fbf2839a21eeaef00e7fad2aac93825d2443a38127ad02007.json +1 -0
  132. xinference/web/ui/node_modules/.cache/babel-loader/d4e6e712ebbbf0f30745093e2b6d235c13fb43d37b586a0109e87c19eafdf35d.json +1 -0
  133. xinference/web/ui/node_modules/.cache/babel-loader/d88a02264125b07881eecff752d9c8d718edcf2d87e5431b13e54809dc7bf4f2.json +1 -0
  134. xinference/web/ui/node_modules/.cache/babel-loader/d907925fb28912f2e98cccfe758bce680aa4f894cc9d2079d73d7e2d85481d47.json +1 -0
  135. xinference/web/ui/node_modules/.cache/babel-loader/daf050ec3a864b9831dcb32cee1fb2739dafb68b369e9108469ca1e6fd166867.json +1 -0
  136. xinference/web/ui/node_modules/.cache/babel-loader/de9645d55ce8744856862eae4a3d3a1266e9df9d575aa3e2f62a75a5d97444f7.json +1 -0
  137. xinference/web/ui/node_modules/.cache/babel-loader/e04199bdfb87226d6aeae387ef677eae06483f9f135b5a3f1360ec1a243c86cd.json +1 -0
  138. xinference/web/ui/node_modules/.cache/babel-loader/e97d1f31f54263bb90b7c3e71be0e19973d76d945fb5d526bf93fb449708bbf0.json +1 -0
  139. xinference/web/ui/node_modules/.cache/babel-loader/e9f87f5c370a731ee42098ae6ff38ca6ad256396158c03f84c5e770c6b76a43d.json +1 -0
  140. xinference/web/ui/node_modules/.cache/babel-loader/f2ad375dbc8779f54eb9c377fd951cd442f3766d5eb70d40613b952b811643da.json +1 -0
  141. xinference/web/ui/node_modules/.cache/babel-loader/f35df9d5a0e0341bcc0f39a462e46988c330cdb905cc690b08de4a9cf5771f28.json +1 -0
  142. xinference/web/ui/node_modules/.cache/babel-loader/f433e33369483f75d31fe937b8a597ef2d7cdfed339931643ff966a33dbc6158.json +1 -0
  143. xinference/web/ui/node_modules/.cache/babel-loader/f4c164c89cfbcb1d81bc0b60e3e7c8715e07c7a23118acf4e3e16715beac09c5.json +1 -0
  144. xinference/web/ui/node_modules/.cache/babel-loader/f545d937cfb9d974f4aaaa9dc1c12adfed0d4f608de7d0a7247452d9bbbb55a4.json +1 -0
  145. xinference/web/ui/node_modules/.cache/babel-loader/{3cc5ec39e76e1768ffb070a8ae18ee61b68f3ed3593178134d3edda5ed378c6c.json → f8d2c4372093d0ff3a2f78260e004e6c4e2b8ab4f5be9713ebc16c50e9c2ab4b.json} +1 -1
  146. xinference/web/ui/node_modules/.cache/babel-loader/fa41c6c8ffa1047b7f2f52b6532ea4d9f5754e5bbc11d1340bae7f75ae5ed84e.json +1 -0
  147. xinference/web/ui/node_modules/.package-lock.json +45 -45
  148. xinference/web/ui/node_modules/@babel/runtime/package.json +64 -37
  149. xinference/web/ui/node_modules/@mui/private-theming/package.json +7 -6
  150. xinference/web/ui/node_modules/@mui/styled-engine/package.json +6 -5
  151. xinference/web/ui/node_modules/@mui/system/node_modules/clsx/package.json +22 -10
  152. xinference/web/ui/node_modules/@mui/system/package.json +13 -12
  153. xinference/web/ui/node_modules/@mui/types/package.json +3 -2
  154. xinference/web/ui/node_modules/@mui/utils/ClassNameGenerator/package.json +2 -2
  155. xinference/web/ui/node_modules/@mui/utils/HTMLElementType/package.json +2 -2
  156. xinference/web/ui/node_modules/@mui/utils/capitalize/package.json +2 -2
  157. xinference/web/ui/node_modules/@mui/utils/chainPropTypes/package.json +2 -2
  158. xinference/web/ui/node_modules/@mui/utils/clamp/package.json +6 -0
  159. xinference/web/ui/node_modules/@mui/utils/composeClasses/package.json +2 -2
  160. xinference/web/ui/node_modules/@mui/utils/createChainedFunction/package.json +6 -0
  161. xinference/web/ui/node_modules/@mui/utils/debounce/package.json +2 -2
  162. xinference/web/ui/node_modules/@mui/utils/deepmerge/package.json +6 -0
  163. xinference/web/ui/node_modules/@mui/utils/deprecatedPropType/package.json +6 -0
  164. xinference/web/ui/node_modules/@mui/utils/elementAcceptingRef/package.json +6 -0
  165. xinference/web/ui/node_modules/@mui/utils/elementTypeAcceptingRef/package.json +6 -0
  166. xinference/web/ui/node_modules/@mui/utils/exactProp/package.json +2 -2
  167. xinference/web/ui/node_modules/@mui/utils/formatMuiErrorMessage/package.json +6 -0
  168. xinference/web/ui/node_modules/@mui/utils/generateUtilityClass/package.json +2 -2
  169. xinference/web/ui/node_modules/@mui/utils/generateUtilityClasses/package.json +2 -2
  170. xinference/web/ui/node_modules/@mui/utils/getDisplayName/package.json +6 -0
  171. xinference/web/ui/node_modules/@mui/utils/getScrollbarSize/package.json +6 -0
  172. xinference/web/ui/node_modules/@mui/utils/getValidReactChildren/package.json +6 -0
  173. xinference/web/ui/node_modules/@mui/utils/integerPropType/package.json +6 -0
  174. xinference/web/ui/node_modules/@mui/utils/isMuiElement/package.json +6 -0
  175. xinference/web/ui/node_modules/@mui/utils/ownerDocument/package.json +2 -2
  176. xinference/web/ui/node_modules/@mui/utils/ownerWindow/package.json +2 -2
  177. xinference/web/ui/node_modules/@mui/utils/package.json +8 -7
  178. xinference/web/ui/node_modules/@mui/utils/ponyfillGlobal/package.json +2 -2
  179. xinference/web/ui/node_modules/@mui/utils/refType/package.json +6 -0
  180. xinference/web/ui/node_modules/@mui/utils/requirePropFactory/package.json +6 -0
  181. xinference/web/ui/node_modules/@mui/utils/resolveProps/package.json +6 -0
  182. xinference/web/ui/node_modules/@mui/utils/scrollLeft/package.json +6 -0
  183. xinference/web/ui/node_modules/@mui/utils/setRef/package.json +6 -0
  184. xinference/web/ui/node_modules/@mui/utils/unsupportedProp/package.json +6 -0
  185. xinference/web/ui/node_modules/@mui/utils/useControlled/package.json +2 -2
  186. xinference/web/ui/node_modules/@mui/utils/useEnhancedEffect/package.json +2 -2
  187. xinference/web/ui/node_modules/@mui/utils/useEventCallback/package.json +2 -2
  188. xinference/web/ui/node_modules/@mui/utils/useForkRef/package.json +2 -2
  189. xinference/web/ui/node_modules/@mui/utils/useId/package.json +2 -2
  190. xinference/web/ui/node_modules/@mui/utils/useIsFocusVisible/package.json +6 -0
  191. xinference/web/ui/node_modules/@mui/utils/useLazyRef/package.json +6 -0
  192. xinference/web/ui/node_modules/@mui/utils/useOnMount/package.json +6 -0
  193. xinference/web/ui/node_modules/@mui/utils/usePreviousProps/package.json +6 -0
  194. xinference/web/ui/node_modules/@mui/utils/useTimeout/package.json +6 -0
  195. xinference/web/ui/node_modules/@mui/utils/visuallyHidden/package.json +6 -0
  196. xinference/web/ui/node_modules/@types/prop-types/package.json +2 -2
  197. xinference/web/ui/node_modules/csstype/package.json +3 -3
  198. xinference/web/ui/package-lock.json +47 -45
  199. xinference/web/ui/package.json +2 -0
  200. {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/METADATA +4 -1
  201. {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/RECORD +206 -150
  202. xinference/web/ui/build/static/js/main.9715fe74.js +0 -3
  203. xinference/web/ui/build/static/js/main.9715fe74.js.map +0 -1
  204. xinference/web/ui/node_modules/.cache/babel-loader/02f54fcadc6fe9b1be8451435f9a097c82b3d665bae2d2df03ba0b36cebeca0c.json +0 -1
  205. xinference/web/ui/node_modules/.cache/babel-loader/051391807ffd255b652b63dc2ff12e5ce80113e1b712c81b6c1986a11cd85d3c.json +0 -1
  206. xinference/web/ui/node_modules/.cache/babel-loader/05710e94caaf5a2da790d67aa572bf6bc19bae0237668c8d97f4da62ab52fe5c.json +0 -1
  207. xinference/web/ui/node_modules/.cache/babel-loader/08a3470840a55d359a967d0763cd1ae5c1f15f3ca31118e29b37b4fbbdb248c7.json +0 -1
  208. xinference/web/ui/node_modules/.cache/babel-loader/0e68b0e268559a563dde6fa4c654922d98c74f5140854437f3e5a01bf23ba41f.json +0 -1
  209. xinference/web/ui/node_modules/.cache/babel-loader/10dc7661741dfc88df5538bbf835870ae47b1a18f36c2f7b9650651e7304ce72.json +0 -1
  210. xinference/web/ui/node_modules/.cache/babel-loader/18b3d28a2692ab4f2d05254221f53b3c13259435bd98cb90ff5d7250d93f11e0.json +0 -1
  211. xinference/web/ui/node_modules/.cache/babel-loader/1b612ab687e3a4e14db156bd92a75b8a3e96ff04bfbf8f3351c7e801bc61cf90.json +0 -1
  212. xinference/web/ui/node_modules/.cache/babel-loader/1bb839ffeb39ed12367c179fbf3b784e20f6f98fb87c597717bc11ebe3201c59.json +0 -1
  213. xinference/web/ui/node_modules/.cache/babel-loader/1d12e0b0e4e87372b18fef72fcd6e264fa40b0cffdbeddcef004becc4e32cf27.json +0 -1
  214. xinference/web/ui/node_modules/.cache/babel-loader/1d5b806c08ffb55539ee26b5fa9746a373f602354d6e55a0d4379d7e2f903868.json +0 -1
  215. xinference/web/ui/node_modules/.cache/babel-loader/1ecb7ce2bc103bec0d1a9f0cb07132e5ec7731d974998c465798c0e15cafb924.json +0 -1
  216. xinference/web/ui/node_modules/.cache/babel-loader/20cfc57ae2b8ba23571f386a5f2bf194bd02e8a3913113881bb9804ee822ad58.json +0 -1
  217. xinference/web/ui/node_modules/.cache/babel-loader/222a437b6773707bbead06ba5db1ed8869e44d9f051a7f6b1a745aeb654679d0.json +0 -1
  218. xinference/web/ui/node_modules/.cache/babel-loader/249709fb20c1254cecc9ba6666e6bcf37b21e47e7a020d5a7f0513680b49473f.json +0 -1
  219. xinference/web/ui/node_modules/.cache/babel-loader/257ad4979294ee3a7ef400feab3b29ebcd259c47b9410a3011a66f4b33a97904.json +0 -1
  220. xinference/web/ui/node_modules/.cache/babel-loader/2779bdc8ed8372f4015d07a45e4b09756bd953250944c3e3efe6b65bf8eaecd8.json +0 -1
  221. xinference/web/ui/node_modules/.cache/babel-loader/2bee7b8bd3d52976a45d6068e1333df88b943e0e679403c809e45382e3818037.json +0 -1
  222. xinference/web/ui/node_modules/.cache/babel-loader/31c71a85b2d0bbdcdc027e7950a9d1f778288f98888d8595fad5e4fdda5bb31f.json +0 -1
  223. xinference/web/ui/node_modules/.cache/babel-loader/371214b6649e323e7281885b1412c5a515ccc2274f4e1a42921e45777694009c.json +0 -1
  224. xinference/web/ui/node_modules/.cache/babel-loader/3eced030d2b3da645f4cbe3dc4ff49744312316e4306ce170991e802e9bfcf8e.json +0 -1
  225. xinference/web/ui/node_modules/.cache/babel-loader/452ae8e7cff99efc35d134014d32610d13217b031b817d3663bb689b3778ebd2.json +0 -1
  226. xinference/web/ui/node_modules/.cache/babel-loader/48e3f5d01ccedde86348a29392bc9b6e301a51b3f7e2476ae0a95a31eb6bf18d.json +0 -1
  227. xinference/web/ui/node_modules/.cache/babel-loader/4a311b5bd98f16588caf60b8de4c8d91d038198e6bfd496f880f60d4c1ea076d.json +0 -1
  228. xinference/web/ui/node_modules/.cache/babel-loader/4ccd71d3f73490a18a5a4de4ad82d63cf019caff6f8d881fd1434755a033bfc5.json +0 -1
  229. xinference/web/ui/node_modules/.cache/babel-loader/4ee8343d4254c95e685d49601f1649e8222d7423c2a822e1d01c8fc4cd3582a2.json +0 -1
  230. xinference/web/ui/node_modules/.cache/babel-loader/523a712748fbb7abf0c5423df62e65c4659b7e450182c890e847cd78ace7cac5.json +0 -1
  231. xinference/web/ui/node_modules/.cache/babel-loader/5777049bbacf7322891761a5b959e62eec22b0f96a70dbf97e58d530c6258a55.json +0 -1
  232. xinference/web/ui/node_modules/.cache/babel-loader/5905957ce95f7dee64f0739359e990b8b518cff088c6958487641a3d21737d1e.json +0 -1
  233. xinference/web/ui/node_modules/.cache/babel-loader/5ac79df79d2dff64da97b02d5c90b36bc0d44a4a4229f6efed2993f93ee1d7a1.json +0 -1
  234. xinference/web/ui/node_modules/.cache/babel-loader/64d3ef38fd394979b09061e67fc47f04b7295401677825c64c72ea2f44cf910c.json +0 -1
  235. xinference/web/ui/node_modules/.cache/babel-loader/68a5d8e66687770e05aebcae9a8047189dc577d6150fbf0e62d2b2c11c4335c5.json +0 -1
  236. xinference/web/ui/node_modules/.cache/babel-loader/69d5f27e40d5e7040c8e8e303ccaee019b54092345acc3838d6eb943c474ce39.json +0 -1
  237. xinference/web/ui/node_modules/.cache/babel-loader/6a07b8d00482c07a4b7c73f6bebb79fad54bbea35ba358681ede6ff299a09f6b.json +0 -1
  238. xinference/web/ui/node_modules/.cache/babel-loader/70b0307a43b9f8c876bf2a1f7dea4cc56cc0ea33743e863c0b3983bc48109f2d.json +0 -1
  239. xinference/web/ui/node_modules/.cache/babel-loader/710c39a26816820c2cfd00b19cd0112851d64b8ac6b201b18727b43b5a69d92c.json +0 -1
  240. xinference/web/ui/node_modules/.cache/babel-loader/722f4c03da79795d1fb738323f3eb6794ab18711e20f141c0b678c5d4cb3dce9.json +0 -1
  241. xinference/web/ui/node_modules/.cache/babel-loader/8237bdfff695d71496ee2ff83ba0392d753f8e1dff0d002a1b2c9e249f03083c.json +0 -1
  242. xinference/web/ui/node_modules/.cache/babel-loader/8342e3800fed63e8e805850810aee4fd9b0558aca10c8b6b770fe5bff1bed266.json +0 -1
  243. xinference/web/ui/node_modules/.cache/babel-loader/847a71f54f7a059efc71f70c297b6b4182a633130715c80f817eb533de108c12.json +0 -1
  244. xinference/web/ui/node_modules/.cache/babel-loader/8731a959d9b7278ba7a1c044bb4253bfff63079da69b21ff80221ae5e037dbbe.json +0 -1
  245. xinference/web/ui/node_modules/.cache/babel-loader/87b4e94b045df561c8f4bd253c84447c96e0dededbcb6646c0cd180119960990.json +0 -1
  246. xinference/web/ui/node_modules/.cache/babel-loader/894a334446bac96d74385c23c9d6f2d117ee29511897700430cd0e578cb32527.json +0 -1
  247. xinference/web/ui/node_modules/.cache/babel-loader/8c5ba88b620eeedbb99db18a610138353a5a657170fdb8bd6969585f81b34f74.json +0 -1
  248. xinference/web/ui/node_modules/.cache/babel-loader/9004bc083cf179941e64130390a27afcf7593b21dd64a87d03e1e6489032aa13.json +0 -1
  249. xinference/web/ui/node_modules/.cache/babel-loader/9b84631723b52988d3748d80b994e43d1944ca55098a5d908c9061eeb4c84f90.json +0 -1
  250. xinference/web/ui/node_modules/.cache/babel-loader/9ee1f84bde10cd78ca23a4681c7372195748bacd5e41c3b1df8e39d960294d81.json +0 -1
  251. xinference/web/ui/node_modules/.cache/babel-loader/a127f13651a7398f5fc378456a52cfa0c6260f6109611d3fb012aa7c7a4ee17a.json +0 -1
  252. xinference/web/ui/node_modules/.cache/babel-loader/a5e2e9f707eb7039bea096ca117d996b8f9cbc2a5613fd8e0c5b0094444ce23c.json +0 -1
  253. xinference/web/ui/node_modules/.cache/babel-loader/a6131b7fb1b30fd63d798845dab508a22b21c616b361e6214bf711915c5fef3d.json +0 -1
  254. xinference/web/ui/node_modules/.cache/babel-loader/a7b8ccb199158c897ed7d7835942eb37569221be4101eecfa7a7ce62e0c76f1f.json +0 -1
  255. xinference/web/ui/node_modules/.cache/babel-loader/acaf6dbddd57b63a874e9062803850ce4911c8dc63a362eacdc4d2d0f48f8c16.json +0 -1
  256. xinference/web/ui/node_modules/.cache/babel-loader/ade3cd82af85d2d28138bd942228b01e27559a5a4072ee049dcb47612ee9a506.json +0 -1
  257. xinference/web/ui/node_modules/.cache/babel-loader/aea82c71088dc9afd6f4ba71434286af5c6ab5e9b87088b0e84f351146d06489.json +0 -1
  258. xinference/web/ui/node_modules/.cache/babel-loader/afbd547078b20bb14596f20e165cb40413147e3de501b126c97bb417f8b9a53e.json +0 -1
  259. xinference/web/ui/node_modules/.cache/babel-loader/afc597494ebc62d22e4d3ac2a839bd5f30d5e47ce1d0576e0f62407ef6c78652.json +0 -1
  260. xinference/web/ui/node_modules/.cache/babel-loader/affbbac58777e26ba8b6dcda8cea72c76975bd0207f0a510bfde5983d3081f74.json +0 -1
  261. xinference/web/ui/node_modules/.cache/babel-loader/b0aa2ece62a1f5fa015f9e6b59d20b79be727fb5a3f4fc747200163097f320ad.json +0 -1
  262. xinference/web/ui/node_modules/.cache/babel-loader/b10391d3ae53452c8ec97d230f064e03a8377aab2be4b1b3e30cb3e054b2341a.json +0 -1
  263. xinference/web/ui/node_modules/.cache/babel-loader/b4bab0c147360f3e335251166d3d25602e2e0e160fb3734ac019a5b42499179c.json +0 -1
  264. xinference/web/ui/node_modules/.cache/babel-loader/b57dcff363e045fd11ea2bcbc58cce470f1bc22d3ac14c8acbf63a36637050ff.json +0 -1
  265. xinference/web/ui/node_modules/.cache/babel-loader/b8de42f7f5dc1df9e802898a0ebb35adb57d269d7af21e6c66a0fde0ff226a08.json +0 -1
  266. xinference/web/ui/node_modules/.cache/babel-loader/be696cadea3bf577c0defc8f8d100232369763dde754b5ba9399f45f42aed215.json +0 -1
  267. xinference/web/ui/node_modules/.cache/babel-loader/c5a30d7c6cb484268e9126dd6d078229a1dcaa07fb064077b87d15224c61a3aa.json +0 -1
  268. xinference/web/ui/node_modules/.cache/babel-loader/c67e2077e70c4751e9e1dd98f378d979abd9c8a39fbf6c69f02fb4e5b04eef87.json +0 -1
  269. xinference/web/ui/node_modules/.cache/babel-loader/c81c1cad35be60a4fad090374edf50a884540e306d464b803120554df61e817e.json +0 -1
  270. xinference/web/ui/node_modules/.cache/babel-loader/d1054fd4d09666318a00ca0aea5ff42a2fdb17b3a42b5f77b93433d7408eb2f4.json +0 -1
  271. xinference/web/ui/node_modules/.cache/babel-loader/d1ec524ce17a48816fc55fe665572c1ef4982a4bd1ecb01badd0fdffc465242c.json +0 -1
  272. xinference/web/ui/node_modules/.cache/babel-loader/d4d61a537c45c4506a6b7e3478206b71dc96e0c6c6410cef9b5d2f47018bc578.json +0 -1
  273. xinference/web/ui/node_modules/.cache/babel-loader/e2e563b606cac9d51661cf68ba1729eb5df7861af521adccedc1b163439b657d.json +0 -1
  274. xinference/web/ui/node_modules/.cache/babel-loader/e3890e804dfaeb19b79f52e1b20198b587da61a9a3a1f55417d2cc5ce4ad7dae.json +0 -1
  275. xinference/web/ui/node_modules/.cache/babel-loader/e8c3fadbd12f0e2c7b70d84acbcabd0a5c96b05380a7b3167b00e8739d829d24.json +0 -1
  276. xinference/web/ui/node_modules/.cache/babel-loader/ebb5ae1a82b33b94e2b0cfec599bec7500aff0a1701f91736a6f3a4f77617560.json +0 -1
  277. xinference/web/ui/node_modules/.cache/babel-loader/f30f0396f4e54f0355112d5eba4b16c3a56f47e456eb117a9dc7fa6218ce21fe.json +0 -1
  278. xinference/web/ui/node_modules/.cache/babel-loader/f318ac15e999a553c00832029245730fe25c2916860b0ff55eee1f0ab926d84e.json +0 -1
  279. xinference/web/ui/node_modules/.cache/babel-loader/f55f4e657cc2d797b7e73a64ce40352f9a5dde31763a3e89c14d3b983307f30e.json +0 -1
  280. xinference/web/ui/node_modules/.cache/babel-loader/f8a68931d652a5e3be4e1ae06210a223de1239f3884ad858cecf9fe8d11c2ffe.json +0 -1
  281. xinference/web/ui/node_modules/.cache/babel-loader/f95a8bd358eeb55fa2f49f1224cc2f4f36006359856744ff09ae4bb295f59ec1.json +0 -1
  282. xinference/web/ui/node_modules/.cache/babel-loader/fb6fbd693a53d3c542d9adf83e0bb458918d39bf101eaf6d6662a2fbc35c1adc.json +0 -1
  283. /xinference/web/ui/build/static/js/{main.9715fe74.js.LICENSE.txt → main.87d39ffb.js.LICENSE.txt} +0 -0
  284. {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/LICENSE +0 -0
  285. {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/WHEEL +0 -0
  286. {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/entry_points.txt +0 -0
  287. {xinference-0.8.5.dist-info → xinference-0.9.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,100 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+
17
+ import torch
18
+ from typing_extensions import Literal, Union
19
+
20
+ DeviceType = Literal["cuda", "mps", "xpu", "cpu"]
21
+
22
+
23
+ def is_xpu_available() -> bool:
24
+ return hasattr(torch, "xpu") and torch.xpu.is_available()
25
+
26
+
27
+ def get_available_device() -> DeviceType:
28
+ if torch.cuda.is_available():
29
+ return "cuda"
30
+ elif torch.backends.mps.is_available():
31
+ return "mps"
32
+ elif is_xpu_available():
33
+ return "xpu"
34
+ return "cpu"
35
+
36
+
37
+ def is_device_available(device: str) -> bool:
38
+ if device == "cuda":
39
+ return torch.cuda.is_available()
40
+ elif device == "mps":
41
+ return torch.backends.mps.is_available()
42
+ elif device == "xpu":
43
+ return is_xpu_available()
44
+ elif device == "cpu":
45
+ return True
46
+
47
+ return False
48
+
49
+
50
+ def move_model_to_available_device(model):
51
+ device = get_available_device()
52
+
53
+ if device == "cpu":
54
+ return model
55
+
56
+ return model.to(device)
57
+
58
+
59
+ def get_device_preferred_dtype(device: str) -> Union[torch.dtype, None]:
60
+ if device == "cpu":
61
+ return torch.float32
62
+ elif device == "cuda" or device == "mps":
63
+ return torch.float16
64
+ elif device == "xpu":
65
+ return torch.bfloat16
66
+
67
+ return None
68
+
69
+
70
+ def is_hf_accelerate_supported(device: str) -> bool:
71
+ return device == "cuda" or device == "xpu"
72
+
73
+
74
+ def empty_cache():
75
+ if torch.cuda.is_available():
76
+ torch.cuda.empty_cache()
77
+ if torch.backends.mps.is_available():
78
+ torch.mps.empty_cache()
79
+ if is_xpu_available():
80
+ torch.xpu.empty_cache()
81
+
82
+
83
+ def gpu_count():
84
+ if torch.cuda.is_available():
85
+ cuda_visible_devices_env = os.getenv("CUDA_VISIBLE_DEVICES", None)
86
+
87
+ if cuda_visible_devices_env is None:
88
+ return torch.cuda.device_count()
89
+
90
+ cuda_visible_devices = (
91
+ cuda_visible_devices_env.split(",") if cuda_visible_devices_env else []
92
+ )
93
+
94
+ return min(torch.cuda.device_count(), len(cuda_visible_devices))
95
+ elif torch.backends.mps.is_available():
96
+ return 1
97
+ elif is_xpu_available():
98
+ return torch.xpu.device_count()
99
+ else:
100
+ return 0
@@ -14,6 +14,12 @@
14
14
  import logging
15
15
  from typing import TYPE_CHECKING, Dict, Optional
16
16
 
17
+ from xinference.device_utils import (
18
+ get_available_device,
19
+ get_device_preferred_dtype,
20
+ is_device_available,
21
+ )
22
+
17
23
  if TYPE_CHECKING:
18
24
  from .core import AudioModelFamilyV1
19
25
 
@@ -37,11 +43,15 @@ class WhisperModel:
37
43
  self._kwargs = kwargs
38
44
 
39
45
  def load(self):
40
- import torch
41
46
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
42
47
 
43
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
44
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
48
+ if self._device is None:
49
+ self._device = get_available_device()
50
+ else:
51
+ if not is_device_available(self._device):
52
+ raise ValueError(f"Device {self._device} is not available!")
53
+
54
+ torch_dtype = get_device_preferred_dtype(self._device)
45
55
 
46
56
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
47
57
  self._model_path,
@@ -49,7 +59,7 @@ class WhisperModel:
49
59
  low_cpu_mem_usage=True,
50
60
  use_safetensors=True,
51
61
  )
52
- model.to(device)
62
+ model.to(self._device)
53
63
 
54
64
  processor = AutoProcessor.from_pretrained(self._model_path)
55
65
 
@@ -63,7 +73,7 @@ class WhisperModel:
63
73
  batch_size=16,
64
74
  return_timestamps=False,
65
75
  torch_dtype=torch_dtype,
66
- device=device,
76
+ device=self._device,
67
77
  )
68
78
 
69
79
  def _call_model(
@@ -99,9 +109,11 @@ class WhisperModel:
99
109
  )
100
110
  return self._call_model(
101
111
  audio=audio,
102
- generate_kwargs={"language": language, "task": "transcribe"}
103
- if language is not None
104
- else {"task": "transcribe"},
112
+ generate_kwargs=(
113
+ {"language": language, "task": "transcribe"}
114
+ if language is not None
115
+ else {"task": "transcribe"}
116
+ ),
105
117
  response_format=response_format,
106
118
  )
107
119
 
@@ -56,6 +56,10 @@ class ImageModelDescription(ModelDescription):
56
56
  self._model_spec = model_spec
57
57
 
58
58
  def to_dict(self):
59
+ if self._model_spec.controlnet is not None:
60
+ controlnet = [cn.dict() for cn in self._model_spec.controlnet]
61
+ else:
62
+ controlnet = self._model_spec.controlnet
59
63
  return {
60
64
  "model_type": "image",
61
65
  "address": self.address,
@@ -63,7 +67,7 @@ class ImageModelDescription(ModelDescription):
63
67
  "model_name": self._model_spec.model_name,
64
68
  "model_family": self._model_spec.model_family,
65
69
  "model_revision": self._model_spec.model_revision,
66
- "controlnet": self._model_spec.controlnet,
70
+ "controlnet": controlnet,
67
71
  }
68
72
 
69
73
  def to_version_info(self):
@@ -24,6 +24,7 @@ from io import BytesIO
24
24
  from typing import List, Optional, Union
25
25
 
26
26
  from ....constants import XINFERENCE_IMAGE_DIR
27
+ from ....device_utils import move_model_to_available_device
27
28
  from ....types import Image, ImageList
28
29
 
29
30
  logger = logging.getLogger(__name__)
@@ -40,7 +41,7 @@ class DiffusionModel:
40
41
  self._kwargs = kwargs
41
42
 
42
43
  def load(self):
43
- import torch
44
+ # import torch
44
45
  from diffusers import AutoPipelineForText2Image
45
46
 
46
47
  controlnet = self._kwargs.get("controlnet")
@@ -57,10 +58,7 @@ class DiffusionModel:
57
58
  # torch_dtype=torch.float16,
58
59
  # use_safetensors=True,
59
60
  )
60
- if torch.cuda.is_available():
61
- self._model = self._model.to("cuda")
62
- elif torch.backends.mps.is_available():
63
- self._model = self._model.to("mps")
61
+ self._model = move_model_to_available_device(self._model)
64
62
  # Recommended if your computer has < 64 GB of RAM
65
63
  self._model.enable_attention_slicing()
66
64
 
@@ -1514,10 +1514,33 @@
1514
1514
  ],
1515
1515
  "model_id": "Qwen/Qwen1.5-72B-Chat-AWQ"
1516
1516
  },
1517
+ {
1518
+ "model_format": "ggufv2",
1519
+ "model_size_in_billions": "0_5",
1520
+ "quantizations": [
1521
+ "q2_k",
1522
+ "q3_k_m",
1523
+ "q4_0",
1524
+ "q4_k_m",
1525
+ "q5_0",
1526
+ "q5_k_m",
1527
+ "q6_k",
1528
+ "q8_0"
1529
+ ],
1530
+ "model_id": "Qwen/Qwen1.5-0.5B-Chat-GGUF",
1531
+ "model_file_name_template": "qwen1_5-0_5b-chat-{quantization}.gguf"
1532
+ },
1517
1533
  {
1518
1534
  "model_format": "ggufv2",
1519
1535
  "model_size_in_billions": "1_8",
1520
1536
  "quantizations": [
1537
+ "q2_k",
1538
+ "q3_k_m",
1539
+ "q4_0",
1540
+ "q4_k_m",
1541
+ "q5_0",
1542
+ "q5_k_m",
1543
+ "q6_k",
1521
1544
  "q8_0"
1522
1545
  ],
1523
1546
  "model_id": "Qwen/Qwen1.5-1.8B-Chat-GGUF",
@@ -1527,6 +1550,13 @@
1527
1550
  "model_format": "ggufv2",
1528
1551
  "model_size_in_billions": 4,
1529
1552
  "quantizations": [
1553
+ "q2_k",
1554
+ "q3_k_m",
1555
+ "q4_0",
1556
+ "q4_k_m",
1557
+ "q5_0",
1558
+ "q5_k_m",
1559
+ "q6_k",
1530
1560
  "q8_0"
1531
1561
  ],
1532
1562
  "model_id": "Qwen/Qwen1.5-4B-Chat-GGUF",
@@ -1536,7 +1566,14 @@
1536
1566
  "model_format": "ggufv2",
1537
1567
  "model_size_in_billions": 7,
1538
1568
  "quantizations": [
1539
- "q5_k_m"
1569
+ "q2_k",
1570
+ "q3_k_m",
1571
+ "q4_0",
1572
+ "q4_k_m",
1573
+ "q5_0",
1574
+ "q5_k_m",
1575
+ "q6_k",
1576
+ "q8_0"
1540
1577
  ],
1541
1578
  "model_id": "Qwen/Qwen1.5-7B-Chat-GGUF",
1542
1579
  "model_file_name_template": "qwen1_5-7b-chat-{quantization}.gguf"
@@ -1545,7 +1582,14 @@
1545
1582
  "model_format": "ggufv2",
1546
1583
  "model_size_in_billions": 14,
1547
1584
  "quantizations": [
1548
- "q5_k_m"
1585
+ "q2_k",
1586
+ "q3_k_m",
1587
+ "q4_0",
1588
+ "q4_k_m",
1589
+ "q5_0",
1590
+ "q5_k_m",
1591
+ "q6_k",
1592
+ "q8_0"
1549
1593
  ],
1550
1594
  "model_id": "Qwen/Qwen1.5-14B-Chat-GGUF",
1551
1595
  "model_file_name_template": "qwen1_5-14b-chat-{quantization}.gguf"
@@ -1554,7 +1598,8 @@
1554
1598
  "model_format": "ggufv2",
1555
1599
  "model_size_in_billions": 72,
1556
1600
  "quantizations": [
1557
- "q2_k"
1601
+ "q2_k",
1602
+ "q3_k_m"
1558
1603
  ],
1559
1604
  "model_id": "Qwen/Qwen1.5-72B-Chat-GGUF",
1560
1605
  "model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf"
@@ -3708,5 +3753,50 @@
3708
3753
  "<|im_sep|>"
3709
3754
  ]
3710
3755
  }
3756
+ },
3757
+ {
3758
+ "version": 1,
3759
+ "context_length": 8192,
3760
+ "model_name": "gemma-it",
3761
+ "model_lang": [
3762
+ "en"
3763
+ ],
3764
+ "model_ability": [
3765
+ "chat"
3766
+ ],
3767
+ "model_description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.",
3768
+ "model_specs": [
3769
+ {
3770
+ "model_format": "pytorch",
3771
+ "model_size_in_billions": 2,
3772
+ "quantizations": [
3773
+ "none",
3774
+ "4-bit",
3775
+ "8-bit"
3776
+ ],
3777
+ "model_id": "google/gemma-2b-it"
3778
+ },
3779
+ {
3780
+ "model_format": "pytorch",
3781
+ "model_size_in_billions": 7,
3782
+ "quantizations": [
3783
+ "none",
3784
+ "4-bit",
3785
+ "8-bit"
3786
+ ],
3787
+ "model_id": "google/gemma-7b-it"
3788
+ }
3789
+ ],
3790
+ "prompt_style": {
3791
+ "style_name": "gemma",
3792
+ "roles": [
3793
+ "user",
3794
+ "model"
3795
+ ],
3796
+ "stop": [
3797
+ "<end_of_turn>",
3798
+ "<start_of_turn>"
3799
+ ]
3800
+ }
3711
3801
  }
3712
3802
  ]
@@ -29,7 +29,7 @@
29
29
  "Q4_K_M"
30
30
  ],
31
31
  "model_id": "Xorbits/Llama-2-13b-Chat-GGUF",
32
- "model_file_name_template": "llama-2-7b-chat.{quantization}.gguf",
32
+ "model_file_name_template": "llama-2-13b-chat.{quantization}.gguf",
33
33
  "model_hub": "modelscope",
34
34
  "model_revision": "v0.0.1"
35
35
  },
@@ -1821,61 +1821,97 @@
1821
1821
  "model_format": "ggufv2",
1822
1822
  "model_size_in_billions": "0_5",
1823
1823
  "quantizations": [
1824
+ "q2_k",
1825
+ "q3_k_m",
1826
+ "q4_0",
1827
+ "q4_k_m",
1828
+ "q5_0",
1829
+ "q5_k_m",
1830
+ "q6_k",
1824
1831
  "q8_0"
1825
1832
  ],
1826
1833
  "model_id": "qwen/Qwen1.5-0.5B-Chat-GGUF",
1827
1834
  "model_hub": "modelscope",
1828
- "model_file_name_template": "qwen1.5-0.5b-chat-{quantization}.gguf"
1835
+ "model_file_name_template": "qwen1_5-0_5b-chat-{quantization}.gguf"
1829
1836
  },
1830
1837
  {
1831
1838
  "model_format": "ggufv2",
1832
1839
  "model_size_in_billions": "1_8",
1833
1840
  "quantizations": [
1841
+ "q2_k",
1842
+ "q3_k_m",
1843
+ "q4_0",
1844
+ "q4_k_m",
1845
+ "q5_0",
1846
+ "q5_k_m",
1847
+ "q6_k",
1834
1848
  "q8_0"
1835
1849
  ],
1836
1850
  "model_id": "qwen/Qwen1.5-1.8B-Chat-GGUF",
1837
1851
  "model_hub": "modelscope",
1838
- "model_file_name_template": "qwen1.5-1_8b-chat-{quantization}.gguf"
1852
+ "model_file_name_template": "qwen1_5-1_8b-chat-{quantization}.gguf"
1839
1853
  },
1840
1854
  {
1841
1855
  "model_format": "ggufv2",
1842
1856
  "model_size_in_billions": 4,
1843
1857
  "quantizations": [
1858
+ "q2_k",
1859
+ "q3_k_m",
1860
+ "q4_0",
1861
+ "q4_k_m",
1862
+ "q5_0",
1863
+ "q5_k_m",
1864
+ "q6_k",
1844
1865
  "q8_0"
1845
1866
  ],
1846
1867
  "model_id": "qwen/Qwen1.5-4B-Chat-GGUF",
1847
1868
  "model_hub": "modelscope",
1848
- "model_file_name_template": "qwen1.5-4b-chat-{quantization}.gguf"
1869
+ "model_file_name_template": "qwen1_5-4b-chat-{quantization}.gguf"
1849
1870
  },
1850
1871
  {
1851
1872
  "model_format": "ggufv2",
1852
1873
  "model_size_in_billions": 7,
1853
1874
  "quantizations": [
1854
- "q5_k_m"
1875
+ "q2_k",
1876
+ "q3_k_m",
1877
+ "q4_0",
1878
+ "q4_k_m",
1879
+ "q5_0",
1880
+ "q5_k_m",
1881
+ "q6_k",
1882
+ "q8_0"
1855
1883
  ],
1856
1884
  "model_id": "qwen/Qwen1.5-7B-Chat-GGUF",
1857
1885
  "model_hub": "modelscope",
1858
- "model_file_name_template": "qwen1.5-7b-chat-{quantization}.gguf"
1886
+ "model_file_name_template": "qwen1_5-7b-chat-{quantization}.gguf"
1859
1887
  },
1860
1888
  {
1861
1889
  "model_format": "ggufv2",
1862
1890
  "model_size_in_billions": 14,
1863
1891
  "quantizations": [
1864
- "q5_k_m"
1892
+ "q2_k",
1893
+ "q3_k_m",
1894
+ "q4_0",
1895
+ "q4_k_m",
1896
+ "q5_0",
1897
+ "q5_k_m",
1898
+ "q6_k",
1899
+ "q8_0"
1865
1900
  ],
1866
1901
  "model_id": "qwen/Qwen1.5-14B-Chat-GGUF",
1867
1902
  "model_hub": "modelscope",
1868
- "model_file_name_template": "qwen1.5-14b-chat-{quantization}.gguf"
1903
+ "model_file_name_template": "qwen1_5-14b-chat-{quantization}.gguf"
1869
1904
  },
1870
1905
  {
1871
1906
  "model_format": "ggufv2",
1872
1907
  "model_size_in_billions": 72,
1873
1908
  "quantizations": [
1874
- "q2_k"
1909
+ "q2_k",
1910
+ "q3_k_m"
1875
1911
  ],
1876
1912
  "model_id": "qwen/Qwen1.5-72B-Chat-GGUF",
1877
1913
  "model_hub": "modelscope",
1878
- "model_file_name_template": "qwen1.5-72b-chat-{quantization}.gguf"
1914
+ "model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf"
1879
1915
  }
1880
1916
  ],
1881
1917
  "prompt_style": {
@@ -25,6 +25,8 @@ from torch.nn import functional as F
25
25
  from tqdm import tqdm
26
26
  from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
27
27
 
28
+ from ....device_utils import empty_cache
29
+
28
30
 
29
31
  @dataclasses.dataclass
30
32
  class CompressionConfig:
@@ -153,7 +155,7 @@ def load_compress_model(
153
155
  tmp_state_dict[name] = None
154
156
  tensor = None
155
157
  gc.collect()
156
- torch.cuda.empty_cache()
158
+ empty_cache()
157
159
 
158
160
  for name in model.state_dict():
159
161
  if name not in linear_weights:
@@ -12,10 +12,16 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import json
15
16
  import logging
16
17
  import os
17
18
  from typing import Iterable, Iterator, List, Optional, Union
18
19
 
20
+ from ....device_utils import (
21
+ get_device_preferred_dtype,
22
+ gpu_count,
23
+ is_hf_accelerate_supported,
24
+ )
19
25
  from ....types import (
20
26
  ChatCompletion,
21
27
  ChatCompletionChunk,
@@ -115,23 +121,18 @@ class PytorchModel(LLM):
115
121
  )
116
122
  from .compression import load_compress_model
117
123
 
118
- cuda_visible_devices_env = os.getenv("CUDA_VISIBLE_DEVICES", None)
119
- cuda_visible_devices = (
120
- cuda_visible_devices_env.split(",") if cuda_visible_devices_env else []
121
- )
122
-
123
124
  quantization = self.quantization
124
- num_gpus = len(cuda_visible_devices) if cuda_visible_devices_env != "-1" else 0
125
+ num_gpus = gpu_count()
125
126
  device = self._pytorch_model_config.get("device", "auto")
126
127
  self._pytorch_model_config["device"] = select_device(device)
127
128
  self._device = self._pytorch_model_config["device"]
128
129
 
129
- if self._device == "cpu":
130
- kwargs = {"torch_dtype": torch.float32}
131
- elif self._device == "cuda":
132
- kwargs = {"torch_dtype": torch.float16}
133
- elif self._device == "mps":
134
- kwargs = {"torch_dtype": torch.float16}
130
+ kwargs = {}
131
+
132
+ dtype = get_device_preferred_dtype(self._device)
133
+
134
+ if dtype is not None:
135
+ kwargs["torch_dtype"] = dtype
135
136
  else:
136
137
  raise ValueError(f"Device {self._device} is not supported in temporary")
137
138
 
@@ -142,9 +143,25 @@ class PytorchModel(LLM):
142
143
  "trust_remote_code"
143
144
  )
144
145
  model_format = self.model_spec.model_format
146
+
147
+ is_device_map_auto = False
148
+
149
+ # This is required for Intel GPU to actually work with accelerate device_map until
150
+ # https://github.com/intel/intel-extension-for-pytorch/issues/522
151
+ # is resolved
152
+ max_memory_env = os.getenv("ACCELERATE_MAX_MEMORY", None)
153
+
154
+ if max_memory_env is not None:
155
+ max_memory_raw = json.loads(max_memory_env)
156
+ max_memory = {
157
+ int(k) if k.isdigit() else k: max_memory_raw[k] for k in max_memory_raw
158
+ }
159
+ kwargs["max_memory"] = max_memory
160
+
145
161
  if quantization != "none" and model_format == "pytorch":
146
162
  if self._device == "cuda" and self._is_linux():
147
163
  kwargs["device_map"] = "auto"
164
+ is_device_map_auto = True
148
165
  if quantization == "4-bit":
149
166
  kwargs["load_in_4bit"] = True
150
167
  kwargs["bnb_4bit_compute_dtype"] = torch.float16
@@ -178,11 +195,13 @@ class PytorchModel(LLM):
178
195
  logger.debug(f"Model Memory: {self._model.get_memory_footprint()}")
179
196
  return
180
197
 
181
- if num_gpus > 0 and self._device == "cuda":
198
+ if num_gpus > 0 and is_hf_accelerate_supported(self._device):
182
199
  kwargs.update({"device_map": "auto"})
200
+ is_device_map_auto = True
201
+
183
202
  self._model, self._tokenizer = self._load_model(**kwargs)
184
203
 
185
- if self._device == "mps":
204
+ if not is_device_map_auto:
186
205
  self._model.to(self._device)
187
206
  logger.debug(f"Model Memory: {self._model.get_memory_footprint()}")
188
207
 
@@ -95,9 +95,11 @@ class QwenVLChatModel(PytorchChatModel):
95
95
  if not isinstance(content, str):
96
96
  # TODO(codingl2k1): Optimize _ensure_url
97
97
  content = [
98
- {"image": _ensure_url(c["image_url"]["url"]), "type": "image"}
99
- if c.get("type") == "image_url"
100
- else c
98
+ (
99
+ {"image": _ensure_url(c["image_url"]["url"]), "type": "image"}
100
+ if c.get("type") == "image_url"
101
+ else c
102
+ )
101
103
  for c in content
102
104
  ]
103
105
  content = sorted(content, key=operator.itemgetter("type"))
@@ -17,6 +17,8 @@ import time
17
17
  import uuid
18
18
  from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple
19
19
 
20
+ from ....device_utils import empty_cache
21
+
20
22
  try:
21
23
  import torch
22
24
  from torch.nn import functional as F
@@ -526,4 +528,4 @@ def speculative_generate_stream(
526
528
  del kv_cache
527
529
  del draft_kv_cache
528
530
  gc.collect()
529
- torch.cuda.empty_cache()
531
+ empty_cache()