xinference 0.12.3__py3-none-any.whl → 0.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (101) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +56 -8
  3. xinference/client/restful/restful_client.py +49 -4
  4. xinference/core/model.py +36 -4
  5. xinference/core/scheduler.py +2 -0
  6. xinference/core/supervisor.py +132 -15
  7. xinference/core/worker.py +239 -53
  8. xinference/deploy/cmdline.py +5 -0
  9. xinference/deploy/utils.py +33 -2
  10. xinference/model/audio/chattts.py +6 -6
  11. xinference/model/audio/core.py +23 -15
  12. xinference/model/core.py +12 -3
  13. xinference/model/embedding/core.py +25 -16
  14. xinference/model/flexible/__init__.py +40 -0
  15. xinference/model/flexible/core.py +228 -0
  16. xinference/model/flexible/launchers/__init__.py +15 -0
  17. xinference/model/flexible/launchers/transformers_launcher.py +63 -0
  18. xinference/model/flexible/utils.py +33 -0
  19. xinference/model/image/core.py +18 -14
  20. xinference/model/image/custom.py +1 -1
  21. xinference/model/llm/__init__.py +5 -2
  22. xinference/model/llm/core.py +3 -2
  23. xinference/model/llm/ggml/llamacpp.py +1 -10
  24. xinference/model/llm/llm_family.json +292 -36
  25. xinference/model/llm/llm_family.py +102 -53
  26. xinference/model/llm/llm_family_modelscope.json +247 -27
  27. xinference/model/llm/mlx/__init__.py +13 -0
  28. xinference/model/llm/mlx/core.py +408 -0
  29. xinference/model/llm/pytorch/chatglm.py +2 -9
  30. xinference/model/llm/pytorch/cogvlm2.py +206 -21
  31. xinference/model/llm/pytorch/core.py +213 -120
  32. xinference/model/llm/pytorch/glm4v.py +171 -15
  33. xinference/model/llm/pytorch/qwen_vl.py +168 -7
  34. xinference/model/llm/pytorch/utils.py +53 -62
  35. xinference/model/llm/utils.py +28 -7
  36. xinference/model/rerank/core.py +29 -25
  37. xinference/thirdparty/deepseek_vl/serve/__init__.py +13 -0
  38. xinference/thirdparty/deepseek_vl/serve/app_deepseek.py +510 -0
  39. xinference/thirdparty/deepseek_vl/serve/app_modules/__init__.py +13 -0
  40. xinference/thirdparty/deepseek_vl/serve/app_modules/gradio_utils.py +94 -0
  41. xinference/thirdparty/deepseek_vl/serve/app_modules/overwrites.py +81 -0
  42. xinference/thirdparty/deepseek_vl/serve/app_modules/presets.py +96 -0
  43. xinference/thirdparty/deepseek_vl/serve/app_modules/utils.py +229 -0
  44. xinference/thirdparty/deepseek_vl/serve/inference.py +170 -0
  45. xinference/types.py +0 -1
  46. xinference/web/ui/build/asset-manifest.json +3 -3
  47. xinference/web/ui/build/index.html +1 -1
  48. xinference/web/ui/build/static/js/main.95c1d652.js +3 -0
  49. xinference/web/ui/build/static/js/main.95c1d652.js.map +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/1444c41a4d04494f1cbc2d8c1537df107b451cb569cb2c1fbf5159f3a4841a5f.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/2c63090c842376cdd368c3ded88a333ef40d94785747651343040a6f7872a223.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/6450605fac003812485f6251b9f0caafbf2e5bfc3bbe2f000050d9e2fdb8dcd3.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +1 -0
  59. xinference/web/ui/node_modules/.cache/babel-loader/8a9742ddd8ba8546ef42dc14caca443f2b4524fabed7bf269e0eff3b7b64ee7d.json +1 -0
  60. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +1 -0
  61. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/d93730e2b5d7e8c957b4d0965d2ed1dac9045a649adbd47c220d11f255d4b1e0.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/e656dc00b4d8b387f0a81ba8fc558767df1601c66369e2eb86a5ef27cf080572.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +1 -0
  65. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/METADATA +10 -11
  66. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/RECORD +71 -69
  67. xinference/model/llm/ggml/chatglm.py +0 -457
  68. xinference/thirdparty/ChatTTS/__init__.py +0 -1
  69. xinference/thirdparty/ChatTTS/core.py +0 -200
  70. xinference/thirdparty/ChatTTS/experimental/__init__.py +0 -0
  71. xinference/thirdparty/ChatTTS/experimental/llm.py +0 -40
  72. xinference/thirdparty/ChatTTS/infer/__init__.py +0 -0
  73. xinference/thirdparty/ChatTTS/infer/api.py +0 -125
  74. xinference/thirdparty/ChatTTS/model/__init__.py +0 -0
  75. xinference/thirdparty/ChatTTS/model/dvae.py +0 -155
  76. xinference/thirdparty/ChatTTS/model/gpt.py +0 -265
  77. xinference/thirdparty/ChatTTS/utils/__init__.py +0 -0
  78. xinference/thirdparty/ChatTTS/utils/gpu_utils.py +0 -23
  79. xinference/thirdparty/ChatTTS/utils/infer_utils.py +0 -141
  80. xinference/thirdparty/ChatTTS/utils/io_utils.py +0 -14
  81. xinference/web/ui/build/static/js/main.77dd47c3.js +0 -3
  82. xinference/web/ui/build/static/js/main.77dd47c3.js.map +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/0cd591866aa345566e0b63fb51ff2043e163a770af6fdc2f3bad395d046353e2.json +0 -1
  84. xinference/web/ui/node_modules/.cache/babel-loader/37c1476717199863bbba1530e3513a9368f8f73001b75b4a85c2075956308027.json +0 -1
  85. xinference/web/ui/node_modules/.cache/babel-loader/3da7d55e87882a4af923e187b1351160e34ca102f589086439c15131a227fb6e.json +0 -1
  86. xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +0 -1
  87. xinference/web/ui/node_modules/.cache/babel-loader/46edc1fe657dfedb2e673148332bb442c6eb98f09f2592c389209e376510afa5.json +0 -1
  88. xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +0 -1
  89. xinference/web/ui/node_modules/.cache/babel-loader/72bcecc71c5267250edeb89608859d449b586f13ff9923a5e70e7172976ec403.json +0 -1
  90. xinference/web/ui/node_modules/.cache/babel-loader/82db357f3fd5b32215d747ee593f69ff06c95ad6cde37f71a96c8290aaab64c0.json +0 -1
  91. xinference/web/ui/node_modules/.cache/babel-loader/935efd2867664c58230378fdf2ff1ea85e58d853b7214014e20dfbca8dab7b05.json +0 -1
  92. xinference/web/ui/node_modules/.cache/babel-loader/bc6da27195ec4607bb472bf61f97c928ad4966fa64e4c2247661bedb7400abba.json +0 -1
  93. xinference/web/ui/node_modules/.cache/babel-loader/c2abe75f04ad82fba68f35ed9cbe2e287762c876684fddccccfa73f739489b65.json +0 -1
  94. xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +0 -1
  95. xinference/web/ui/node_modules/.cache/babel-loader/f118f99c22b713c678c1209c4e1dd43fe86e3f6e801a4c0c35d3bbf41fd05fe6.json +0 -1
  96. xinference/web/ui/node_modules/.cache/babel-loader/f51bf63ddaa7afd125ef2254a105789333eecc1c94fdf5157a9b88ef7ad0a5bd.json +0 -1
  97. /xinference/web/ui/build/static/js/{main.77dd47c3.js.LICENSE.txt → main.95c1d652.js.LICENSE.txt} +0 -0
  98. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/LICENSE +0 -0
  99. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/WHEEL +0 -0
  100. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/entry_points.txt +0 -0
  101. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/top_level.txt +0 -0
xinference/core/worker.py CHANGED
@@ -22,7 +22,7 @@ import threading
22
22
  import time
23
23
  from collections import defaultdict
24
24
  from logging import getLogger
25
- from typing import Any, Dict, List, Optional, Set, Tuple, Union
25
+ from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Union
26
26
 
27
27
  import xoscar as xo
28
28
  from async_timeout import timeout
@@ -73,6 +73,9 @@ class WorkerActor(xo.StatelessActor):
73
73
  self._main_pool.recover_sub_pool = self.recover_sub_pool
74
74
 
75
75
  # internal states.
76
+ # temporary placeholder during model launch process:
77
+ self._model_uid_launching_guard: Dict[str, bool] = {}
78
+ # attributes maintained after model launched:
76
79
  self._model_uid_to_model: Dict[str, xo.ActorRefType["ModelActor"]] = {}
77
80
  self._model_uid_to_model_spec: Dict[str, ModelDescription] = {}
78
81
  self._gpu_to_model_uid: Dict[int, str] = {}
@@ -209,48 +212,81 @@ class WorkerActor(xo.StatelessActor):
209
212
 
210
213
  from ..model.audio import (
211
214
  CustomAudioModelFamilyV1,
215
+ generate_audio_description,
212
216
  get_audio_model_descriptions,
213
217
  register_audio,
214
218
  unregister_audio,
215
219
  )
216
220
  from ..model.embedding import (
217
221
  CustomEmbeddingModelSpec,
222
+ generate_embedding_description,
218
223
  get_embedding_model_descriptions,
219
224
  register_embedding,
220
225
  unregister_embedding,
221
226
  )
227
+ from ..model.flexible import (
228
+ FlexibleModelSpec,
229
+ get_flexible_model_descriptions,
230
+ register_flexible_model,
231
+ unregister_flexible_model,
232
+ )
222
233
  from ..model.image import (
223
234
  CustomImageModelFamilyV1,
235
+ generate_image_description,
224
236
  get_image_model_descriptions,
225
237
  register_image,
226
238
  unregister_image,
227
239
  )
228
240
  from ..model.llm import (
229
241
  CustomLLMFamilyV1,
242
+ generate_llm_description,
230
243
  get_llm_model_descriptions,
231
244
  register_llm,
232
245
  unregister_llm,
233
246
  )
234
247
  from ..model.rerank import (
235
248
  CustomRerankModelSpec,
249
+ generate_rerank_description,
236
250
  get_rerank_model_descriptions,
237
251
  register_rerank,
238
252
  unregister_rerank,
239
253
  )
240
254
 
241
255
  self._custom_register_type_to_cls: Dict[str, Tuple] = { # type: ignore
242
- "LLM": (CustomLLMFamilyV1, register_llm, unregister_llm),
256
+ "LLM": (
257
+ CustomLLMFamilyV1,
258
+ register_llm,
259
+ unregister_llm,
260
+ generate_llm_description,
261
+ ),
243
262
  "embedding": (
244
263
  CustomEmbeddingModelSpec,
245
264
  register_embedding,
246
265
  unregister_embedding,
266
+ generate_embedding_description,
267
+ ),
268
+ "rerank": (
269
+ CustomRerankModelSpec,
270
+ register_rerank,
271
+ unregister_rerank,
272
+ generate_rerank_description,
247
273
  ),
248
- "rerank": (CustomRerankModelSpec, register_rerank, unregister_rerank),
249
- "audio": (CustomAudioModelFamilyV1, register_audio, unregister_audio),
250
274
  "image": (
251
275
  CustomImageModelFamilyV1,
252
276
  register_image,
253
277
  unregister_image,
278
+ generate_image_description,
279
+ ),
280
+ "audio": (
281
+ CustomAudioModelFamilyV1,
282
+ register_audio,
283
+ unregister_audio,
284
+ generate_audio_description,
285
+ ),
286
+ "flexible": (
287
+ FlexibleModelSpec,
288
+ register_flexible_model,
289
+ unregister_flexible_model,
254
290
  ),
255
291
  }
256
292
 
@@ -261,6 +297,7 @@ class WorkerActor(xo.StatelessActor):
261
297
  model_version_infos.update(get_rerank_model_descriptions())
262
298
  model_version_infos.update(get_image_model_descriptions())
263
299
  model_version_infos.update(get_audio_model_descriptions())
300
+ model_version_infos.update(get_flexible_model_descriptions())
264
301
  await self._cache_tracker_ref.record_model_version(
265
302
  model_version_infos, self.address
266
303
  )
@@ -511,17 +548,23 @@ class WorkerActor(xo.StatelessActor):
511
548
  raise ValueError(f"{model_name} model can't run on Darwin system.")
512
549
 
513
550
  @log_sync(logger=logger)
514
- def register_model(self, model_type: str, model: str, persist: bool):
551
+ async def register_model(self, model_type: str, model: str, persist: bool):
515
552
  # TODO: centralized model registrations
516
553
  if model_type in self._custom_register_type_to_cls:
517
554
  (
518
555
  model_spec_cls,
519
556
  register_fn,
520
557
  unregister_fn,
558
+ generate_fn,
521
559
  ) = self._custom_register_type_to_cls[model_type]
522
560
  model_spec = model_spec_cls.parse_raw(model)
523
561
  try:
524
562
  register_fn(model_spec, persist)
563
+ await self._cache_tracker_ref.record_model_version(
564
+ generate_fn(model_spec), self.address
565
+ )
566
+ except ValueError as e:
567
+ raise e
525
568
  except Exception as e:
526
569
  unregister_fn(model_spec.model_name, raise_error=False)
527
570
  raise e
@@ -529,14 +572,127 @@ class WorkerActor(xo.StatelessActor):
529
572
  raise ValueError(f"Unsupported model type: {model_type}")
530
573
 
531
574
  @log_sync(logger=logger)
532
- def unregister_model(self, model_type: str, model_name: str):
575
+ async def unregister_model(self, model_type: str, model_name: str):
533
576
  # TODO: centralized model registrations
534
577
  if model_type in self._custom_register_type_to_cls:
535
- _, _, unregister_fn = self._custom_register_type_to_cls[model_type]
536
- unregister_fn(model_name)
578
+ _, _, unregister_fn, _ = self._custom_register_type_to_cls[model_type]
579
+ unregister_fn(model_name, False)
580
+ else:
581
+ raise ValueError(f"Unsupported model type: {model_type}")
582
+
583
+ @log_async(logger=logger)
584
+ async def list_model_registrations(
585
+ self, model_type: str, detailed: bool = False
586
+ ) -> List[Dict[str, Any]]:
587
+ def sort_helper(item):
588
+ assert isinstance(item["model_name"], str)
589
+ return item.get("model_name").lower()
590
+
591
+ if model_type == "LLM":
592
+ from ..model.llm import get_user_defined_llm_families
593
+
594
+ ret = []
595
+
596
+ for family in get_user_defined_llm_families():
597
+ ret.append({"model_name": family.model_name, "is_builtin": False})
598
+
599
+ ret.sort(key=sort_helper)
600
+ return ret
601
+ elif model_type == "embedding":
602
+ from ..model.embedding.custom import get_user_defined_embeddings
603
+
604
+ ret = []
605
+
606
+ for model_spec in get_user_defined_embeddings():
607
+ ret.append({"model_name": model_spec.model_name, "is_builtin": False})
608
+
609
+ ret.sort(key=sort_helper)
610
+ return ret
611
+ elif model_type == "image":
612
+ from ..model.image.custom import get_user_defined_images
613
+
614
+ ret = []
615
+
616
+ for model_spec in get_user_defined_images():
617
+ ret.append({"model_name": model_spec.model_name, "is_builtin": False})
618
+
619
+ ret.sort(key=sort_helper)
620
+ return ret
621
+ elif model_type == "audio":
622
+ from ..model.audio.custom import get_user_defined_audios
623
+
624
+ ret = []
625
+
626
+ for model_spec in get_user_defined_audios():
627
+ ret.append({"model_name": model_spec.model_name, "is_builtin": False})
628
+
629
+ ret.sort(key=sort_helper)
630
+ return ret
631
+ elif model_type == "rerank":
632
+ from ..model.rerank.custom import get_user_defined_reranks
633
+
634
+ ret = []
635
+
636
+ for model_spec in get_user_defined_reranks():
637
+ ret.append({"model_name": model_spec.model_name, "is_builtin": False})
638
+
639
+ ret.sort(key=sort_helper)
640
+ return ret
537
641
  else:
538
642
  raise ValueError(f"Unsupported model type: {model_type}")
539
643
 
644
+ @log_sync(logger=logger)
645
+ async def get_model_registration(self, model_type: str, model_name: str) -> Any:
646
+ if model_type == "LLM":
647
+ from ..model.llm import get_user_defined_llm_families
648
+
649
+ for f in get_user_defined_llm_families():
650
+ if f.model_name == model_name:
651
+ return f
652
+ elif model_type == "embedding":
653
+ from ..model.embedding.custom import get_user_defined_embeddings
654
+
655
+ for f in get_user_defined_embeddings():
656
+ if f.model_name == model_name:
657
+ return f
658
+ elif model_type == "image":
659
+ from ..model.image.custom import get_user_defined_images
660
+
661
+ for f in get_user_defined_images():
662
+ if f.model_name == model_name:
663
+ return f
664
+ elif model_type == "audio":
665
+ from ..model.audio.custom import get_user_defined_audios
666
+
667
+ for f in get_user_defined_audios():
668
+ if f.model_name == model_name:
669
+ return f
670
+ elif model_type == "rerank":
671
+ from ..model.rerank.custom import get_user_defined_reranks
672
+
673
+ for f in get_user_defined_reranks():
674
+ if f.model_name == model_name:
675
+ return f
676
+ return None
677
+
678
+ @log_async(logger=logger)
679
+ async def query_engines_by_model_name(self, model_name: str):
680
+ from copy import deepcopy
681
+
682
+ from ..model.llm.llm_family import LLM_ENGINES
683
+
684
+ if model_name not in LLM_ENGINES:
685
+ return None
686
+
687
+ # filter llm_class
688
+ engine_params = deepcopy(LLM_ENGINES[model_name])
689
+ for engine in engine_params:
690
+ params = engine_params[engine]
691
+ for param in params:
692
+ del param["llm_class"]
693
+
694
+ return engine_params
695
+
540
696
  async def _get_model_ability(self, model: Any, model_type: str) -> List[str]:
541
697
  from ..model.llm.core import LLM
542
698
 
@@ -548,6 +704,8 @@ class WorkerActor(xo.StatelessActor):
548
704
  return ["text_to_image"]
549
705
  elif model_type == "audio":
550
706
  return ["audio_to_text"]
707
+ elif model_type == "flexible":
708
+ return ["flexible"]
551
709
  else:
552
710
  assert model_type == "LLM"
553
711
  assert isinstance(model, LLM)
@@ -584,6 +742,7 @@ class WorkerActor(xo.StatelessActor):
584
742
  peft_model_config: Optional[PeftModelConfig] = None,
585
743
  request_limits: Optional[int] = None,
586
744
  gpu_idx: Optional[Union[int, List[int]]] = None,
745
+ download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
587
746
  **kwargs,
588
747
  ):
589
748
  # !!! Note that The following code must be placed at the very beginning of this function,
@@ -594,10 +753,14 @@ class WorkerActor(xo.StatelessActor):
594
753
  launch_args.pop("kwargs")
595
754
  launch_args.update(kwargs)
596
755
 
597
- event_model_uid, _, __ = parse_replica_model_uid(model_uid)
756
+ try:
757
+ origin_uid, _, _ = parse_replica_model_uid(model_uid)
758
+ except Exception as e:
759
+ logger.exception(e)
760
+ raise
598
761
  try:
599
762
  await self._event_collector_ref.report_event(
600
- event_model_uid,
763
+ origin_uid,
601
764
  Event(
602
765
  event_type=EventType.INFO,
603
766
  event_ts=int(time.time()),
@@ -640,50 +803,56 @@ class WorkerActor(xo.StatelessActor):
640
803
  assert model_uid not in self._model_uid_to_model
641
804
  self._check_model_is_valid(model_name, model_format)
642
805
 
643
- subpool_address, devices = await self._create_subpool(
644
- model_uid, model_type, n_gpu=n_gpu, gpu_idx=gpu_idx
645
- )
806
+ if self.get_model_launch_status(model_uid) is not None:
807
+ raise ValueError(f"{model_uid} is running")
646
808
 
647
809
  try:
648
- origin_uid, _, _ = parse_replica_model_uid(model_uid)
649
- model, model_description = await asyncio.to_thread(
650
- create_model_instance,
651
- subpool_address,
652
- devices,
653
- model_uid,
654
- model_type,
655
- model_name,
656
- model_engine,
657
- model_format,
658
- model_size_in_billions,
659
- quantization,
660
- peft_model_config,
661
- **kwargs,
662
- )
663
- await self.update_cache_status(model_name, model_description)
664
- model_ref = await xo.create_actor(
665
- ModelActor,
666
- address=subpool_address,
667
- uid=model_uid,
668
- worker_address=self.address,
669
- model=model,
670
- model_description=model_description,
671
- request_limits=request_limits,
810
+ self._model_uid_launching_guard[model_uid] = True
811
+ subpool_address, devices = await self._create_subpool(
812
+ model_uid, model_type, n_gpu=n_gpu, gpu_idx=gpu_idx
672
813
  )
673
- await model_ref.load()
674
- except:
675
- logger.error(f"Failed to load model {model_uid}", exc_info=True)
676
- self.release_devices(model_uid=model_uid)
677
- await self._main_pool.remove_sub_pool(subpool_address)
678
- raise
679
814
 
680
- self._model_uid_to_model[model_uid] = model_ref
681
- self._model_uid_to_model_spec[model_uid] = model_description
682
- self._model_uid_to_addr[model_uid] = subpool_address
683
- self._model_uid_to_recover_count.setdefault(
684
- model_uid, MODEL_ACTOR_AUTO_RECOVER_LIMIT
685
- )
686
- self._model_uid_to_launch_args[model_uid] = launch_args
815
+ try:
816
+ model, model_description = await asyncio.to_thread(
817
+ create_model_instance,
818
+ subpool_address,
819
+ devices,
820
+ model_uid,
821
+ model_type,
822
+ model_name,
823
+ model_engine,
824
+ model_format,
825
+ model_size_in_billions,
826
+ quantization,
827
+ peft_model_config,
828
+ download_hub,
829
+ **kwargs,
830
+ )
831
+ await self.update_cache_status(model_name, model_description)
832
+ model_ref = await xo.create_actor(
833
+ ModelActor,
834
+ address=subpool_address,
835
+ uid=model_uid,
836
+ worker_address=self.address,
837
+ model=model,
838
+ model_description=model_description,
839
+ request_limits=request_limits,
840
+ )
841
+ await model_ref.load()
842
+ except:
843
+ logger.error(f"Failed to load model {model_uid}", exc_info=True)
844
+ self.release_devices(model_uid=model_uid)
845
+ await self._main_pool.remove_sub_pool(subpool_address)
846
+ raise
847
+ self._model_uid_to_model[model_uid] = model_ref
848
+ self._model_uid_to_model_spec[model_uid] = model_description
849
+ self._model_uid_to_addr[model_uid] = subpool_address
850
+ self._model_uid_to_recover_count.setdefault(
851
+ model_uid, MODEL_ACTOR_AUTO_RECOVER_LIMIT
852
+ )
853
+ self._model_uid_to_launch_args[model_uid] = launch_args
854
+ finally:
855
+ del self._model_uid_launching_guard[model_uid]
687
856
 
688
857
  # update status to READY
689
858
  abilities = await self._get_model_ability(model, model_type)
@@ -694,10 +863,13 @@ class WorkerActor(xo.StatelessActor):
694
863
 
695
864
  @log_async(logger=logger)
696
865
  async def terminate_model(self, model_uid: str):
697
- event_model_uid, _, __ = parse_replica_model_uid(model_uid)
866
+ # Terminate model while its launching is not allow
867
+ if model_uid in self._model_uid_launching_guard:
868
+ raise ValueError(f"{model_uid} is launching")
869
+ origin_uid, _, __ = parse_replica_model_uid(model_uid)
698
870
  try:
699
871
  await self._event_collector_ref.report_event(
700
- event_model_uid,
872
+ origin_uid,
701
873
  Event(
702
874
  event_type=EventType.INFO,
703
875
  event_ts=int(time.time()),
@@ -708,7 +880,6 @@ class WorkerActor(xo.StatelessActor):
708
880
  # Report callback error can be log and ignore, should not interrupt the Process
709
881
  logger.error("report_event error: %s" % (e))
710
882
 
711
- origin_uid, _, _ = parse_replica_model_uid(model_uid)
712
883
  await self._status_guard_ref.update_instance_info(
713
884
  origin_uid, {"status": LaunchStatus.TERMINATING.name}
714
885
  )
@@ -740,6 +911,21 @@ class WorkerActor(xo.StatelessActor):
740
911
  origin_uid, {"status": LaunchStatus.TERMINATED.name}
741
912
  )
742
913
 
914
+ # Provide an interface for future version of supervisor to call
915
+ def get_model_launch_status(self, model_uid: str) -> Optional[str]:
916
+ """
917
+ returns:
918
+ CREATING: model is launching
919
+ RREADY: model is running
920
+ None: model is not running (launch error might have happened)
921
+ """
922
+
923
+ if model_uid in self._model_uid_launching_guard:
924
+ return LaunchStatus.CREATING.name
925
+ if model_uid in self._model_uid_to_model:
926
+ return LaunchStatus.READY.name
927
+ return None
928
+
743
929
  @log_async(logger=logger)
744
930
  async def list_models(self) -> Dict[str, Dict[str, Any]]:
745
931
  ret = {}
@@ -370,6 +370,9 @@ def worker(
370
370
  help="Type of model to register (default is 'LLM').",
371
371
  )
372
372
  @click.option("--file", "-f", type=str, help="Path to the model configuration file.")
373
+ @click.option(
374
+ "--worker-ip", "-w", type=str, help="Specify the ip address of the worker."
375
+ )
373
376
  @click.option(
374
377
  "--persist",
375
378
  "-p",
@@ -387,6 +390,7 @@ def register_model(
387
390
  endpoint: Optional[str],
388
391
  model_type: str,
389
392
  file: str,
393
+ worker_ip: str,
390
394
  persist: bool,
391
395
  api_key: Optional[str],
392
396
  ):
@@ -400,6 +404,7 @@ def register_model(
400
404
  client.register_model(
401
405
  model_type=model_type,
402
406
  model=model,
407
+ worker_ip=worker_ip,
403
408
  persist=persist,
404
409
  )
405
410
 
@@ -79,6 +79,12 @@ def get_config_dict(
79
79
  "stream": "ext://sys.stderr",
80
80
  "filters": ["logger_name_filter"],
81
81
  },
82
+ "console_handler": {
83
+ "class": "logging.StreamHandler",
84
+ "formatter": "formatter",
85
+ "level": log_level,
86
+ "stream": "ext://sys.stderr",
87
+ },
82
88
  "file_handler": {
83
89
  "class": "logging.handlers.RotatingFileHandler",
84
90
  "formatter": "formatter",
@@ -95,7 +101,32 @@ def get_config_dict(
95
101
  "handlers": ["stream_handler", "file_handler"],
96
102
  "level": log_level,
97
103
  "propagate": False,
98
- }
104
+ },
105
+ "uvicorn": {
106
+ "handlers": ["stream_handler", "file_handler"],
107
+ "level": log_level,
108
+ "propagate": False,
109
+ },
110
+ "uvicorn.error": {
111
+ "handlers": ["stream_handler", "file_handler"],
112
+ "level": log_level,
113
+ "propagate": False,
114
+ },
115
+ "uvicorn.access": {
116
+ "handlers": ["stream_handler", "file_handler"],
117
+ "level": log_level,
118
+ "propagate": False,
119
+ },
120
+ "transformers": {
121
+ "handlers": ["console_handler", "file_handler"],
122
+ "level": log_level,
123
+ "propagate": False,
124
+ },
125
+ "vllm": {
126
+ "handlers": ["console_handler", "file_handler"],
127
+ "level": log_level,
128
+ "propagate": False,
129
+ },
99
130
  },
100
131
  "root": {
101
132
  "level": "WARN",
@@ -127,7 +158,7 @@ def health_check(address: str, max_attempts: int, sleep_interval: int = 3) -> bo
127
158
  while attempts < max_attempts:
128
159
  time.sleep(sleep_interval)
129
160
  try:
130
- from xinference.core.supervisor import SupervisorActor
161
+ from ..core.supervisor import SupervisorActor
131
162
 
132
163
  supervisor_ref: xo.ActorRefType[SupervisorActor] = await xo.actor_ref( # type: ignore
133
164
  address=address, uid=SupervisorActor.uid()
@@ -38,21 +38,19 @@ class ChatTTSModel:
38
38
  self._kwargs = kwargs
39
39
 
40
40
  def load(self):
41
+ import ChatTTS
41
42
  import torch
42
43
 
43
- from xinference.thirdparty import ChatTTS
44
-
45
44
  torch._dynamo.config.cache_size_limit = 64
46
45
  torch._dynamo.config.suppress_errors = True
47
46
  torch.set_float32_matmul_precision("high")
48
47
  self._model = ChatTTS.Chat()
49
- self._model.load_models(
50
- source="local", local_path=self._model_path, compile=True
51
- )
48
+ self._model.load(source="custom", custom_path=self._model_path, compile=True)
52
49
 
53
50
  def speech(
54
51
  self, input: str, voice: str, response_format: str = "mp3", speed: float = 1.0
55
52
  ):
53
+ import ChatTTS
56
54
  import numpy as np
57
55
  import torch
58
56
  import torchaudio
@@ -71,7 +69,9 @@ class ChatTTSModel:
71
69
 
72
70
  default = 5
73
71
  infer_speed = int(default * speed)
74
- params_infer_code = {"spk_emb": rnd_spk_emb, "prompt": f"[speed_{infer_speed}]"}
72
+ params_infer_code = ChatTTS.Chat.InferCodeParams(
73
+ prompt=f"[speed_{infer_speed}]", spk_emb=rnd_spk_emb
74
+ )
75
75
 
76
76
  assert self._model is not None
77
77
  wavs = self._model.infer([input], params_infer_code=params_infer_code)
@@ -14,7 +14,7 @@
14
14
  import logging
15
15
  import os
16
16
  from collections import defaultdict
17
- from typing import Dict, List, Optional, Tuple, Union
17
+ from typing import Dict, List, Literal, Optional, Tuple, Union
18
18
 
19
19
  from ...constants import XINFERENCE_CACHE_DIR
20
20
  from ..core import CacheableModelSpec, ModelDescription
@@ -94,7 +94,10 @@ def generate_audio_description(
94
94
  return res
95
95
 
96
96
 
97
- def match_audio(model_name: str) -> AudioModelFamilyV1:
97
+ def match_audio(
98
+ model_name: str,
99
+ download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
100
+ ) -> AudioModelFamilyV1:
98
101
  from ..utils import download_from_modelscope
99
102
  from . import BUILTIN_AUDIO_MODELS, MODELSCOPE_AUDIO_MODELS
100
103
  from .custom import get_user_defined_audios
@@ -103,17 +106,17 @@ def match_audio(model_name: str) -> AudioModelFamilyV1:
103
106
  if model_spec.model_name == model_name:
104
107
  return model_spec
105
108
 
106
- if download_from_modelscope():
107
- if model_name in MODELSCOPE_AUDIO_MODELS:
108
- logger.debug(f"Audio model {model_name} found in ModelScope.")
109
- return MODELSCOPE_AUDIO_MODELS[model_name]
110
- else:
111
- logger.debug(
112
- f"Audio model {model_name} not found in ModelScope, "
113
- f"now try to load it via builtin way."
114
- )
115
-
116
- if model_name in BUILTIN_AUDIO_MODELS:
109
+ if download_hub == "huggingface" and model_name in BUILTIN_AUDIO_MODELS:
110
+ logger.debug(f"Audio model {model_name} found in huggingface.")
111
+ return BUILTIN_AUDIO_MODELS[model_name]
112
+ elif download_hub == "modelscope" and model_name in MODELSCOPE_AUDIO_MODELS:
113
+ logger.debug(f"Audio model {model_name} found in ModelScope.")
114
+ return MODELSCOPE_AUDIO_MODELS[model_name]
115
+ elif download_from_modelscope() and model_name in MODELSCOPE_AUDIO_MODELS:
116
+ logger.debug(f"Audio model {model_name} found in ModelScope.")
117
+ return MODELSCOPE_AUDIO_MODELS[model_name]
118
+ elif model_name in BUILTIN_AUDIO_MODELS:
119
+ logger.debug(f"Audio model {model_name} found in huggingface.")
117
120
  return BUILTIN_AUDIO_MODELS[model_name]
118
121
  else:
119
122
  raise ValueError(
@@ -141,9 +144,14 @@ def get_cache_status(
141
144
 
142
145
 
143
146
  def create_audio_model_instance(
144
- subpool_addr: str, devices: List[str], model_uid: str, model_name: str, **kwargs
147
+ subpool_addr: str,
148
+ devices: List[str],
149
+ model_uid: str,
150
+ model_name: str,
151
+ download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
152
+ **kwargs,
145
153
  ) -> Tuple[Union[WhisperModel, ChatTTSModel], AudioModelDescription]:
146
- model_spec = match_audio(model_name)
154
+ model_spec = match_audio(model_name, download_hub)
147
155
  model_path = cache(model_spec)
148
156
  model: Union[WhisperModel, ChatTTSModel]
149
157
  if model_spec.model_family == "whisper":
xinference/model/core.py CHANGED
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from abc import ABC, abstractmethod
16
- from typing import Any, List, Optional, Tuple, Union
16
+ from typing import Any, List, Literal, Optional, Tuple, Union
17
17
 
18
18
  from .._compat import BaseModel
19
19
  from ..types import PeftModelConfig
@@ -55,10 +55,12 @@ def create_model_instance(
55
55
  model_size_in_billions: Optional[Union[int, str]] = None,
56
56
  quantization: Optional[str] = None,
57
57
  peft_model_config: Optional[PeftModelConfig] = None,
58
+ download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
58
59
  **kwargs,
59
60
  ) -> Tuple[Any, ModelDescription]:
60
61
  from .audio.core import create_audio_model_instance
61
62
  from .embedding.core import create_embedding_model_instance
63
+ from .flexible.core import create_flexible_model_instance
62
64
  from .image.core import create_image_model_instance
63
65
  from .llm.core import create_llm_model_instance
64
66
  from .rerank.core import create_rerank_model_instance
@@ -74,13 +76,14 @@ def create_model_instance(
74
76
  model_size_in_billions,
75
77
  quantization,
76
78
  peft_model_config,
79
+ download_hub,
77
80
  **kwargs,
78
81
  )
79
82
  elif model_type == "embedding":
80
83
  # embedding model doesn't accept trust_remote_code
81
84
  kwargs.pop("trust_remote_code", None)
82
85
  return create_embedding_model_instance(
83
- subpool_addr, devices, model_uid, model_name, **kwargs
86
+ subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
84
87
  )
85
88
  elif model_type == "image":
86
89
  kwargs.pop("trust_remote_code", None)
@@ -90,16 +93,22 @@ def create_model_instance(
90
93
  model_uid,
91
94
  model_name,
92
95
  peft_model_config,
96
+ download_hub,
93
97
  **kwargs,
94
98
  )
95
99
  elif model_type == "rerank":
96
100
  kwargs.pop("trust_remote_code", None)
97
101
  return create_rerank_model_instance(
98
- subpool_addr, devices, model_uid, model_name, **kwargs
102
+ subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
99
103
  )
100
104
  elif model_type == "audio":
101
105
  kwargs.pop("trust_remote_code", None)
102
106
  return create_audio_model_instance(
107
+ subpool_addr, devices, model_uid, model_name, download_hub, **kwargs
108
+ )
109
+ elif model_type == "flexible":
110
+ kwargs.pop("trust_remote_code", None)
111
+ return create_flexible_model_instance(
103
112
  subpool_addr, devices, model_uid, model_name, **kwargs
104
113
  )
105
114
  else: