xinference 0.16.1__py3-none-any.whl → 0.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (29) hide show
  1. xinference/_version.py +3 -3
  2. xinference/constants.py +1 -0
  3. xinference/core/worker.py +3 -1
  4. xinference/model/audio/core.py +6 -2
  5. xinference/model/core.py +3 -1
  6. xinference/model/embedding/core.py +6 -2
  7. xinference/model/image/core.py +6 -2
  8. xinference/model/image/ocr/got_ocr2.py +3 -0
  9. xinference/model/llm/__init__.py +33 -0
  10. xinference/model/llm/core.py +3 -1
  11. xinference/model/llm/llm_family.py +68 -2
  12. xinference/model/llm/llm_family_openmind_hub.json +1359 -0
  13. xinference/model/rerank/core.py +9 -1
  14. xinference/model/utils.py +7 -0
  15. xinference/model/video/core.py +6 -2
  16. xinference/web/ui/build/asset-manifest.json +3 -3
  17. xinference/web/ui/build/index.html +1 -1
  18. xinference/web/ui/build/static/js/{main.b76aeeb7.js → main.2f269bb3.js} +3 -3
  19. xinference/web/ui/build/static/js/main.2f269bb3.js.map +1 -0
  20. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +1 -0
  21. {xinference-0.16.1.dist-info → xinference-0.16.2.dist-info}/METADATA +2 -2
  22. {xinference-0.16.1.dist-info → xinference-0.16.2.dist-info}/RECORD +27 -26
  23. xinference/web/ui/build/static/js/main.b76aeeb7.js.map +0 -1
  24. xinference/web/ui/node_modules/.cache/babel-loader/32ea2c04cf0bba2761b4883d2c40cc259952c94d2d6bb774e510963ca37aac0a.json +0 -1
  25. /xinference/web/ui/build/static/js/{main.b76aeeb7.js.LICENSE.txt → main.2f269bb3.js.LICENSE.txt} +0 -0
  26. {xinference-0.16.1.dist-info → xinference-0.16.2.dist-info}/LICENSE +0 -0
  27. {xinference-0.16.1.dist-info → xinference-0.16.2.dist-info}/WHEEL +0 -0
  28. {xinference-0.16.1.dist-info → xinference-0.16.2.dist-info}/entry_points.txt +0 -0
  29. {xinference-0.16.1.dist-info → xinference-0.16.2.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2024-10-25T12:51:06+0800",
11
+ "date": "2024-11-01T17:56:47+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "d4cd7b15104c16838e3c562cf2d33337e3d38897",
15
- "version": "0.16.1"
14
+ "full-revisionid": "67e97ab485b539dc7a208825bee0504acc37044e",
15
+ "version": "0.16.2"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
xinference/constants.py CHANGED
@@ -39,6 +39,7 @@ def get_xinference_home() -> str:
39
39
  # if user has already set `XINFERENCE_HOME` env, change huggingface and modelscope default download path
40
40
  os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join(home_path, "huggingface")
41
41
  os.environ["MODELSCOPE_CACHE"] = os.path.join(home_path, "modelscope")
42
+ os.environ["XDG_CACHE_HOME"] = os.path.join(home_path, "openmind_hub")
42
43
  # In multi-tenant mode,
43
44
  # gradio's temporary files are stored in their respective home directories,
44
45
  # to prevent insufficient permissions
xinference/core/worker.py CHANGED
@@ -785,7 +785,9 @@ class WorkerActor(xo.StatelessActor):
785
785
  peft_model_config: Optional[PeftModelConfig] = None,
786
786
  request_limits: Optional[int] = None,
787
787
  gpu_idx: Optional[Union[int, List[int]]] = None,
788
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
788
+ download_hub: Optional[
789
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
790
+ ] = None,
789
791
  model_path: Optional[str] = None,
790
792
  **kwargs,
791
793
  ):
@@ -100,7 +100,9 @@ def generate_audio_description(
100
100
 
101
101
  def match_audio(
102
102
  model_name: str,
103
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
103
+ download_hub: Optional[
104
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
105
+ ] = None,
104
106
  ) -> AudioModelFamilyV1:
105
107
  from ..utils import download_from_modelscope
106
108
  from . import BUILTIN_AUDIO_MODELS, MODELSCOPE_AUDIO_MODELS
@@ -152,7 +154,9 @@ def create_audio_model_instance(
152
154
  devices: List[str],
153
155
  model_uid: str,
154
156
  model_name: str,
155
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
157
+ download_hub: Optional[
158
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
159
+ ] = None,
156
160
  model_path: Optional[str] = None,
157
161
  **kwargs,
158
162
  ) -> Tuple[
xinference/model/core.py CHANGED
@@ -55,7 +55,9 @@ def create_model_instance(
55
55
  model_size_in_billions: Optional[Union[int, str]] = None,
56
56
  quantization: Optional[str] = None,
57
57
  peft_model_config: Optional[PeftModelConfig] = None,
58
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
58
+ download_hub: Optional[
59
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
60
+ ] = None,
59
61
  model_path: Optional[str] = None,
60
62
  **kwargs,
61
63
  ) -> Tuple[Any, ModelDescription]:
@@ -433,7 +433,9 @@ class EmbeddingModel:
433
433
 
434
434
  def match_embedding(
435
435
  model_name: str,
436
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
436
+ download_hub: Optional[
437
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
438
+ ] = None,
437
439
  ) -> EmbeddingModelSpec:
438
440
  from ..utils import download_from_modelscope
439
441
  from . import BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS
@@ -469,7 +471,9 @@ def create_embedding_model_instance(
469
471
  devices: List[str],
470
472
  model_uid: str,
471
473
  model_name: str,
472
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
474
+ download_hub: Optional[
475
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
476
+ ] = None,
473
477
  model_path: Optional[str] = None,
474
478
  **kwargs,
475
479
  ) -> Tuple[EmbeddingModel, EmbeddingModelDescription]:
@@ -125,7 +125,9 @@ def generate_image_description(
125
125
 
126
126
  def match_diffusion(
127
127
  model_name: str,
128
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
128
+ download_hub: Optional[
129
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
130
+ ] = None,
129
131
  ) -> ImageModelFamilyV1:
130
132
  from ..utils import download_from_modelscope
131
133
  from . import BUILTIN_IMAGE_MODELS, MODELSCOPE_IMAGE_MODELS
@@ -213,7 +215,9 @@ def create_image_model_instance(
213
215
  model_uid: str,
214
216
  model_name: str,
215
217
  peft_model_config: Optional[PeftModelConfig] = None,
216
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
218
+ download_hub: Optional[
219
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
220
+ ] = None,
217
221
  model_path: Optional[str] = None,
218
222
  **kwargs,
219
223
  ) -> Tuple[
@@ -71,6 +71,9 @@ class GotOCR2Model:
71
71
  logger.info("Got OCR 2.0 kwargs: %s", kwargs)
72
72
  if "ocr_type" not in kwargs:
73
73
  kwargs["ocr_type"] = "ocr"
74
+ if image.mode == "RGBA" or image.mode == "CMYK":
75
+ # convert to RGB
76
+ image = image.convert("RGB")
74
77
  assert self._model is not None
75
78
  # This chat API limits the max new tokens inside.
76
79
  return self._model.chat(self._tokenizer, image, gradio_input=True, **kwargs)
@@ -32,6 +32,7 @@ from .llm_family import (
32
32
  BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES,
33
33
  BUILTIN_LLM_PROMPT_STYLE,
34
34
  BUILTIN_MODELSCOPE_LLM_FAMILIES,
35
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES,
35
36
  LLAMA_CLASSES,
36
37
  LLM_ENGINES,
37
38
  LMDEPLOY_CLASSES,
@@ -258,6 +259,36 @@ def _install():
258
259
  if "tools" in model_spec.model_ability:
259
260
  BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
260
261
 
262
+ openmind_hub_json_path = os.path.join(
263
+ os.path.dirname(os.path.abspath(__file__)), "llm_family_openmind_hub.json"
264
+ )
265
+ for json_obj in json.load(
266
+ codecs.open(openmind_hub_json_path, "r", encoding="utf-8")
267
+ ):
268
+ model_spec = LLMFamilyV1.parse_obj(json_obj)
269
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES.append(model_spec)
270
+
271
+ # register prompt style, in case that we have something missed
272
+ # if duplicated with huggingface json, keep it as the huggingface style
273
+
274
+ if (
275
+ "chat" in model_spec.model_ability
276
+ and isinstance(model_spec.chat_template, str)
277
+ and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
278
+ ):
279
+ BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
280
+ "chat_template": model_spec.chat_template,
281
+ "stop_token_ids": model_spec.stop_token_ids,
282
+ "stop": model_spec.stop,
283
+ }
284
+ # register model family
285
+ if "chat" in model_spec.model_ability:
286
+ BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
287
+ else:
288
+ BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
289
+ if "tools" in model_spec.model_ability:
290
+ BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
291
+
261
292
  csghub_json_path = os.path.join(
262
293
  os.path.dirname(os.path.abspath(__file__)), "llm_family_csghub.json"
263
294
  )
@@ -288,6 +319,7 @@ def _install():
288
319
  for llm_specs in [
289
320
  BUILTIN_LLM_FAMILIES,
290
321
  BUILTIN_MODELSCOPE_LLM_FAMILIES,
322
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES,
291
323
  BUILTIN_CSGHUB_LLM_FAMILIES,
292
324
  ]:
293
325
  for llm_spec in llm_specs:
@@ -298,6 +330,7 @@ def _install():
298
330
  for families in [
299
331
  BUILTIN_LLM_FAMILIES,
300
332
  BUILTIN_MODELSCOPE_LLM_FAMILIES,
333
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES,
301
334
  BUILTIN_CSGHUB_LLM_FAMILIES,
302
335
  ]:
303
336
  for family in families:
@@ -193,7 +193,9 @@ def create_llm_model_instance(
193
193
  model_size_in_billions: Optional[Union[int, str]] = None,
194
194
  quantization: Optional[str] = None,
195
195
  peft_model_config: Optional[PeftModelConfig] = None,
196
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
196
+ download_hub: Optional[
197
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
198
+ ] = None,
197
199
  model_path: Optional[str] = None,
198
200
  **kwargs,
199
201
  ) -> Tuple[LLM, LLMDescription]:
@@ -41,6 +41,7 @@ from ..utils import (
41
41
  create_symlink,
42
42
  download_from_csghub,
43
43
  download_from_modelscope,
44
+ download_from_openmind_hub,
44
45
  is_valid_model_uri,
45
46
  parse_uri,
46
47
  retry_download,
@@ -239,6 +240,7 @@ LLAMA_CLASSES: List[Type[LLM]] = []
239
240
 
240
241
  BUILTIN_LLM_FAMILIES: List["LLMFamilyV1"] = []
241
242
  BUILTIN_MODELSCOPE_LLM_FAMILIES: List["LLMFamilyV1"] = []
243
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES: List["LLMFamilyV1"] = []
242
244
  BUILTIN_CSGHUB_LLM_FAMILIES: List["LLMFamilyV1"] = []
243
245
 
244
246
  SGLANG_CLASSES: List[Type[LLM]] = []
@@ -301,6 +303,9 @@ def cache(
301
303
  elif llm_spec.model_hub == "modelscope":
302
304
  logger.info(f"Caching from Modelscope: {llm_spec.model_id}")
303
305
  return cache_from_modelscope(llm_family, llm_spec, quantization)
306
+ elif llm_spec.model_hub == "openmind_hub":
307
+ logger.info(f"Caching from openmind_hub: {llm_spec.model_id}")
308
+ return cache_from_openmind_hub(llm_family, llm_spec, quantization)
304
309
  elif llm_spec.model_hub == "csghub":
305
310
  logger.info(f"Caching from CSGHub: {llm_spec.model_id}")
306
311
  return cache_from_csghub(llm_family, llm_spec, quantization)
@@ -474,7 +479,7 @@ def _skip_download(
474
479
  model_revision: Optional[str],
475
480
  quantization: Optional[str] = None,
476
481
  ) -> bool:
477
- if model_format == "pytorch":
482
+ if model_format in ["pytorch", "mindspore"]:
478
483
  model_hub_to_meta_path = {
479
484
  "huggingface": _get_meta_path(
480
485
  cache_dir, model_format, "huggingface", quantization
@@ -482,6 +487,9 @@ def _skip_download(
482
487
  "modelscope": _get_meta_path(
483
488
  cache_dir, model_format, "modelscope", quantization
484
489
  ),
490
+ "openmind_hub": _get_meta_path(
491
+ cache_dir, model_format, "openmind_hub", quantization
492
+ ),
485
493
  "csghub": _get_meta_path(cache_dir, model_format, "csghub", quantization),
486
494
  }
487
495
  if valid_model_revision(model_hub_to_meta_path[model_hub], model_revision):
@@ -702,6 +710,50 @@ def cache_from_modelscope(
702
710
  return cache_dir
703
711
 
704
712
 
713
+ def cache_from_openmind_hub(
714
+ llm_family: LLMFamilyV1,
715
+ llm_spec: "LLMSpecV1",
716
+ quantization: Optional[str] = None,
717
+ ) -> str:
718
+ """
719
+ Cache model from openmind_hub. Return the cache directory.
720
+ """
721
+ from openmind_hub import snapshot_download
722
+
723
+ cache_dir = _get_cache_dir(llm_family, llm_spec)
724
+ if _skip_download(
725
+ cache_dir,
726
+ llm_spec.model_format,
727
+ llm_spec.model_hub,
728
+ llm_spec.model_revision,
729
+ quantization,
730
+ ):
731
+ return cache_dir
732
+
733
+ if llm_spec.model_format in ["pytorch", "mindspore"]:
734
+ download_dir = retry_download(
735
+ snapshot_download,
736
+ llm_family.model_name,
737
+ {
738
+ "model_size": llm_spec.model_size_in_billions,
739
+ "model_format": llm_spec.model_format,
740
+ },
741
+ llm_spec.model_id,
742
+ revision=llm_spec.model_revision,
743
+ )
744
+ create_symlink(download_dir, cache_dir)
745
+
746
+ else:
747
+ raise ValueError(f"Unsupported format: {llm_spec.model_format}")
748
+
749
+ meta_path = _get_meta_path(
750
+ cache_dir, llm_spec.model_format, llm_spec.model_hub, quantization
751
+ )
752
+ _generate_meta_file(meta_path, llm_family, llm_spec, quantization)
753
+
754
+ return cache_dir
755
+
756
+
705
757
  def cache_from_huggingface(
706
758
  llm_family: LLMFamilyV1,
707
759
  llm_spec: "LLMSpecV1",
@@ -893,7 +945,9 @@ def match_llm(
893
945
  model_format: Optional[str] = None,
894
946
  model_size_in_billions: Optional[Union[int, str]] = None,
895
947
  quantization: Optional[str] = None,
896
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
948
+ download_hub: Optional[
949
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
950
+ ] = None,
897
951
  ) -> Optional[Tuple[LLMFamilyV1, LLMSpecV1, str]]:
898
952
  """
899
953
  Find an LLM family, spec, and quantization that satisfy given criteria.
@@ -924,6 +978,12 @@ def match_llm(
924
978
  + BUILTIN_LLM_FAMILIES
925
979
  + user_defined_llm_families
926
980
  )
981
+ elif download_hub == "openmind_hub":
982
+ all_families = (
983
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES
984
+ + BUILTIN_LLM_FAMILIES
985
+ + user_defined_llm_families
986
+ )
927
987
  elif download_hub == "csghub":
928
988
  all_families = (
929
989
  BUILTIN_CSGHUB_LLM_FAMILIES
@@ -938,6 +998,12 @@ def match_llm(
938
998
  + BUILTIN_LLM_FAMILIES
939
999
  + user_defined_llm_families
940
1000
  )
1001
+ elif download_from_openmind_hub():
1002
+ all_families = (
1003
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES
1004
+ + BUILTIN_LLM_FAMILIES
1005
+ + user_defined_llm_families
1006
+ )
941
1007
  elif download_from_csghub():
942
1008
  all_families = (
943
1009
  BUILTIN_CSGHUB_LLM_FAMILIES