xinference 0.16.1__py3-none-any.whl → 0.16.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (39) hide show
  1. xinference/_version.py +3 -3
  2. xinference/conftest.py +0 -8
  3. xinference/constants.py +2 -0
  4. xinference/core/model.py +34 -2
  5. xinference/core/supervisor.py +5 -5
  6. xinference/core/utils.py +9 -10
  7. xinference/core/worker.py +8 -5
  8. xinference/deploy/cmdline.py +5 -0
  9. xinference/deploy/utils.py +7 -4
  10. xinference/model/audio/core.py +6 -2
  11. xinference/model/audio/model_spec.json +1 -1
  12. xinference/model/core.py +3 -1
  13. xinference/model/embedding/core.py +6 -2
  14. xinference/model/image/core.py +6 -2
  15. xinference/model/image/ocr/got_ocr2.py +3 -0
  16. xinference/model/llm/__init__.py +33 -0
  17. xinference/model/llm/core.py +4 -4
  18. xinference/model/llm/llm_family.json +87 -0
  19. xinference/model/llm/llm_family.py +68 -2
  20. xinference/model/llm/llm_family_modelscope.json +91 -0
  21. xinference/model/llm/llm_family_openmind_hub.json +1359 -0
  22. xinference/model/llm/vllm/core.py +2 -1
  23. xinference/model/rerank/core.py +9 -1
  24. xinference/model/utils.py +7 -0
  25. xinference/model/video/core.py +6 -2
  26. xinference/web/ui/build/asset-manifest.json +3 -3
  27. xinference/web/ui/build/index.html +1 -1
  28. xinference/web/ui/build/static/js/{main.b76aeeb7.js → main.2f269bb3.js} +3 -3
  29. xinference/web/ui/build/static/js/main.2f269bb3.js.map +1 -0
  30. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +1 -0
  31. {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/METADATA +5 -4
  32. {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/RECORD +37 -36
  33. xinference/web/ui/build/static/js/main.b76aeeb7.js.map +0 -1
  34. xinference/web/ui/node_modules/.cache/babel-loader/32ea2c04cf0bba2761b4883d2c40cc259952c94d2d6bb774e510963ca37aac0a.json +0 -1
  35. /xinference/web/ui/build/static/js/{main.b76aeeb7.js.LICENSE.txt → main.2f269bb3.js.LICENSE.txt} +0 -0
  36. {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/LICENSE +0 -0
  37. {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/WHEEL +0 -0
  38. {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/entry_points.txt +0 -0
  39. {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/top_level.txt +0 -0
@@ -41,6 +41,7 @@ from ..utils import (
41
41
  create_symlink,
42
42
  download_from_csghub,
43
43
  download_from_modelscope,
44
+ download_from_openmind_hub,
44
45
  is_valid_model_uri,
45
46
  parse_uri,
46
47
  retry_download,
@@ -239,6 +240,7 @@ LLAMA_CLASSES: List[Type[LLM]] = []
239
240
 
240
241
  BUILTIN_LLM_FAMILIES: List["LLMFamilyV1"] = []
241
242
  BUILTIN_MODELSCOPE_LLM_FAMILIES: List["LLMFamilyV1"] = []
243
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES: List["LLMFamilyV1"] = []
242
244
  BUILTIN_CSGHUB_LLM_FAMILIES: List["LLMFamilyV1"] = []
243
245
 
244
246
  SGLANG_CLASSES: List[Type[LLM]] = []
@@ -301,6 +303,9 @@ def cache(
301
303
  elif llm_spec.model_hub == "modelscope":
302
304
  logger.info(f"Caching from Modelscope: {llm_spec.model_id}")
303
305
  return cache_from_modelscope(llm_family, llm_spec, quantization)
306
+ elif llm_spec.model_hub == "openmind_hub":
307
+ logger.info(f"Caching from openmind_hub: {llm_spec.model_id}")
308
+ return cache_from_openmind_hub(llm_family, llm_spec, quantization)
304
309
  elif llm_spec.model_hub == "csghub":
305
310
  logger.info(f"Caching from CSGHub: {llm_spec.model_id}")
306
311
  return cache_from_csghub(llm_family, llm_spec, quantization)
@@ -474,7 +479,7 @@ def _skip_download(
474
479
  model_revision: Optional[str],
475
480
  quantization: Optional[str] = None,
476
481
  ) -> bool:
477
- if model_format == "pytorch":
482
+ if model_format in ["pytorch", "mindspore"]:
478
483
  model_hub_to_meta_path = {
479
484
  "huggingface": _get_meta_path(
480
485
  cache_dir, model_format, "huggingface", quantization
@@ -482,6 +487,9 @@ def _skip_download(
482
487
  "modelscope": _get_meta_path(
483
488
  cache_dir, model_format, "modelscope", quantization
484
489
  ),
490
+ "openmind_hub": _get_meta_path(
491
+ cache_dir, model_format, "openmind_hub", quantization
492
+ ),
485
493
  "csghub": _get_meta_path(cache_dir, model_format, "csghub", quantization),
486
494
  }
487
495
  if valid_model_revision(model_hub_to_meta_path[model_hub], model_revision):
@@ -702,6 +710,50 @@ def cache_from_modelscope(
702
710
  return cache_dir
703
711
 
704
712
 
713
+ def cache_from_openmind_hub(
714
+ llm_family: LLMFamilyV1,
715
+ llm_spec: "LLMSpecV1",
716
+ quantization: Optional[str] = None,
717
+ ) -> str:
718
+ """
719
+ Cache model from openmind_hub. Return the cache directory.
720
+ """
721
+ from openmind_hub import snapshot_download
722
+
723
+ cache_dir = _get_cache_dir(llm_family, llm_spec)
724
+ if _skip_download(
725
+ cache_dir,
726
+ llm_spec.model_format,
727
+ llm_spec.model_hub,
728
+ llm_spec.model_revision,
729
+ quantization,
730
+ ):
731
+ return cache_dir
732
+
733
+ if llm_spec.model_format in ["pytorch", "mindspore"]:
734
+ download_dir = retry_download(
735
+ snapshot_download,
736
+ llm_family.model_name,
737
+ {
738
+ "model_size": llm_spec.model_size_in_billions,
739
+ "model_format": llm_spec.model_format,
740
+ },
741
+ llm_spec.model_id,
742
+ revision=llm_spec.model_revision,
743
+ )
744
+ create_symlink(download_dir, cache_dir)
745
+
746
+ else:
747
+ raise ValueError(f"Unsupported format: {llm_spec.model_format}")
748
+
749
+ meta_path = _get_meta_path(
750
+ cache_dir, llm_spec.model_format, llm_spec.model_hub, quantization
751
+ )
752
+ _generate_meta_file(meta_path, llm_family, llm_spec, quantization)
753
+
754
+ return cache_dir
755
+
756
+
705
757
  def cache_from_huggingface(
706
758
  llm_family: LLMFamilyV1,
707
759
  llm_spec: "LLMSpecV1",
@@ -893,7 +945,9 @@ def match_llm(
893
945
  model_format: Optional[str] = None,
894
946
  model_size_in_billions: Optional[Union[int, str]] = None,
895
947
  quantization: Optional[str] = None,
896
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
948
+ download_hub: Optional[
949
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
950
+ ] = None,
897
951
  ) -> Optional[Tuple[LLMFamilyV1, LLMSpecV1, str]]:
898
952
  """
899
953
  Find an LLM family, spec, and quantization that satisfy given criteria.
@@ -924,6 +978,12 @@ def match_llm(
924
978
  + BUILTIN_LLM_FAMILIES
925
979
  + user_defined_llm_families
926
980
  )
981
+ elif download_hub == "openmind_hub":
982
+ all_families = (
983
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES
984
+ + BUILTIN_LLM_FAMILIES
985
+ + user_defined_llm_families
986
+ )
927
987
  elif download_hub == "csghub":
928
988
  all_families = (
929
989
  BUILTIN_CSGHUB_LLM_FAMILIES
@@ -938,6 +998,12 @@ def match_llm(
938
998
  + BUILTIN_LLM_FAMILIES
939
999
  + user_defined_llm_families
940
1000
  )
1001
+ elif download_from_openmind_hub():
1002
+ all_families = (
1003
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES
1004
+ + BUILTIN_LLM_FAMILIES
1005
+ + user_defined_llm_families
1006
+ )
941
1007
  elif download_from_csghub():
942
1008
  all_families = (
943
1009
  BUILTIN_CSGHUB_LLM_FAMILIES
@@ -363,6 +363,97 @@
363
363
  "<|eom_id|>"
364
364
  ]
365
365
  },
366
+ {
367
+ "version": 1,
368
+ "context_length": 131072,
369
+ "model_name": "llama-3.2-vision-instruct",
370
+ "model_lang": [
371
+ "en",
372
+ "de",
373
+ "fr",
374
+ "it",
375
+ "pt",
376
+ "hi",
377
+ "es",
378
+ "th"
379
+ ],
380
+ "model_ability": [
381
+ "chat",
382
+ "vision"
383
+ ],
384
+ "model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
385
+ "model_specs": [
386
+ {
387
+ "model_format": "pytorch",
388
+ "model_size_in_billions": 11,
389
+ "quantizations": [
390
+ "none"
391
+ ],
392
+ "model_id": "LLM-Research/Llama-3.2-11B-Vision-Instruct",
393
+ "model_hub": "modelscope"
394
+ },
395
+ {
396
+ "model_format": "pytorch",
397
+ "model_size_in_billions": 90,
398
+ "quantizations": [
399
+ "none"
400
+ ],
401
+ "model_id": "LLM-Research/Llama-3.2-90B-Vision-Instruct",
402
+ "model_hub": "modelscope"
403
+ }
404
+ ],
405
+ "chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
406
+ "stop_token_ids": [
407
+ 128001,
408
+ 128008,
409
+ 128009
410
+ ],
411
+ "stop": [
412
+ "<|end_of_text|>",
413
+ "<|eot_id|>",
414
+ "<|eom_id|>"
415
+ ]
416
+ },
417
+ {
418
+ "version": 1,
419
+ "context_length": 131072,
420
+ "model_name": "llama-3.2-vision",
421
+ "model_lang": [
422
+ "en",
423
+ "de",
424
+ "fr",
425
+ "it",
426
+ "pt",
427
+ "hi",
428
+ "es",
429
+ "th"
430
+ ],
431
+ "model_ability": [
432
+ "generate",
433
+ "vision"
434
+ ],
435
+ "model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
436
+ "model_specs": [
437
+ {
438
+ "model_format": "pytorch",
439
+ "model_size_in_billions": 11,
440
+ "quantizations": [
441
+ "none"
442
+ ],
443
+ "model_id": "LLM-Research/Llama-3.2-11B-Vision",
444
+ "model_hub": "modelscope"
445
+ },
446
+ {
447
+ "model_format": "pytorch",
448
+ "model_size_in_billions": 90,
449
+ "quantizations": [
450
+ "none"
451
+ ],
452
+ "model_id": "LLM-Research/Llama-3.2-90B-Vision",
453
+ "model_hub": "modelscope"
454
+ }
455
+ ]
456
+ },
366
457
  {
367
458
  "version": 1,
368
459
  "context_length": 2048,