xinference 1.5.0.post2__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (89) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +10 -3
  3. xinference/constants.py +5 -1
  4. xinference/core/supervisor.py +1 -1
  5. xinference/core/utils.py +1 -1
  6. xinference/core/worker.py +2 -2
  7. xinference/deploy/cmdline.py +17 -0
  8. xinference/model/audio/core.py +1 -1
  9. xinference/model/audio/model_spec.json +43 -43
  10. xinference/model/audio/model_spec_modelscope.json +13 -13
  11. xinference/model/llm/__init__.py +3 -5
  12. xinference/model/llm/core.py +14 -0
  13. xinference/model/llm/llama_cpp/core.py +15 -4
  14. xinference/model/llm/llm_family.json +3251 -4304
  15. xinference/model/llm/llm_family.py +62 -6
  16. xinference/model/llm/llm_family_csghub.json +0 -32
  17. xinference/model/llm/llm_family_modelscope.json +1161 -1789
  18. xinference/model/llm/llm_family_openmind_hub.json +19 -325
  19. xinference/model/llm/lmdeploy/core.py +7 -2
  20. xinference/model/llm/mlx/core.py +19 -6
  21. xinference/model/llm/sglang/core.py +25 -10
  22. xinference/model/llm/transformers/chatglm.py +8 -1
  23. xinference/model/llm/transformers/cogagent.py +10 -12
  24. xinference/model/llm/transformers/cogvlm2.py +6 -3
  25. xinference/model/llm/transformers/cogvlm2_video.py +3 -6
  26. xinference/model/llm/transformers/core.py +50 -58
  27. xinference/model/llm/transformers/deepseek_v2.py +4 -2
  28. xinference/model/llm/transformers/deepseek_vl.py +10 -4
  29. xinference/model/llm/transformers/deepseek_vl2.py +9 -4
  30. xinference/model/llm/transformers/gemma3.py +4 -5
  31. xinference/model/llm/transformers/glm4v.py +2 -20
  32. xinference/model/llm/transformers/glm_edge_v.py +3 -20
  33. xinference/model/llm/transformers/intern_vl.py +3 -6
  34. xinference/model/llm/transformers/internlm2.py +1 -1
  35. xinference/model/llm/transformers/minicpmv25.py +4 -2
  36. xinference/model/llm/transformers/minicpmv26.py +5 -3
  37. xinference/model/llm/transformers/omnilmm.py +1 -1
  38. xinference/model/llm/transformers/opt.py +1 -1
  39. xinference/model/llm/transformers/ovis2.py +302 -0
  40. xinference/model/llm/transformers/qwen-omni.py +2 -1
  41. xinference/model/llm/transformers/qwen2_audio.py +3 -1
  42. xinference/model/llm/transformers/qwen2_vl.py +5 -1
  43. xinference/model/llm/transformers/qwen_vl.py +5 -2
  44. xinference/model/llm/utils.py +28 -0
  45. xinference/model/llm/vllm/core.py +73 -9
  46. xinference/model/llm/vllm/distributed_executor.py +8 -7
  47. xinference/model/llm/vllm/xavier/allocator.py +1 -1
  48. xinference/model/llm/vllm/xavier/block_manager.py +1 -1
  49. xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
  50. xinference/model/llm/vllm/xavier/executor.py +1 -1
  51. xinference/model/llm/vllm/xavier/test/test_xavier.py +1 -1
  52. xinference/model/video/diffusers.py +30 -3
  53. xinference/model/video/model_spec.json +46 -0
  54. xinference/model/video/model_spec_modelscope.json +48 -0
  55. xinference/types.py +2 -0
  56. xinference/web/ui/build/asset-manifest.json +6 -6
  57. xinference/web/ui/build/index.html +1 -1
  58. xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
  59. xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
  60. xinference/web/ui/build/static/js/main.91e77b5c.js +3 -0
  61. xinference/web/ui/build/static/js/main.91e77b5c.js.map +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/5e6edb0fb87e3798f142e9abf8dd2dc46bab33a60d31dff525797c0c99887097.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/6087820be1bd5c02c42dff797e7df365448ef35ab26dd5d6bd33e967e05cbfd4.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
  67. xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
  68. xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
  69. xinference/web/ui/src/locales/en.json +1 -0
  70. xinference/web/ui/src/locales/zh.json +1 -0
  71. {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/METADATA +1 -1
  72. {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/RECORD +77 -78
  73. {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/WHEEL +1 -1
  74. xinference/model/llm/transformers/compression.py +0 -258
  75. xinference/model/llm/transformers/yi_vl.py +0 -239
  76. xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
  77. xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
  78. xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
  79. xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
  80. xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
  81. xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
  82. xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
  84. xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
  85. xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
  86. /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.91e77b5c.js.LICENSE.txt} +0 -0
  87. {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/entry_points.txt +0 -0
  88. {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/licenses/LICENSE +0 -0
  89. {xinference-1.5.0.post2.dist-info → xinference-1.5.1.dist-info}/top_level.txt +0 -0
@@ -72,8 +72,10 @@ class LlamaCppLLMSpecV1(BaseModel):
72
72
  model_hub: str = "huggingface"
73
73
  model_uri: Optional[str]
74
74
  model_revision: Optional[str]
75
+ # for MOE model, illustrates the activated model size
76
+ activated_size_in_billions: Optional[Union[str, int]]
75
77
 
76
- @validator("model_size_in_billions", pre=False)
78
+ @validator("model_size_in_billions", "activated_size_in_billions", pre=False)
77
79
  def validate_model_size_with_radix(cls, v: object) -> object:
78
80
  if isinstance(v, str):
79
81
  if (
@@ -94,8 +96,10 @@ class PytorchLLMSpecV1(BaseModel):
94
96
  model_hub: str = "huggingface"
95
97
  model_uri: Optional[str]
96
98
  model_revision: Optional[str]
99
+ # for MOE model, illustrates the activated model size
100
+ activated_size_in_billions: Optional[Union[str, int]]
97
101
 
98
- @validator("model_size_in_billions", pre=False)
102
+ @validator("model_size_in_billions", "activated_size_in_billions", pre=False)
99
103
  def validate_model_size_with_radix(cls, v: object) -> object:
100
104
  if isinstance(v, str):
101
105
  if (
@@ -116,8 +120,10 @@ class MLXLLMSpecV1(BaseModel):
116
120
  model_hub: str = "huggingface"
117
121
  model_uri: Optional[str]
118
122
  model_revision: Optional[str]
123
+ # for MOE model, illustrates the activated model size
124
+ activated_size_in_billions: Optional[Union[str, int]]
119
125
 
120
- @validator("model_size_in_billions", pre=False)
126
+ @validator("model_size_in_billions", "activated_size_in_billions", pre=False)
121
127
  def validate_model_size_with_radix(cls, v: object) -> object:
122
128
  if isinstance(v, str):
123
129
  if (
@@ -370,6 +376,53 @@ def cache_from_uri(
370
376
  raise ValueError(f"Unsupported URL scheme: {src_scheme}")
371
377
 
372
378
 
379
+ def cache_model_tokenizer_and_config(
380
+ llm_family: LLMFamilyV1,
381
+ llm_spec: "LLMSpecV1",
382
+ ) -> str:
383
+ """
384
+ Download model config.json and tokenizers only
385
+ """
386
+ cache_dir = _get_cache_dir_for_model_mem(llm_family, llm_spec, "tokenizer_config")
387
+ os.makedirs(cache_dir, exist_ok=True)
388
+ if llm_spec.model_hub == "huggingface":
389
+ from huggingface_hub import snapshot_download
390
+
391
+ download_dir = retry_download(
392
+ snapshot_download,
393
+ llm_family.model_name,
394
+ {
395
+ "model_size": llm_spec.model_size_in_billions,
396
+ "model_format": llm_spec.model_format,
397
+ },
398
+ llm_spec.model_id,
399
+ revision=llm_spec.model_revision,
400
+ allow_patterns=["tokenizer*", "config.json"],
401
+ local_dir=cache_dir,
402
+ )
403
+ elif llm_spec.model_hub == "modelscope":
404
+ from modelscope.hub.snapshot_download import snapshot_download
405
+
406
+ download_dir = retry_download(
407
+ snapshot_download,
408
+ llm_family.model_name,
409
+ {
410
+ "model_size": llm_spec.model_size_in_billions,
411
+ "model_format": llm_spec.model_format,
412
+ },
413
+ llm_spec.model_id,
414
+ revision=llm_spec.model_revision,
415
+ allow_patterns=["tokenizer*", "config.json"],
416
+ local_dir=cache_dir,
417
+ )
418
+ else:
419
+ raise NotImplementedError(
420
+ f"Does not support download config.json and "
421
+ f"tokenizer related files via {llm_spec.model_hub}"
422
+ )
423
+ return download_dir
424
+
425
+
373
426
  def cache_model_config(
374
427
  llm_family: LLMFamilyV1,
375
428
  llm_spec: "LLMSpecV1",
@@ -377,7 +430,7 @@ def cache_model_config(
377
430
  """Download model config.json into cache_dir,
378
431
  returns local filepath
379
432
  """
380
- cache_dir = _get_cache_dir_for_model_mem(llm_family, llm_spec)
433
+ cache_dir = _get_cache_dir_for_model_mem(llm_family, llm_spec, "model_mem")
381
434
  config_file = os.path.join(cache_dir, "config.json")
382
435
  if not os.path.islink(config_file) and not os.path.exists(config_file):
383
436
  os.makedirs(cache_dir, exist_ok=True)
@@ -400,10 +453,13 @@ def cache_model_config(
400
453
  def _get_cache_dir_for_model_mem(
401
454
  llm_family: LLMFamilyV1,
402
455
  llm_spec: "LLMSpecV1",
456
+ category: str,
403
457
  create_if_not_exist=True,
404
458
  ):
405
459
  """
406
- For cal-model-mem only. (might called from supervisor / cli)
460
+ Get file dir for special usage, like `cal-model-mem` and download partial files for
461
+
462
+ e.g. for cal-model-mem, (might called from supervisor / cli)
407
463
  Temporary use separate dir from worker's cache_dir, due to issue of different style of symlink.
408
464
  """
409
465
  quant_suffix = ""
@@ -418,7 +474,7 @@ def _get_cache_dir_for_model_mem(
418
474
  if quant_suffix:
419
475
  cache_dir_name += f"-{quant_suffix}"
420
476
  cache_dir = os.path.realpath(
421
- os.path.join(XINFERENCE_CACHE_DIR, "model_mem", cache_dir_name)
477
+ os.path.join(XINFERENCE_CACHE_DIR, category, cache_dir_name)
422
478
  )
423
479
  if create_if_not_exist and not os.path.exists(cache_dir):
424
480
  os.makedirs(cache_dir, exist_ok=True)
@@ -17,8 +17,6 @@
17
17
  "model_format": "pytorch",
18
18
  "model_size_in_billions": "0_5",
19
19
  "quantizations": [
20
- "4-bit",
21
- "8-bit",
22
20
  "none"
23
21
  ],
24
22
  "model_id": "Qwen/Qwen2-0.5B-Instruct",
@@ -54,35 +52,5 @@
54
52
  "<|im_start|>",
55
53
  "<|im_end|>"
56
54
  ]
57
- },
58
- {
59
- "version": 1,
60
- "context_length": 32768,
61
- "model_name": "csg-wukong-chat-v0.1",
62
- "model_lang": [
63
- "en"
64
- ],
65
- "model_ability": [
66
- "chat"
67
- ],
68
- "model_description": "csg-wukong-1B is a 1 billion-parameter small language model(SLM) pretrained on 1T tokens.",
69
- "model_specs": [
70
- {
71
- "model_format": "pytorch",
72
- "model_size_in_billions": 1,
73
- "quantizations": [
74
- "none"
75
- ],
76
- "model_id": "OpenCSG/csg-wukong-1B-chat-v0.1",
77
- "model_hub": "csghub"
78
- }
79
- ],
80
- "chat_template": "{% for item in messages %}{% if loop.first and item['role'] == 'system' %}{{ item['content'] + '\n' }}{% elif loop.first %}{{ '<|system|>\nYou are a creative super artificial intelligence assistant, possessing all the knowledge of humankind. Your name is csg-wukong, developed by OpenCSG. You need to understand and infer the true intentions of users based on the topics discussed in the chat history, and respond to user questions correctly as required. You enjoy responding to users with accurate and insightful answers. Please pay attention to the appropriate style and format when replying, try to avoid repetitive words and sentences, and keep your responses as concise and profound as possible. You carefully consider the context of the discussion when replying to users. When the user says \"continue,\" please proceed with the continuation of the previous assistant\\'s response.</s>\n' }}{% endif %}{% if item['role'] == 'user' %}{{ '<|user|>\n' + item['content'] + '</s>\n' }}{% elif item['role'] == 'assistant' %}{{ '<|assistant|>\n' + item['content'] + '</s>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}",
81
- "stop_token_ids": [
82
- 2
83
- ],
84
- "stop": [
85
- "</s>"
86
- ]
87
55
  }
88
56
  ]