xinference 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +4 -7
- xinference/client/handlers.py +3 -0
- xinference/client/restful/restful_client.py +9 -1
- xinference/core/model.py +19 -0
- xinference/core/resource.py +7 -1
- xinference/core/scheduler.py +4 -7
- xinference/core/status_guard.py +1 -0
- xinference/core/supervisor.py +228 -19
- xinference/core/utils.py +1 -29
- xinference/core/worker.py +28 -2
- xinference/deploy/cmdline.py +33 -3
- xinference/deploy/local.py +2 -1
- xinference/deploy/test/test_cmdline.py +32 -0
- xinference/device_utils.py +43 -1
- xinference/model/audio/core.py +5 -0
- xinference/model/audio/kokoro.py +122 -0
- xinference/model/audio/model_spec.json +8 -0
- xinference/model/audio/model_spec_modelscope.json +9 -0
- xinference/model/image/stable_diffusion/core.py +15 -6
- xinference/model/llm/llama_cpp/core.py +21 -14
- xinference/model/llm/llm_family.json +866 -46
- xinference/model/llm/llm_family.py +7 -2
- xinference/model/llm/llm_family_modelscope.json +873 -16
- xinference/model/llm/mlx/core.py +11 -3
- xinference/model/llm/reasoning_parsers/__init__.py +13 -0
- xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py +98 -0
- xinference/model/llm/reasoning_parsers/deepseek_r1_reasoning_parser.py +140 -0
- xinference/model/llm/sglang/core.py +99 -11
- xinference/model/llm/transformers/core.py +9 -1
- xinference/model/llm/transformers/intern_vl.py +23 -14
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +20 -3
- xinference/model/llm/transformers/utils.py +22 -11
- xinference/model/llm/utils.py +164 -20
- xinference/model/llm/vllm/core.py +36 -4
- xinference/model/llm/vllm/xavier/executor.py +2 -2
- xinference/model/llm/vllm/xavier/scheduler.py +3 -3
- xinference/thirdparty/internvl/conversation.py +26 -17
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.f8177338.css +2 -0
- xinference/web/ui/build/static/css/main.f8177338.css.map +1 -0
- xinference/web/ui/build/static/js/main.ad42919c.js +3 -0
- xinference/web/ui/build/static/js/main.ad42919c.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/074a42304bbbaa79e1bfc3b28502457a390df55708de9006f4cc8e35c60aea87.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0acb065326560592b10888234242f94f67efe28458b90f273d4d4fba9daa0cd2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/279ace390216236a82b3d8995c78eca4d637ac9a523e9f521a2d9c76607a43d7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/630a7bd592596cc6e291fc32238ce7c08238038a64ed8ccee0eb0c13c9902910.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6cb9f6c62ab4042f0b11c5d75e51187188e9d6f5f08b1d63e796e051bafdb457.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/914c33e91c1012e3bcd3e96f3a25884cbef148290632d0266dab972b8cc1e95f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b7939cd3a48adf12fccfdd0803019b5cc235ff7de3a297dae70ce635e0eea13e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/fecf076bcd198a458c2a6ab0e85e40dc1c99994c353164e79c469be162cb74c9.json +1 -0
- xinference/web/ui/src/locales/en.json +14 -1
- xinference/web/ui/src/locales/zh.json +14 -1
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/METADATA +18 -17
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/RECORD +67 -60
- xinference/web/ui/build/static/css/main.51a587ff.css +0 -2
- xinference/web/ui/build/static/css/main.51a587ff.css.map +0 -1
- xinference/web/ui/build/static/js/main.b0936c54.js +0 -3
- xinference/web/ui/build/static/js/main.b0936c54.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0c2fb5375667931c4a331c99e0d87dc145e8f327cea3f44d6e56f54c7c1d4020.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a7f1a71f6580dfe810c685a9c1d68e318f71e1fa258fbe50b87a6ac37cc0a598.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +0 -1
- /xinference/web/ui/build/static/js/{main.b0936c54.js.LICENSE.txt → main.ad42919c.js.LICENSE.txt} +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/LICENSE +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/WHEEL +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.2.1.dist-info → xinference-1.3.0.dist-info}/top_level.txt +0 -0
|
@@ -134,7 +134,7 @@ class LLMFamilyV1(BaseModel):
|
|
|
134
134
|
model_name: str
|
|
135
135
|
model_lang: List[str]
|
|
136
136
|
model_ability: List[
|
|
137
|
-
Literal["embed", "generate", "chat", "tools", "vision", "audio"]
|
|
137
|
+
Literal["embed", "generate", "chat", "tools", "vision", "audio", "reasoning"]
|
|
138
138
|
]
|
|
139
139
|
model_description: Optional[str]
|
|
140
140
|
# reason for not required str here: legacy registration
|
|
@@ -143,6 +143,8 @@ class LLMFamilyV1(BaseModel):
|
|
|
143
143
|
chat_template: Optional[str]
|
|
144
144
|
stop_token_ids: Optional[List[int]]
|
|
145
145
|
stop: Optional[List[str]]
|
|
146
|
+
reasoning_start_tag: Optional[str]
|
|
147
|
+
reasoning_end_tag: Optional[str]
|
|
146
148
|
|
|
147
149
|
|
|
148
150
|
class CustomLLMFamilyV1(LLMFamilyV1):
|
|
@@ -538,7 +540,10 @@ def _generate_model_file_names(
|
|
|
538
540
|
)
|
|
539
541
|
need_merge = False
|
|
540
542
|
|
|
541
|
-
if
|
|
543
|
+
if (
|
|
544
|
+
llm_spec.quantization_parts is None
|
|
545
|
+
or quantization not in llm_spec.quantization_parts
|
|
546
|
+
):
|
|
542
547
|
file_names.append(final_file_name)
|
|
543
548
|
elif quantization is not None and quantization in llm_spec.quantization_parts:
|
|
544
549
|
parts = llm_spec.quantization_parts[quantization]
|