PyPI - xinference - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

xinference 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (124) hide show

xinference/_version.py +3 -3
xinference/api/restful_api.py +4 -7
xinference/client/handlers.py +3 -0
xinference/core/chat_interface.py +6 -1
xinference/core/model.py +2 -0
xinference/core/scheduler.py +4 -7
xinference/core/supervisor.py +114 -23
xinference/core/worker.py +70 -4
xinference/deploy/local.py +2 -1
xinference/model/audio/core.py +11 -0
xinference/model/audio/cosyvoice.py +16 -5
xinference/model/audio/kokoro.py +139 -0
xinference/model/audio/melotts.py +110 -0
xinference/model/audio/model_spec.json +80 -0
xinference/model/audio/model_spec_modelscope.json +18 -0
xinference/model/audio/whisper.py +35 -10
xinference/model/llm/llama_cpp/core.py +21 -14
xinference/model/llm/llm_family.json +527 -1
xinference/model/llm/llm_family.py +4 -1
xinference/model/llm/llm_family_modelscope.json +495 -3
xinference/model/llm/memory.py +1 -1
xinference/model/llm/mlx/core.py +24 -6
xinference/model/llm/transformers/core.py +9 -1
xinference/model/llm/transformers/qwen2_audio.py +3 -1
xinference/model/llm/transformers/qwen2_vl.py +20 -3
xinference/model/llm/transformers/utils.py +22 -11
xinference/model/llm/utils.py +115 -1
xinference/model/llm/vllm/core.py +14 -4
xinference/model/llm/vllm/xavier/block.py +3 -4
xinference/model/llm/vllm/xavier/block_tracker.py +71 -58
xinference/model/llm/vllm/xavier/collective.py +74 -0
xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
xinference/model/llm/vllm/xavier/executor.py +18 -16
xinference/model/llm/vllm/xavier/scheduler.py +79 -63
xinference/model/llm/vllm/xavier/test/test_xavier.py +60 -35
xinference/model/llm/vllm/xavier/transfer.py +53 -32
xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
xinference/thirdparty/melo/__init__.py +0 -0
xinference/thirdparty/melo/api.py +135 -0
xinference/thirdparty/melo/app.py +61 -0
xinference/thirdparty/melo/attentions.py +459 -0
xinference/thirdparty/melo/commons.py +160 -0
xinference/thirdparty/melo/configs/config.json +94 -0
xinference/thirdparty/melo/data/example/metadata.list +20 -0
xinference/thirdparty/melo/data_utils.py +413 -0
xinference/thirdparty/melo/download_utils.py +67 -0
xinference/thirdparty/melo/infer.py +25 -0
xinference/thirdparty/melo/init_downloads.py +14 -0
xinference/thirdparty/melo/losses.py +58 -0
xinference/thirdparty/melo/main.py +36 -0
xinference/thirdparty/melo/mel_processing.py +174 -0
xinference/thirdparty/melo/models.py +1030 -0
xinference/thirdparty/melo/modules.py +598 -0
xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
xinference/thirdparty/melo/monotonic_align/core.py +46 -0
xinference/thirdparty/melo/preprocess_text.py +135 -0
xinference/thirdparty/melo/split_utils.py +174 -0
xinference/thirdparty/melo/text/__init__.py +35 -0
xinference/thirdparty/melo/text/chinese.py +199 -0
xinference/thirdparty/melo/text/chinese_bert.py +107 -0
xinference/thirdparty/melo/text/chinese_mix.py +253 -0
xinference/thirdparty/melo/text/cleaner.py +36 -0
xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
xinference/thirdparty/melo/text/cmudict.rep +129530 -0
xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
xinference/thirdparty/melo/text/english.py +284 -0
xinference/thirdparty/melo/text/english_bert.py +39 -0
xinference/thirdparty/melo/text/english_utils/__init__.py +0 -0
xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
xinference/thirdparty/melo/text/es_phonemizer/__init__.py +0 -0
xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
xinference/thirdparty/melo/text/fr_phonemizer/__init__.py +0 -0
xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
xinference/thirdparty/melo/text/french.py +94 -0
xinference/thirdparty/melo/text/french_bert.py +39 -0
xinference/thirdparty/melo/text/japanese.py +647 -0
xinference/thirdparty/melo/text/japanese_bert.py +49 -0
xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
xinference/thirdparty/melo/text/korean.py +192 -0
xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
xinference/thirdparty/melo/text/spanish.py +122 -0
xinference/thirdparty/melo/text/spanish_bert.py +39 -0
xinference/thirdparty/melo/text/symbols.py +290 -0
xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
xinference/thirdparty/melo/train.py +635 -0
xinference/thirdparty/melo/train.sh +19 -0
xinference/thirdparty/melo/transforms.py +209 -0
xinference/thirdparty/melo/utils.py +424 -0
xinference/types.py +2 -0
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.1eb206d1.js → main.b0936c54.js} +3 -3
xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/METADATA +37 -27
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/RECORD +122 -45
xinference/web/ui/build/static/js/main.1eb206d1.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/2213d49de260e1f67c888081b18f120f5225462b829ae57c9e05a05cec83689d.json +0 -1
/xinference/web/ui/build/static/js/{main.1eb206d1.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/LICENSE +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/WHEEL +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/entry_points.txt +0 -0
{xinference-1.2.0.dist-info → xinference-1.2.2.dist-info}/top_level.txt +0 -0

xinference/model/audio/kokoro.py ADDED Viewed

@@ -0,0 +1,139 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from io import BytesIO
+from typing import TYPE_CHECKING, Optional
+import numpy as np
+from ...device_utils import get_available_device, is_device_available
+if TYPE_CHECKING:
+    from .core import AudioModelFamilyV1
+logger = logging.getLogger(__name__)
+class KokoroModel:
+    # The available voices, should keep sync with https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
+    VOICES = [
+        "af_alloy",
+        "af_aoede",
+        "af_bella",
+        "af_jessica",
+        "af_kore",
+        "af_nicole",
+        "af_nova",
+        "af_river",
+        "af_sarah",
+        "af_sky",
+        "am_adam",
+        "am_echo",
+        "am_eric",
+        "am_fenrir",
+        "am_liam",
+        "am_michael",
+        "am_onyx",
+        "am_puck",
+        "bf_alice",
+        "bf_emma",
+        "bf_isabella",
+        "bf_lily",
+        "bm_daniel",
+        "bm_fable",
+        "bm_george",
+        "bm_lewis",
+    ]
+    def __init__(
+        self,
+        model_uid: str,
+        model_path: str,
+        model_spec: "AudioModelFamilyV1",
+        device: Optional[str] = None,
+        **kwargs,
+    ):
+        self._model_uid = model_uid
+        self._model_path = model_path
+        self._model_spec = model_spec
+        self._device = device
+        self._model = None
+        self._kwargs = kwargs
+    @property
+    def model_ability(self):
+        return self._model_spec.model_ability
+    def load(self):
+        if self._device is None:
+            self._device = get_available_device()
+        else:
+            if not is_device_available(self._device):
+                raise ValueError(f"Device {self._device} is not available!")
+        import os
+        from kokoro import KModel, KPipeline
+        config_path = os.path.join(self._model_path, "config.json")
+        model_path = os.path.join(self._model_path, "kokoro-v1_0.pth")
+        # LANG_CODES = dict(
+        #     a='American English',
+        #     b='British English',
+        # )
+        lang_code = self._kwargs.get("lang_code", "a")
+        self._model = KPipeline(
+            lang_code=lang_code,
+            model=KModel(config=config_path, model=model_path),
+            device=self._device,
+        )
+    def speech(
+        self,
+        input: str,
+        voice: str,
+        response_format: str = "mp3",
+        speed: float = 1.0,
+        stream: bool = False,
+        **kwargs,
+    ):
+        import soundfile
+        if stream:
+            raise Exception("Kokoro does not support stream mode.")
+        assert self._model is not None
+        if not voice:
+            voice = next(iter(self.VOICES))
+            logger.info("Auto select speaker: %s", voice)
+        elif not voice.endswith(".pt") and voice not in self.VOICES:
+            raise ValueError(
+                f"Invalid voice: {voice}, available speakers: {self.VOICES}"
+            )
+        else:
+            logger.info("Using custom voice pt: %s", voice)
+        logger.info("Speech kwargs: %s", kwargs)
+        generator = self._model(text=input, voice=voice, speed=speed, **kwargs)
+        results = list(generator)
+        audio = np.concatenate([r[2] for r in results])
+        # Save the generated audio
+        with BytesIO() as out:
+            with soundfile.SoundFile(
+                out,
+                "w",
+                24000,
+                1,
+                format=response_format.upper(),
+            ) as f:
+                f.write(audio)
+            return out.getvalue()

xinference/model/audio/melotts.py ADDED Viewed

@@ -0,0 +1,110 @@
+# Copyright 2022-2023 XProbe Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from io import BytesIO
+from typing import TYPE_CHECKING, Optional
+from ...device_utils import get_available_device, is_device_available
+if TYPE_CHECKING:
+    from .core import AudioModelFamilyV1
+logger = logging.getLogger(__name__)
+class MeloTTSModel:
+    def __init__(
+        self,
+        model_uid: str,
+        model_path: str,
+        model_spec: "AudioModelFamilyV1",
+        device: Optional[str] = None,
+        **kwargs,
+    ):
+        self._model_uid = model_uid
+        self._model_path = model_path
+        self._model_spec = model_spec
+        self._device = device
+        self._model = None
+        self._kwargs = kwargs
+    @property
+    def model_ability(self):
+        return self._model_spec.model_ability
+    def load(self):
+        if self._device is None:
+            self._device = get_available_device()
+        else:
+            if not is_device_available(self._device):
+                raise ValueError(f"Device {self._device} is not available!")
+        import os
+        import sys
+        import nltk
+        # English language requires download averaged_perceptron_tagger_eng
+        nltk.download("averaged_perceptron_tagger_eng")
+        # The yaml config loaded from model has hard-coded the import paths. please refer to: load_hyperpyyaml
+        sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../thirdparty"))
+        from melo.api import TTS
+        config_path = os.path.join(self._model_path, "config.json")
+        ckpt_path = os.path.join(self._model_path, "checkpoint.pth")
+        self._model = TTS(
+            language=self._model_spec.language,
+            device=self._device,
+            config_path=config_path,
+            ckpt_path=ckpt_path,
+        )
+    def speech(
+        self,
+        input: str,
+        voice: str,
+        response_format: str = "mp3",
+        speed: float = 1.0,
+        stream: bool = False,
+        **kwargs,
+    ):
+        import soundfile
+        if stream:
+            raise Exception("MeloTTS does not support stream mode.")
+        assert self._model is not None
+        speaker_ids = self._model.hps.data.spk2id
+        if not voice:
+            voice = next(iter(speaker_ids.keys()))
+            logger.info("Auto select speaker: %s", voice)
+        elif voice not in speaker_ids:
+            raise ValueError(
+                f"Invalid voice: {voice}, available speakers: {speaker_ids}"
+            )
+        audio = self._model.tts_to_file(
+            text=input, speaker_id=speaker_ids[voice], speed=speed, **kwargs
+        )
+        # Save the generated audio
+        with BytesIO() as out:
+            with soundfile.SoundFile(
+                out,
+                "w",
+                self._model.hps.data.sampling_rate,
+                1,
+                format=response_format.upper(),
+            ) as f:
+                f.write(audio)
+            return out.getvalue()

xinference/model/audio/model_spec.json CHANGED Viewed

@@ -266,5 +266,85 @@
     "model_revision": "7642bb232e3fcacf92c51c786edebb8624da6b93",
     "model_ability": "text-to-audio",
     "multilingual": true
+  },
+  {
+    "model_name": "MeloTTS-English",
+    "model_family": "MeloTTS",
+    "model_id": "myshell-ai/MeloTTS-English",
+    "model_revision": "bb4fb7346d566d277ba8c8c7dbfdf6786139b8ef",
+    "model_ability": "text-to-audio",
+    "multilingual": false,
+    "language": "EN"
+  },
+  {
+    "model_name": "MeloTTS-English-v2",
+    "model_family": "MeloTTS",
+    "model_id": "myshell-ai/MeloTTS-English-v2",
+    "model_revision": "a53e3509c4ee4ff16d79272feb2474ff864e18f3",
+    "model_ability": "text-to-audio",
+    "multilingual": false,
+    "language": "EN"
+  },
+  {
+    "model_name": "MeloTTS-English-v3",
+    "model_family": "MeloTTS",
+    "model_id": "myshell-ai/MeloTTS-English-v3",
+    "model_revision": "f7c4a35392c0e9be24a755f1edb4c3f63040f759",
+    "model_ability": "text-to-audio",
+    "multilingual": false,
+    "language": "EN"
+  },
+  {
+    "model_name": "MeloTTS-French",
+    "model_family": "MeloTTS",
+    "model_id": "myshell-ai/MeloTTS-French",
+    "model_revision": "1e9bf590262392d8bffb679b0a3b0c16b0f9fdaf",
+    "model_ability": "text-to-audio",
+    "multilingual": false,
+    "language": "FR"
+  },
+  {
+    "model_name": "MeloTTS-Japanese",
+    "model_family": "MeloTTS",
+    "model_id": "myshell-ai/MeloTTS-Japanese",
+    "model_revision": "367f8795464b531b4e97c1515bddfc1243e60891",
+    "model_ability": "text-to-audio",
+    "multilingual": false,
+    "language": "JP"
+  },
+  {
+    "model_name": "MeloTTS-Spanish",
+    "model_family": "MeloTTS",
+    "model_id": "myshell-ai/MeloTTS-Spanish",
+    "model_revision": "dbb5496df39d11a66c1d5f5a9ca357c3c9fb95fb",
+    "model_ability": "text-to-audio",
+    "multilingual": false,
+    "language": "ES"
+  },
+  {
+    "model_name": "MeloTTS-Chinese",
+    "model_family": "MeloTTS",
+    "model_id": "myshell-ai/MeloTTS-Chinese",
+    "model_revision": "af5d207a364ea4208c6f589c89f57f88414bdd16",
+    "model_ability": "text-to-audio",
+    "multilingual": false,
+    "language": "ZH"
+  },
+  {
+    "model_name": "MeloTTS-Korean",
+    "model_family": "MeloTTS",
+    "model_id": "myshell-ai/MeloTTS-Korean",
+    "model_revision": "0207e5adfc90129a51b6b03d89be6d84360ed323",
+    "model_ability": "text-to-audio",
+    "multilingual": false,
+    "language": "KR"
+  },
+  {
+    "model_name": "Kokoro-82M",
+    "model_family": "Kokoro",
+    "model_id": "hexgrad/Kokoro-82M",
+    "model_revision": "7a29fcdf8e997bac6d6f5f6f0c2f0b92912f6102",
+    "model_ability": "text-to-audio",
+    "multilingual": true
   }
 ]

xinference/model/audio/model_spec_modelscope.json CHANGED Viewed

@@ -17,6 +17,15 @@
     "model_ability": "audio-to-text",
     "multilingual": true
   },
+  {
+    "model_name": "Belle-whisper-large-v3-zh",
+    "model_family": "whisper",
+    "model_hub": "modelscope",
+    "model_id": "Xorbits/Belle-whisper-large-v3-zh",
+    "model_revision": "master",
+    "model_ability": "audio-to-text",
+    "multilingual": false
+  },
   {
     "model_name": "SenseVoiceSmall",
     "model_family": "funasr",
@@ -91,5 +100,14 @@
     "model_revision": "master",
     "model_ability": "text-to-audio",
     "multilingual": true
+  },
+  {
+    "model_name": "Kokoro-82M",
+    "model_family": "Kokoro",
+    "model_hub": "modelscope",
+    "model_id": "AI-ModelScope/Kokoro-82M",
+    "model_revision": "master",
+    "model_ability": "text-to-audio",
+    "multilingual": true
   }
 ]

xinference/model/audio/whisper.py CHANGED Viewed

@@ -13,9 +13,12 @@
 # limitations under the License.
 import logging
 import os
+import typing
 from glob import glob
 from typing import TYPE_CHECKING, Dict, List, Optional, Union
+from typing_extensions import TypedDict
 from ...device_utils import (
     get_available_device,
     get_device_preferred_dtype,
@@ -28,6 +31,13 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
+class WhisperModelConfig(TypedDict, total=False):
+    chunk_length_s: Optional[float]
+    stride_length_s: Optional[float]
+    return_timestamps: Optional[bool]
+    batch_size: Optional[int]
 class WhisperModel:
     def __init__(
         self,
@@ -35,6 +45,7 @@ class WhisperModel:
         model_path: str,
         model_spec: "AudioModelFamilyV1",
         device: Optional[str] = None,
+        max_new_tokens: Optional[int] = 128,
         **kwargs,
     ):
         self._model_uid = model_uid
@@ -42,7 +53,21 @@ class WhisperModel:
         self._model_spec = model_spec
         self._device = device
         self._model = None
-        self._kwargs = kwargs
+        self._max_new_tokens = max_new_tokens
+        self._model_config: WhisperModelConfig = self._sanitize_model_config(
+            typing.cast(WhisperModelConfig, kwargs)
+        )
+    def _sanitize_model_config(
+        self, model_config: Optional[WhisperModelConfig]
+    ) -> WhisperModelConfig:
+        if model_config is None:
+            model_config = WhisperModelConfig()
+        model_config.setdefault("chunk_length_s", 30)
+        model_config.setdefault("stride_length_s", None)
+        model_config.setdefault("return_timestamps", False)
+        model_config.setdefault("batch_size", 16)
+        return model_config
     @property
     def model_ability(self):
@@ -75,10 +100,10 @@ class WhisperModel:
             model=model,
             tokenizer=processor.tokenizer,
             feature_extractor=processor.feature_extractor,
-            max_new_tokens=128,
-            chunk_length_s=30,
-            batch_size=16,
-            return_timestamps=False,
+            chunk_length_s=self._model_config.get("chunk_length_s"),
+            stride_length_s=self._model_config.get("stride_length_s"),
+            return_timestamps=self._model_config.get("return_timestamps"),
+            batch_size=self._model_config.get("batch_size"),
             torch_dtype=torch_dtype,
             device=self._device,
         )
@@ -185,13 +210,13 @@ class WhisperModel:
             logger.warning(
                 "Prompt for whisper transcriptions will be ignored: %s", prompt
             )
+        generate_kwargs = {"max_new_tokens": self._max_new_tokens, "task": "transcribe"}
+        if language is not None:
+            generate_kwargs["language"] = language
         return self._call_model(
             audio=audio,
-            generate_kwargs=(
-                {"language": language, "task": "transcribe"}
-                if language is not None
-                else {"task": "transcribe"}
-            ),
+            generate_kwargs=generate_kwargs,
             response_format=response_format,
             temperature=temperature,
             timestamp_granularities=timestamp_granularities,

xinference/model/llm/llama_cpp/core.py CHANGED Viewed

@@ -28,7 +28,7 @@ from ....types import (
 )
 from ..core import LLM
 from ..llm_family import LLMFamilyV1, LLMSpecV1
-from ..utils import QWEN_TOOL_CALL_FAMILY, ChatModelMixin
+from ..utils import DEEPSEEK_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelMixin
 logger = logging.getLogger(__name__)
@@ -123,18 +123,22 @@ class LlamaCppModel(LLM):
             raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
-        # handle legacy cache.
-        model_path = os.path.realpath(
-            os.path.join(
-                self.model_path,
-                self.model_spec.model_file_name_template.format(
-                    quantization=self.quantization
-                ),
+        if os.path.isfile(self.model_path):
+            # mostly passed from --model_path
+            model_path = os.path.realpath(self.model_path)
+        else:
+            # handle legacy cache.
+            model_path = os.path.realpath(
+                os.path.join(
+                    self.model_path,
+                    self.model_spec.model_file_name_template.format(
+                        quantization=self.quantization
+                    ),
+                )
             )
-        )
-        legacy_model_file_path = os.path.join(self.model_path, "model.bin")
-        if os.path.exists(legacy_model_file_path):
-            model_path = legacy_model_file_path
+            legacy_model_file_path = os.path.join(self.model_path, "model.bin")
+            if os.path.exists(legacy_model_file_path):
+                model_path = legacy_model_file_path
         try:
             self._llm = Llama(
@@ -272,8 +276,11 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
         model_family = self.model_family.model_family or self.model_family.model_name
         tools = generate_config.pop("tools", []) if generate_config else None
         full_context_kwargs = {}
-        if tools and model_family in QWEN_TOOL_CALL_FAMILY:
-            full_context_kwargs["tools"] = tools
+        if tools:
+            if model_family in QWEN_TOOL_CALL_FAMILY:
+                full_context_kwargs["tools"] = tools
+            elif model_family in DEEPSEEK_TOOL_CALL_FAMILY:
+                self._tools_to_messages_for_deepseek(messages, tools)
         assert self.model_family.chat_template is not None
         full_prompt = self.get_full_context(
             messages, self.model_family.chat_template, **full_context_kwargs

xinference 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

Potentially problematic release.

xinference 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl