xinference 1.4.1__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +50 -1
- xinference/client/restful/restful_client.py +82 -2
- xinference/constants.py +3 -0
- xinference/core/chat_interface.py +297 -83
- xinference/core/model.py +1 -0
- xinference/core/progress_tracker.py +16 -8
- xinference/core/supervisor.py +45 -1
- xinference/core/worker.py +262 -37
- xinference/deploy/cmdline.py +33 -1
- xinference/model/audio/core.py +11 -1
- xinference/model/audio/megatts.py +105 -0
- xinference/model/audio/model_spec.json +24 -1
- xinference/model/audio/model_spec_modelscope.json +26 -1
- xinference/model/core.py +14 -0
- xinference/model/embedding/core.py +6 -1
- xinference/model/flexible/core.py +6 -1
- xinference/model/image/core.py +6 -1
- xinference/model/image/model_spec.json +17 -1
- xinference/model/image/model_spec_modelscope.json +17 -1
- xinference/model/llm/__init__.py +0 -4
- xinference/model/llm/core.py +4 -0
- xinference/model/llm/llama_cpp/core.py +40 -16
- xinference/model/llm/llm_family.json +413 -84
- xinference/model/llm/llm_family.py +24 -1
- xinference/model/llm/llm_family_modelscope.json +447 -0
- xinference/model/llm/mlx/core.py +16 -2
- xinference/model/llm/transformers/__init__.py +14 -0
- xinference/model/llm/transformers/core.py +30 -6
- xinference/model/llm/transformers/gemma3.py +17 -2
- xinference/model/llm/transformers/intern_vl.py +28 -18
- xinference/model/llm/transformers/minicpmv26.py +21 -2
- xinference/model/llm/transformers/qwen-omni.py +308 -0
- xinference/model/llm/transformers/qwen2_audio.py +1 -1
- xinference/model/llm/transformers/qwen2_vl.py +20 -4
- xinference/model/llm/utils.py +11 -1
- xinference/model/llm/vllm/core.py +35 -0
- xinference/model/llm/vllm/distributed_executor.py +8 -2
- xinference/model/rerank/core.py +6 -1
- xinference/model/utils.py +118 -1
- xinference/model/video/core.py +6 -1
- xinference/thirdparty/megatts3/__init__.py +0 -0
- xinference/thirdparty/megatts3/tts/frontend_function.py +175 -0
- xinference/thirdparty/megatts3/tts/gradio_api.py +93 -0
- xinference/thirdparty/megatts3/tts/infer_cli.py +277 -0
- xinference/thirdparty/megatts3/tts/modules/aligner/whisper_small.py +318 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/ar_dur_predictor.py +362 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/layers.py +64 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/nar_tts_modules.py +73 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rel_transformer.py +403 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/rot_transformer.py +649 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/seq_utils.py +342 -0
- xinference/thirdparty/megatts3/tts/modules/ar_dur/commons/transformer.py +767 -0
- xinference/thirdparty/megatts3/tts/modules/llm_dit/cfm.py +309 -0
- xinference/thirdparty/megatts3/tts/modules/llm_dit/dit.py +180 -0
- xinference/thirdparty/megatts3/tts/modules/llm_dit/time_embedding.py +44 -0
- xinference/thirdparty/megatts3/tts/modules/llm_dit/transformer.py +230 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/diag_gaussian.py +67 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/hifigan_modules.py +283 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/seanet_encoder.py +38 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/decoder/wavvae_v3.py +60 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/conv.py +154 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/lstm.py +51 -0
- xinference/thirdparty/megatts3/tts/modules/wavvae/encoder/common_modules/seanet.py +126 -0
- xinference/thirdparty/megatts3/tts/utils/audio_utils/align.py +36 -0
- xinference/thirdparty/megatts3/tts/utils/audio_utils/io.py +95 -0
- xinference/thirdparty/megatts3/tts/utils/audio_utils/plot.py +90 -0
- xinference/thirdparty/megatts3/tts/utils/commons/ckpt_utils.py +171 -0
- xinference/thirdparty/megatts3/tts/utils/commons/hparams.py +215 -0
- xinference/thirdparty/megatts3/tts/utils/text_utils/dict.json +1 -0
- xinference/thirdparty/megatts3/tts/utils/text_utils/ph_tone_convert.py +94 -0
- xinference/thirdparty/megatts3/tts/utils/text_utils/split_text.py +90 -0
- xinference/thirdparty/megatts3/tts/utils/text_utils/text_encoder.py +280 -0
- xinference/types.py +10 -0
- xinference/utils.py +54 -0
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.0f6523be.css +2 -0
- xinference/web/ui/build/static/css/main.0f6523be.css.map +1 -0
- xinference/web/ui/build/static/js/main.58bd483c.js +3 -0
- xinference/web/ui/build/static/js/main.58bd483c.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3bff8cbe9141f937f4d98879a9771b0f48e0e4e0dbee8e647adbfe23859e7048.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4500b1a622a031011f0a291701e306b87e08cbc749c50e285103536b85b6a914.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69081049f0c7447544b7cfd73dd13d8846c02fe5febe4d81587e95c89a412d5b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bf2b211b0d1b6465eff512d64c869d748f803c5651a7c24e48de6ea3484a7bfe.json +1 -0
- xinference/web/ui/src/locales/en.json +2 -1
- xinference/web/ui/src/locales/zh.json +2 -1
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/METADATA +127 -114
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/RECORD +96 -60
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/WHEEL +1 -1
- xinference/web/ui/build/static/css/main.b494ae7e.css +0 -2
- xinference/web/ui/build/static/css/main.b494ae7e.css.map +0 -1
- xinference/web/ui/build/static/js/main.5ca4eea1.js +0 -3
- xinference/web/ui/build/static/js/main.5ca4eea1.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f0967acaec5df1d45b80010949c258d64297ebbb0f44b8bb3afcbd45c6f0ec4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/27bcada3ee8f89d21184b359f022fc965f350ffaca52c9814c29f1fc37121173.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/68249645124f37d01eef83b1d897e751f895bea919b6fb466f907c1f87cebc84.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e547bbb18abb4a474b675a8d5782d25617566bea0af8caa9b836ce5649e2250a.json +0 -1
- /xinference/web/ui/build/static/js/{main.5ca4eea1.js.LICENSE.txt → main.58bd483c.js.LICENSE.txt} +0 -0
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info/licenses}/LICENSE +0 -0
- {xinference-1.4.1.dist-info → xinference-1.5.0.dist-info}/top_level.txt +0 -0
|
@@ -47,6 +47,22 @@
|
|
|
47
47
|
"merge_length_s": 15
|
|
48
48
|
}
|
|
49
49
|
},
|
|
50
|
+
{
|
|
51
|
+
"model_name": "paraformer-zh",
|
|
52
|
+
"model_family": "funasr",
|
|
53
|
+
"model_hub": "modelscope",
|
|
54
|
+
"model_id": "iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn",
|
|
55
|
+
"model_revision": "master",
|
|
56
|
+
"model_ability": "audio-to-text",
|
|
57
|
+
"multilingual": false,
|
|
58
|
+
"default_model_config": {
|
|
59
|
+
"vad_model": "fsmn-vad",
|
|
60
|
+
"punc_model": "ct-punc"
|
|
61
|
+
},
|
|
62
|
+
"default_transcription_config": {
|
|
63
|
+
"batch_size_s": 300
|
|
64
|
+
}
|
|
65
|
+
},
|
|
50
66
|
{
|
|
51
67
|
"model_name": "ChatTTS",
|
|
52
68
|
"model_family": "ChatTTS",
|
|
@@ -62,7 +78,7 @@
|
|
|
62
78
|
"model_hub": "modelscope",
|
|
63
79
|
"model_id": "iic/CosyVoice-300M",
|
|
64
80
|
"model_revision": "master",
|
|
65
|
-
"model_ability": "
|
|
81
|
+
"model_ability": "text-to-audio",
|
|
66
82
|
"multilingual": true
|
|
67
83
|
},
|
|
68
84
|
{
|
|
@@ -109,5 +125,14 @@
|
|
|
109
125
|
"model_revision": "master",
|
|
110
126
|
"model_ability": "text-to-audio",
|
|
111
127
|
"multilingual": true
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
"model_name": "MegaTTS3",
|
|
131
|
+
"model_family": "MegaTTS",
|
|
132
|
+
"model_hub": "modelscope",
|
|
133
|
+
"model_id": "ByteDance/MegaTTS3",
|
|
134
|
+
"model_revision": "master",
|
|
135
|
+
"model_ability": "text-to-audio",
|
|
136
|
+
"multilingual": true
|
|
112
137
|
}
|
|
113
138
|
]
|
xinference/model/core.py
CHANGED
|
@@ -30,6 +30,11 @@ class ModelDescription(ABC):
|
|
|
30
30
|
self.devices = devices
|
|
31
31
|
self._model_path = model_path
|
|
32
32
|
|
|
33
|
+
@property
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def spec(self):
|
|
36
|
+
pass
|
|
37
|
+
|
|
33
38
|
def to_dict(self):
|
|
34
39
|
"""
|
|
35
40
|
Return a dict to describe some information about model.
|
|
@@ -155,3 +160,12 @@ class CacheableModelSpec(BaseModel):
|
|
|
155
160
|
model_id: str
|
|
156
161
|
model_revision: Optional[str]
|
|
157
162
|
model_hub: str = "huggingface"
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class VirtualEnvSettings(BaseModel):
|
|
166
|
+
packages: List[str]
|
|
167
|
+
inherit_pip_config: bool = True
|
|
168
|
+
index_url: Optional[str] = None
|
|
169
|
+
extra_index_url: Optional[str] = None
|
|
170
|
+
find_links: Optional[str] = None
|
|
171
|
+
trusted_host: Optional[str] = None
|
|
@@ -24,7 +24,7 @@ import torch
|
|
|
24
24
|
from ..._compat import ROOT_KEY, ErrorWrapper, ValidationError
|
|
25
25
|
from ...device_utils import empty_cache
|
|
26
26
|
from ...types import Embedding, EmbeddingData, EmbeddingUsage
|
|
27
|
-
from ..core import CacheableModelSpec, ModelDescription
|
|
27
|
+
from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
|
|
28
28
|
from ..utils import get_cache_dir, is_model_cached
|
|
29
29
|
|
|
30
30
|
logger = logging.getLogger(__name__)
|
|
@@ -57,6 +57,7 @@ class EmbeddingModelSpec(CacheableModelSpec):
|
|
|
57
57
|
model_id: str
|
|
58
58
|
model_revision: Optional[str]
|
|
59
59
|
model_hub: str = "huggingface"
|
|
60
|
+
virtualenv: Optional[VirtualEnvSettings]
|
|
60
61
|
|
|
61
62
|
|
|
62
63
|
class EmbeddingModelDescription(ModelDescription):
|
|
@@ -70,6 +71,10 @@ class EmbeddingModelDescription(ModelDescription):
|
|
|
70
71
|
super().__init__(address, devices, model_path=model_path)
|
|
71
72
|
self._model_spec = model_spec
|
|
72
73
|
|
|
74
|
+
@property
|
|
75
|
+
def spec(self):
|
|
76
|
+
return self._model_spec
|
|
77
|
+
|
|
73
78
|
def to_dict(self):
|
|
74
79
|
return {
|
|
75
80
|
"model_type": "embedding",
|
|
@@ -20,7 +20,7 @@ from threading import Lock
|
|
|
20
20
|
from typing import Dict, List, Optional, Tuple
|
|
21
21
|
|
|
22
22
|
from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
|
|
23
|
-
from ..core import CacheableModelSpec, ModelDescription
|
|
23
|
+
from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
|
|
24
24
|
from .utils import get_launcher
|
|
25
25
|
|
|
26
26
|
logger = logging.getLogger(__name__)
|
|
@@ -34,6 +34,7 @@ class FlexibleModelSpec(CacheableModelSpec):
|
|
|
34
34
|
model_uri: Optional[str]
|
|
35
35
|
launcher: str
|
|
36
36
|
launcher_args: Optional[str]
|
|
37
|
+
virtualenv: Optional[VirtualEnvSettings]
|
|
37
38
|
|
|
38
39
|
def parser_args(self):
|
|
39
40
|
return json.loads(self.launcher_args)
|
|
@@ -50,6 +51,10 @@ class FlexibleModelDescription(ModelDescription):
|
|
|
50
51
|
super().__init__(address, devices, model_path=model_path)
|
|
51
52
|
self._model_spec = model_spec
|
|
52
53
|
|
|
54
|
+
@property
|
|
55
|
+
def spec(self):
|
|
56
|
+
return self._model_spec
|
|
57
|
+
|
|
53
58
|
def to_dict(self):
|
|
54
59
|
return {
|
|
55
60
|
"model_type": "flexible",
|
xinference/model/image/core.py
CHANGED
|
@@ -21,7 +21,7 @@ from typing import Dict, List, Literal, Optional, Tuple, Union
|
|
|
21
21
|
|
|
22
22
|
from ...constants import XINFERENCE_CACHE_DIR
|
|
23
23
|
from ...types import PeftModelConfig
|
|
24
|
-
from ..core import CacheableModelSpec, ModelDescription
|
|
24
|
+
from ..core import CacheableModelSpec, ModelDescription, VirtualEnvSettings
|
|
25
25
|
from ..utils import (
|
|
26
26
|
IS_NEW_HUGGINGFACE_HUB,
|
|
27
27
|
retry_download,
|
|
@@ -59,6 +59,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
|
|
|
59
59
|
gguf_model_id: Optional[str]
|
|
60
60
|
gguf_quantizations: Optional[List[str]]
|
|
61
61
|
gguf_model_file_name_template: Optional[str]
|
|
62
|
+
virtualenv: Optional[VirtualEnvSettings]
|
|
62
63
|
|
|
63
64
|
|
|
64
65
|
class ImageModelDescription(ModelDescription):
|
|
@@ -72,6 +73,10 @@ class ImageModelDescription(ModelDescription):
|
|
|
72
73
|
super().__init__(address, devices, model_path=model_path)
|
|
73
74
|
self._model_spec = model_spec
|
|
74
75
|
|
|
76
|
+
@property
|
|
77
|
+
def spec(self):
|
|
78
|
+
return self._model_spec
|
|
79
|
+
|
|
75
80
|
def to_dict(self):
|
|
76
81
|
if self._model_spec.controlnet is not None:
|
|
77
82
|
controlnet = [cn.dict() for cn in self._model_spec.controlnet]
|
|
@@ -339,6 +339,22 @@
|
|
|
339
339
|
"model_revision": "cf6b7386bc89a54f09785612ba74cb12de6fa17c",
|
|
340
340
|
"model_ability": [
|
|
341
341
|
"ocr"
|
|
342
|
-
]
|
|
342
|
+
],
|
|
343
|
+
"virtualenv": {
|
|
344
|
+
"packages": [
|
|
345
|
+
"transformers==4.37.2",
|
|
346
|
+
"httpx==0.24.0",
|
|
347
|
+
"deepspeed==0.12.3",
|
|
348
|
+
"peft==0.4.0",
|
|
349
|
+
"tiktoken==0.6.0",
|
|
350
|
+
"bitsandbytes==0.41.0",
|
|
351
|
+
"scikit-learn==1.2.2",
|
|
352
|
+
"sentencepiece==0.1.99",
|
|
353
|
+
"einops==0.6.1",
|
|
354
|
+
"einops-exts==0.0.4",
|
|
355
|
+
"timm==0.6.13",
|
|
356
|
+
"numpy==1.26.4"
|
|
357
|
+
]
|
|
358
|
+
}
|
|
343
359
|
}
|
|
344
360
|
]
|
|
@@ -315,6 +315,22 @@
|
|
|
315
315
|
"model_hub": "modelscope",
|
|
316
316
|
"model_ability": [
|
|
317
317
|
"ocr"
|
|
318
|
-
]
|
|
318
|
+
],
|
|
319
|
+
"virtualenv": {
|
|
320
|
+
"packages": [
|
|
321
|
+
"transformers==4.37.2",
|
|
322
|
+
"httpx==0.24.0",
|
|
323
|
+
"deepspeed==0.12.3",
|
|
324
|
+
"peft==0.4.0",
|
|
325
|
+
"tiktoken==0.6.0",
|
|
326
|
+
"bitsandbytes==0.41.0",
|
|
327
|
+
"scikit-learn==1.2.2",
|
|
328
|
+
"sentencepiece==0.1.99",
|
|
329
|
+
"einops==0.6.1",
|
|
330
|
+
"einops-exts==0.0.4",
|
|
331
|
+
"timm==0.6.13",
|
|
332
|
+
"numpy==1.26.4"
|
|
333
|
+
]
|
|
334
|
+
}
|
|
319
335
|
}
|
|
320
336
|
]
|
xinference/model/llm/__init__.py
CHANGED
|
@@ -147,13 +147,11 @@ def _install():
|
|
|
147
147
|
from .transformers.gemma3 import Gemma3ChatModel, Gemma3TextChatModel
|
|
148
148
|
from .transformers.glm4v import Glm4VModel
|
|
149
149
|
from .transformers.glm_edge_v import GlmEdgeVModel
|
|
150
|
-
from .transformers.intern_vl import InternVLChatModel
|
|
151
150
|
from .transformers.internlm2 import Internlm2PytorchChatModel
|
|
152
151
|
from .transformers.minicpmv25 import MiniCPMV25Model
|
|
153
152
|
from .transformers.minicpmv26 import MiniCPMV26Model
|
|
154
153
|
from .transformers.opt import OptPytorchModel
|
|
155
154
|
from .transformers.qwen2_audio import Qwen2AudioChatModel
|
|
156
|
-
from .transformers.qwen2_vl import Qwen2VLChatModel
|
|
157
155
|
from .transformers.qwen_vl import QwenVLChatModel
|
|
158
156
|
from .transformers.yi_vl import YiVLChatModel
|
|
159
157
|
from .vllm.core import VLLMChatModel, VLLMModel, VLLMVisionModel
|
|
@@ -184,12 +182,10 @@ def _install():
|
|
|
184
182
|
PytorchChatModel,
|
|
185
183
|
Internlm2PytorchChatModel,
|
|
186
184
|
QwenVLChatModel,
|
|
187
|
-
Qwen2VLChatModel,
|
|
188
185
|
Qwen2AudioChatModel,
|
|
189
186
|
YiVLChatModel,
|
|
190
187
|
DeepSeekVLChatModel,
|
|
191
188
|
DeepSeekVL2ChatModel,
|
|
192
|
-
InternVLChatModel,
|
|
193
189
|
PytorchModel,
|
|
194
190
|
CogVLM2Model,
|
|
195
191
|
CogVLM2VideoModel,
|
xinference/model/llm/core.py
CHANGED
|
@@ -36,7 +36,7 @@ from ..utils import DEEPSEEK_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelM
|
|
|
36
36
|
|
|
37
37
|
logger = logging.getLogger(__name__)
|
|
38
38
|
|
|
39
|
-
USE_XLLAMACPP = bool(int(os.environ.get("USE_XLLAMACPP",
|
|
39
|
+
USE_XLLAMACPP = bool(int(os.environ.get("USE_XLLAMACPP", 1)))
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
class _Done:
|
|
@@ -142,24 +142,38 @@ class XllamaCppModel(LLM, ChatModelMixin):
|
|
|
142
142
|
|
|
143
143
|
if os.path.isfile(self.model_path):
|
|
144
144
|
# mostly passed from --model_path
|
|
145
|
-
model_path =
|
|
145
|
+
model_path = self.model_path
|
|
146
146
|
else:
|
|
147
147
|
# handle legacy cache.
|
|
148
|
-
|
|
149
|
-
|
|
148
|
+
if (
|
|
149
|
+
self.model_spec.model_file_name_split_template
|
|
150
|
+
and self.model_spec.quantization_parts
|
|
151
|
+
):
|
|
152
|
+
part = self.model_spec.quantization_parts[self.quantization]
|
|
153
|
+
model_path = os.path.join(
|
|
154
|
+
self.model_path,
|
|
155
|
+
self.model_spec.model_file_name_split_template.format(
|
|
156
|
+
quantization=self.quantization, part=part[0]
|
|
157
|
+
),
|
|
158
|
+
)
|
|
159
|
+
else:
|
|
160
|
+
model_path = os.path.join(
|
|
150
161
|
self.model_path,
|
|
151
162
|
self.model_spec.model_file_name_template.format(
|
|
152
163
|
quantization=self.quantization
|
|
153
164
|
),
|
|
154
165
|
)
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
model_path = legacy_model_file_path
|
|
166
|
+
legacy_model_file_path = os.path.join(self.model_path, "model.bin")
|
|
167
|
+
if os.path.exists(legacy_model_file_path):
|
|
168
|
+
model_path = legacy_model_file_path
|
|
159
169
|
|
|
160
170
|
try:
|
|
161
171
|
params = CommonParams()
|
|
162
|
-
|
|
172
|
+
# Compatible with xllamacpp changes
|
|
173
|
+
try:
|
|
174
|
+
params.model = model_path
|
|
175
|
+
except Exception:
|
|
176
|
+
params.model.path = model_path
|
|
163
177
|
if self.model_family.chat_template:
|
|
164
178
|
params.chat_template = self.model_family.chat_template
|
|
165
179
|
# This is the default value, could be overwritten by _llamacpp_model_config
|
|
@@ -415,20 +429,30 @@ class LlamaCppModel(LLM):
|
|
|
415
429
|
|
|
416
430
|
if os.path.isfile(self.model_path):
|
|
417
431
|
# mostly passed from --model_path
|
|
418
|
-
model_path =
|
|
432
|
+
model_path = self.model_path
|
|
419
433
|
else:
|
|
420
434
|
# handle legacy cache.
|
|
421
|
-
|
|
422
|
-
|
|
435
|
+
if (
|
|
436
|
+
self.model_spec.model_file_name_split_template
|
|
437
|
+
and self.model_spec.quantization_parts
|
|
438
|
+
):
|
|
439
|
+
part = self.model_spec.quantization_parts[self.quantization]
|
|
440
|
+
model_path = os.path.join(
|
|
441
|
+
self.model_path,
|
|
442
|
+
self.model_spec.model_file_name_split_template.format(
|
|
443
|
+
quantization=self.quantization, part=part[0]
|
|
444
|
+
),
|
|
445
|
+
)
|
|
446
|
+
else:
|
|
447
|
+
model_path = os.path.join(
|
|
423
448
|
self.model_path,
|
|
424
449
|
self.model_spec.model_file_name_template.format(
|
|
425
450
|
quantization=self.quantization
|
|
426
451
|
),
|
|
427
452
|
)
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
model_path = legacy_model_file_path
|
|
453
|
+
legacy_model_file_path = os.path.join(self.model_path, "model.bin")
|
|
454
|
+
if os.path.exists(legacy_model_file_path):
|
|
455
|
+
model_path = legacy_model_file_path
|
|
432
456
|
|
|
433
457
|
try:
|
|
434
458
|
self._llm = Llama(
|