xinference 0.13.0__py3-none-any.whl → 0.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (66) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +50 -2
  3. xinference/client/restful/restful_client.py +49 -2
  4. xinference/core/model.py +15 -0
  5. xinference/core/supervisor.py +132 -15
  6. xinference/core/worker.py +165 -8
  7. xinference/deploy/cmdline.py +5 -0
  8. xinference/model/audio/chattts.py +6 -6
  9. xinference/model/audio/core.py +23 -15
  10. xinference/model/core.py +12 -3
  11. xinference/model/embedding/core.py +25 -16
  12. xinference/model/flexible/__init__.py +40 -0
  13. xinference/model/flexible/core.py +228 -0
  14. xinference/model/flexible/launchers/__init__.py +15 -0
  15. xinference/model/flexible/launchers/transformers_launcher.py +63 -0
  16. xinference/model/flexible/utils.py +33 -0
  17. xinference/model/image/core.py +18 -14
  18. xinference/model/image/custom.py +1 -1
  19. xinference/model/llm/__init__.py +0 -2
  20. xinference/model/llm/core.py +3 -2
  21. xinference/model/llm/ggml/llamacpp.py +1 -10
  22. xinference/model/llm/llm_family.json +52 -35
  23. xinference/model/llm/llm_family.py +71 -46
  24. xinference/model/llm/llm_family_modelscope.json +55 -27
  25. xinference/model/llm/pytorch/core.py +0 -80
  26. xinference/model/llm/utils.py +4 -2
  27. xinference/model/rerank/core.py +24 -25
  28. xinference/types.py +0 -1
  29. xinference/web/ui/build/asset-manifest.json +3 -3
  30. xinference/web/ui/build/index.html +1 -1
  31. xinference/web/ui/build/static/js/{main.0fb6f3ab.js → main.95c1d652.js} +3 -3
  32. xinference/web/ui/build/static/js/main.95c1d652.js.map +1 -0
  33. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +1 -0
  34. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +1 -0
  35. xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +1 -0
  36. xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +1 -0
  37. xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +1 -0
  38. xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +1 -0
  39. {xinference-0.13.0.dist-info → xinference-0.13.1.dist-info}/METADATA +7 -11
  40. {xinference-0.13.0.dist-info → xinference-0.13.1.dist-info}/RECORD +45 -54
  41. xinference/model/llm/ggml/chatglm.py +0 -457
  42. xinference/thirdparty/ChatTTS/__init__.py +0 -1
  43. xinference/thirdparty/ChatTTS/core.py +0 -200
  44. xinference/thirdparty/ChatTTS/experimental/__init__.py +0 -0
  45. xinference/thirdparty/ChatTTS/experimental/llm.py +0 -40
  46. xinference/thirdparty/ChatTTS/infer/__init__.py +0 -0
  47. xinference/thirdparty/ChatTTS/infer/api.py +0 -125
  48. xinference/thirdparty/ChatTTS/model/__init__.py +0 -0
  49. xinference/thirdparty/ChatTTS/model/dvae.py +0 -155
  50. xinference/thirdparty/ChatTTS/model/gpt.py +0 -265
  51. xinference/thirdparty/ChatTTS/utils/__init__.py +0 -0
  52. xinference/thirdparty/ChatTTS/utils/gpu_utils.py +0 -23
  53. xinference/thirdparty/ChatTTS/utils/infer_utils.py +0 -141
  54. xinference/thirdparty/ChatTTS/utils/io_utils.py +0 -14
  55. xinference/web/ui/build/static/js/main.0fb6f3ab.js.map +0 -1
  56. xinference/web/ui/node_modules/.cache/babel-loader/0f6b391abec76271137faad13a3793fe7acc1024e8cd2269c147b653ecd3a73b.json +0 -1
  57. xinference/web/ui/node_modules/.cache/babel-loader/30a0c79d8025d6441eb75b2df5bc2750a14f30119c869ef02570d294dff65c2f.json +0 -1
  58. xinference/web/ui/node_modules/.cache/babel-loader/40486e655c3c5801f087e2cf206c0b5511aaa0dfdba78046b7181bf9c17e54c5.json +0 -1
  59. xinference/web/ui/node_modules/.cache/babel-loader/b5507cd57f16a3a230aa0128e39fe103e928de139ea29e2679e4c64dcbba3b3a.json +0 -1
  60. xinference/web/ui/node_modules/.cache/babel-loader/d779b915f83f9c7b5a72515b6932fdd114f1822cef90ae01cc0d12bca59abc2d.json +0 -1
  61. xinference/web/ui/node_modules/.cache/babel-loader/d87824cb266194447a9c0c69ebab2d507bfc3e3148976173760d18c035e9dd26.json +0 -1
  62. /xinference/web/ui/build/static/js/{main.0fb6f3ab.js.LICENSE.txt → main.95c1d652.js.LICENSE.txt} +0 -0
  63. {xinference-0.13.0.dist-info → xinference-0.13.1.dist-info}/LICENSE +0 -0
  64. {xinference-0.13.0.dist-info → xinference-0.13.1.dist-info}/WHEEL +0 -0
  65. {xinference-0.13.0.dist-info → xinference-0.13.1.dist-info}/entry_points.txt +0 -0
  66. {xinference-0.13.0.dist-info → xinference-0.13.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,228 @@
1
+ # Copyright 2022-2024 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ import logging
17
+ import os
18
+ from collections import defaultdict
19
+ from threading import Lock
20
+ from typing import Dict, List, Optional, Tuple
21
+
22
+ from ...constants import XINFERENCE_CACHE_DIR, XINFERENCE_MODEL_DIR
23
+ from ..core import CacheableModelSpec, ModelDescription
24
+ from .utils import get_launcher
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ FLEXIBLE_MODEL_LOCK = Lock()
29
+
30
+
31
+ class FlexibleModelSpec(CacheableModelSpec):
32
+ model_id: Optional[str] # type: ignore
33
+ model_description: Optional[str]
34
+ model_uri: Optional[str]
35
+ launcher: str
36
+ launcher_args: Optional[str]
37
+
38
+ def parser_args(self):
39
+ return json.loads(self.launcher_args)
40
+
41
+
42
+ class FlexibleModelDescription(ModelDescription):
43
+ def __init__(
44
+ self,
45
+ address: Optional[str],
46
+ devices: Optional[List[str]],
47
+ model_spec: FlexibleModelSpec,
48
+ model_path: Optional[str] = None,
49
+ ):
50
+ super().__init__(address, devices, model_path=model_path)
51
+ self._model_spec = model_spec
52
+
53
+ def to_dict(self):
54
+ return {
55
+ "model_type": "flexible",
56
+ "address": self.address,
57
+ "accelerators": self.devices,
58
+ "model_name": self._model_spec.model_name,
59
+ "launcher": self._model_spec.launcher,
60
+ "launcher_args": self._model_spec.launcher_args,
61
+ }
62
+
63
+ def get_model_version(self) -> str:
64
+ return f"{self._model_spec.model_name}"
65
+
66
+ def to_version_info(self):
67
+ return {
68
+ "model_version": self.get_model_version(),
69
+ "cache_status": True,
70
+ "model_file_location": self._model_spec.model_uri,
71
+ "launcher": self._model_spec.launcher,
72
+ "launcher_args": self._model_spec.launcher_args,
73
+ }
74
+
75
+
76
+ def generate_flexible_model_description(
77
+ model_spec: FlexibleModelSpec,
78
+ ) -> Dict[str, List[Dict]]:
79
+ res = defaultdict(list)
80
+ res[model_spec.model_name].append(
81
+ FlexibleModelDescription(None, None, model_spec).to_version_info()
82
+ )
83
+ return res
84
+
85
+
86
+ FLEXIBLE_MODELS: List[FlexibleModelSpec] = []
87
+ FLEXIBLE_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
88
+
89
+
90
+ def get_flexible_models():
91
+ with FLEXIBLE_MODEL_LOCK:
92
+ return FLEXIBLE_MODELS.copy()
93
+
94
+
95
+ def get_flexible_model_descriptions():
96
+ import copy
97
+
98
+ return copy.deepcopy(FLEXIBLE_MODEL_DESCRIPTIONS)
99
+
100
+
101
+ def register_flexible_model(model_spec: FlexibleModelSpec, persist: bool):
102
+ from ..utils import is_valid_model_name
103
+
104
+ if not is_valid_model_name(model_spec.model_name):
105
+ raise ValueError(f"Invalid model name {model_spec.model_name}.")
106
+
107
+ if model_spec.launcher_args:
108
+ try:
109
+ model_spec.parser_args()
110
+ except Exception:
111
+ raise ValueError(f"Invalid model launcher args {model_spec.launcher_args}.")
112
+
113
+ with FLEXIBLE_MODEL_LOCK:
114
+ for model_name in [spec.model_name for spec in FLEXIBLE_MODELS]:
115
+ if model_spec.model_name == model_name:
116
+ raise ValueError(
117
+ f"Model name conflicts with existing model {model_spec.model_name}"
118
+ )
119
+ FLEXIBLE_MODELS.append(model_spec)
120
+
121
+ if persist:
122
+ persist_path = os.path.join(
123
+ XINFERENCE_MODEL_DIR, "flexible", f"{model_spec.model_name}.json"
124
+ )
125
+ os.makedirs(os.path.dirname(persist_path), exist_ok=True)
126
+ with open(persist_path, mode="w") as fd:
127
+ fd.write(model_spec.json())
128
+
129
+
130
+ def unregister_flexible_model(model_name: str, raise_error: bool = True):
131
+ with FLEXIBLE_MODEL_LOCK:
132
+ model_spec = None
133
+ for i, f in enumerate(FLEXIBLE_MODELS):
134
+ if f.model_name == model_name:
135
+ model_spec = f
136
+ break
137
+ if model_spec:
138
+ FLEXIBLE_MODELS.remove(model_spec)
139
+
140
+ persist_path = os.path.join(
141
+ XINFERENCE_MODEL_DIR, "flexible", f"{model_spec.model_name}.json"
142
+ )
143
+ if os.path.exists(persist_path):
144
+ os.remove(persist_path)
145
+
146
+ cache_dir = os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
147
+ if os.path.exists(cache_dir):
148
+ logger.warning(
149
+ f"Remove the cache of user-defined model {model_spec.model_name}. "
150
+ f"Cache directory: {cache_dir}"
151
+ )
152
+ if os.path.islink(cache_dir):
153
+ os.remove(cache_dir)
154
+ else:
155
+ logger.warning(
156
+ f"Cache directory is not a soft link, please remove it manually."
157
+ )
158
+ else:
159
+ if raise_error:
160
+ raise ValueError(f"Model {model_name} not found")
161
+ else:
162
+ logger.warning(f"Model {model_name} not found")
163
+
164
+
165
+ class FlexibleModel:
166
+ def __init__(
167
+ self,
168
+ model_uid: str,
169
+ model_path: str,
170
+ device: Optional[str] = None,
171
+ config: Optional[Dict] = None,
172
+ ):
173
+ self._model_uid = model_uid
174
+ self._model_path = model_path
175
+ self._device = device
176
+ self._config = config
177
+
178
+ def load(self):
179
+ """
180
+ Load the model.
181
+ """
182
+
183
+ def infer(self, **kwargs):
184
+ """
185
+ Call model to inference.
186
+ """
187
+ raise NotImplementedError("infer method not implemented.")
188
+
189
+ @property
190
+ def model_uid(self):
191
+ return self._model_uid
192
+
193
+ @property
194
+ def model_path(self):
195
+ return self._model_path
196
+
197
+ @property
198
+ def device(self):
199
+ return self._device
200
+
201
+ @property
202
+ def config(self):
203
+ return self._config
204
+
205
+
206
+ def match_flexible_model(model_name):
207
+ for model_spec in get_flexible_models():
208
+ if model_name == model_spec.model_name:
209
+ return model_spec
210
+
211
+
212
+ def create_flexible_model_instance(
213
+ subpool_addr: str, devices: List[str], model_uid: str, model_name: str, **kwargs
214
+ ) -> Tuple[FlexibleModel, FlexibleModelDescription]:
215
+ model_spec = match_flexible_model(model_name)
216
+ model_path = model_spec.model_uri
217
+ launcher_name = model_spec.launcher
218
+ launcher_args = model_spec.parser_args()
219
+ kwargs.update(launcher_args)
220
+
221
+ model = get_launcher(launcher_name)(
222
+ model_uid=model_uid, model_spec=model_spec, **kwargs
223
+ )
224
+
225
+ model_description = FlexibleModelDescription(
226
+ subpool_addr, devices, model_spec, model_path=model_path
227
+ )
228
+ return model, model_description
@@ -0,0 +1,15 @@
1
+ # Copyright 2022-2024 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .transformers_launcher import launcher as transformers
@@ -0,0 +1,63 @@
1
+ # Copyright 2022-2024 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from transformers import pipeline
16
+
17
+ from ..core import FlexibleModel, FlexibleModelSpec
18
+
19
+
20
+ class MockModel(FlexibleModel):
21
+ def infer(self, **kwargs):
22
+ return kwargs
23
+
24
+
25
+ class AutoModel(FlexibleModel):
26
+ def load(self):
27
+ config = self.config or {}
28
+ self._pipeline = pipeline(model=self.model_path, device=self.device, **config)
29
+
30
+ def infer(self, **kwargs):
31
+ return self._pipeline(**kwargs)
32
+
33
+
34
+ class TransformersTextClassificationModel(FlexibleModel):
35
+ def load(self):
36
+ config = self.config or {}
37
+
38
+ self._pipeline = pipeline(model=self._model_path, device=self._device, **config)
39
+
40
+ def infer(self, **kwargs):
41
+ return self._pipeline(**kwargs)
42
+
43
+
44
+ def launcher(model_uid: str, model_spec: FlexibleModelSpec, **kwargs) -> FlexibleModel:
45
+ task = kwargs.get("task")
46
+ device = kwargs.get("device")
47
+
48
+ model_path = model_spec.model_uri
49
+ if model_path is None:
50
+ raise ValueError("model_path required")
51
+
52
+ if task == "text-classification":
53
+ return TransformersTextClassificationModel(
54
+ model_uid=model_uid, model_path=model_path, device=device, config=kwargs
55
+ )
56
+ elif task == "mock":
57
+ return MockModel(
58
+ model_uid=model_uid, model_path=model_path, device=device, config=kwargs
59
+ )
60
+ else:
61
+ return AutoModel(
62
+ model_uid=model_uid, model_path=model_path, device=device, config=kwargs
63
+ )
@@ -0,0 +1,33 @@
1
+ # Copyright 2022-2024 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import importlib
16
+
17
+
18
+ def get_launcher(launcher_name: str):
19
+ try:
20
+ i = launcher_name.rfind(".")
21
+ if i != -1:
22
+ module = importlib.import_module(launcher_name[:i])
23
+ fn = getattr(module, launcher_name[i + 1 :])
24
+ else:
25
+ importlib.import_module(launcher_name)
26
+ fn = locals().get(launcher_name)
27
+
28
+ if fn is None:
29
+ raise ValueError(f"Launcher {launcher_name} not found.")
30
+
31
+ return fn
32
+ except ImportError as e:
33
+ raise ImportError(f"Failed to import {launcher_name}: {e}")
@@ -15,7 +15,7 @@ import collections.abc
15
15
  import logging
16
16
  import os
17
17
  from collections import defaultdict
18
- from typing import Dict, List, Optional, Tuple
18
+ from typing import Dict, List, Literal, Optional, Tuple
19
19
 
20
20
  from ...constants import XINFERENCE_CACHE_DIR
21
21
  from ...types import PeftModelConfig
@@ -117,7 +117,10 @@ def generate_image_description(
117
117
  return res
118
118
 
119
119
 
120
- def match_diffusion(model_name: str) -> ImageModelFamilyV1:
120
+ def match_diffusion(
121
+ model_name: str,
122
+ download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
123
+ ) -> ImageModelFamilyV1:
121
124
  from ..utils import download_from_modelscope
122
125
  from . import BUILTIN_IMAGE_MODELS, MODELSCOPE_IMAGE_MODELS
123
126
  from .custom import get_user_defined_images
@@ -126,17 +129,17 @@ def match_diffusion(model_name: str) -> ImageModelFamilyV1:
126
129
  if model_spec.model_name == model_name:
127
130
  return model_spec
128
131
 
129
- if download_from_modelscope():
130
- if model_name in MODELSCOPE_IMAGE_MODELS:
131
- logger.debug(f"Image model {model_name} found in ModelScope.")
132
- return MODELSCOPE_IMAGE_MODELS[model_name]
133
- else:
134
- logger.debug(
135
- f"Image model {model_name} not found in ModelScope, "
136
- f"now try to load it via builtin way."
137
- )
138
-
139
- if model_name in BUILTIN_IMAGE_MODELS:
132
+ if download_hub == "modelscope" and model_name in MODELSCOPE_IMAGE_MODELS:
133
+ logger.debug(f"Image model {model_name} found in ModelScope.")
134
+ return MODELSCOPE_IMAGE_MODELS[model_name]
135
+ elif download_hub == "huggingface" and model_name in BUILTIN_IMAGE_MODELS:
136
+ logger.debug(f"Image model {model_name} found in Huggingface.")
137
+ return BUILTIN_IMAGE_MODELS[model_name]
138
+ elif download_from_modelscope() and model_name in MODELSCOPE_IMAGE_MODELS:
139
+ logger.debug(f"Image model {model_name} found in ModelScope.")
140
+ return MODELSCOPE_IMAGE_MODELS[model_name]
141
+ elif model_name in BUILTIN_IMAGE_MODELS:
142
+ logger.debug(f"Image model {model_name} found in Huggingface.")
140
143
  return BUILTIN_IMAGE_MODELS[model_name]
141
144
  else:
142
145
  raise ValueError(
@@ -183,9 +186,10 @@ def create_image_model_instance(
183
186
  model_uid: str,
184
187
  model_name: str,
185
188
  peft_model_config: Optional[PeftModelConfig] = None,
189
+ download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
186
190
  **kwargs,
187
191
  ) -> Tuple[DiffusionModel, ImageModelDescription]:
188
- model_spec = match_diffusion(model_name)
192
+ model_spec = match_diffusion(model_name, download_hub)
189
193
  controlnet = kwargs.get("controlnet")
190
194
  # Handle controlnet
191
195
  if controlnet is not None:
@@ -66,7 +66,7 @@ def register_image(model_spec: CustomImageModelFamilyV1, persist: bool):
66
66
  raise ValueError(f"Invalid model URI {model_uri}")
67
67
 
68
68
  persist_path = os.path.join(
69
- XINFERENCE_MODEL_DIR, "image", f"{model_spec.model_id}.json"
69
+ XINFERENCE_MODEL_DIR, "image", f"{model_spec.model_name}.json"
70
70
  )
71
71
  os.makedirs(os.path.dirname(persist_path), exist_ok=True)
72
72
  with open(persist_path, "w") as f:
@@ -112,7 +112,6 @@ def generate_engine_config_by_model_family(model_family):
112
112
 
113
113
 
114
114
  def _install():
115
- from .ggml.chatglm import ChatglmCppChatModel
116
115
  from .ggml.llamacpp import LlamaCppChatModel, LlamaCppModel
117
116
  from .mlx.core import MLXChatModel, MLXModel
118
117
  from .pytorch.baichuan import BaichuanPytorchChatModel
@@ -143,7 +142,6 @@ def _install():
143
142
  # register llm classes.
144
143
  LLAMA_CLASSES.extend(
145
144
  [
146
- ChatglmCppChatModel,
147
145
  LlamaCppChatModel,
148
146
  LlamaCppModel,
149
147
  ]
@@ -20,7 +20,7 @@ import platform
20
20
  from abc import abstractmethod
21
21
  from collections import defaultdict
22
22
  from functools import lru_cache
23
- from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
23
+ from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union
24
24
 
25
25
  from ...core.utils import parse_replica_model_uid
26
26
  from ...types import PeftModelConfig
@@ -193,6 +193,7 @@ def create_llm_model_instance(
193
193
  model_size_in_billions: Optional[Union[int, str]] = None,
194
194
  quantization: Optional[str] = None,
195
195
  peft_model_config: Optional[PeftModelConfig] = None,
196
+ download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
196
197
  **kwargs,
197
198
  ) -> Tuple[LLM, LLMDescription]:
198
199
  from .llm_family import cache, check_engine_by_spec_parameters, match_llm
@@ -200,7 +201,7 @@ def create_llm_model_instance(
200
201
  if model_engine is None:
201
202
  raise ValueError("model_engine is required for LLM model")
202
203
  match_result = match_llm(
203
- model_name, model_format, model_size_in_billions, quantization
204
+ model_name, model_format, model_size_in_billions, quantization, download_hub
204
205
  )
205
206
 
206
207
  if not match_result:
@@ -25,7 +25,6 @@ from ....types import (
25
25
  CompletionChunk,
26
26
  CompletionUsage,
27
27
  CreateCompletionLlamaCpp,
28
- Embedding,
29
28
  LlamaCppGenerateConfig,
30
29
  LlamaCppModelConfig,
31
30
  )
@@ -65,7 +64,6 @@ class LlamaCppModel(LLM):
65
64
 
66
65
  if self.model_family.context_length:
67
66
  llamacpp_model_config.setdefault("n_ctx", self.model_family.context_length)
68
- llamacpp_model_config.setdefault("embedding", True)
69
67
  llamacpp_model_config.setdefault("use_mmap", False)
70
68
  llamacpp_model_config.setdefault("use_mlock", True)
71
69
 
@@ -185,7 +183,7 @@ class LlamaCppModel(LLM):
185
183
  ) -> bool:
186
184
  if llm_spec.model_format not in ["ggmlv3", "ggufv2"]:
187
185
  return False
188
- if "chatglm" in llm_family.model_name or "qwen" in llm_family.model_name:
186
+ if "qwen" in llm_family.model_name:
189
187
  return False
190
188
  if "generate" not in llm_family.model_ability:
191
189
  return False
@@ -261,11 +259,6 @@ class LlamaCppModel(LLM):
261
259
  else:
262
260
  return generator_wrapper(prompt, generate_config)
263
261
 
264
- def create_embedding(self, input: Union[str, List[str]]) -> Embedding:
265
- assert self._llm is not None
266
- embedding = self._llm.create_embedding(input)
267
- return embedding
268
-
269
262
 
270
263
  class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
271
264
  def __init__(
@@ -292,8 +285,6 @@ class LlamaCppChatModel(LlamaCppModel, ChatModelMixin):
292
285
  ) -> bool:
293
286
  if llm_spec.model_format not in ["ggmlv3", "ggufv2"]:
294
287
  return False
295
- if "chatglm" in llm_family.model_name:
296
- return False
297
288
  if "chat" not in llm_family.model_ability:
298
289
  return False
299
290
  return True
@@ -574,19 +574,6 @@
574
574
  ],
575
575
  "model_description": "ChatGLM is an open-source General Language Model (GLM) based LLM trained on both Chinese and English data.",
576
576
  "model_specs": [
577
- {
578
- "model_format": "ggmlv3",
579
- "model_size_in_billions": 6,
580
- "quantizations": [
581
- "q4_0",
582
- "q4_1",
583
- "q5_0",
584
- "q5_1",
585
- "q8_0"
586
- ],
587
- "model_id": "Xorbits/chatglm-6B-GGML",
588
- "model_file_name_template": "chatglm-ggml-{quantization}.bin"
589
- },
590
577
  {
591
578
  "model_format": "pytorch",
592
579
  "model_size_in_billions": 6,
@@ -622,19 +609,6 @@
622
609
  ],
623
610
  "model_description": "ChatGLM2 is the second generation of ChatGLM, still open-source and trained on Chinese and English data.",
624
611
  "model_specs": [
625
- {
626
- "model_format": "ggmlv3",
627
- "model_size_in_billions": 6,
628
- "quantizations": [
629
- "q4_0",
630
- "q4_1",
631
- "q5_0",
632
- "q5_1",
633
- "q8_0"
634
- ],
635
- "model_id": "Xorbits/chatglm2-6B-GGML",
636
- "model_file_name_template": "chatglm2-ggml-{quantization}.bin"
637
- },
638
612
  {
639
613
  "model_format": "pytorch",
640
614
  "model_size_in_billions": 6,
@@ -706,15 +680,6 @@
706
680
  ],
707
681
  "model_description": "ChatGLM3 is the third generation of ChatGLM, still open-source and trained on Chinese and English data.",
708
682
  "model_specs": [
709
- {
710
- "model_format": "ggmlv3",
711
- "model_size_in_billions": 6,
712
- "quantizations": [
713
- "q4_0"
714
- ],
715
- "model_id": "Xorbits/chatglm3-6B-GGML",
716
- "model_file_name_template": "chatglm3-ggml-{quantization}.bin"
717
- },
718
683
  {
719
684
  "model_format": "pytorch",
720
685
  "model_size_in_billions": 6,
@@ -855,6 +820,32 @@
855
820
  ],
856
821
  "model_id": "THUDM/glm-4-9b-chat",
857
822
  "model_revision": "b84dc74294ccd507a3d78bde8aebf628221af9bd"
823
+ },
824
+ {
825
+ "model_format": "ggufv2",
826
+ "model_size_in_billions": 9,
827
+ "quantizations": [
828
+ "Q2_K",
829
+ "IQ3_XS",
830
+ "IQ3_S",
831
+ "IQ3_M",
832
+ "Q3_K_S",
833
+ "Q3_K_L",
834
+ "Q3_K",
835
+ "IQ4_XS",
836
+ "IQ4_NL",
837
+ "Q4_K_S",
838
+ "Q4_K",
839
+ "Q5_K_S",
840
+ "Q5_K",
841
+ "Q6_K",
842
+ "Q8_0",
843
+ "BF16",
844
+ "FP16"
845
+ ],
846
+ "model_file_name_template": "glm-4-9b-chat.{quantization}.gguf",
847
+ "model_id": "legraphista/glm-4-9b-chat-GGUF",
848
+ "model_revision": "0155a14edf0176863e9a003cdd78ce599e4d62c0"
858
849
  }
859
850
  ],
860
851
  "prompt_style": {
@@ -900,6 +891,32 @@
900
891
  ],
901
892
  "model_id": "THUDM/glm-4-9b-chat-1m",
902
893
  "model_revision": "715ddbe91082f976ff6a4ca06d59e5bbff6c3642"
894
+ },
895
+ {
896
+ "model_format": "ggufv2",
897
+ "model_size_in_billions": 9,
898
+ "quantizations": [
899
+ "Q2_K",
900
+ "IQ3_XS",
901
+ "IQ3_S",
902
+ "IQ3_M",
903
+ "Q3_K_S",
904
+ "Q3_K_L",
905
+ "Q3_K",
906
+ "IQ4_XS",
907
+ "IQ4_NL",
908
+ "Q4_K_S",
909
+ "Q4_K",
910
+ "Q5_K_S",
911
+ "Q5_K",
912
+ "Q6_K",
913
+ "Q8_0",
914
+ "BF16",
915
+ "FP16"
916
+ ],
917
+ "model_file_name_template": "glm-4-9b-chat-1m.{quantization}.gguf",
918
+ "model_id": "legraphista/glm-4-9b-chat-1m-GGUF",
919
+ "model_revision": "782e28bd5eee3c514c07108da15e0b5e06dcf776"
903
920
  }
904
921
  ],
905
922
  "prompt_style": {