xinference 0.16.1__py3-none-any.whl → 0.16.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (39) hide show
  1. xinference/_version.py +3 -3
  2. xinference/conftest.py +0 -8
  3. xinference/constants.py +2 -0
  4. xinference/core/model.py +34 -2
  5. xinference/core/supervisor.py +5 -5
  6. xinference/core/utils.py +9 -10
  7. xinference/core/worker.py +8 -5
  8. xinference/deploy/cmdline.py +5 -0
  9. xinference/deploy/utils.py +7 -4
  10. xinference/model/audio/core.py +6 -2
  11. xinference/model/audio/model_spec.json +1 -1
  12. xinference/model/core.py +3 -1
  13. xinference/model/embedding/core.py +6 -2
  14. xinference/model/image/core.py +6 -2
  15. xinference/model/image/ocr/got_ocr2.py +3 -0
  16. xinference/model/llm/__init__.py +33 -0
  17. xinference/model/llm/core.py +4 -4
  18. xinference/model/llm/llm_family.json +87 -0
  19. xinference/model/llm/llm_family.py +68 -2
  20. xinference/model/llm/llm_family_modelscope.json +91 -0
  21. xinference/model/llm/llm_family_openmind_hub.json +1359 -0
  22. xinference/model/llm/vllm/core.py +2 -1
  23. xinference/model/rerank/core.py +9 -1
  24. xinference/model/utils.py +7 -0
  25. xinference/model/video/core.py +6 -2
  26. xinference/web/ui/build/asset-manifest.json +3 -3
  27. xinference/web/ui/build/index.html +1 -1
  28. xinference/web/ui/build/static/js/{main.b76aeeb7.js → main.2f269bb3.js} +3 -3
  29. xinference/web/ui/build/static/js/main.2f269bb3.js.map +1 -0
  30. xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +1 -0
  31. {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/METADATA +5 -4
  32. {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/RECORD +37 -36
  33. xinference/web/ui/build/static/js/main.b76aeeb7.js.map +0 -1
  34. xinference/web/ui/node_modules/.cache/babel-loader/32ea2c04cf0bba2761b4883d2c40cc259952c94d2d6bb774e510963ca37aac0a.json +0 -1
  35. /xinference/web/ui/build/static/js/{main.b76aeeb7.js.LICENSE.txt → main.2f269bb3.js.LICENSE.txt} +0 -0
  36. {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/LICENSE +0 -0
  37. {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/WHEEL +0 -0
  38. {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/entry_points.txt +0 -0
  39. {xinference-0.16.1.dist-info → xinference-0.16.3.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2024-10-25T12:51:06+0800",
11
+ "date": "2024-11-07T16:55:36+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "d4cd7b15104c16838e3c562cf2d33337e3d38897",
15
- "version": "0.16.1"
14
+ "full-revisionid": "85ab86bf1c0967e45fbec995534cd5a0c9a9c439",
15
+ "version": "0.16.3"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
xinference/conftest.py CHANGED
@@ -58,10 +58,6 @@ TEST_LOGGING_CONF = {
58
58
  "propagate": False,
59
59
  }
60
60
  },
61
- "root": {
62
- "level": "WARN",
63
- "handlers": ["stream_handler"],
64
- },
65
61
  }
66
62
 
67
63
  TEST_LOG_FILE_PATH = get_log_file(f"test_{get_timestamp_ms()}")
@@ -102,10 +98,6 @@ TEST_FILE_LOGGING_CONF = {
102
98
  "propagate": False,
103
99
  }
104
100
  },
105
- "root": {
106
- "level": "WARN",
107
- "handlers": ["stream_handler", "file_handler"],
108
- },
109
101
  }
110
102
 
111
103
 
xinference/constants.py CHANGED
@@ -39,6 +39,7 @@ def get_xinference_home() -> str:
39
39
  # if user has already set `XINFERENCE_HOME` env, change huggingface and modelscope default download path
40
40
  os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join(home_path, "huggingface")
41
41
  os.environ["MODELSCOPE_CACHE"] = os.path.join(home_path, "modelscope")
42
+ os.environ["XDG_CACHE_HOME"] = os.path.join(home_path, "openmind_hub")
42
43
  # In multi-tenant mode,
43
44
  # gradio's temporary files are stored in their respective home directories,
44
45
  # to prevent insufficient permissions
@@ -86,3 +87,4 @@ XINFERENCE_DOWNLOAD_MAX_ATTEMPTS = int(
86
87
  XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE = os.environ.get(
87
88
  XINFERENCE_ENV_TEXT_TO_IMAGE_BATCHING_SIZE, None
88
89
  )
90
+ XINFERENCE_LAUNCH_MODEL_RETRY = 3
xinference/core/model.py CHANGED
@@ -40,7 +40,10 @@ from typing import (
40
40
  import sse_starlette.sse
41
41
  import xoscar as xo
42
42
 
43
- from ..constants import XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE
43
+ from ..constants import (
44
+ XINFERENCE_LAUNCH_MODEL_RETRY,
45
+ XINFERENCE_TEXT_TO_IMAGE_BATCHING_SIZE,
46
+ )
44
47
 
45
48
  if TYPE_CHECKING:
46
49
  from .progress_tracker import ProgressTrackerActor
@@ -134,6 +137,8 @@ def oom_check(fn):
134
137
 
135
138
 
136
139
  class ModelActor(xo.StatelessActor):
140
+ _replica_model_uid: Optional[str]
141
+
137
142
  @classmethod
138
143
  def gen_uid(cls, model: "LLM"):
139
144
  return f"{model.__class__}-model-actor"
@@ -192,6 +197,7 @@ class ModelActor(xo.StatelessActor):
192
197
  supervisor_address: str,
193
198
  worker_address: str,
194
199
  model: "LLM",
200
+ replica_model_uid: str,
195
201
  model_description: Optional["ModelDescription"] = None,
196
202
  request_limits: Optional[int] = None,
197
203
  ):
@@ -203,6 +209,7 @@ class ModelActor(xo.StatelessActor):
203
209
 
204
210
  self._supervisor_address = supervisor_address
205
211
  self._worker_address = worker_address
212
+ self._replica_model_uid = replica_model_uid
206
213
  self._model = model
207
214
  self._model_description = (
208
215
  model_description.to_dict() if model_description else {}
@@ -257,6 +264,9 @@ class ModelActor(xo.StatelessActor):
257
264
  uid=FluxBatchSchedulerActor.gen_uid(self.model_uid()),
258
265
  )
259
266
 
267
+ def __repr__(self) -> str:
268
+ return f"ModelActor({self._replica_model_uid})"
269
+
260
270
  async def _record_completion_metrics(
261
271
  self, duration, completion_tokens, prompt_tokens
262
272
  ):
@@ -374,7 +384,28 @@ class ModelActor(xo.StatelessActor):
374
384
  return condition
375
385
 
376
386
  async def load(self):
377
- self._model.load()
387
+ try:
388
+ # Change process title for model
389
+ import setproctitle
390
+
391
+ setproctitle.setproctitle(f"Model: {self._replica_model_uid}")
392
+ except ImportError:
393
+ pass
394
+ i = 0
395
+ while True:
396
+ i += 1
397
+ try:
398
+ self._model.load()
399
+ break
400
+ except Exception as e:
401
+ if (
402
+ i < XINFERENCE_LAUNCH_MODEL_RETRY
403
+ and str(e).find("busy or unavailable") >= 0
404
+ ):
405
+ await asyncio.sleep(5)
406
+ logger.warning("Retry to load model {model_uid}: %d times", i)
407
+ continue
408
+ raise
378
409
  if self.allow_batching():
379
410
  await self._scheduler_ref.set_model(self._model)
380
411
  logger.debug(
@@ -385,6 +416,7 @@ class ModelActor(xo.StatelessActor):
385
416
  logger.debug(
386
417
  f"Batching enabled for model: {self.model_uid()}, max_num_images: {self._model.get_max_num_images_for_batching()}"
387
418
  )
419
+ logger.info(f"{self} loaded")
388
420
 
389
421
  def model_uid(self):
390
422
  return (
@@ -970,7 +970,7 @@ class SupervisorActor(xo.StatelessActor):
970
970
  raise ValueError(
971
971
  f"Model is already in the model list, uid: {_replica_model_uid}"
972
972
  )
973
- replica_gpu_idx = assign_replica_gpu(_replica_model_uid, gpu_idx)
973
+ replica_gpu_idx = assign_replica_gpu(_replica_model_uid, replica, gpu_idx)
974
974
  nonlocal model_type
975
975
 
976
976
  worker_ref = (
@@ -1084,7 +1084,7 @@ class SupervisorActor(xo.StatelessActor):
1084
1084
  dead_models,
1085
1085
  )
1086
1086
  for replica_model_uid in dead_models:
1087
- model_uid, _, _ = parse_replica_model_uid(replica_model_uid)
1087
+ model_uid, _ = parse_replica_model_uid(replica_model_uid)
1088
1088
  self._model_uid_to_replica_info.pop(model_uid, None)
1089
1089
  self._replica_model_uid_to_worker.pop(
1090
1090
  replica_model_uid, None
@@ -1137,7 +1137,7 @@ class SupervisorActor(xo.StatelessActor):
1137
1137
  raise ValueError(f"Model not found in the model list, uid: {model_uid}")
1138
1138
 
1139
1139
  replica_model_uid = build_replica_model_uid(
1140
- model_uid, replica_info.replica, next(replica_info.scheduler)
1140
+ model_uid, next(replica_info.scheduler)
1141
1141
  )
1142
1142
 
1143
1143
  worker_ref = self._replica_model_uid_to_worker.get(replica_model_uid, None)
@@ -1154,7 +1154,7 @@ class SupervisorActor(xo.StatelessActor):
1154
1154
  raise ValueError(f"Model not found in the model list, uid: {model_uid}")
1155
1155
  # Use rep id 0 to instead of next(replica_info.scheduler) to avoid
1156
1156
  # consuming the generator.
1157
- replica_model_uid = build_replica_model_uid(model_uid, replica_info.replica, 0)
1157
+ replica_model_uid = build_replica_model_uid(model_uid, 0)
1158
1158
  worker_ref = self._replica_model_uid_to_worker.get(replica_model_uid, None)
1159
1159
  if worker_ref is None:
1160
1160
  raise ValueError(
@@ -1260,7 +1260,7 @@ class SupervisorActor(xo.StatelessActor):
1260
1260
  uids_to_remove.append(model_uid)
1261
1261
 
1262
1262
  for replica_model_uid in uids_to_remove:
1263
- model_uid, _, _ = parse_replica_model_uid(replica_model_uid)
1263
+ model_uid, _ = parse_replica_model_uid(replica_model_uid)
1264
1264
  self._model_uid_to_replica_info.pop(model_uid, None)
1265
1265
  self._replica_model_uid_to_worker.pop(replica_model_uid, None)
1266
1266
 
xinference/core/utils.py CHANGED
@@ -146,27 +146,26 @@ def iter_replica_model_uid(model_uid: str, replica: int) -> Generator[str, None,
146
146
  """
147
147
  replica = int(replica)
148
148
  for rep_id in range(replica):
149
- yield f"{model_uid}-{replica}-{rep_id}"
149
+ yield f"{model_uid}-{rep_id}"
150
150
 
151
151
 
152
- def build_replica_model_uid(model_uid: str, replica: int, rep_id: int) -> str:
152
+ def build_replica_model_uid(model_uid: str, rep_id: int) -> str:
153
153
  """
154
154
  Build a replica model uid.
155
155
  """
156
- return f"{model_uid}-{replica}-{rep_id}"
156
+ return f"{model_uid}-{rep_id}"
157
157
 
158
158
 
159
- def parse_replica_model_uid(replica_model_uid: str) -> Tuple[str, int, int]:
159
+ def parse_replica_model_uid(replica_model_uid: str) -> Tuple[str, int]:
160
160
  """
161
- Parse replica model uid to model uid, replica and rep id.
161
+ Parse replica model uid to model uid and rep id.
162
162
  """
163
163
  parts = replica_model_uid.split("-")
164
164
  if len(parts) == 1:
165
- return replica_model_uid, -1, -1
165
+ return replica_model_uid, -1
166
166
  rep_id = int(parts.pop())
167
- replica = int(parts.pop())
168
167
  model_uid = "-".join(parts)
169
- return model_uid, replica, rep_id
168
+ return model_uid, rep_id
170
169
 
171
170
 
172
171
  def is_valid_model_uid(model_uid: str) -> bool:
@@ -261,9 +260,9 @@ def get_nvidia_gpu_info() -> Dict:
261
260
 
262
261
 
263
262
  def assign_replica_gpu(
264
- _replica_model_uid: str, gpu_idx: Union[int, List[int]]
263
+ _replica_model_uid: str, replica: int, gpu_idx: Union[int, List[int]]
265
264
  ) -> List[int]:
266
- model_uid, replica, rep_id = parse_replica_model_uid(_replica_model_uid)
265
+ model_uid, rep_id = parse_replica_model_uid(_replica_model_uid)
267
266
  rep_id, replica = int(rep_id), int(replica)
268
267
  if isinstance(gpu_idx, int):
269
268
  gpu_idx = [gpu_idx]
xinference/core/worker.py CHANGED
@@ -157,7 +157,7 @@ class WorkerActor(xo.StatelessActor):
157
157
  model_uid,
158
158
  recover_count - 1,
159
159
  )
160
- event_model_uid, _, __ = parse_replica_model_uid(model_uid)
160
+ event_model_uid, _ = parse_replica_model_uid(model_uid)
161
161
  try:
162
162
  if self._event_collector_ref is not None:
163
163
  await self._event_collector_ref.report_event(
@@ -377,7 +377,7 @@ class WorkerActor(xo.StatelessActor):
377
377
  return len(self._model_uid_to_model)
378
378
 
379
379
  async def is_model_vllm_backend(self, model_uid: str) -> bool:
380
- _model_uid, _, _ = parse_replica_model_uid(model_uid)
380
+ _model_uid, _ = parse_replica_model_uid(model_uid)
381
381
  supervisor_ref = await self.get_supervisor_ref()
382
382
  model_ref = await supervisor_ref.get_model(_model_uid)
383
383
  return await model_ref.is_vllm_backend()
@@ -785,7 +785,9 @@ class WorkerActor(xo.StatelessActor):
785
785
  peft_model_config: Optional[PeftModelConfig] = None,
786
786
  request_limits: Optional[int] = None,
787
787
  gpu_idx: Optional[Union[int, List[int]]] = None,
788
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
788
+ download_hub: Optional[
789
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
790
+ ] = None,
789
791
  model_path: Optional[str] = None,
790
792
  **kwargs,
791
793
  ):
@@ -798,7 +800,7 @@ class WorkerActor(xo.StatelessActor):
798
800
  launch_args.update(kwargs)
799
801
 
800
802
  try:
801
- origin_uid, _, _ = parse_replica_model_uid(model_uid)
803
+ origin_uid, _ = parse_replica_model_uid(model_uid)
802
804
  except Exception as e:
803
805
  logger.exception(e)
804
806
  raise
@@ -887,6 +889,7 @@ class WorkerActor(xo.StatelessActor):
887
889
  uid=model_uid,
888
890
  supervisor_address=self._supervisor_address,
889
891
  worker_address=self.address,
892
+ replica_model_uid=model_uid,
890
893
  model=model,
891
894
  model_description=model_description,
892
895
  request_limits=request_limits,
@@ -924,7 +927,7 @@ class WorkerActor(xo.StatelessActor):
924
927
  # Terminate model while its launching is not allow
925
928
  if model_uid in self._model_uid_launching_guard:
926
929
  raise ValueError(f"{model_uid} is launching")
927
- origin_uid, _, __ = parse_replica_model_uid(model_uid)
930
+ origin_uid, _ = parse_replica_model_uid(model_uid)
928
931
  try:
929
932
  _ = await self.get_supervisor_ref()
930
933
  if self._event_collector_ref is not None:
@@ -43,6 +43,7 @@ from .utils import (
43
43
  get_log_file,
44
44
  get_timestamp_ms,
45
45
  handle_click_args_type,
46
+ set_envs,
46
47
  )
47
48
 
48
49
  try:
@@ -106,6 +107,8 @@ def start_local_cluster(
106
107
  XINFERENCE_LOG_MAX_BYTES,
107
108
  )
108
109
  logging.config.dictConfig(dict_config) # type: ignore
110
+ # refer to https://huggingface.co/docs/transformers/main_classes/logging
111
+ set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
109
112
 
110
113
  main(
111
114
  host=host,
@@ -280,6 +283,7 @@ def supervisor(
280
283
  XINFERENCE_LOG_MAX_BYTES,
281
284
  )
282
285
  logging.config.dictConfig(dict_config) # type: ignore
286
+ set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
283
287
 
284
288
  main(
285
289
  host=host,
@@ -342,6 +346,7 @@ def worker(
342
346
  XINFERENCE_LOG_MAX_BYTES,
343
347
  )
344
348
  logging.config.dictConfig(dict_config) # type: ignore
349
+ set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
345
350
 
346
351
  endpoint = get_endpoint(endpoint)
347
352
 
@@ -134,10 +134,6 @@ def get_config_dict(
134
134
  "propagate": False,
135
135
  },
136
136
  },
137
- "root": {
138
- "level": "WARN",
139
- "handlers": ["stream_handler", "file_handler"],
140
- },
141
137
  }
142
138
  return config_dict
143
139
 
@@ -220,3 +216,10 @@ def handle_click_args_type(arg: str) -> Any:
220
216
  pass
221
217
 
222
218
  return arg
219
+
220
+
221
+ def set_envs(key: str, value: str):
222
+ """
223
+ Environment variables are set by the parent process and inherited by child processes
224
+ """
225
+ os.environ[key] = value
@@ -100,7 +100,9 @@ def generate_audio_description(
100
100
 
101
101
  def match_audio(
102
102
  model_name: str,
103
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
103
+ download_hub: Optional[
104
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
105
+ ] = None,
104
106
  ) -> AudioModelFamilyV1:
105
107
  from ..utils import download_from_modelscope
106
108
  from . import BUILTIN_AUDIO_MODELS, MODELSCOPE_AUDIO_MODELS
@@ -152,7 +154,9 @@ def create_audio_model_instance(
152
154
  devices: List[str],
153
155
  model_uid: str,
154
156
  model_name: str,
155
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
157
+ download_hub: Optional[
158
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
159
+ ] = None,
156
160
  model_path: Optional[str] = None,
157
161
  **kwargs,
158
162
  ) -> Tuple[
@@ -127,7 +127,7 @@
127
127
  "model_name": "ChatTTS",
128
128
  "model_family": "ChatTTS",
129
129
  "model_id": "2Noise/ChatTTS",
130
- "model_revision": "3b34118f6d25850440b8901cef3e71c6ef8619c8",
130
+ "model_revision": "1a3c04a8b0651689bd9242fbb55b1f4b5a9aef84",
131
131
  "model_ability": "text-to-audio",
132
132
  "multilingual": true
133
133
  },
xinference/model/core.py CHANGED
@@ -55,7 +55,9 @@ def create_model_instance(
55
55
  model_size_in_billions: Optional[Union[int, str]] = None,
56
56
  quantization: Optional[str] = None,
57
57
  peft_model_config: Optional[PeftModelConfig] = None,
58
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
58
+ download_hub: Optional[
59
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
60
+ ] = None,
59
61
  model_path: Optional[str] = None,
60
62
  **kwargs,
61
63
  ) -> Tuple[Any, ModelDescription]:
@@ -433,7 +433,9 @@ class EmbeddingModel:
433
433
 
434
434
  def match_embedding(
435
435
  model_name: str,
436
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
436
+ download_hub: Optional[
437
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
438
+ ] = None,
437
439
  ) -> EmbeddingModelSpec:
438
440
  from ..utils import download_from_modelscope
439
441
  from . import BUILTIN_EMBEDDING_MODELS, MODELSCOPE_EMBEDDING_MODELS
@@ -469,7 +471,9 @@ def create_embedding_model_instance(
469
471
  devices: List[str],
470
472
  model_uid: str,
471
473
  model_name: str,
472
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
474
+ download_hub: Optional[
475
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
476
+ ] = None,
473
477
  model_path: Optional[str] = None,
474
478
  **kwargs,
475
479
  ) -> Tuple[EmbeddingModel, EmbeddingModelDescription]:
@@ -125,7 +125,9 @@ def generate_image_description(
125
125
 
126
126
  def match_diffusion(
127
127
  model_name: str,
128
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
128
+ download_hub: Optional[
129
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
130
+ ] = None,
129
131
  ) -> ImageModelFamilyV1:
130
132
  from ..utils import download_from_modelscope
131
133
  from . import BUILTIN_IMAGE_MODELS, MODELSCOPE_IMAGE_MODELS
@@ -213,7 +215,9 @@ def create_image_model_instance(
213
215
  model_uid: str,
214
216
  model_name: str,
215
217
  peft_model_config: Optional[PeftModelConfig] = None,
216
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
218
+ download_hub: Optional[
219
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
220
+ ] = None,
217
221
  model_path: Optional[str] = None,
218
222
  **kwargs,
219
223
  ) -> Tuple[
@@ -71,6 +71,9 @@ class GotOCR2Model:
71
71
  logger.info("Got OCR 2.0 kwargs: %s", kwargs)
72
72
  if "ocr_type" not in kwargs:
73
73
  kwargs["ocr_type"] = "ocr"
74
+ if image.mode == "RGBA" or image.mode == "CMYK":
75
+ # convert to RGB
76
+ image = image.convert("RGB")
74
77
  assert self._model is not None
75
78
  # This chat API limits the max new tokens inside.
76
79
  return self._model.chat(self._tokenizer, image, gradio_input=True, **kwargs)
@@ -32,6 +32,7 @@ from .llm_family import (
32
32
  BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES,
33
33
  BUILTIN_LLM_PROMPT_STYLE,
34
34
  BUILTIN_MODELSCOPE_LLM_FAMILIES,
35
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES,
35
36
  LLAMA_CLASSES,
36
37
  LLM_ENGINES,
37
38
  LMDEPLOY_CLASSES,
@@ -258,6 +259,36 @@ def _install():
258
259
  if "tools" in model_spec.model_ability:
259
260
  BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
260
261
 
262
+ openmind_hub_json_path = os.path.join(
263
+ os.path.dirname(os.path.abspath(__file__)), "llm_family_openmind_hub.json"
264
+ )
265
+ for json_obj in json.load(
266
+ codecs.open(openmind_hub_json_path, "r", encoding="utf-8")
267
+ ):
268
+ model_spec = LLMFamilyV1.parse_obj(json_obj)
269
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES.append(model_spec)
270
+
271
+ # register prompt style, in case that we have something missed
272
+ # if duplicated with huggingface json, keep it as the huggingface style
273
+
274
+ if (
275
+ "chat" in model_spec.model_ability
276
+ and isinstance(model_spec.chat_template, str)
277
+ and model_spec.model_name not in BUILTIN_LLM_PROMPT_STYLE
278
+ ):
279
+ BUILTIN_LLM_PROMPT_STYLE[model_spec.model_name] = {
280
+ "chat_template": model_spec.chat_template,
281
+ "stop_token_ids": model_spec.stop_token_ids,
282
+ "stop": model_spec.stop,
283
+ }
284
+ # register model family
285
+ if "chat" in model_spec.model_ability:
286
+ BUILTIN_LLM_MODEL_CHAT_FAMILIES.add(model_spec.model_name)
287
+ else:
288
+ BUILTIN_LLM_MODEL_GENERATE_FAMILIES.add(model_spec.model_name)
289
+ if "tools" in model_spec.model_ability:
290
+ BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES.add(model_spec.model_name)
291
+
261
292
  csghub_json_path = os.path.join(
262
293
  os.path.dirname(os.path.abspath(__file__)), "llm_family_csghub.json"
263
294
  )
@@ -288,6 +319,7 @@ def _install():
288
319
  for llm_specs in [
289
320
  BUILTIN_LLM_FAMILIES,
290
321
  BUILTIN_MODELSCOPE_LLM_FAMILIES,
322
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES,
291
323
  BUILTIN_CSGHUB_LLM_FAMILIES,
292
324
  ]:
293
325
  for llm_spec in llm_specs:
@@ -298,6 +330,7 @@ def _install():
298
330
  for families in [
299
331
  BUILTIN_LLM_FAMILIES,
300
332
  BUILTIN_MODELSCOPE_LLM_FAMILIES,
333
+ BUILTIN_OPENMIND_HUB_LLM_FAMILIES,
301
334
  BUILTIN_CSGHUB_LLM_FAMILIES,
302
335
  ]:
303
336
  for family in families:
@@ -52,9 +52,7 @@ class LLM(abc.ABC):
52
52
  *args,
53
53
  **kwargs,
54
54
  ):
55
- self.model_uid, self.replica, self.rep_id = parse_replica_model_uid(
56
- replica_model_uid
57
- )
55
+ self.model_uid, self.rep_id = parse_replica_model_uid(replica_model_uid)
58
56
  self.model_family = model_family
59
57
  self.model_spec = model_spec
60
58
  self.quantization = quantization
@@ -193,7 +191,9 @@ def create_llm_model_instance(
193
191
  model_size_in_billions: Optional[Union[int, str]] = None,
194
192
  quantization: Optional[str] = None,
195
193
  peft_model_config: Optional[PeftModelConfig] = None,
196
- download_hub: Optional[Literal["huggingface", "modelscope", "csghub"]] = None,
194
+ download_hub: Optional[
195
+ Literal["huggingface", "modelscope", "openmind_hub", "csghub"]
196
+ ] = None,
197
197
  model_path: Optional[str] = None,
198
198
  **kwargs,
199
199
  ) -> Tuple[LLM, LLMDescription]:
@@ -1312,6 +1312,93 @@
1312
1312
  "<|eom_id|>"
1313
1313
  ]
1314
1314
  },
1315
+ {
1316
+ "version": 1,
1317
+ "context_length": 131072,
1318
+ "model_name": "llama-3.2-vision-instruct",
1319
+ "model_lang": [
1320
+ "en",
1321
+ "de",
1322
+ "fr",
1323
+ "it",
1324
+ "pt",
1325
+ "hi",
1326
+ "es",
1327
+ "th"
1328
+ ],
1329
+ "model_ability": [
1330
+ "chat",
1331
+ "vision"
1332
+ ],
1333
+ "model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
1334
+ "model_specs": [
1335
+ {
1336
+ "model_format": "pytorch",
1337
+ "model_size_in_billions": 11,
1338
+ "quantizations": [
1339
+ "none"
1340
+ ],
1341
+ "model_id": "meta-llama/Llama-3.2-11B-Vision-Instruct"
1342
+ },
1343
+ {
1344
+ "model_format": "pytorch",
1345
+ "model_size_in_billions": 90,
1346
+ "quantizations": [
1347
+ "none"
1348
+ ],
1349
+ "model_id": "meta-llama/Llama-3.2-90B-Vision-Instruct"
1350
+ }
1351
+ ],
1352
+ "chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
1353
+ "stop_token_ids": [
1354
+ 128001,
1355
+ 128008,
1356
+ 128009
1357
+ ],
1358
+ "stop": [
1359
+ "<|end_of_text|>",
1360
+ "<|eot_id|>",
1361
+ "<|eom_id|>"
1362
+ ]
1363
+ },
1364
+ {
1365
+ "version": 1,
1366
+ "context_length": 131072,
1367
+ "model_name": "llama-3.2-vision",
1368
+ "model_lang": [
1369
+ "en",
1370
+ "de",
1371
+ "fr",
1372
+ "it",
1373
+ "pt",
1374
+ "hi",
1375
+ "es",
1376
+ "th"
1377
+ ],
1378
+ "model_ability": [
1379
+ "generate",
1380
+ "vision"
1381
+ ],
1382
+ "model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
1383
+ "model_specs": [
1384
+ {
1385
+ "model_format": "pytorch",
1386
+ "model_size_in_billions": 11,
1387
+ "quantizations": [
1388
+ "none"
1389
+ ],
1390
+ "model_id": "meta-llama/Meta-Llama-3.2-11B-Vision"
1391
+ },
1392
+ {
1393
+ "model_format": "pytorch",
1394
+ "model_size_in_billions": 90,
1395
+ "quantizations": [
1396
+ "none"
1397
+ ],
1398
+ "model_id": "meta-llama/Meta-Llama-3.2-90B-Vision"
1399
+ }
1400
+ ]
1401
+ },
1315
1402
  {
1316
1403
  "version": 1,
1317
1404
  "context_length": 2048,