xinference 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (46) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +4 -4
  3. xinference/deploy/utils.py +3 -1
  4. xinference/model/embedding/__init__.py +5 -1
  5. xinference/model/embedding/core.py +7 -7
  6. xinference/model/llm/ggml/chatglm.py +7 -5
  7. xinference/model/llm/llm_family.py +41 -15
  8. xinference/model/llm/pytorch/core.py +1 -0
  9. xinference/model/llm/pytorch/utils.py +3 -0
  10. xinference/model/llm/vllm/core.py +5 -1
  11. xinference/model/rerank/__init__.py +5 -1
  12. xinference/model/rerank/core.py +7 -6
  13. xinference/model/utils.py +13 -2
  14. xinference/web/ui/build/asset-manifest.json +3 -3
  15. xinference/web/ui/build/index.html +1 -1
  16. xinference/web/ui/build/static/js/main.778615cc.js +3 -0
  17. xinference/web/ui/build/static/js/main.778615cc.js.map +1 -0
  18. xinference/web/ui/node_modules/.cache/babel-loader/35204b265d70210394b0a8571e5b01c8c0f9a748437b8a923961e3560ae3d550.json +1 -0
  19. xinference/web/ui/node_modules/.cache/babel-loader/483eb7e5f01e34c6a42ad7c64dad550ff945ee21053a52c2e5e7ebe108b85411.json +1 -0
  20. xinference/web/ui/node_modules/.cache/babel-loader/667753ce39ce1d4bcbf9a5f1a103d653be1d19d42f4e1fbaceb9b507679a52c7.json +1 -0
  21. xinference/web/ui/node_modules/.cache/babel-loader/6e63957e4e0801705c6cb258379bfda0007ce6c3ddd2e3b62898b68455c3edf4.json +1 -0
  22. xinference/web/ui/node_modules/.cache/babel-loader/8d2b0b3c6988d1894694dcbbe708ef91cfe62d62dac317031f09915ced637953.json +1 -0
  23. xinference/web/ui/node_modules/.cache/babel-loader/8d77975a2735d67a618407026e5325608ccd66f1b379a74faf35b4087db536f3.json +1 -0
  24. xinference/web/ui/node_modules/.cache/babel-loader/b374bf2be1eac3cff5e0a8528a8e816e266ece911f714c123110961798a93a3b.json +1 -0
  25. xinference/web/ui/node_modules/.cache/babel-loader/d8a42e9df7157de9f28eecefdf178fd113bf2280d28471b6e32a8a45276042df.json +1 -0
  26. xinference/web/ui/node_modules/.cache/babel-loader/ed010102f476cd1a22b49be031a7f94f2ab3dd7ba8bf58839a771d46e28ff559.json +1 -0
  27. xinference/web/ui/node_modules/.cache/babel-loader/fe653ca0ca4297b415c0be4013574870d0465a657ae0f3d3f5b66ef6a831390c.json +1 -0
  28. {xinference-0.7.0.dist-info → xinference-0.7.1.dist-info}/METADATA +1 -1
  29. {xinference-0.7.0.dist-info → xinference-0.7.1.dist-info}/RECORD +34 -34
  30. xinference/web/ui/build/static/js/main.8126d441.js +0 -3
  31. xinference/web/ui/build/static/js/main.8126d441.js.map +0 -1
  32. xinference/web/ui/node_modules/.cache/babel-loader/06eb9846159adb398d44df0b0debc256a9fd9e8171a7d68f5c4ee4d655acfa45.json +0 -1
  33. xinference/web/ui/node_modules/.cache/babel-loader/3bda436576ecb05f81f7b6ec475d1cfaf03e2b3066e3a75902fe6e8c4773b43b.json +0 -1
  34. xinference/web/ui/node_modules/.cache/babel-loader/48878f5178bad1a47757e011af41c974a7946efa29485506c4d19f25bf5d522d.json +0 -1
  35. xinference/web/ui/node_modules/.cache/babel-loader/59574eb63cfe9ed2e58d2f5a420e1ae54354e243a602e9bc73deae3147ed4f98.json +0 -1
  36. xinference/web/ui/node_modules/.cache/babel-loader/75a5abcbc92da335fdde530f5689194ec79a4b2345b8cba594f8904d3b88e3c6.json +0 -1
  37. xinference/web/ui/node_modules/.cache/babel-loader/84bfe7afede38da1f8ad569d891276fe4d66cfb87bf5c9ff7a113788ba62bb88.json +0 -1
  38. xinference/web/ui/node_modules/.cache/babel-loader/940ed05006583b955894e2b8f65a4a5ebf34f8149d747f59fae5131f17d65482.json +0 -1
  39. xinference/web/ui/node_modules/.cache/babel-loader/9c5f03db9aa88582a9b69b25c7f1acc78ba7fc61f743c9ed7399abb292d5dbde.json +0 -1
  40. xinference/web/ui/node_modules/.cache/babel-loader/c02e70e9b9efcf3bd056606308104308d6a6ac559f2bc0b4454c11fb5874457c.json +0 -1
  41. xinference/web/ui/node_modules/.cache/babel-loader/e610aefd7000a3f8542a25cb66c64671cc8da18350de4e5b577102ba4bb78d65.json +0 -1
  42. /xinference/web/ui/build/static/js/{main.8126d441.js.LICENSE.txt → main.778615cc.js.LICENSE.txt} +0 -0
  43. {xinference-0.7.0.dist-info → xinference-0.7.1.dist-info}/LICENSE +0 -0
  44. {xinference-0.7.0.dist-info → xinference-0.7.1.dist-info}/WHEEL +0 -0
  45. {xinference-0.7.0.dist-info → xinference-0.7.1.dist-info}/entry_points.txt +0 -0
  46. {xinference-0.7.0.dist-info → xinference-0.7.1.dist-info}/top_level.txt +0 -0
xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2023-12-08T13:45:18+0800",
11
+ "date": "2023-12-12T19:35:36+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "b5a5f0a270f85e451591eba34fe615a0fc8ce4bf",
15
- "version": "0.7.0"
14
+ "full-revisionid": "91f5f13c3914e1943977c80281ce485e8e3502cf",
15
+ "version": "0.7.1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -258,9 +258,6 @@ class RESTfulAPI:
258
258
  f"{pprint.pformat(invalid_routes)}"
259
259
  )
260
260
 
261
- for tp in [CreateChatCompletion, CreateCompletion]:
262
- logger.debug("Dump request model fields:\n%s", tp.__fields__)
263
-
264
261
  class SPAStaticFiles(StaticFiles):
265
262
  async def get_response(self, path: str, scope):
266
263
  response = await super().get_response(path, scope)
@@ -721,7 +718,10 @@ class RESTfulAPI:
721
718
 
722
719
  if (
723
720
  not body.messages
724
- or body.messages[-1].get("role") != "user"
721
+ or (
722
+ body.messages[-1].get("role") != "user"
723
+ and body.messages[-1].get("role") != "system"
724
+ )
725
725
  or not body.messages[-1].get("content")
726
726
  ):
727
727
  raise HTTPException(
@@ -60,7 +60,9 @@ def get_config_dict(
60
60
  "disable_existing_loggers": False,
61
61
  "formatters": {
62
62
  "formatter": {
63
- "format": "%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s"
63
+ "format": (
64
+ "%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s"
65
+ )
64
66
  },
65
67
  },
66
68
  "filters": {
@@ -16,7 +16,7 @@ import codecs
16
16
  import json
17
17
  import os
18
18
 
19
- from .core import EmbeddingModelSpec, get_cache_status
19
+ from .core import MODEL_NAME_TO_REVISION, EmbeddingModelSpec, get_cache_status
20
20
  from .custom import CustomEmbeddingModelSpec, register_embedding, unregister_embedding
21
21
 
22
22
  _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
@@ -27,12 +27,16 @@ BUILTIN_EMBEDDING_MODELS = dict(
27
27
  (spec["model_name"], EmbeddingModelSpec(**spec))
28
28
  for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
29
29
  )
30
+ for model_name, model_spec in BUILTIN_EMBEDDING_MODELS.items():
31
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
30
32
  MODELSCOPE_EMBEDDING_MODELS = dict(
31
33
  (spec["model_name"], EmbeddingModelSpec(**spec))
32
34
  for spec in json.load(
33
35
  codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
34
36
  )
35
37
  )
38
+ for model_name, model_spec in MODELSCOPE_EMBEDDING_MODELS.items():
39
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
36
40
 
37
41
  from ...constants import XINFERENCE_MODEL_DIR
38
42
 
@@ -15,7 +15,8 @@
15
15
  import logging
16
16
  import os
17
17
  import shutil
18
- from typing import List, Optional, Tuple, Union, no_type_check
18
+ from collections import defaultdict
19
+ from typing import Dict, List, Optional, Tuple, Union, no_type_check
19
20
 
20
21
  import numpy as np
21
22
  from pydantic import BaseModel
@@ -23,11 +24,14 @@ from pydantic import BaseModel
23
24
  from ...constants import XINFERENCE_CACHE_DIR
24
25
  from ...types import Embedding, EmbeddingData, EmbeddingUsage
25
26
  from ..core import ModelDescription
26
- from ..utils import valid_model_revision
27
+ from ..utils import is_model_cached, valid_model_revision
27
28
 
28
29
  logger = logging.getLogger(__name__)
29
30
 
30
31
  SUPPORTED_SCHEMES = ["s3"]
32
+ # Used for check whether the model is cached.
33
+ # Init when registering all the builtin models.
34
+ MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
31
35
 
32
36
 
33
37
  class EmbeddingModelSpec(BaseModel):
@@ -195,11 +199,7 @@ def cache(model_spec: EmbeddingModelSpec):
195
199
  def get_cache_status(
196
200
  model_spec: EmbeddingModelSpec,
197
201
  ) -> bool:
198
- cache_dir = os.path.realpath(
199
- os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
200
- )
201
- meta_path = os.path.join(cache_dir, "__valid_download")
202
- return valid_model_revision(meta_path, model_spec.model_revision)
202
+ return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
203
203
 
204
204
 
205
205
  class EmbeddingModel:
@@ -134,9 +134,9 @@ class ChatglmCppChatModel(LLM):
134
134
  {
135
135
  "index": 0,
136
136
  "delta": {
137
- "content": token
138
- if isinstance(token, str)
139
- else token.content,
137
+ "content": (
138
+ token if isinstance(token, str) else token.content
139
+ ),
140
140
  },
141
141
  "finish_reason": None,
142
142
  }
@@ -223,8 +223,10 @@ class ChatglmCppChatModel(LLM):
223
223
  chatglm_tools.append(elem["function"])
224
224
  return {
225
225
  "role": "system",
226
- "content": f"Answer the following questions as best as you can. You have access to the following tools:\n"
227
- f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}",
226
+ "content": (
227
+ f"Answer the following questions as best as you can. You have access to the following tools:\n"
228
+ f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}"
229
+ ),
228
230
  }
229
231
 
230
232
  def chat(
@@ -588,31 +588,57 @@ def cache_from_huggingface(
588
588
  return cache_dir
589
589
 
590
590
 
591
+ def _check_revision(
592
+ llm_family: LLMFamilyV1,
593
+ llm_spec: "LLMSpecV1",
594
+ builtin: list,
595
+ meta_path: str,
596
+ ) -> bool:
597
+ for family in builtin:
598
+ if llm_family.model_name == family.model_name:
599
+ specs = family.model_specs
600
+ for spec in specs:
601
+ if (
602
+ spec.model_format == "pytorch"
603
+ and spec.model_size_in_billions == llm_spec.model_size_in_billions
604
+ ):
605
+ return valid_model_revision(meta_path, spec.model_revision)
606
+ return False
607
+
608
+
591
609
  def get_cache_status(
592
610
  llm_family: LLMFamilyV1,
593
611
  llm_spec: "LLMSpecV1",
594
612
  ) -> Union[bool, List[bool]]:
613
+ """
614
+ When calling this function from above, `llm_family` is constructed only from BUILTIN_LLM_FAMILIES,
615
+ so we should check both huggingface and modelscope cache files.
616
+ """
595
617
  cache_dir = _get_cache_dir(llm_family, llm_spec, create_if_not_exist=False)
618
+ # check revision for pytorch model
596
619
  if llm_spec.model_format == "pytorch":
597
- return _skip_download(
598
- cache_dir,
599
- llm_spec.model_format,
600
- llm_spec.model_hub,
601
- llm_spec.model_revision,
602
- "none",
603
- )
620
+ hf_meta_path = _get_meta_path(cache_dir, "pytorch", "huggingface", "none")
621
+ ms_meta_path = _get_meta_path(cache_dir, "pytorch", "modelscope", "none")
622
+ revisions = [
623
+ _check_revision(llm_family, llm_spec, BUILTIN_LLM_FAMILIES, hf_meta_path),
624
+ _check_revision(
625
+ llm_family, llm_spec, BUILTIN_MODELSCOPE_LLM_FAMILIES, ms_meta_path
626
+ ),
627
+ ]
628
+ return any(revisions)
629
+ # just check meta file for ggml and gptq model
604
630
  elif llm_spec.model_format in ["ggmlv3", "ggufv2", "gptq"]:
605
631
  ret = []
606
632
  for q in llm_spec.quantizations:
607
- ret.append(
608
- _skip_download(
609
- cache_dir,
610
- llm_spec.model_format,
611
- llm_spec.model_hub,
612
- llm_spec.model_revision,
613
- q,
614
- )
633
+ assert q is not None
634
+ hf_meta_path = _get_meta_path(
635
+ cache_dir, llm_spec.model_format, "huggingface", q
636
+ )
637
+ ms_meta_path = _get_meta_path(
638
+ cache_dir, llm_spec.model_format, "modelscope", q
615
639
  )
640
+ results = [os.path.exists(hf_meta_path), os.path.exists(ms_meta_path)]
641
+ ret.append(any(results))
616
642
  return ret
617
643
  else:
618
644
  raise ValueError(f"Unsupported model format: {llm_spec.model_format}")
@@ -345,6 +345,7 @@ class PytorchModel(LLM):
345
345
  inputs = input
346
346
 
347
347
  tokenizer = self._tokenizer
348
+ tokenizer.pad_token = tokenizer.eos_token
348
349
  is_llama = "llama" in str(type(self._model)) # llama supports batch inference
349
350
  is_chatglm = "chatglm" in str(type(self._model))
350
351
  if is_llama:
@@ -259,6 +259,7 @@ def generate_stream(
259
259
  raise ValueError("Invalid stop field type.")
260
260
 
261
261
  if stream:
262
+ output = output.strip("�")
262
263
  tmp_output_length = len(output)
263
264
  output = output[last_output_length:]
264
265
  last_output_length = tmp_output_length
@@ -424,6 +425,7 @@ def generate_stream_falcon(
424
425
  raise ValueError("Invalid stop field type.")
425
426
 
426
427
  if stream:
428
+ output = output.strip("�")
427
429
  tmp_output_length = len(output)
428
430
  output = output[last_output_length:]
429
431
  last_output_length = tmp_output_length
@@ -552,6 +554,7 @@ def generate_stream_chatglm(
552
554
  response = process_response(response)
553
555
 
554
556
  if stream:
557
+ response = response.strip("�")
555
558
  tmp_response_length = len(response)
556
559
  response = response[last_response_length:]
557
560
  last_response_length = tmp_response_length
@@ -79,6 +79,10 @@ VLLM_SUPPORTED_CHAT_MODELS = [
79
79
  "internlm-chat-20b",
80
80
  "qwen-chat",
81
81
  "Yi",
82
+ "Yi-chat",
83
+ "code-llama",
84
+ "code-llama-python",
85
+ "code-llama-instruct",
82
86
  "mistral-instruct-v0.1",
83
87
  "chatglm3",
84
88
  ]
@@ -345,7 +349,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
345
349
  full_prompt = self.get_prompt(prompt, chat_history, prompt_style)
346
350
 
347
351
  sanitized = self._sanitize_chat_config(generate_config)
348
- stream = sanitized["stream"]
352
+ stream = sanitized.get("stream", None)
349
353
 
350
354
  if stream:
351
355
  agen = await self.async_generate(full_prompt, sanitized)
@@ -16,7 +16,7 @@ import codecs
16
16
  import json
17
17
  import os
18
18
 
19
- from .core import RerankModelSpec, get_cache_status
19
+ from .core import MODEL_NAME_TO_REVISION, RerankModelSpec, get_cache_status
20
20
 
21
21
  _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
22
22
  _model_spec_modelscope_json = os.path.join(
@@ -26,11 +26,15 @@ BUILTIN_RERANK_MODELS = dict(
26
26
  (spec["model_name"], RerankModelSpec(**spec))
27
27
  for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
28
28
  )
29
+ for model_name, model_spec in BUILTIN_RERANK_MODELS.items():
30
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
29
31
  MODELSCOPE_RERANK_MODELS = dict(
30
32
  (spec["model_name"], RerankModelSpec(**spec))
31
33
  for spec in json.load(
32
34
  codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
33
35
  )
34
36
  )
37
+ for model_name, model_spec in MODELSCOPE_RERANK_MODELS.items():
38
+ MODEL_NAME_TO_REVISION[model_name].append(model_spec.model_revision)
35
39
  del _model_spec_json
36
40
  del _model_spec_modelscope_json
@@ -15,6 +15,7 @@
15
15
  import logging
16
16
  import os
17
17
  import uuid
18
+ from collections import defaultdict
18
19
  from typing import Dict, List, Optional, Tuple
19
20
 
20
21
  import numpy as np
@@ -23,10 +24,14 @@ from pydantic import BaseModel
23
24
  from ...constants import XINFERENCE_CACHE_DIR
24
25
  from ...types import Document, DocumentObj, Rerank
25
26
  from ..core import ModelDescription
26
- from ..utils import valid_model_revision
27
+ from ..utils import is_model_cached, valid_model_revision
27
28
 
28
29
  logger = logging.getLogger(__name__)
29
30
 
31
+ # Used for check whether the model is cached.
32
+ # Init when registering all the builtin models.
33
+ MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
34
+
30
35
 
31
36
  class RerankModelSpec(BaseModel):
32
37
  model_name: str
@@ -126,11 +131,7 @@ class RerankModel:
126
131
  def get_cache_status(
127
132
  model_spec: RerankModelSpec,
128
133
  ) -> bool:
129
- cache_dir = os.path.realpath(
130
- os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
131
- )
132
- meta_path = os.path.join(cache_dir, "__valid_download")
133
- return valid_model_revision(meta_path, model_spec.model_revision)
134
+ return is_model_cached(model_spec, MODEL_NAME_TO_REVISION)
134
135
 
135
136
 
136
137
  def cache(model_spec: RerankModelSpec):
xinference/model/utils.py CHANGED
@@ -16,11 +16,11 @@ import logging
16
16
  import os
17
17
  from json import JSONDecodeError
18
18
  from pathlib import Path
19
- from typing import Callable, Dict, Optional, Tuple
19
+ from typing import Any, Callable, Dict, Optional, Tuple
20
20
 
21
21
  from fsspec import AbstractFileSystem
22
22
 
23
- from ..constants import XINFERENCE_ENV_MODEL_SRC
23
+ from ..constants import XINFERENCE_CACHE_DIR, XINFERENCE_ENV_MODEL_SRC
24
24
 
25
25
  logger = logging.getLogger(__name__)
26
26
  MAX_ATTEMPTS = 3
@@ -132,6 +132,17 @@ def valid_model_revision(
132
132
  return real_revision == expected_model_revision
133
133
 
134
134
 
135
+ def is_model_cached(model_spec: Any, name_to_revisions_mapping: Dict):
136
+ cache_dir = os.path.realpath(
137
+ os.path.join(XINFERENCE_CACHE_DIR, model_spec.model_name)
138
+ )
139
+ meta_path = os.path.join(cache_dir, "__valid_download")
140
+ revisions = name_to_revisions_mapping[model_spec.model_name]
141
+ if model_spec.model_revision not in revisions: # Usually for UT
142
+ revisions.append(model_spec.model_revision)
143
+ return any([valid_model_revision(meta_path, revision) for revision in revisions])
144
+
145
+
135
146
  def is_valid_model_name(model_name: str) -> bool:
136
147
  import re
137
148
 
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "files": {
3
- "main.js": "./static/js/main.8126d441.js",
3
+ "main.js": "./static/js/main.778615cc.js",
4
4
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
5
5
  "index.html": "./index.html",
6
- "main.8126d441.js.map": "./static/js/main.8126d441.js.map"
6
+ "main.778615cc.js.map": "./static/js/main.778615cc.js.map"
7
7
  },
8
8
  "entrypoints": [
9
- "static/js/main.8126d441.js"
9
+ "static/js/main.778615cc.js"
10
10
  ]
11
11
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.8126d441.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.778615cc.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>