xinference 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (48) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +111 -13
  3. xinference/client/restful/restful_client.py +2 -1
  4. xinference/conftest.py +18 -15
  5. xinference/constants.py +2 -0
  6. xinference/core/image_interface.py +252 -0
  7. xinference/core/supervisor.py +3 -10
  8. xinference/deploy/cmdline.py +69 -4
  9. xinference/deploy/local.py +1 -1
  10. xinference/deploy/supervisor.py +1 -1
  11. xinference/model/image/__init__.py +13 -7
  12. xinference/model/image/core.py +17 -1
  13. xinference/model/llm/__init__.py +2 -0
  14. xinference/model/llm/ggml/llamacpp.py +1 -5
  15. xinference/model/llm/llm_family.json +98 -13
  16. xinference/model/llm/llm_family_modelscope.json +98 -7
  17. xinference/model/llm/pytorch/chatglm.py +2 -1
  18. xinference/model/llm/pytorch/internlm2.py +2 -1
  19. xinference/model/llm/sglang/__init__.py +13 -0
  20. xinference/model/llm/sglang/core.py +365 -0
  21. xinference/model/llm/utils.py +35 -12
  22. xinference/model/llm/vllm/core.py +17 -0
  23. xinference/web/ui/build/asset-manifest.json +3 -3
  24. xinference/web/ui/build/index.html +1 -1
  25. xinference/web/ui/build/static/js/{main.78829790.js → main.66b1c4fb.js} +3 -3
  26. xinference/web/ui/build/static/js/main.66b1c4fb.js.map +1 -0
  27. xinference/web/ui/node_modules/.cache/babel-loader/0bd70b1ecf307e2681318e864f4692305b6350c8683863007f4caf2f9ac33b6e.json +1 -0
  28. xinference/web/ui/node_modules/.cache/babel-loader/3e055de705e397e1d413d7f429589b1a98dd78ef378b97f0cdb462c5f2487d5e.json +1 -0
  29. xinference/web/ui/node_modules/.cache/babel-loader/60c4b98d8ea7479fb0c94cfd19c8128f17bd7e27a1e73e6dd9adf6e9d88d18eb.json +1 -0
  30. xinference/web/ui/node_modules/.cache/babel-loader/7e094845f611802b024b57439cbf911038169d06cdf6c34a72a7277f35aa71a4.json +1 -0
  31. xinference/web/ui/node_modules/.cache/babel-loader/b400cfc9db57fa6c70cd2bad055b73c5079fde0ed37974009d898083f6af8cd8.json +1 -0
  32. xinference/web/ui/node_modules/.cache/babel-loader/e1d9b2ae4e1248658704bc6bfc5d6160dcd1a9e771ea4ae8c1fed0aaddeedd29.json +1 -0
  33. xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +1 -0
  34. {xinference-0.9.2.dist-info → xinference-0.9.4.dist-info}/METADATA +8 -5
  35. {xinference-0.9.2.dist-info → xinference-0.9.4.dist-info}/RECORD +40 -37
  36. {xinference-0.9.2.dist-info → xinference-0.9.4.dist-info}/WHEEL +1 -1
  37. xinference/web/ui/build/static/js/main.78829790.js.map +0 -1
  38. xinference/web/ui/node_modules/.cache/babel-loader/22858de5265f2d279fca9f2f54dfb147e4b2704200dfb5d2ad3ec9769417328f.json +0 -1
  39. xinference/web/ui/node_modules/.cache/babel-loader/30670751f55508ef3b861e13dd71b9e5a10d2561373357a12fc3831a0b77fd93.json +0 -1
  40. xinference/web/ui/node_modules/.cache/babel-loader/396f7ce6ae6900bfdb00e369ade8a05045dc1df025610057ff7436d9e58af81c.json +0 -1
  41. xinference/web/ui/node_modules/.cache/babel-loader/5282ee05e064b3a80bc991e9003ddef6a4958471d8f4fc65589dc64553365cdd.json +0 -1
  42. xinference/web/ui/node_modules/.cache/babel-loader/83beb31daa7169fb0057453d4f86411f1effd3e3f7af97472cbd22accbfc65bb.json +0 -1
  43. xinference/web/ui/node_modules/.cache/babel-loader/ddf597663270471b31251b2abb36e3fa093efe20489387d996f993d2c61be112.json +0 -1
  44. xinference/web/ui/node_modules/.cache/babel-loader/e8687f75d2adacd34852b71c41ca17203d6fb4c8999ea55325bb2939f9d9ea90.json +0 -1
  45. /xinference/web/ui/build/static/js/{main.78829790.js.LICENSE.txt → main.66b1c4fb.js.LICENSE.txt} +0 -0
  46. {xinference-0.9.2.dist-info → xinference-0.9.4.dist-info}/LICENSE +0 -0
  47. {xinference-0.9.2.dist-info → xinference-0.9.4.dist-info}/entry_points.txt +0 -0
  48. {xinference-0.9.2.dist-info → xinference-0.9.4.dist-info}/top_level.txt +0 -0
@@ -360,7 +360,7 @@ def worker(
360
360
  )
361
361
 
362
362
 
363
- @cli.command("register", help="Registers a new model with Xinference for deployment.")
363
+ @cli.command("register", help="Register a new model with Xinference for deployment.")
364
364
  @click.option("--endpoint", "-e", type=str, help="Xinference endpoint.")
365
365
  @click.option(
366
366
  "--model-type",
@@ -397,7 +397,7 @@ def register_model(
397
397
 
398
398
  @cli.command(
399
399
  "unregister",
400
- help="Unregisters a model from Xinference, removing it from deployment.",
400
+ help="Unregister a model from Xinference, removing it from deployment.",
401
401
  )
402
402
  @click.option("--endpoint", "-e", type=str, help="Xinference endpoint.")
403
403
  @click.option(
@@ -423,7 +423,7 @@ def unregister_model(
423
423
  )
424
424
 
425
425
 
426
- @cli.command("registrations", help="Lists all registered models in Xinference.")
426
+ @cli.command("registrations", help="List all registered models in Xinference.")
427
427
  @click.option(
428
428
  "--endpoint",
429
429
  "-e",
@@ -488,6 +488,22 @@ def list_model_registrations(
488
488
  ),
489
489
  file=sys.stderr,
490
490
  )
491
+ elif model_type == "rerank":
492
+ for registration in registrations:
493
+ model_name = registration["model_name"]
494
+ model_family = client.get_model_registration(model_type, model_name)
495
+ table.append(
496
+ [
497
+ model_type,
498
+ model_family["model_name"],
499
+ model_family["language"],
500
+ registration["is_builtin"],
501
+ ]
502
+ )
503
+ print(
504
+ tabulate(table, headers=["Type", "Name", "Language", "Is-built-in"]),
505
+ file=sys.stderr,
506
+ )
491
507
  elif model_type == "image":
492
508
  for registration in registrations:
493
509
  model_name = registration["model_name"]
@@ -711,6 +727,9 @@ def model_list(endpoint: Optional[str]):
711
727
 
712
728
  llm_table = []
713
729
  embedding_table = []
730
+ rerank_table = []
731
+ image_table = []
732
+ audio_table = []
714
733
  models = client.list_models()
715
734
  for model_uid, model_spec in models.items():
716
735
  if model_spec["model_type"] == "LLM":
@@ -733,6 +752,23 @@ def model_list(endpoint: Optional[str]):
733
752
  model_spec["dimensions"],
734
753
  ]
735
754
  )
755
+ elif model_spec["model_type"] == "rerank":
756
+ rerank_table.append(
757
+ [model_uid, model_spec["model_type"], model_spec["model_name"]]
758
+ )
759
+ elif model_spec["model_type"] == "image":
760
+ image_table.append(
761
+ [
762
+ model_uid,
763
+ model_spec["model_type"],
764
+ model_spec["model_name"],
765
+ str(model_spec["controlnet"]),
766
+ ]
767
+ )
768
+ elif model_spec["model_type"] == "audio":
769
+ audio_table.append(
770
+ [model_uid, model_spec["model_type"], model_spec["model_name"]]
771
+ )
736
772
  if llm_table:
737
773
  print(
738
774
  tabulate(
@@ -748,6 +784,7 @@ def model_list(endpoint: Optional[str]):
748
784
  ),
749
785
  file=sys.stderr,
750
786
  )
787
+ print() # add a blank line for better visual experience
751
788
  if embedding_table:
752
789
  print(
753
790
  tabulate(
@@ -761,6 +798,34 @@ def model_list(endpoint: Optional[str]):
761
798
  ),
762
799
  file=sys.stderr,
763
800
  )
801
+ print()
802
+ if rerank_table:
803
+ print(
804
+ tabulate(
805
+ rerank_table,
806
+ headers=["UID", "Type", "Name"],
807
+ ),
808
+ file=sys.stderr,
809
+ )
810
+ print()
811
+ if image_table:
812
+ print(
813
+ tabulate(
814
+ image_table,
815
+ headers=["UID", "Type", "Name", "Controlnet"],
816
+ ),
817
+ file=sys.stderr,
818
+ )
819
+ print()
820
+ if audio_table:
821
+ print(
822
+ tabulate(
823
+ audio_table,
824
+ headers=["UID", "Type", "Name"],
825
+ ),
826
+ file=sys.stderr,
827
+ )
828
+ print()
764
829
 
765
830
 
766
831
  @cli.command(
@@ -981,7 +1046,7 @@ def model_chat(
981
1046
  )
982
1047
 
983
1048
 
984
- @cli.command("vllm-models", help="Query and display models compatible with VLLM.")
1049
+ @cli.command("vllm-models", help="Query and display models compatible with vLLM.")
985
1050
  @click.option("--endpoint", "-e", type=str, help="Xinference endpoint.")
986
1051
  def vllm_models(endpoint: Optional[str]):
987
1052
  endpoint = get_endpoint(endpoint)
@@ -132,4 +132,4 @@ def main(
132
132
  auth_config_file=auth_config_file,
133
133
  )
134
134
  finally:
135
- local_cluster.terminate()
135
+ local_cluster.kill()
@@ -98,4 +98,4 @@ def main(
98
98
  auth_config_file=auth_config_file,
99
99
  )
100
100
  finally:
101
- local_cluster.terminate()
101
+ local_cluster.kill()
@@ -18,7 +18,9 @@ import os
18
18
  from itertools import chain
19
19
 
20
20
  from .core import (
21
+ BUILTIN_IMAGE_MODELS,
21
22
  IMAGE_MODEL_DESCRIPTIONS,
23
+ MODELSCOPE_IMAGE_MODELS,
22
24
  ImageModelFamilyV1,
23
25
  generate_image_description,
24
26
  get_cache_status,
@@ -29,14 +31,18 @@ _model_spec_json = os.path.join(os.path.dirname(__file__), "model_spec.json")
29
31
  _model_spec_modelscope_json = os.path.join(
30
32
  os.path.dirname(__file__), "model_spec_modelscope.json"
31
33
  )
32
- BUILTIN_IMAGE_MODELS = dict(
33
- (spec["model_name"], ImageModelFamilyV1(**spec))
34
- for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
34
+ BUILTIN_IMAGE_MODELS.update(
35
+ dict(
36
+ (spec["model_name"], ImageModelFamilyV1(**spec))
37
+ for spec in json.load(codecs.open(_model_spec_json, "r", encoding="utf-8"))
38
+ )
35
39
  )
36
- MODELSCOPE_IMAGE_MODELS = dict(
37
- (spec["model_name"], ImageModelFamilyV1(**spec))
38
- for spec in json.load(
39
- codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
40
+ MODELSCOPE_IMAGE_MODELS.update(
41
+ dict(
42
+ (spec["model_name"], ImageModelFamilyV1(**spec))
43
+ for spec in json.load(
44
+ codecs.open(_model_spec_modelscope_json, "r", encoding="utf-8")
45
+ )
40
46
  )
41
47
  )
42
48
 
@@ -27,6 +27,8 @@ MAX_ATTEMPTS = 3
27
27
  logger = logging.getLogger(__name__)
28
28
 
29
29
  IMAGE_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
30
+ BUILTIN_IMAGE_MODELS: Dict[str, "ImageModelFamilyV1"] = {}
31
+ MODELSCOPE_IMAGE_MODELS: Dict[str, "ImageModelFamilyV1"] = {}
30
32
 
31
33
 
32
34
  def get_image_model_descriptions():
@@ -151,7 +153,21 @@ def get_cache_status(
151
153
  ) -> bool:
152
154
  cache_dir = get_cache_dir(model_spec)
153
155
  meta_path = os.path.join(cache_dir, "__valid_download")
154
- return valid_model_revision(meta_path, model_spec.model_revision)
156
+
157
+ model_name = model_spec.model_name
158
+ if model_name in BUILTIN_IMAGE_MODELS and model_name in MODELSCOPE_IMAGE_MODELS:
159
+ hf_spec = BUILTIN_IMAGE_MODELS[model_name]
160
+ ms_spec = MODELSCOPE_IMAGE_MODELS[model_name]
161
+
162
+ return any(
163
+ [
164
+ valid_model_revision(meta_path, hf_spec.model_revision),
165
+ valid_model_revision(meta_path, ms_spec.model_revision),
166
+ ]
167
+ )
168
+ else: # Usually for UT
169
+ logger.warning(f"Cannot find builtin image model spec: {model_name}")
170
+ return valid_model_revision(meta_path, model_spec.model_revision)
155
171
 
156
172
 
157
173
  def create_image_model_instance(
@@ -60,6 +60,7 @@ def _install():
60
60
  from .pytorch.qwen_vl import QwenVLChatModel
61
61
  from .pytorch.vicuna import VicunaPytorchChatModel
62
62
  from .pytorch.yi_vl import YiVLChatModel
63
+ from .sglang.core import SGLANGChatModel, SGLANGModel
63
64
  from .vllm.core import VLLMChatModel, VLLMModel
64
65
 
65
66
  # register llm classes.
@@ -79,6 +80,7 @@ def _install():
79
80
  CtransformersModel,
80
81
  ]
81
82
  )
83
+ LLM_CLASSES.extend([SGLANGModel, SGLANGChatModel])
82
84
  LLM_CLASSES.extend([VLLMModel, VLLMChatModel])
83
85
  LLM_CLASSES.extend(
84
86
  [
@@ -52,9 +52,6 @@ class LlamaCppModel(LLM):
52
52
  )
53
53
  self._llm = None
54
54
 
55
- def _can_apply_metal(self):
56
- return self.quantization.lower() in ["q4_0", "q4_1", "q4_k_s", "q4_k_m"]
57
-
58
55
  def _can_apply_cublas(self):
59
56
  # TODO: figure out the quantizations supported.
60
57
  return True
@@ -78,8 +75,7 @@ class LlamaCppModel(LLM):
78
75
  llamacpp_model_config["use_mlock"] = False
79
76
  llamacpp_model_config["n_gqa"] = 8
80
77
 
81
- if self._is_darwin_and_apple_silicon() and self._can_apply_metal():
82
- # TODO: platform.processor() is not safe, need to be replaced to other method.
78
+ if self._is_darwin_and_apple_silicon():
83
79
  llamacpp_model_config.setdefault("n_gpu_layers", -1)
84
80
  elif self._is_linux() and self._can_apply_cublas():
85
81
  llamacpp_model_config.setdefault("n_gpu_layers", -1)
@@ -98,6 +98,72 @@
98
98
  ]
99
99
  }
100
100
  },
101
+ {
102
+ "version": 1,
103
+ "context_length": 8194,
104
+ "model_name": "codeshell",
105
+ "model_lang": [
106
+ "en",
107
+ "zh"
108
+ ],
109
+ "model_ability": [
110
+ "generate"
111
+ ],
112
+ "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University. ",
113
+ "model_specs": [
114
+ {
115
+ "model_format": "pytorch",
116
+ "model_size_in_billions": 7,
117
+ "quantizations": [
118
+ "none"
119
+ ],
120
+ "model_id": "WisdomShell/CodeShell-7B",
121
+ "model_revision": "1c79ab7fd316a62ab41d764facd3548a23fa5dee"
122
+ }
123
+ ]
124
+ },
125
+ {
126
+ "version": 1,
127
+ "context_length": 8194,
128
+ "model_name": "codeshell-chat",
129
+ "model_lang": [
130
+ "en",
131
+ "zh"
132
+ ],
133
+ "model_ability": [
134
+ "chat"
135
+ ],
136
+ "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University.",
137
+ "model_specs": [
138
+ {
139
+ "model_format": "pytorch",
140
+ "model_size_in_billions": 7,
141
+ "quantizations": [
142
+ "none"
143
+ ],
144
+ "model_id": "WisdomShell/CodeShell-7B-Chat",
145
+ "model_revision": "3cb06f589b7b1e2f8e728c77280b1114191d24de"
146
+ }
147
+ ],
148
+ "prompt_style": {
149
+ "style_name": "CodeShell",
150
+ "system_prompt": "",
151
+ "roles": [
152
+ "## human:",
153
+ "## assistant: "
154
+ ],
155
+ "intra_message_sep": "",
156
+ "inter_message_sep": "",
157
+ "stop_token_ids": [
158
+ 70000
159
+ ],
160
+ "stop": [
161
+ "<|endoftext|>",
162
+ "|||",
163
+ "|<end>|"
164
+ ]
165
+ }
166
+ },
101
167
  {
102
168
  "version": 1,
103
169
  "context_length": 2048,
@@ -573,7 +639,7 @@
573
639
  64797,
574
640
  2
575
641
  ],
576
- "stop":[
642
+ "stop": [
577
643
  "<|user|>",
578
644
  "<|observation|>"
579
645
  ]
@@ -616,7 +682,7 @@
616
682
  64797,
617
683
  2
618
684
  ],
619
- "stop":[
685
+ "stop": [
620
686
  "<|user|>",
621
687
  "<|observation|>"
622
688
  ]
@@ -667,7 +733,6 @@
667
733
  ]
668
734
  }
669
735
  },
670
-
671
736
  {
672
737
  "version": 1,
673
738
  "context_length": 2048,
@@ -715,8 +780,7 @@
715
780
  "model_revision": "7f1b7394f74c630f50612a19ba90bd021c373989"
716
781
  }
717
782
  ]
718
- }
719
- ,
783
+ },
720
784
  {
721
785
  "version": 1,
722
786
  "context_length": 4096,
@@ -1606,7 +1670,10 @@
1606
1670
  "model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf",
1607
1671
  "model_file_name_split_template": "qwen1_5-72b-chat-{quantization}.gguf.{part}",
1608
1672
  "quantization_parts": {
1609
- "q4_k_m": ["a", "b"]
1673
+ "q4_k_m": [
1674
+ "a",
1675
+ "b"
1676
+ ]
1610
1677
  }
1611
1678
  }
1612
1679
  ],
@@ -2658,7 +2725,11 @@
2658
2725
  "context_length": 32768,
2659
2726
  "model_name": "mixtral-v0.1",
2660
2727
  "model_lang": [
2661
- "en", "fr", "it", "de", "es"
2728
+ "en",
2729
+ "fr",
2730
+ "it",
2731
+ "de",
2732
+ "es"
2662
2733
  ],
2663
2734
  "model_ability": [
2664
2735
  "generate"
@@ -2699,7 +2770,11 @@
2699
2770
  "context_length": 32768,
2700
2771
  "model_name": "mixtral-instruct-v0.1",
2701
2772
  "model_lang": [
2702
- "en", "fr", "it", "de", "es"
2773
+ "en",
2774
+ "fr",
2775
+ "it",
2776
+ "de",
2777
+ "es"
2703
2778
  ],
2704
2779
  "model_ability": [
2705
2780
  "chat"
@@ -2798,6 +2873,17 @@
2798
2873
  "model_id": "01-ai/Yi-6B",
2799
2874
  "model_revision": "25beebcb1166b9f49458459eb7b68130b9f9cf4d"
2800
2875
  },
2876
+ {
2877
+ "model_format": "pytorch",
2878
+ "model_size_in_billions": 9,
2879
+ "quantizations": [
2880
+ "4-bit",
2881
+ "8-bit",
2882
+ "none"
2883
+ ],
2884
+ "model_id": "01-ai/Yi-9B",
2885
+ "model_revision": "f70a5ff8b2e51c5d5b20e649d7b5f4238ffe6d5b"
2886
+ },
2801
2887
  {
2802
2888
  "model_format": "pytorch",
2803
2889
  "model_size_in_billions": 34,
@@ -3264,10 +3350,8 @@
3264
3350
  ],
3265
3351
  "intra_message_sep": "\n",
3266
3352
  "inter_message_sep": "\n",
3267
- "stop_token_ids": [
3268
- ],
3269
- "stop": [
3270
- ]
3353
+ "stop_token_ids": [],
3354
+ "stop": []
3271
3355
  }
3272
3356
  },
3273
3357
  {
@@ -3365,7 +3449,8 @@
3365
3449
  "context_length": 4096,
3366
3450
  "model_name": "deepseek-coder-instruct",
3367
3451
  "model_lang": [
3368
- "en", "zh"
3452
+ "en",
3453
+ "zh"
3369
3454
  ],
3370
3455
  "model_ability": [
3371
3456
  "chat"
@@ -338,7 +338,7 @@
338
338
  64797,
339
339
  2
340
340
  ],
341
- "stop":[
341
+ "stop": [
342
342
  "<|user|>",
343
343
  "<|observation|>"
344
344
  ]
@@ -382,13 +382,12 @@
382
382
  64797,
383
383
  2
384
384
  ],
385
- "stop":[
385
+ "stop": [
386
386
  "<|user|>",
387
387
  "<|observation|>"
388
388
  ]
389
389
  }
390
390
  },
391
-
392
391
  {
393
392
  "version": 1,
394
393
  "context_length": 2048,
@@ -728,6 +727,74 @@
728
727
  }
729
728
  ]
730
729
  },
730
+ {
731
+ "version": 1,
732
+ "context_length": 8194,
733
+ "model_name": "codeshell",
734
+ "model_lang": [
735
+ "en",
736
+ "zh"
737
+ ],
738
+ "model_ability": [
739
+ "generate"
740
+ ],
741
+ "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University. ",
742
+ "model_specs": [
743
+ {
744
+ "model_format": "pytorch",
745
+ "model_size_in_billions": 7,
746
+ "quantizations": [
747
+ "none"
748
+ ],
749
+ "model_id": "WisdomShell/CodeShell-7B",
750
+ "model_revision": "master",
751
+ "model_hub": "modelscope"
752
+ }
753
+ ]
754
+ },
755
+ {
756
+ "version": 1,
757
+ "context_length": 8194,
758
+ "model_name": "codeshell-chat",
759
+ "model_lang": [
760
+ "en",
761
+ "zh"
762
+ ],
763
+ "model_ability": [
764
+ "chat"
765
+ ],
766
+ "model_description": "CodeShell is a multi-language code LLM developed by the Knowledge Computing Lab of Peking University.",
767
+ "model_specs": [
768
+ {
769
+ "model_format": "pytorch",
770
+ "model_size_in_billions": 7,
771
+ "quantizations": [
772
+ "none"
773
+ ],
774
+ "model_id": "WisdomShell/CodeShell-7B-Chat",
775
+ "model_revision": "master",
776
+ "model_hub": "modelscope"
777
+ }
778
+ ],
779
+ "prompt_style": {
780
+ "style_name": "CodeShell",
781
+ "system_prompt": "",
782
+ "roles": [
783
+ "## human:",
784
+ "## assistant: "
785
+ ],
786
+ "intra_message_sep": "",
787
+ "inter_message_sep": "",
788
+ "stop_token_ids": [
789
+ 70000
790
+ ],
791
+ "stop": [
792
+ "<|endoftext|>",
793
+ "|||",
794
+ "|<end>|"
795
+ ]
796
+ }
797
+ },
731
798
  {
732
799
  "version": 1,
733
800
  "context_length": 100000,
@@ -970,7 +1037,11 @@
970
1037
  "context_length": 32768,
971
1038
  "model_name": "mixtral-v0.1",
972
1039
  "model_lang": [
973
- "en", "fr", "it", "de", "es"
1040
+ "en",
1041
+ "fr",
1042
+ "it",
1043
+ "de",
1044
+ "es"
974
1045
  ],
975
1046
  "model_ability": [
976
1047
  "generate"
@@ -996,7 +1067,11 @@
996
1067
  "context_length": 32768,
997
1068
  "model_name": "mixtral-instruct-v0.1",
998
1069
  "model_lang": [
999
- "en", "fr", "it", "de", "es"
1070
+ "en",
1071
+ "fr",
1072
+ "it",
1073
+ "de",
1074
+ "es"
1000
1075
  ],
1001
1076
  "model_ability": [
1002
1077
  "chat"
@@ -1052,6 +1127,18 @@
1052
1127
  "model_id": "01ai/Yi-6B",
1053
1128
  "model_revision": "master"
1054
1129
  },
1130
+ {
1131
+ "model_format": "pytorch",
1132
+ "model_size_in_billions": 9,
1133
+ "quantizations": [
1134
+ "4-bit",
1135
+ "8-bit",
1136
+ "none"
1137
+ ],
1138
+ "model_hub": "modelscope",
1139
+ "model_id": "01ai/Yi-9B",
1140
+ "model_revision": "master"
1141
+ },
1055
1142
  {
1056
1143
  "model_format": "pytorch",
1057
1144
  "model_size_in_billions": 34,
@@ -1917,7 +2004,10 @@
1917
2004
  "model_file_name_template": "qwen1_5-72b-chat-{quantization}.gguf",
1918
2005
  "model_file_name_split_template": "qwen1_5-72b-chat-{quantization}.gguf.{part}",
1919
2006
  "quantization_parts": {
1920
- "q4_k_m": ["a", "b"]
2007
+ "q4_k_m": [
2008
+ "a",
2009
+ "b"
2010
+ ]
1921
2011
  }
1922
2012
  }
1923
2013
  ],
@@ -1996,7 +2086,8 @@
1996
2086
  "context_length": 4096,
1997
2087
  "model_name": "deepseek-coder-instruct",
1998
2088
  "model_lang": [
1999
- "en", "zh"
2089
+ "en",
2090
+ "zh"
2000
2091
  ],
2001
2092
  "model_ability": [
2002
2093
  "chat"
@@ -148,6 +148,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
148
148
 
149
149
  def _stream_generator():
150
150
  last_chunk_text_length = 0
151
+ chunk_id = "chat-" + str(uuid.uuid1())
151
152
  for chunk_text, _ in self._model.stream_chat(
152
153
  self._tokenizer, prompt, chat_history, **kwargs
153
154
  ):
@@ -157,7 +158,7 @@ class ChatglmPytorchChatModel(PytorchChatModel):
157
158
  text=chunk_text, index=0, logprobs=None, finish_reason=None
158
159
  )
159
160
  yield CompletionChunk(
160
- id=str(uuid.uuid1()),
161
+ id=chunk_id,
161
162
  object="text_completion",
162
163
  created=int(time.time()),
163
164
  model=self.model_uid,
@@ -118,6 +118,7 @@ class Internlm2PytorchChatModel(PytorchChatModel):
118
118
 
119
119
  def _stream_generator():
120
120
  last_chunk_text_length = 0
121
+ chunk_id = "chat-" + str(uuid.uuid1())
121
122
  for chunk_text, _ in self._model.stream_chat(
122
123
  self._tokenizer, prompt, input_history, **kwargs
123
124
  ):
@@ -127,7 +128,7 @@ class Internlm2PytorchChatModel(PytorchChatModel):
127
128
  text=chunk_text, index=0, logprobs=None, finish_reason=None
128
129
  )
129
130
  yield CompletionChunk(
130
- id=str(uuid.uuid1()),
131
+ id=chunk_id,
131
132
  object="text_completion",
132
133
  created=int(time.time()),
133
134
  model=self.model_uid,
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-2024 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.