xinference 0.9.4__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (103) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/oauth2/auth_service.py +47 -18
  3. xinference/api/oauth2/types.py +1 -0
  4. xinference/api/restful_api.py +34 -7
  5. xinference/client/oscar/actor_client.py +4 -3
  6. xinference/client/restful/restful_client.py +20 -4
  7. xinference/conftest.py +13 -2
  8. xinference/core/supervisor.py +48 -1
  9. xinference/core/worker.py +139 -20
  10. xinference/deploy/cmdline.py +119 -20
  11. xinference/model/embedding/core.py +1 -2
  12. xinference/model/llm/__init__.py +4 -6
  13. xinference/model/llm/ggml/llamacpp.py +2 -10
  14. xinference/model/llm/llm_family.json +877 -13
  15. xinference/model/llm/llm_family.py +15 -0
  16. xinference/model/llm/llm_family_modelscope.json +571 -0
  17. xinference/model/llm/pytorch/chatglm.py +2 -0
  18. xinference/model/llm/pytorch/core.py +22 -26
  19. xinference/model/llm/pytorch/deepseek_vl.py +232 -0
  20. xinference/model/llm/pytorch/internlm2.py +2 -0
  21. xinference/model/llm/pytorch/omnilmm.py +153 -0
  22. xinference/model/llm/pytorch/qwen_vl.py +2 -0
  23. xinference/model/llm/pytorch/yi_vl.py +4 -2
  24. xinference/model/llm/utils.py +53 -5
  25. xinference/model/llm/vllm/core.py +54 -6
  26. xinference/model/rerank/core.py +3 -0
  27. xinference/thirdparty/deepseek_vl/__init__.py +31 -0
  28. xinference/thirdparty/deepseek_vl/models/__init__.py +28 -0
  29. xinference/thirdparty/deepseek_vl/models/clip_encoder.py +242 -0
  30. xinference/thirdparty/deepseek_vl/models/image_processing_vlm.py +208 -0
  31. xinference/thirdparty/deepseek_vl/models/modeling_vlm.py +170 -0
  32. xinference/thirdparty/deepseek_vl/models/processing_vlm.py +390 -0
  33. xinference/thirdparty/deepseek_vl/models/projector.py +100 -0
  34. xinference/thirdparty/deepseek_vl/models/sam.py +593 -0
  35. xinference/thirdparty/deepseek_vl/models/siglip_vit.py +681 -0
  36. xinference/thirdparty/deepseek_vl/utils/__init__.py +18 -0
  37. xinference/thirdparty/deepseek_vl/utils/conversation.py +348 -0
  38. xinference/thirdparty/deepseek_vl/utils/io.py +78 -0
  39. xinference/thirdparty/omnilmm/__init__.py +0 -0
  40. xinference/thirdparty/omnilmm/chat.py +216 -0
  41. xinference/thirdparty/omnilmm/constants.py +4 -0
  42. xinference/thirdparty/omnilmm/conversation.py +332 -0
  43. xinference/thirdparty/omnilmm/model/__init__.py +1 -0
  44. xinference/thirdparty/omnilmm/model/omnilmm.py +594 -0
  45. xinference/thirdparty/omnilmm/model/resampler.py +166 -0
  46. xinference/thirdparty/omnilmm/model/utils.py +563 -0
  47. xinference/thirdparty/omnilmm/train/__init__.py +13 -0
  48. xinference/thirdparty/omnilmm/train/train_utils.py +150 -0
  49. xinference/thirdparty/omnilmm/utils.py +134 -0
  50. xinference/types.py +15 -19
  51. xinference/web/ui/build/asset-manifest.json +3 -3
  52. xinference/web/ui/build/index.html +1 -1
  53. xinference/web/ui/build/static/js/main.76ef2b17.js +3 -0
  54. xinference/web/ui/build/static/js/main.76ef2b17.js.map +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/15e2cf8cd8d0989719b6349428ff576f9009ff4c2dcc52378be0bd938e82495e.json +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/35d0e4a317e5582cbb79d901302e9d706520ac53f8a734c2fd8bfde6eb5a4f02.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/3c2f277c93c5f1638e08db38df0d0fb4e58d1c5571aea03241a5c04ff4094704.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +1 -0
  59. xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +1 -0
  60. xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +1 -0
  61. xinference/web/ui/node_modules/.cache/babel-loader/59ce49eae0f486af4c5034d4d2f9ca77c3ec3a32ecc560085caf5ef482b5f4c9.json +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/63a4c48f0326d071c7772c46598215c006ae41fd3d4ff3577fe717de66ad6e89.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/b9cbcb6d77ba21b22c6950b6fb5b305d23c19cf747f99f7d48b6b046f8f7b1b0.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/d076fd56cf3b15ed2433e3744b98c6b4e4410a19903d1db4de5bba0e1a1b3347.json +1 -0
  67. xinference/web/ui/node_modules/.cache/babel-loader/daad8131d91134f6d7aef895a0c9c32e1cb928277cb5aa66c01028126d215be0.json +1 -0
  68. xinference/web/ui/node_modules/.cache/babel-loader/de0299226173b0662b573f49e3992220f6611947073bd66ac079728a8bc8837d.json +1 -0
  69. xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +1 -0
  70. xinference/web/ui/node_modules/.cache/babel-loader/e6eccc9aa641e7da833492e27846dc965f9750281420977dc84654ca6ed221e4.json +1 -0
  71. xinference/web/ui/node_modules/.cache/babel-loader/e9b52d171223bb59fb918316297a051cdfd42dd453e8260fd918e90bc0a4ebdf.json +1 -0
  72. xinference/web/ui/node_modules/.cache/babel-loader/f16aec63602a77bd561d0e67fa00b76469ac54b8033754bba114ec5eb3257964.json +1 -0
  73. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/METADATA +25 -12
  74. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/RECORD +79 -58
  75. xinference/model/llm/ggml/ctransformers.py +0 -281
  76. xinference/model/llm/ggml/ctransformers_util.py +0 -161
  77. xinference/web/ui/build/static/js/main.66b1c4fb.js +0 -3
  78. xinference/web/ui/build/static/js/main.66b1c4fb.js.map +0 -1
  79. xinference/web/ui/node_modules/.cache/babel-loader/0bd70b1ecf307e2681318e864f4692305b6350c8683863007f4caf2f9ac33b6e.json +0 -1
  80. xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +0 -1
  81. xinference/web/ui/node_modules/.cache/babel-loader/18e5d5422e2464abf4a3e6d38164570e2e426e0a921e9a2628bbae81b18da353.json +0 -1
  82. xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/3e055de705e397e1d413d7f429589b1a98dd78ef378b97f0cdb462c5f2487d5e.json +0 -1
  84. xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +0 -1
  85. xinference/web/ui/node_modules/.cache/babel-loader/52aa27272b4b9968f62666262b47661cb1992336a2aff3b13994cc36877b3ec3.json +0 -1
  86. xinference/web/ui/node_modules/.cache/babel-loader/60c4b98d8ea7479fb0c94cfd19c8128f17bd7e27a1e73e6dd9adf6e9d88d18eb.json +0 -1
  87. xinference/web/ui/node_modules/.cache/babel-loader/7e094845f611802b024b57439cbf911038169d06cdf6c34a72a7277f35aa71a4.json +0 -1
  88. xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +0 -1
  89. xinference/web/ui/node_modules/.cache/babel-loader/98b7ef307f436affe13d75a4f265b27e828ccc2b10ffae6513abe2681bc11971.json +0 -1
  90. xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +0 -1
  91. xinference/web/ui/node_modules/.cache/babel-loader/b400cfc9db57fa6c70cd2bad055b73c5079fde0ed37974009d898083f6af8cd8.json +0 -1
  92. xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +0 -1
  93. xinference/web/ui/node_modules/.cache/babel-loader/c2124cfe036b26befcbd386d1d17743b1a58d0b7a041a17bb67f9924400d63c3.json +0 -1
  94. xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +0 -1
  95. xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +0 -1
  96. xinference/web/ui/node_modules/.cache/babel-loader/e1d9b2ae4e1248658704bc6bfc5d6160dcd1a9e771ea4ae8c1fed0aaddeedd29.json +0 -1
  97. xinference/web/ui/node_modules/.cache/babel-loader/fd4a8ae5d192331af1bedd1d2d70efcc569708ee6cc4cb479b225d059482aa81.json +0 -1
  98. xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +0 -1
  99. /xinference/web/ui/build/static/js/{main.66b1c4fb.js.LICENSE.txt → main.76ef2b17.js.LICENSE.txt} +0 -0
  100. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/LICENSE +0 -0
  101. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/WHEEL +0 -0
  102. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/entry_points.txt +0 -0
  103. {xinference-0.9.4.dist-info → xinference-0.10.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,150 @@
1
+ import copy
2
+ import warnings
3
+ from typing import Dict, Sequence
4
+
5
+ import numpy as np
6
+ import transformers
7
+
8
+ IGNORE_INDEX = -100
9
+ DEFAULT_IMAGE_TOKEN = "<image>"
10
+ DEFAULT_IMAGE_PATCH_TOKEN = "<im_patch>"
11
+ DEFAULT_IM_START_TOKEN = "<im_start>"
12
+ DEFAULT_IM_END_TOKEN = "<im_end>"
13
+
14
+
15
+ def _tokenize_fn(
16
+ strings: Sequence[str], tokenizer: transformers.PreTrainedTokenizer
17
+ ) -> Dict:
18
+ """Tokenize a list of strings."""
19
+ tokenized_list = [
20
+ tokenizer(
21
+ text,
22
+ return_tensors="pt",
23
+ padding="longest",
24
+ max_length=tokenizer.model_max_length,
25
+ truncation=True,
26
+ )
27
+ for text in strings
28
+ ]
29
+ input_ids = labels = [tokenized.input_ids[0] for tokenized in tokenized_list]
30
+ input_ids_lens = labels_lens = [
31
+ tokenized.input_ids.ne(tokenizer.pad_token_id).sum().item()
32
+ for tokenized in tokenized_list
33
+ ]
34
+ return dict(
35
+ input_ids=input_ids,
36
+ labels=labels,
37
+ input_ids_lens=input_ids_lens,
38
+ labels_lens=labels_lens,
39
+ )
40
+
41
+
42
+ def omni_preprocess(
43
+ sources, tokenizer: transformers.PreTrainedTokenizer, generation=False
44
+ ):
45
+ system_content = "You are an artificial intelligence assistant, which gives helpful, detailed, and polite answers to the human's questions."
46
+ ignore_index = -100
47
+
48
+ response_template = "\n<|assistant|>\n"
49
+ instruction_template = "\n<|user|>\n"
50
+ response_token_ids = tokenizer.encode(response_template, add_special_tokens=False)
51
+ instruction_token_ids = tokenizer.encode(
52
+ instruction_template, add_special_tokens=False
53
+ )
54
+
55
+ batch_input_ids = []
56
+ batch_labels = []
57
+ for i in range(len(sources)):
58
+ new_source = []
59
+ prev_role = "unexpect"
60
+ for conv_turn in sources[i]:
61
+ role = conv_turn["from"] if "from" in conv_turn else conv_turn["role"]
62
+ content = (
63
+ conv_turn["value"] if "value" in conv_turn else conv_turn["content"]
64
+ )
65
+
66
+ role = "user" if role == "human" else role
67
+ role = "assistant" if role == "gpt" else role
68
+
69
+ assert role in ["user", "assistant"]
70
+ assert role != prev_role, f"role={role}, prev_role={prev_role}"
71
+ prev_role = role
72
+
73
+ new_turn = {"role": role, "content": content}
74
+ new_source.append(new_turn)
75
+ if new_source[0]["role"] != "system":
76
+ new_source.insert(0, {"role": "system", "content": system_content})
77
+
78
+ # TODO: this automatically add '\n' to the end
79
+ res_text = tokenizer.apply_chat_template(
80
+ new_source, tokenize=False, add_generation_prompt=generation
81
+ )
82
+ if not generation:
83
+ res_text = res_text.strip()
84
+
85
+ conversations_tokenized = _tokenize_fn([res_text], tokenizer)
86
+ res_input_ids = conversations_tokenized["input_ids"][0]
87
+
88
+ # since labels and input_ids are reference towards the same object
89
+ res_labels = copy.deepcopy(conversations_tokenized["labels"][0])
90
+
91
+ response_token_ids_idxs = []
92
+ human_token_ids_idxs = []
93
+
94
+ for assistant_idx in np.where(res_labels == response_token_ids[0])[0]:
95
+ # find the indexes of the start of a response.
96
+ if (
97
+ response_token_ids
98
+ == res_labels[
99
+ assistant_idx : assistant_idx + len(response_token_ids)
100
+ ].tolist()
101
+ ):
102
+ response_token_ids_idxs.append(assistant_idx + len(response_token_ids))
103
+
104
+ if len(response_token_ids_idxs) == 0:
105
+ warnings.warn(
106
+ f"Could not find response key `{response_template}` in the "
107
+ f"following instance: @===>{tokenizer.decode(res_input_ids)}<===@ "
108
+ f"Raw text is @===>{res_text}<===@"
109
+ f"Raw source is @===>{new_source}<===@"
110
+ f"This instance will be ignored in loss calculation. "
111
+ f"Note, if this happens often, consider increasing the `max_seq_length`."
112
+ )
113
+ res_labels[:] = ignore_index
114
+
115
+ human_token_ids = instruction_token_ids
116
+ for human_idx in np.where(res_labels == human_token_ids[0])[0]:
117
+ # find the indexes of the start of a human answer.
118
+ if (
119
+ human_token_ids
120
+ == res_labels[human_idx : human_idx + len(human_token_ids)].tolist()
121
+ ):
122
+ human_token_ids_idxs.append(human_idx)
123
+
124
+ if len(human_token_ids_idxs) == 0:
125
+ warnings.warn(
126
+ f"Could not find instruction key `{instruction_template}` in the "
127
+ f"following instance: @===>{tokenizer.decode(res_input_ids)}<===@ "
128
+ f"Raw text is @===>{res_text}<===@"
129
+ f"Raw source is @===>{new_source}<===@"
130
+ f"This instance will be ignored in loss calculation. "
131
+ f"Note, if this happens often, consider increasing the `max_seq_length`."
132
+ )
133
+ res_labels[:] = ignore_index
134
+
135
+ for idx, (start, end) in enumerate(
136
+ zip(human_token_ids_idxs, response_token_ids_idxs)
137
+ ):
138
+ # Make pytorch loss function ignore all non response tokens
139
+ if idx != 0:
140
+ res_labels[start:end] = ignore_index
141
+ else:
142
+ res_labels[:end] = ignore_index
143
+
144
+ if len(response_token_ids_idxs) < len(human_token_ids_idxs):
145
+ res_labels[human_token_ids_idxs[-1] :] = ignore_index
146
+
147
+ batch_input_ids.append(res_input_ids)
148
+ batch_labels.append(res_labels)
149
+
150
+ return dict(input_ids=batch_input_ids, labels=batch_labels)
@@ -0,0 +1,134 @@
1
+ import logging
2
+ import logging.handlers
3
+ import os
4
+ import sys
5
+
6
+ import requests
7
+
8
+ from .constants import LOGDIR
9
+
10
+ server_error_msg = (
11
+ "**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
12
+ )
13
+ moderation_msg = (
14
+ "YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES. PLEASE TRY AGAIN."
15
+ )
16
+
17
+ handler = None
18
+
19
+
20
+ def build_logger(logger_name, logger_filename):
21
+ global handler
22
+
23
+ formatter = logging.Formatter(
24
+ fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
25
+ datefmt="%Y-%m-%d %H:%M:%S",
26
+ )
27
+
28
+ # Set the format of root handlers
29
+ if not logging.getLogger().handlers:
30
+ logging.basicConfig(level=logging.INFO)
31
+ logging.getLogger().handlers[0].setFormatter(formatter)
32
+
33
+ # Redirect stdout and stderr to loggers
34
+ stdout_logger = logging.getLogger("stdout")
35
+ stdout_logger.setLevel(logging.INFO)
36
+ sl = StreamToLogger(stdout_logger, logging.INFO)
37
+ sys.stdout = sl
38
+
39
+ stderr_logger = logging.getLogger("stderr")
40
+ stderr_logger.setLevel(logging.ERROR)
41
+ sl = StreamToLogger(stderr_logger, logging.ERROR)
42
+ sys.stderr = sl
43
+
44
+ # Get logger
45
+ logger = logging.getLogger(logger_name)
46
+ logger.setLevel(logging.INFO)
47
+
48
+ # Add a file handler for all loggers
49
+ if handler is None:
50
+ os.makedirs(LOGDIR, exist_ok=True)
51
+ filename = os.path.join(LOGDIR, logger_filename)
52
+ handler = logging.handlers.TimedRotatingFileHandler(
53
+ filename, when="D", utc=True
54
+ )
55
+ handler.setFormatter(formatter)
56
+
57
+ for name, item in logging.root.manager.loggerDict.items():
58
+ if isinstance(item, logging.Logger):
59
+ item.addHandler(handler)
60
+
61
+ return logger
62
+
63
+
64
+ class StreamToLogger(object):
65
+ """
66
+ Fake file-like stream object that redirects writes to a logger instance.
67
+ """
68
+
69
+ def __init__(self, logger, log_level=logging.INFO):
70
+ self.terminal = sys.stdout
71
+ self.logger = logger
72
+ self.log_level = log_level
73
+ self.linebuf = ""
74
+
75
+ def __getattr__(self, attr):
76
+ return getattr(self.terminal, attr)
77
+
78
+ def write(self, buf):
79
+ temp_linebuf = self.linebuf + buf
80
+ self.linebuf = ""
81
+ for line in temp_linebuf.splitlines(True):
82
+ # From the io.TextIOWrapper docs:
83
+ # On output, if newline is None, any '\n' characters written
84
+ # are translated to the system default line separator.
85
+ # By default sys.stdout.write() expects '\n' newlines and then
86
+ # translates them so this is still cross platform.
87
+ if line[-1] == "\n":
88
+ self.logger.log(self.log_level, line.rstrip())
89
+ else:
90
+ self.linebuf += line
91
+
92
+ def flush(self):
93
+ if self.linebuf != "":
94
+ self.logger.log(self.log_level, self.linebuf.rstrip())
95
+ self.linebuf = ""
96
+
97
+
98
+ def disable_torch_init():
99
+ """
100
+ Disable the redundant torch default initialization to accelerate model creation.
101
+ """
102
+ import torch
103
+
104
+ setattr(torch.nn.Linear, "reset_parameters", lambda self: None)
105
+ setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None)
106
+
107
+
108
+ def violates_moderation(text):
109
+ """
110
+ Check whether the text violates OpenAI moderation API.
111
+ """
112
+ url = "https://api.openai.com/v1/moderations"
113
+ headers = {
114
+ "Content-Type": "application/json",
115
+ "Authorization": "Bearer " + os.environ["OPENAI_API_KEY"],
116
+ }
117
+ text = text.replace("\n", "")
118
+ data = "{" + '"input": ' + f'"{text}"' + "}"
119
+ data = data.encode("utf-8")
120
+ try:
121
+ ret = requests.post(url, headers=headers, data=data, timeout=5)
122
+ flagged = ret.json()["results"][0]["flagged"]
123
+ except requests.exceptions.RequestException:
124
+ flagged = False
125
+ except KeyError:
126
+ flagged = False
127
+
128
+ return flagged
129
+
130
+
131
+ def pretty_print_semaphore(semaphore):
132
+ if semaphore is None:
133
+ return "None"
134
+ return f"Semaphore(value={semaphore._value}, locked={semaphore.locked()})"
xinference/types.py CHANGED
@@ -91,11 +91,23 @@ class CompletionLogprobs(TypedDict):
91
91
  top_logprobs: List[Optional[Dict[str, float]]]
92
92
 
93
93
 
94
+ class ToolCallFunction(TypedDict):
95
+ name: str
96
+ arguments: str
97
+
98
+
99
+ class ToolCalls(TypedDict):
100
+ id: str
101
+ type: Literal["function"]
102
+ function: ToolCallFunction
103
+
104
+
94
105
  class CompletionChoice(TypedDict):
95
106
  text: str
96
107
  index: int
97
108
  logprobs: Optional[CompletionLogprobs]
98
109
  finish_reason: Optional[str]
110
+ tool_calls: NotRequired[List[ToolCalls]]
99
111
 
100
112
 
101
113
  class CompletionUsage(TypedDict):
@@ -147,6 +159,7 @@ class ChatCompletion(TypedDict):
147
159
  class ChatCompletionChunkDelta(TypedDict):
148
160
  role: NotRequired[str]
149
161
  content: NotRequired[str]
162
+ tool_calls: NotRequired[List[ToolCalls]]
150
163
 
151
164
 
152
165
  class ChatCompletionChunkChoice(TypedDict):
@@ -232,6 +245,8 @@ class LlamaCppModelConfig(TypedDict, total=False):
232
245
  n_ctx: int
233
246
  n_parts: int
234
247
  n_gpu_layers: int
248
+ split_mode: int
249
+ main_gpu: int
235
250
  seed: int
236
251
  f16_kv: bool
237
252
  logits_all: bool
@@ -355,21 +370,6 @@ try:
355
370
  except ImportError:
356
371
  CreateCompletionLlamaCpp = create_model("CreateCompletionLlamaCpp")
357
372
 
358
- CreateCompletionCTransformers: BaseModel
359
- try:
360
- from ctransformers.llm import LLM
361
-
362
- CreateCompletionCTransformers = get_pydantic_model_from_method(
363
- LLM.generate,
364
- exclude_fields=["tokens"],
365
- include_fields={
366
- "max_tokens": (Optional[int], max_tokens_field),
367
- "stream": (Optional[bool], stream_field),
368
- },
369
- )
370
- except ImportError:
371
- CreateCompletionCTransformers = create_model("CreateCompletionCTransformers")
372
-
373
373
 
374
374
  # This type is for openai API compatibility
375
375
  CreateCompletionOpenAI: BaseModel
@@ -415,7 +415,6 @@ class CreateCompletion(
415
415
  ModelAndPrompt,
416
416
  CreateCompletionTorch,
417
417
  CreateCompletionLlamaCpp,
418
- CreateCompletionCTransformers,
419
418
  CreateCompletionOpenAI,
420
419
  ):
421
420
  pass
@@ -428,8 +427,6 @@ class CreateChatModel(BaseModel):
428
427
  # Currently, chat calls generates, so the params share the same one.
429
428
  CreateChatCompletionTorch = CreateCompletionTorch
430
429
  CreateChatCompletionLlamaCpp: BaseModel = CreateCompletionLlamaCpp
431
- CreateChatCompletionCTransformers: BaseModel = CreateCompletionCTransformers
432
-
433
430
 
434
431
  # This type is for openai API compatibility
435
432
  CreateChatCompletionOpenAI: BaseModel
@@ -450,7 +447,6 @@ class CreateChatCompletion(
450
447
  CreateChatModel,
451
448
  CreateChatCompletionTorch,
452
449
  CreateChatCompletionLlamaCpp,
453
- CreateChatCompletionCTransformers,
454
450
  CreateChatCompletionOpenAI,
455
451
  ):
456
452
  pass
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "files": {
3
- "main.js": "./static/js/main.66b1c4fb.js",
3
+ "main.js": "./static/js/main.76ef2b17.js",
4
4
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
5
5
  "index.html": "./index.html",
6
- "main.66b1c4fb.js.map": "./static/js/main.66b1c4fb.js.map"
6
+ "main.76ef2b17.js.map": "./static/js/main.76ef2b17.js.map"
7
7
  },
8
8
  "entrypoints": [
9
- "static/js/main.66b1c4fb.js"
9
+ "static/js/main.76ef2b17.js"
10
10
  ]
11
11
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.66b1c4fb.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.76ef2b17.js"></script></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>