xinference 1.5.0.post1__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (89) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +10 -3
  3. xinference/constants.py +5 -1
  4. xinference/core/supervisor.py +12 -3
  5. xinference/core/utils.py +1 -1
  6. xinference/core/worker.py +2 -2
  7. xinference/deploy/cmdline.py +17 -0
  8. xinference/model/audio/core.py +1 -1
  9. xinference/model/audio/model_spec.json +43 -43
  10. xinference/model/audio/model_spec_modelscope.json +13 -13
  11. xinference/model/llm/__init__.py +3 -5
  12. xinference/model/llm/core.py +14 -0
  13. xinference/model/llm/llama_cpp/core.py +15 -4
  14. xinference/model/llm/llm_family.json +3251 -4304
  15. xinference/model/llm/llm_family.py +62 -6
  16. xinference/model/llm/llm_family_csghub.json +0 -32
  17. xinference/model/llm/llm_family_modelscope.json +1161 -1789
  18. xinference/model/llm/llm_family_openmind_hub.json +19 -325
  19. xinference/model/llm/lmdeploy/core.py +7 -2
  20. xinference/model/llm/mlx/core.py +19 -6
  21. xinference/model/llm/sglang/core.py +25 -10
  22. xinference/model/llm/transformers/chatglm.py +8 -1
  23. xinference/model/llm/transformers/cogagent.py +10 -12
  24. xinference/model/llm/transformers/cogvlm2.py +6 -3
  25. xinference/model/llm/transformers/cogvlm2_video.py +3 -6
  26. xinference/model/llm/transformers/core.py +50 -58
  27. xinference/model/llm/transformers/deepseek_v2.py +4 -2
  28. xinference/model/llm/transformers/deepseek_vl.py +10 -4
  29. xinference/model/llm/transformers/deepseek_vl2.py +9 -4
  30. xinference/model/llm/transformers/gemma3.py +15 -7
  31. xinference/model/llm/transformers/glm4v.py +2 -20
  32. xinference/model/llm/transformers/glm_edge_v.py +3 -20
  33. xinference/model/llm/transformers/intern_vl.py +3 -6
  34. xinference/model/llm/transformers/internlm2.py +1 -1
  35. xinference/model/llm/transformers/minicpmv25.py +4 -2
  36. xinference/model/llm/transformers/minicpmv26.py +5 -3
  37. xinference/model/llm/transformers/omnilmm.py +1 -1
  38. xinference/model/llm/transformers/opt.py +1 -1
  39. xinference/model/llm/transformers/ovis2.py +302 -0
  40. xinference/model/llm/transformers/qwen-omni.py +2 -1
  41. xinference/model/llm/transformers/qwen2_audio.py +3 -1
  42. xinference/model/llm/transformers/qwen2_vl.py +5 -1
  43. xinference/model/llm/transformers/qwen_vl.py +5 -2
  44. xinference/model/llm/utils.py +28 -0
  45. xinference/model/llm/vllm/core.py +73 -9
  46. xinference/model/llm/vllm/distributed_executor.py +8 -7
  47. xinference/model/llm/vllm/xavier/allocator.py +1 -1
  48. xinference/model/llm/vllm/xavier/block_manager.py +1 -1
  49. xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
  50. xinference/model/llm/vllm/xavier/executor.py +1 -1
  51. xinference/model/llm/vllm/xavier/test/test_xavier.py +1 -1
  52. xinference/model/video/diffusers.py +30 -3
  53. xinference/model/video/model_spec.json +46 -0
  54. xinference/model/video/model_spec_modelscope.json +48 -0
  55. xinference/types.py +2 -0
  56. xinference/web/ui/build/asset-manifest.json +6 -6
  57. xinference/web/ui/build/index.html +1 -1
  58. xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
  59. xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
  60. xinference/web/ui/build/static/js/main.91e77b5c.js +3 -0
  61. xinference/web/ui/build/static/js/main.91e77b5c.js.map +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/5e6edb0fb87e3798f142e9abf8dd2dc46bab33a60d31dff525797c0c99887097.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/6087820be1bd5c02c42dff797e7df365448ef35ab26dd5d6bd33e967e05cbfd4.json +1 -0
  65. xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
  66. xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
  67. xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
  68. xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
  69. xinference/web/ui/src/locales/en.json +1 -0
  70. xinference/web/ui/src/locales/zh.json +1 -0
  71. {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/METADATA +1 -1
  72. {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/RECORD +77 -78
  73. {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/WHEEL +1 -1
  74. xinference/model/llm/transformers/compression.py +0 -258
  75. xinference/model/llm/transformers/yi_vl.py +0 -239
  76. xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
  77. xinference/web/ui/build/static/js/main.58bd483c.js +0 -3
  78. xinference/web/ui/build/static/js/main.58bd483c.js.map +0 -1
  79. xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
  80. xinference/web/ui/node_modules/.cache/babel-loader/69081049f0c7447544b7cfd73dd13d8846c02fe5febe4d81587e95c89a412d5b.json +0 -1
  81. xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
  82. xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
  84. xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
  85. xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
  86. /xinference/web/ui/build/static/js/{main.58bd483c.js.LICENSE.txt → main.91e77b5c.js.LICENSE.txt} +0 -0
  87. {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/entry_points.txt +0 -0
  88. {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/licenses/LICENSE +0 -0
  89. {xinference-1.5.0.post1.dist-info → xinference-1.5.1.dist-info}/top_level.txt +0 -0
@@ -1,258 +0,0 @@
1
- # Copyright 2022-2023 XProbe Inc.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import dataclasses
16
- import gc
17
- import glob
18
- import os
19
-
20
- import torch
21
- import torch.nn as nn
22
- from huggingface_hub import snapshot_download
23
- from torch import Tensor
24
- from torch.nn import functional as F
25
- from tqdm import tqdm
26
- from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
27
-
28
- from ....device_utils import empty_cache
29
-
30
-
31
- @dataclasses.dataclass
32
- class CompressionConfig:
33
- """Group-wise quantization."""
34
-
35
- num_bits: int
36
- group_size: int
37
- group_dim: int
38
- symmetric: bool
39
- enabled: bool = True
40
-
41
-
42
- default_compression_config = CompressionConfig(
43
- num_bits=8, group_size=256, group_dim=1, symmetric=True, enabled=True
44
- )
45
-
46
-
47
- class CLinear(nn.Module):
48
- """Compressed Linear Layer."""
49
-
50
- def __init__(self, weight=None, bias=None, device=None):
51
- super().__init__()
52
- if weight is None:
53
- self.weight = None
54
- elif isinstance(weight, Tensor):
55
- self.weight = compress(weight.data.to(device), default_compression_config)
56
- else:
57
- self.weight = weight
58
- self.bias = bias
59
-
60
- def forward(self, input: Tensor) -> Tensor:
61
- weight = decompress(self.weight, default_compression_config)
62
- if self.bias is None:
63
- return F.linear(input.to(weight.dtype), weight)
64
- return F.linear(input.to(weight.dtype), weight, self.bias.to(weight.dtype))
65
-
66
-
67
- def get_compressed_list(module, prefix=""):
68
- compressed_list = []
69
- for attr_str in dir(module):
70
- target_attr = getattr(module, attr_str)
71
- if type(target_attr) == torch.nn.Linear:
72
- full_name = (
73
- f"{prefix}.{attr_str}.weight" if prefix else f"{attr_str}.weight"
74
- )
75
- compressed_list.append(full_name)
76
- for name, child in module.named_children():
77
- child_prefix = f"{prefix}.{name}" if prefix else name
78
- for each in get_compressed_list(child, child_prefix):
79
- compressed_list.append(each)
80
- return compressed_list
81
-
82
-
83
- def apply_compressed_weight(module, compressed_state_dict, target_device, prefix=""):
84
- for attr_str in dir(module):
85
- target_attr = getattr(module, attr_str)
86
- if type(target_attr) == torch.nn.Linear:
87
- full_name = (
88
- f"{prefix}.{attr_str}.weight" if prefix else f"{attr_str}.weight"
89
- )
90
- setattr(
91
- module,
92
- attr_str,
93
- CLinear(
94
- compressed_state_dict[full_name], target_attr.bias, target_device
95
- ),
96
- )
97
- for name, child in module.named_children():
98
- child_prefix = f"{prefix}.{name}" if prefix else name
99
- apply_compressed_weight(
100
- child, compressed_state_dict, target_device, child_prefix
101
- )
102
-
103
-
104
- def load_compress_model(
105
- model_path: str,
106
- device: str,
107
- torch_dtype: torch.dtype,
108
- use_fast: bool,
109
- revision: str = "main",
110
- ):
111
- from accelerate import init_empty_weights
112
- from accelerate.utils import set_module_tensor_to_device
113
-
114
- # partially load model
115
- tokenizer = AutoTokenizer.from_pretrained(
116
- model_path,
117
- use_fast=use_fast,
118
- trust_remote_code=True,
119
- revision=revision,
120
- )
121
-
122
- with init_empty_weights():
123
- config = AutoConfig.from_pretrained(
124
- model_path,
125
- low_cpu_mem_usage=True,
126
- torch_dtype=torch_dtype,
127
- trust_remote_code=True,
128
- revision=revision,
129
- )
130
- model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
131
- linear_weights = get_compressed_list(model)
132
-
133
- if os.path.exists(model_path):
134
- # `model_path` is a local folder
135
- base_pattern = os.path.join(model_path, "pytorch_model*.bin")
136
- else:
137
- # `model_path` is a cached Hugging Face repo
138
- model_path = snapshot_download(model_path, revision=revision)
139
- base_pattern = os.path.join(model_path, "pytorch_model*.bin")
140
-
141
- files = glob.glob(base_pattern)
142
-
143
- compressed_state_dict = {}
144
-
145
- for filename in tqdm(files):
146
- tmp_state_dict = torch.load(filename, map_location=torch.device(device))
147
- for name in tmp_state_dict:
148
- if name in linear_weights:
149
- tensor = tmp_state_dict[name].to(device).data.to(torch_dtype)
150
- compressed_state_dict[name] = compress(
151
- tensor, default_compression_config
152
- )
153
- else:
154
- compressed_state_dict[name] = tmp_state_dict[name].to(device)
155
- tmp_state_dict[name] = None
156
- tensor = None
157
- gc.collect()
158
- empty_cache()
159
-
160
- for name in model.state_dict():
161
- if name not in linear_weights:
162
- set_module_tensor_to_device(
163
- model, name, device, value=compressed_state_dict[name]
164
- )
165
- apply_compressed_weight(model, compressed_state_dict, device)
166
-
167
- model.to(device)
168
-
169
- return model, tokenizer
170
-
171
-
172
- def compress(tensor, config):
173
- """Simulate group-wise quantization."""
174
- if not config.enabled:
175
- return tensor
176
-
177
- group_size, num_bits, group_dim, symmetric = (
178
- config.group_size,
179
- config.num_bits,
180
- config.group_dim,
181
- config.symmetric,
182
- )
183
- assert num_bits <= 8
184
-
185
- original_shape = tensor.shape
186
- num_groups = (original_shape[group_dim] + group_size - 1) // group_size
187
- new_shape = (
188
- original_shape[:group_dim]
189
- + (num_groups, group_size)
190
- + original_shape[group_dim + 1 :]
191
- )
192
-
193
- # Pad
194
- pad_len = (group_size - original_shape[group_dim] % group_size) % group_size
195
- if pad_len != 0:
196
- pad_shape = (
197
- original_shape[:group_dim] + (pad_len,) + original_shape[group_dim + 1 :]
198
- )
199
- tensor = torch.cat(
200
- [tensor, torch.zeros(pad_shape, dtype=tensor.dtype, device=tensor.device)],
201
- dim=group_dim,
202
- )
203
- data = tensor.view(new_shape)
204
-
205
- # Quantize
206
- if symmetric:
207
- B = 2 ** (num_bits - 1) - 1
208
- scale = B / torch.max(data.abs(), dim=group_dim + 1, keepdim=True)[0]
209
- data = data * scale
210
- data = data.clamp_(-B, B).round_().to(torch.int8)
211
- return data, scale, original_shape
212
- else:
213
- B = 2**num_bits - 1
214
- mn = torch.min(data, dim=group_dim + 1, keepdim=True)[0]
215
- mx = torch.max(data, dim=group_dim + 1, keepdim=True)[0]
216
-
217
- scale = B / (mx - mn)
218
- data = data - mn
219
- data.mul_(scale)
220
-
221
- data = data.clamp_(0, B).round_().to(torch.uint8)
222
- return data, mn, scale, original_shape
223
-
224
-
225
- def decompress(packed_data, config):
226
- """Simulate group-wise dequantization."""
227
- if not config.enabled:
228
- return packed_data
229
-
230
- group_size, _, group_dim, symmetric = (
231
- config.group_size,
232
- config.num_bits,
233
- config.group_dim,
234
- config.symmetric,
235
- )
236
-
237
- # Dequantize
238
- if symmetric:
239
- data, scale, original_shape = packed_data
240
- data = data / scale
241
- else:
242
- data, mn, scale, original_shape = packed_data
243
- data = data / scale
244
- data.add_(mn)
245
-
246
- # Unpad
247
- pad_len = (group_size - original_shape[group_dim] % group_size) % group_size
248
- if pad_len:
249
- padded_original_shape = (
250
- original_shape[:group_dim]
251
- + (original_shape[group_dim] + pad_len,)
252
- + original_shape[group_dim + 1 :]
253
- )
254
- data = data.reshape(padded_original_shape)
255
- indices = [slice(0, x) for x in original_shape]
256
- return data[indices].contiguous()
257
- else:
258
- return data.view(original_shape)
@@ -1,239 +0,0 @@
1
- # Copyright 2022-2023 XProbe Inc.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import logging
15
- import uuid
16
- from concurrent.futures import ThreadPoolExecutor
17
- from threading import Thread
18
- from typing import Dict, Iterator, List, Optional, Union
19
-
20
- import torch
21
-
22
- from ....model.utils import select_device
23
- from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
24
- from ..llm_family import LLMFamilyV1, LLMSpecV1
25
- from ..utils import (
26
- _decode_image,
27
- generate_chat_completion,
28
- generate_completion_chunk,
29
- parse_messages,
30
- )
31
- from .core import PytorchChatModel, PytorchGenerateConfig
32
- from .utils import cache_clean
33
-
34
- logger = logging.getLogger(__name__)
35
-
36
-
37
- class YiVLChatModel(PytorchChatModel):
38
- def __init__(self, *args, **kwargs):
39
- super().__init__(*args, **kwargs)
40
- self._tokenizer = None
41
- self._model = None
42
- self._image_processor = None
43
-
44
- @classmethod
45
- def match(
46
- cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
47
- ) -> bool:
48
- llm_family = model_family.model_family or model_family.model_name
49
- if "yi-vl" in llm_family:
50
- return True
51
- return False
52
-
53
- def load(self):
54
- from ....thirdparty.llava.mm_utils import load_pretrained_model
55
- from ....thirdparty.llava.model.constants import key_info
56
-
57
- self._device = self._pytorch_model_config.get("device", "auto")
58
- self._device = select_device(self._device)
59
- # for multiple GPU, set back to auto to make multiple devices work
60
- self._device = "auto" if self._device == "cuda" else self._device
61
-
62
- key_info["model_path"] = self.model_path
63
- # Default device_map is auto, it can loads model to multiple cards.
64
- # If the device_map is set to cuda, then only 1 card can be used.
65
- (
66
- self._tokenizer,
67
- self._model,
68
- self._image_processor,
69
- _,
70
- ) = load_pretrained_model(self.model_path, device_map=self._device)
71
- self._apply_lora()
72
-
73
- @staticmethod
74
- def _message_content_to_yi(content) -> Union[str, tuple]:
75
- if not isinstance(content, str):
76
- from ....thirdparty.llava.model.constants import DEFAULT_IMAGE_TOKEN
77
-
78
- texts = []
79
- image_urls = []
80
- for c in content:
81
- c_type = c.get("type")
82
- if c_type == "text":
83
- texts.append(c["text"])
84
- elif c_type == "image_url":
85
- image_urls.append(c["image_url"]["url"])
86
- image_futures = []
87
- with ThreadPoolExecutor() as executor:
88
- for image_url in image_urls:
89
- fut = executor.submit(_decode_image, image_url)
90
- image_futures.append(fut)
91
- images = [fut.result() for fut in image_futures]
92
- text = " ".join(texts)
93
- if DEFAULT_IMAGE_TOKEN not in text:
94
- text = DEFAULT_IMAGE_TOKEN + "\n" + text
95
- if len(images) == 0:
96
- return text
97
- elif len(images) == 1:
98
- return text, images[0], "Pad"
99
- else:
100
- raise RuntimeError("Only one image per message is supported by Yi VL.")
101
- return content
102
-
103
- @cache_clean
104
- def chat(
105
- self,
106
- messages: List[Dict],
107
- generate_config: Optional[PytorchGenerateConfig] = None,
108
- ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
109
- from transformers import TextIteratorStreamer
110
-
111
- if not generate_config:
112
- generate_config = {}
113
-
114
- stream = generate_config.get("stream", False)
115
- stream_options = generate_config.pop("stream_options", None)
116
- include_usage = (
117
- stream_options["include_usage"]
118
- if isinstance(stream_options, dict)
119
- else False
120
- )
121
-
122
- from ....thirdparty.llava.conversation import conv_templates
123
- from ....thirdparty.llava.mm_utils import (
124
- KeywordsStoppingCriteria,
125
- tokenizer_image_token,
126
- )
127
- from ....thirdparty.llava.model.constants import IMAGE_TOKEN_INDEX
128
-
129
- # Convert chat history to llava state
130
- state = conv_templates["mm_default"].copy()
131
- prompt, _, chat_history = parse_messages(messages)
132
- for message in chat_history:
133
- content = self._message_content_to_yi(message["content"])
134
- state.append_message(message["role"], content)
135
- state.append_message(state.roles[0], self._message_content_to_yi(prompt))
136
- state.append_message(state.roles[1], None)
137
-
138
- prompt = state.get_prompt()
139
-
140
- input_ids = (
141
- tokenizer_image_token(
142
- prompt, self._tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt"
143
- )
144
- .unsqueeze(0)
145
- .to(self._model.device)
146
- )
147
-
148
- images = state.get_images(return_pil=True)
149
- if images:
150
- image = images[0]
151
- image_tensor = self._image_processor.preprocess(image, return_tensors="pt")[
152
- "pixel_values"
153
- ][0]
154
-
155
- stop_str = state.sep
156
- keywords = [stop_str]
157
- stopping_criteria = KeywordsStoppingCriteria(
158
- keywords, self._tokenizer, input_ids
159
- )
160
- streamer = TextIteratorStreamer(
161
- self._tokenizer, timeout=60, skip_prompt=True, skip_special_tokens=True
162
- )
163
- top_p = generate_config.get("top_p", 0.7)
164
- temperature = generate_config.get("temperature", 0.2)
165
- max_new_tokens = generate_config.get("max_tokens", 512)
166
- generate_kwargs = {
167
- "input_ids": input_ids,
168
- "images": image_tensor.unsqueeze(0)
169
- .to(dtype=torch.bfloat16)
170
- .to(self._model.device)
171
- if images
172
- else None,
173
- "streamer": streamer,
174
- "do_sample": True,
175
- "top_p": float(top_p),
176
- "temperature": float(temperature),
177
- "stopping_criteria": [stopping_criteria],
178
- "use_cache": True,
179
- "max_new_tokens": min(int(max_new_tokens), 1536),
180
- }
181
- t = Thread(target=self._model.generate, kwargs=generate_kwargs)
182
- t.start()
183
-
184
- if stream:
185
- it = self._generate_stream(streamer, stop_str, input_ids, include_usage)
186
- return self._to_chat_completion_chunks(it)
187
- else:
188
- return self._generate(streamer, stop_str)
189
-
190
- def _generate(self, streamer, stop_str) -> ChatCompletion:
191
- generated_text = ""
192
- for new_text in streamer:
193
- generated_text += new_text
194
- if generated_text.endswith(stop_str):
195
- generated_text = generated_text[: -len(stop_str)]
196
- return generate_chat_completion(self.model_uid, generated_text)
197
-
198
- def _generate_stream(
199
- self, streamer, stop_str, input_ids, include_usage
200
- ) -> Iterator[CompletionChunk]:
201
- completion_id = str(uuid.uuid1())
202
- prompt_tokens, completion_tokens, total_tokens = 0, 0, 0
203
- prompt_tokens = len(input_ids[0])
204
- for i, new_text in enumerate(streamer):
205
- if not new_text.endswith(stop_str):
206
- completion_tokens = i
207
- total_tokens = prompt_tokens + completion_tokens
208
- yield generate_completion_chunk(
209
- chunk_text=new_text,
210
- finish_reason=None,
211
- chunk_id=completion_id,
212
- model_uid=self.model_uid,
213
- prompt_tokens=prompt_tokens,
214
- completion_tokens=completion_tokens,
215
- total_tokens=total_tokens,
216
- )
217
- yield generate_completion_chunk(
218
- chunk_text=None,
219
- finish_reason="stop",
220
- chunk_id=completion_id,
221
- model_uid=self.model_uid,
222
- prompt_tokens=prompt_tokens,
223
- completion_tokens=completion_tokens,
224
- total_tokens=total_tokens,
225
- has_choice=True,
226
- has_content=False,
227
- )
228
- if include_usage:
229
- yield generate_completion_chunk(
230
- chunk_text=None,
231
- finish_reason=None,
232
- chunk_id=completion_id,
233
- model_uid=self.model_uid,
234
- prompt_tokens=prompt_tokens,
235
- completion_tokens=completion_tokens,
236
- total_tokens=total_tokens,
237
- has_choice=False,
238
- has_content=False,
239
- )
@@ -1 +0,0 @@
1
- {"version":3,"file":"static/css/main.0f6523be.css","mappings":"AAAA,WAKE,cAAe,CAJf,aAMF,CACA,4BAFE,kBAAmB,CAFnB,YAAa,CAFb,iBAAkB,CAClB,WAaF,CARA,iBAGE,SAAU,CAGV,YAAa,CAJb,QAMF,CACA,WACE,YAAa,CACb,6BACF,CACA,eAEE,kBAAmB,CADnB,YAEF,CACA,YAGE,eAAgB,CADhB,mBAAoB,CADpB,iBAAkB,CAGlB,UACF,CACA,GAEE,oBAAqB,CACrB,2BAA4B,CAF5B,mBAAoB,CAMpB,cAAe,CAHf,eAAgB,CAIhB,cAAiB,CAHjB,sBAAuB,CACvB,qBAGF,CACA,WAEE,aAAc,CACd,cAAe,CAFf,wBAA0B,CAG1B,kBACF,CACA,iBACE,eACF,CACA,mBAEE,aAAc,CACd,cAAe,CAFf,wBAGF,CACA,yBACE,eACF,CACA,eACE,UAAW,CACX,eAAgB,CAEhB,oBACF,CACA,kBAEE,WAAY,CACZ,UAAW,CAFX,iBAAkB,CAGlB,WACF,CACA,SAGE,kBAAmB,CAFnB,YAAa,CAGb,QAAS,CAFT,6BAGF,CAIA,2HACE,oBACF,CACA,iBAEE,UAAc,CADd,cAAe,CAEf,iBAAkB,CAClB,aAAc,CACd,iBACF,CACA,SAEE,WAAY,CAIZ,6BAA8B,CAH9B,SAAU,CAFV,iBAAkB,CAGlB,UAIF,CACA,mBAFE,kBAAmB,CAFnB,YASF,CALA,UAEE,qBAAsB,CAEtB,WACF,CACA,cAEE,eAAgB,CADhB,eAEF,CACA,SACE,eACF,CACA,WACE,cACF,CACA,WAEE,YAAa,CACb,WAAY,CACZ,iBAAkB,CAHlB,YAIF,CACA,aACE,YAAa,CACb,6BAA8B,CAC9B,qBACF,CACA,wBACE,cAAe,CACf,eACF,CACA,SAEE,cAAe,CACf,eAAgB,CAEhB,sBAAuB,CADvB,kBAAmB,CAHnB,WAKF,CACA,UACE,WACF,CACA,OAKE,UAAW,CADX,cAAe,CAFf,QAAS,CADT,iBAAkB,CAElB,OAAQ,CAGR,iCAA6B,CAA7B,yBACF,CACA,cAEE,kBAAmB,CADnB,YAEF,CACA,aAEE,aAAuB,CADvB,iBAEF,CACA,YAKE,iBAAkB,CADlB,UAAW,CAHX,YAAa,CACb,qBAAsB,CACtB,iBAGF,CACA,kBAGE,kBAAmB,CAFnB,YAAa,CACb,6BAA8B,CAE9B,kBACF,CACA,YACE,cAAe,CACf,eACF,CACA,UAEE,YAAa,CADb,WAEF,CACA,SACE,YAAa,CACb,mBAAoB,CACpB,eACF,CACA,QAKE,QAAS,CACT,MAAO,CAEP,SAAU,CANV,cAAe,CAEf,OAAQ,CADR,KAAM,CAMN,+CAAmD,CAFnD,iBAAkB,CANlB,YASF,CACA,aAEE,SAAU,CADV,kBAEF,CACA,gBAME,+BAAoC,CADpC,MAAO,CAEP,WACF,CACA,gCALE,QAAS,CAHT,iBAAkB,CAElB,OAAQ,CADR,KAkBF,CAXA,gBAKE,qBAAuB,CAEvB,qCAA0C,CAG1C,eAAgB,CADhB,kCAA2B,CAA3B,0BAA2B,CAD3B,qCAA+B,CAA/B,6BAA+B,CAA/B,wDAA+B,CAF/B,YAKF,CACA,6BACE,+BAAwB,CAAxB,uBACF,CC3NA,UAEE,UAAW,CACX,cAAe,CAFf,wBAGF,CAEA,gBACE,aACF,CCRA,SAIE,eAAgB,CAFhB,cAAe,CACf,cAAe,CAEf,aAAc,CACd,qBAAsB,CALtB,iBAAkB,CAMlB,8BACF,CAEA,SACE,cAAe,CACf,cAAe,CACf,eACF,CAEA,WAEE,kBAAmB,CADnB,YAAa,CAIb,UAAW,CADX,QAEF,CAEA,iBALE,iBAUF,CALA,MAGE,cAAe,CACf,gBAAiB,CAFjB,WAGF,CAEA,YACE,aACF,CAEA,OACE,wBACF,CAEA,SAEE,eAAgB,CADhB,iBAAkB,CAGlB,8BAAgC,CADhC,UAEF,CAEA,MAGE,eAAgB,CADhB,iCAA6B,CAA7B,yBAA6B,CAD7B,OAGF,CAEA,iBAGE,kBAAmB,CAFnB,YAAa,CACb,cAAe,CAEf,UACF,CAEA,gBAGE,kBAAmB,CAFnB,YAAa,CACb,6BAEF,CAEA,eAEE,eAAgB,CADhB,gBAEF,CAEA,UAQE,wBAA6B,CAJ7B,qBAAsB,CACtB,iBAAkB,CAElB,UAAW,CALX,wBAAyB,CACzB,gBAAiB,CAGjB,WAAY,CALZ,UAQF,CAEA,QACE,0BACF,CAEA,MAEE,qBAAsB,CAGtB,kBAAmB,CAFnB,kBAAmB,CAGnB,eAAgB,CAFhB,YAAa,CAHb,iBAMF,CAEA,uBACE,mCAA4B,CAA5B,2BACF,CAEA,WAUE,wBAAyB,CADzB,kBAAmB,CAJnB,WAAY,CAGZ,gBAAiB,CAPjB,iBAAkB,CAClB,UAAW,CAKX,iBAAkB,CAJlB,oBAAqB,CAGrB,kCAA2B,CAA3B,0BAA2B,CAK3B,8BAAgC,CAPhC,UAQF,CAEA,iBAEE,wBAAyB,CADzB,cAEF,CAEA,YAEE,UAAW,CADX,wBAEF,CAEA,mBAEE,iBAAkB,CADlB,YAAa,CAEb,QACF,CAEA,oBACE,SACF,CAEA,4BAGE,qBAAsB,CACtB,iBAAkB,CAHlB,YAAa,CAIb,eAAgB,CAHhB,YAIF,CAEA,wBAGE,oBAAyB,CAFzB,cAAe,CACf,iBAEF,CAEA,cACE,qBAAsB,CAItB,iBAAkB,CADlB,cAAe,CAFf,eAAgB,CAChB,YAGF,CAEA,oEACE,sCACF","sources":["scenes/launch_model/styles/modelCardStyle.css","components/copyComponent/style.css","scenes/register_model/styles/registerModelStyle.css"],"sourcesContent":[".container {\n display: block;\n position: relative;\n width: 300px;\n height: 300px;\n cursor: pointer;\n border-radius: 20px;\n}\n.descriptionCard {\n position: relative;\n top: -1px;\n left: -1px;\n width: 300px;\n height: 300px;\n padding: 20px;\n border-radius: 20px;\n}\n.cardTitle {\n display: flex;\n justify-content: space-between;\n}\n.iconButtonBox {\n display: flex;\n align-items: center;\n}\n.drawerCard {\n position: relative;\n padding: 20px 80px 0;\n min-height: 100%;\n width: 60vw;\n}\n.p {\n display: -webkit-box;\n -webkit-line-clamp: 4;\n -webkit-box-orient: vertical;\n overflow: hidden;\n text-overflow: ellipsis;\n word-break: break-word;\n font-size: 14px;\n padding: 0px 10px;\n}\n.pasteText {\n font-size: 18px !important;\n color: #1976d2;\n cursor: pointer;\n margin-inline: 10px;\n}\n.pasteText:hover {\n color: #1976d2b3;\n}\n.copyToCommandLine {\n font-size: 16px !important;\n color: #1976d2;\n cursor: pointer;\n}\n.copyToCommandLine:hover {\n color: #1976d2b3;\n}\n.formContainer {\n height: 80%;\n overflow: scroll;\n padding: 0 10px;\n padding-bottom: 160px;\n}\n.buttonsContainer {\n position: absolute;\n bottom: 50px;\n left: 100px;\n right: 100px;\n}\n.buttons {\n display: flex;\n justify-content: space-between;\n align-items: center;\n gap: 20px;\n}\n.css-1be5mm1-MuiLinearProgress-root-MuiMobileStepper-progress {\n width: 100% !important;\n}\n.css-r5rjnf-MuiLinearProgress-root-MuiMobileStepper-progress {\n width: 100% !important;\n}\n.instructionText {\n font-size: 12px;\n color: #666666;\n font-style: italic;\n margin: 30px 0;\n text-align: center;\n}\n.iconRow {\n position: absolute;\n bottom: 20px;\n left: 20px;\n right: 20px;\n display: flex;\n justify-content: space-between;\n align-items: center;\n}\n.iconItem {\n display: flex;\n flex-direction: column;\n align-items: center;\n margin: 20px;\n}\n.boldIconText {\n font-weight: bold;\n font-size: 1.2em;\n}\n.muiIcon {\n font-size: 1.5em;\n}\n.smallText {\n font-size: 0.8em;\n}\n.dialogBox {\n width: 1241px;\n height: 607px;\n margin: 32px;\n overflow-x: scroll;\n}\n.dialogTitle {\n display: flex;\n justify-content: space-between;\n padding: 20px 20px 7px;\n}\n.dialogTitle-model_name {\n font-size: 18px;\n font-weight: 700;\n}\n.pathBox {\n width: 160px;\n cursor: pointer;\n overflow: hidden;\n white-space: nowrap;\n text-overflow: ellipsis;\n}\n.pathBox2 {\n width: 300px;\n}\n.empty {\n position: absolute;\n left: 50%;\n top: 30%;\n font-size: 20px;\n color: #555;\n transform: translate(-50%, 0);\n}\n.deleteDialog {\n display: flex;\n align-items: center;\n}\n.warningIcon {\n margin-right: 10px;\n color: rgb(237, 108, 2);\n}\n.jsonDialog {\n display: flex;\n flex-direction: column;\n padding: 10px 30px;\n color: #000;\n border-radius: 8px;\n}\n.jsonDialog-title {\n display: flex;\n justify-content: space-between;\n align-items: center;\n margin: 10px 0 20px 0;\n}\n.title-name {\n font-size: 16px;\n font-weight: 700;\n}\n.main-box {\n width: 700px;\n height: 500px;\n}\n.but-box {\n display: flex;\n justify-content: end;\n margin-top: 20px;\n}\n.drawer {\n z-index: 1000;\n position: fixed;\n top: 0;\n right: 0;\n bottom: 0;\n left: 0;\n visibility: hidden;\n opacity: 0;\n transition: visibility 0.3s ease, opacity 0.3s ease;\n}\n.drawer.open {\n visibility: visible;\n opacity: 1;\n}\n.drawer-overlay {\n position: absolute;\n top: 0;\n right: 0;\n bottom: 0;\n left: 0;\n background-color: rgba(0, 0, 0, 0.5);\n z-index: 999;\n}\n.drawer-content {\n position: absolute;\n top: 0;\n right: 0;\n bottom: 0;\n background-color: white;\n z-index: 1000;\n box-shadow: -2px 0 10px rgba(0, 0, 0, 0.1);\n transition: transform 0.3s ease;\n transform: translateX(100%);\n overflow-y: auto;\n}\n.drawer.open .drawer-content {\n transform: translateX(0);\n}\n",".copyText {\n font-size: 14px !important;\n color: #666;\n cursor: pointer;\n}\n\n.copyText:hover {\n color: #1976d2;\n}\n",".formBox {\n position: relative;\n max-width: 50vw;\n min-width: 50vw;\n max-height: 80vh;\n overflow: auto;\n padding: 40px 20px 0 0;\n transition: all 0.4s ease-in-out;\n}\n\n.broaden {\n max-width: 100%;\n min-width: 100%;\n padding-right: 0;\n}\n\n.show-json {\n display: flex;\n align-items: center;\n position: absolute;\n top: 90px;\n right: 60px;\n}\n\n.icon {\n position: absolute;\n right: -40px;\n cursor: pointer;\n margin-left: 20px;\n}\n\n.icon:hover {\n color: #1976d2;\n}\n\n.arrow {\n font-size: 24px !important;\n}\n\n.jsonBox {\n position: relative;\n min-height: 80vh;\n width: 100%;\n transition: all 0.4s ease-in-out;\n}\n\n.hide {\n width: 0;\n transform: translate(30vw, 0);\n overflow: hidden;\n}\n\n.checkboxWrapper {\n display: flex;\n flex-wrap: wrap;\n align-items: center;\n width: 100%;\n}\n\n.jsonBox-header {\n display: flex;\n justify-content: space-between;\n align-items: center;\n}\n\n.jsonBox-title {\n line-height: 40px;\n font-weight: 700;\n}\n\n.textarea {\n width: 100%;\n height: calc(100% - 40px);\n padding: 5px 10px;\n border: 1px solid #ddd;\n border-radius: 5px;\n resize: none;\n color: #666;\n background-color: transparent;\n}\n\n.addBtn {\n margin-left: 20px !important;\n}\n\n.item {\n position: relative;\n border: 1px solid #ddd;\n margin: 10px 50px 0;\n padding: 20px;\n border-radius: 10px;\n overflow: hidden;\n}\n\n.item:hover .deleteBtn {\n transform: translateX(-50px);\n}\n\n.deleteBtn {\n position: absolute;\n right: 20px;\n top: calc(50% - 25px);\n width: 50px;\n height: 50px;\n transform: translateX(80px);\n text-align: center;\n line-height: 70px;\n border-radius: 25px;\n background-color: #1976d2;\n transition: all 0.3s ease-in-out;\n}\n\n.deleteBtn:hover {\n cursor: pointer;\n box-shadow: 0 0 10px #aaa;\n}\n\n.deleteIcon {\n font-size: 28px !important;\n color: #fff;\n}\n\n.chat_template_box {\n display: flex;\n align-items: start;\n gap: 10px;\n}\n\n.chat_template_test {\n width: 30%;\n}\n\n.chat_template_test_mainBox {\n height: 137px;\n padding: 10px;\n border: 1px solid #ccc;\n border-radius: 4px;\n overflow: scroll;\n}\n\n.chat_template_test_tip {\n font-size: 10px;\n margin: 4px 14px 0;\n color: rgba(0, 0, 0, 0.6);\n}\n\n.test_res_box {\n border: 1px solid #ddd;\n min-height: 55px;\n padding: 10px;\n margin-top: 5px;\n border-radius: 4px;\n}\n\n.css-19qh8xo-MuiInputBase-input-MuiOutlinedInput-input.Mui-disabled {\n -webkit-text-fill-color: #000 !important;\n}\n"],"names":[],"sourceRoot":""}