xinference 0.10.0__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (76) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +25 -6
  3. xinference/client/oscar/actor_client.py +4 -3
  4. xinference/client/restful/restful_client.py +8 -2
  5. xinference/core/supervisor.py +16 -0
  6. xinference/model/embedding/core.py +1 -2
  7. xinference/model/llm/__init__.py +0 -6
  8. xinference/model/llm/ggml/llamacpp.py +2 -10
  9. xinference/model/llm/llm_family.json +244 -7
  10. xinference/model/llm/llm_family.py +15 -0
  11. xinference/model/llm/llm_family_modelscope.json +100 -0
  12. xinference/model/llm/pytorch/chatglm.py +2 -0
  13. xinference/model/llm/pytorch/core.py +22 -28
  14. xinference/model/llm/pytorch/internlm2.py +2 -0
  15. xinference/model/llm/pytorch/qwen_vl.py +2 -0
  16. xinference/model/llm/pytorch/yi_vl.py +4 -2
  17. xinference/model/llm/utils.py +42 -4
  18. xinference/model/llm/vllm/core.py +51 -6
  19. xinference/model/rerank/core.py +3 -0
  20. xinference/thirdparty/omnilmm/chat.py +1 -1
  21. xinference/types.py +15 -19
  22. xinference/web/ui/build/asset-manifest.json +3 -3
  23. xinference/web/ui/build/index.html +1 -1
  24. xinference/web/ui/build/static/js/main.76ef2b17.js +3 -0
  25. xinference/web/ui/build/static/js/main.76ef2b17.js.map +1 -0
  26. xinference/web/ui/node_modules/.cache/babel-loader/15e2cf8cd8d0989719b6349428ff576f9009ff4c2dcc52378be0bd938e82495e.json +1 -0
  27. xinference/web/ui/node_modules/.cache/babel-loader/35d0e4a317e5582cbb79d901302e9d706520ac53f8a734c2fd8bfde6eb5a4f02.json +1 -0
  28. xinference/web/ui/node_modules/.cache/babel-loader/3c2f277c93c5f1638e08db38df0d0fb4e58d1c5571aea03241a5c04ff4094704.json +1 -0
  29. xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +1 -0
  30. xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +1 -0
  31. xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +1 -0
  32. xinference/web/ui/node_modules/.cache/babel-loader/59ce49eae0f486af4c5034d4d2f9ca77c3ec3a32ecc560085caf5ef482b5f4c9.json +1 -0
  33. xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +1 -0
  34. xinference/web/ui/node_modules/.cache/babel-loader/63a4c48f0326d071c7772c46598215c006ae41fd3d4ff3577fe717de66ad6e89.json +1 -0
  35. xinference/web/ui/node_modules/.cache/babel-loader/b9cbcb6d77ba21b22c6950b6fb5b305d23c19cf747f99f7d48b6b046f8f7b1b0.json +1 -0
  36. xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +1 -0
  37. xinference/web/ui/node_modules/.cache/babel-loader/d076fd56cf3b15ed2433e3744b98c6b4e4410a19903d1db4de5bba0e1a1b3347.json +1 -0
  38. xinference/web/ui/node_modules/.cache/babel-loader/daad8131d91134f6d7aef895a0c9c32e1cb928277cb5aa66c01028126d215be0.json +1 -0
  39. xinference/web/ui/node_modules/.cache/babel-loader/de0299226173b0662b573f49e3992220f6611947073bd66ac079728a8bc8837d.json +1 -0
  40. xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +1 -0
  41. xinference/web/ui/node_modules/.cache/babel-loader/e6eccc9aa641e7da833492e27846dc965f9750281420977dc84654ca6ed221e4.json +1 -0
  42. xinference/web/ui/node_modules/.cache/babel-loader/e9b52d171223bb59fb918316297a051cdfd42dd453e8260fd918e90bc0a4ebdf.json +1 -0
  43. xinference/web/ui/node_modules/.cache/babel-loader/f16aec63602a77bd561d0e67fa00b76469ac54b8033754bba114ec5eb3257964.json +1 -0
  44. {xinference-0.10.0.dist-info → xinference-0.10.1.dist-info}/METADATA +10 -10
  45. {xinference-0.10.0.dist-info → xinference-0.10.1.dist-info}/RECORD +50 -56
  46. xinference/model/llm/ggml/ctransformers.py +0 -281
  47. xinference/model/llm/ggml/ctransformers_util.py +0 -161
  48. xinference/web/ui/build/static/js/main.98516614.js +0 -3
  49. xinference/web/ui/build/static/js/main.98516614.js.map +0 -1
  50. xinference/web/ui/node_modules/.cache/babel-loader/0bd70b1ecf307e2681318e864f4692305b6350c8683863007f4caf2f9ac33b6e.json +0 -1
  51. xinference/web/ui/node_modules/.cache/babel-loader/0db651c046ef908f45cde73af0dbea0a797d3e35bb57f4a0863b481502103a64.json +0 -1
  52. xinference/web/ui/node_modules/.cache/babel-loader/139969fd25258eb7decc9505f30b779089bba50c402bb5c663008477c7bff73b.json +0 -1
  53. xinference/web/ui/node_modules/.cache/babel-loader/18e5d5422e2464abf4a3e6d38164570e2e426e0a921e9a2628bbae81b18da353.json +0 -1
  54. xinference/web/ui/node_modules/.cache/babel-loader/3d93bd9a74a1ab0cec85af40f9baa5f6a8e7384b9e18c409b95a81a7b45bb7e2.json +0 -1
  55. xinference/web/ui/node_modules/.cache/babel-loader/3e055de705e397e1d413d7f429589b1a98dd78ef378b97f0cdb462c5f2487d5e.json +0 -1
  56. xinference/web/ui/node_modules/.cache/babel-loader/3f357ab57b8e7fade54c667f0e0ebf2787566f72bfdca0fea14e395b5c203753.json +0 -1
  57. xinference/web/ui/node_modules/.cache/babel-loader/4fd24800544873512b540544ae54601240a5bfefd9105ff647855c64f8ad828f.json +0 -1
  58. xinference/web/ui/node_modules/.cache/babel-loader/52aa27272b4b9968f62666262b47661cb1992336a2aff3b13994cc36877b3ec3.json +0 -1
  59. xinference/web/ui/node_modules/.cache/babel-loader/60c4b98d8ea7479fb0c94cfd19c8128f17bd7e27a1e73e6dd9adf6e9d88d18eb.json +0 -1
  60. xinference/web/ui/node_modules/.cache/babel-loader/7e094845f611802b024b57439cbf911038169d06cdf6c34a72a7277f35aa71a4.json +0 -1
  61. xinference/web/ui/node_modules/.cache/babel-loader/95c8cc049fadd23085d8623e1d43d70b614a4e52217676f186a417dca894aa09.json +0 -1
  62. xinference/web/ui/node_modules/.cache/babel-loader/98b7ef307f436affe13d75a4f265b27e828ccc2b10ffae6513abe2681bc11971.json +0 -1
  63. xinference/web/ui/node_modules/.cache/babel-loader/9d7c49815d97539207e5aab2fb967591b5fed7791218a0762539efc9491f36af.json +0 -1
  64. xinference/web/ui/node_modules/.cache/babel-loader/a8070ce4b780b4a044218536e158a9e7192a6c80ff593fdc126fee43f46296b5.json +0 -1
  65. xinference/web/ui/node_modules/.cache/babel-loader/b400cfc9db57fa6c70cd2bad055b73c5079fde0ed37974009d898083f6af8cd8.json +0 -1
  66. xinference/web/ui/node_modules/.cache/babel-loader/bd04667474fd9cac2983b03725c218908a6cc0ee9128a5953cd00d26d4877f60.json +0 -1
  67. xinference/web/ui/node_modules/.cache/babel-loader/c230a727b8f68f0e62616a75e14a3d33026dc4164f2e325a9a8072d733850edb.json +0 -1
  68. xinference/web/ui/node_modules/.cache/babel-loader/d0d0b591d9adaf42b83ad6633f8b7c118541a4b80ea957c303d3bf9b86fbad0a.json +0 -1
  69. xinference/web/ui/node_modules/.cache/babel-loader/d44a6eb6106e09082b691a315c9f6ce17fcfe25beb7547810e0d271ce3301cd2.json +0 -1
  70. xinference/web/ui/node_modules/.cache/babel-loader/e1d9b2ae4e1248658704bc6bfc5d6160dcd1a9e771ea4ae8c1fed0aaddeedd29.json +0 -1
  71. xinference/web/ui/node_modules/.cache/babel-loader/fe5db70859503a54cbe71f9637e5a314cda88b1f0eecb733b6e6f837697db1ef.json +0 -1
  72. /xinference/web/ui/build/static/js/{main.98516614.js.LICENSE.txt → main.76ef2b17.js.LICENSE.txt} +0 -0
  73. {xinference-0.10.0.dist-info → xinference-0.10.1.dist-info}/LICENSE +0 -0
  74. {xinference-0.10.0.dist-info → xinference-0.10.1.dist-info}/WHEEL +0 -0
  75. {xinference-0.10.0.dist-info → xinference-0.10.1.dist-info}/entry_points.txt +0 -0
  76. {xinference-0.10.0.dist-info → xinference-0.10.1.dist-info}/top_level.txt +0 -0
@@ -1,281 +0,0 @@
1
- # Copyright 2022-2023 XProbe Inc.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import logging
16
- import os
17
- from typing import TYPE_CHECKING, Iterator, Optional, Sequence, TypedDict, Union
18
-
19
- if TYPE_CHECKING:
20
- from ctransformers import AutoConfig
21
-
22
- from ....types import Completion, CompletionChunk, CreateCompletionCTransformers
23
- from ..core import LLM
24
- from ..llm_family import LLMFamilyV1, LLMSpecV1
25
- from .ctransformers_util import generate_stream
26
-
27
- logger = logging.getLogger(__name__)
28
-
29
- # all supported models for Ctransformers with their model type.
30
- # Please Strictly follows this name format when inputting new model to model_family.
31
- MODEL_TYPE_FOR_CTRANSFORMERS = {
32
- "gpt-2": "gpt2",
33
- "gpt-j": "gptj",
34
- "gpt4all-j": "gptj",
35
- "gpt-neox": "gpt_neox",
36
- "stablelm": "gpt_neox",
37
- "llama": "llama",
38
- "llama-2": "llama",
39
- "mpt": "mpt",
40
- "dolly-v2": "dolly-v2",
41
- "replit": "replit",
42
- "starcoder": "starcoder",
43
- "starchat": "starcoder",
44
- "falcon": "falcon",
45
- }
46
-
47
- # these two constants subjects to change for future development and ctransformers updates.
48
- CTRANSFORMERS_SUPPORTED_MODEL = ["starcoder", "gpt-2"]
49
-
50
- CTRANSFORMERS_GPU_SUPPORT = ["llama", "llama-2", "mpt", "falcon"]
51
-
52
- SIZE_TO_GPU_LAYERS = {
53
- 3: 26,
54
- 7: 32,
55
- 13: 40,
56
- 30: 60,
57
- 65: 80,
58
- }
59
-
60
-
61
- class CtransformersModelConfig(TypedDict, total=False):
62
- n_ctx: int
63
- n_gpu_layers: int
64
-
65
-
66
- class CtransformersGenerateConfig(TypedDict, total=False):
67
- max_tokens: Optional[int]
68
- top_k: Optional[int]
69
- top_p: Optional[float]
70
- temperature: Optional[float]
71
- repetition_penalty: Optional[float]
72
- last_n_tokens: Optional[int]
73
- seed: Optional[int]
74
- batch_size: Optional[int]
75
- threads: Optional[int]
76
- stop: Optional[Sequence[str]]
77
- stream: Optional[bool]
78
- reset: Optional[bool]
79
-
80
-
81
- class CtransformersModel(LLM):
82
- def __init__(
83
- self,
84
- model_uid: str,
85
- model_family: "LLMFamilyV1",
86
- model_spec: "LLMSpecV1",
87
- quantization: str,
88
- model_path: str,
89
- ctransformers_model_config: Optional[CtransformersModelConfig],
90
- ):
91
- super().__init__(model_uid, model_family, model_spec, quantization, model_path)
92
-
93
- self._model_type = None
94
- closest_size = min(
95
- SIZE_TO_GPU_LAYERS.keys(),
96
- key=lambda x: abs(
97
- x - self.handle_model_size(model_spec.model_size_in_billions)
98
- ),
99
- )
100
-
101
- self._model_family = model_family
102
- self._model_uid = model_uid
103
- self._llm = None
104
-
105
- self._gpu_layers = SIZE_TO_GPU_LAYERS[closest_size]
106
- self._ctransformer_model_config = self._sanitize_model_config(
107
- model_path, ctransformers_model_config
108
- )
109
-
110
- def _sanitize_model_config(
111
- self, model_path, ctransformers_model_config: Optional[CtransformersModelConfig]
112
- ) -> "AutoConfig":
113
- try:
114
- from ctransformers import AutoConfig, Config
115
- except ImportError:
116
- error_message = (
117
- "Failed to import module 'ctransformers - AutoConfig and Config'"
118
- )
119
-
120
- installation_guide = [
121
- f"Please make sure 'ctransformers' is installed.",
122
- f"You can install it by checking out the repository for command:"
123
- f"https://github.com/marella/ctransformers",
124
- ]
125
-
126
- raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
127
-
128
- # if the model have customized config, we update it.
129
- model_config_ret = Config()
130
- potential_gpu_layers = None
131
- if ctransformers_model_config:
132
- potential_context_length = ctransformers_model_config.pop("n_ctx", None)
133
- potential_gpu_layers = ctransformers_model_config.pop("n_gpu_layers", None)
134
-
135
- model_config_ret.context_length = potential_context_length
136
- model_config_ret.gpu_layers = potential_gpu_layers
137
-
138
- # if user does not define gpu layers, we have to set it with our system if applicable.
139
- if potential_gpu_layers is None:
140
- if self._model_family.model_name not in CTRANSFORMERS_GPU_SUPPORT:
141
- model_config_ret.gpu_layers = -1
142
- elif self._is_darwin_and_apple_silicon():
143
- model_config_ret.gpu_layers = 1
144
- elif self._has_cuda_device():
145
- model_config_ret.gpu_layers = self._gpu_layers
146
-
147
- return AutoConfig(model_config_ret)
148
-
149
- def _sanitize_generate_config(
150
- self,
151
- generate_config: Optional[CtransformersGenerateConfig],
152
- ) -> CtransformersGenerateConfig:
153
- # if the input config is not None, we try to copy the selected attributes to the ctransformersGenerateConfig.
154
- if generate_config is None:
155
- generate_config = CtransformersGenerateConfig(
156
- **CreateCompletionCTransformers().dict()
157
- )
158
- else:
159
- # Validate generate_config and fill default values to the generate config.
160
- generate_config = CtransformersGenerateConfig(
161
- **CreateCompletionCTransformers(**generate_config).dict()
162
- )
163
-
164
- # for our system, the threads will have to be set to 4
165
- # all other parameters, if not specified, will be set to default when generate.
166
- generate_config.setdefault("threads", 4)
167
-
168
- return generate_config
169
-
170
- def load(self):
171
- try:
172
- from ctransformers import AutoModelForCausalLM
173
- except ImportError:
174
- error_message = "Failed to import module 'ctransformers'"
175
-
176
- installation_guide = [
177
- f"Please make sure 'ctransformers' is installed.",
178
- f"You can install it by checking out the repository for command."
179
- f"https://github.com/marella/ctransformers",
180
- ]
181
-
182
- raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
183
-
184
- model_path = os.path.join(
185
- self.model_path,
186
- self.model_spec.model_file_name_template.format(
187
- quantization=self.quantization
188
- ),
189
- )
190
-
191
- self._model_type = self._determine_model_type()
192
- self._llm = AutoModelForCausalLM.from_pretrained(
193
- model_path_or_repo_id=model_path,
194
- model_type=self._model_type,
195
- config=self._ctransformer_model_config,
196
- )
197
-
198
- @classmethod
199
- def match(
200
- cls, llm_family: LLMFamilyV1, llm_spec: LLMSpecV1, quantization: str
201
- ) -> bool:
202
- if llm_spec.model_format != "ggmlv3" and llm_spec.model_format != "ggufv2":
203
- return False
204
- if llm_family.model_name not in CTRANSFORMERS_SUPPORTED_MODEL:
205
- return False
206
- if "generate" not in llm_family.model_ability:
207
- return False
208
- return True
209
-
210
- def _determine_model_type(self):
211
- if self._model_family.model_name not in MODEL_TYPE_FOR_CTRANSFORMERS:
212
- raise ValueError(
213
- f"The current model {self._model_family.model_name} is not supported, check your model name. "
214
- )
215
- return MODEL_TYPE_FOR_CTRANSFORMERS[self._model_family.model_name]
216
-
217
- def generate(
218
- self, prompt: str, generate_config_raw: CtransformersGenerateConfig
219
- ) -> Union[Completion, Iterator[CompletionChunk]]:
220
- def generator_wrapper(
221
- _prompt: str,
222
- _max_new_tokens: Union[int, None],
223
- _generate_config: CtransformersGenerateConfig,
224
- ) -> Iterator[CompletionChunk]:
225
- assert self._model_uid is not None
226
- for _completion_chunk, _ in generate_stream(
227
- model=self._model_uid,
228
- model_ref=self._llm,
229
- prompt=_prompt,
230
- max_new_tokens=_max_new_tokens,
231
- **_generate_config,
232
- ):
233
- yield _completion_chunk
234
-
235
- generate_config = self._sanitize_generate_config(generate_config_raw)
236
-
237
- logger.debug(
238
- "Enter generate, prompt: %s, generate config: %s", prompt, generate_config
239
- )
240
-
241
- max_new_tokens = generate_config.pop("max_tokens", None)
242
-
243
- stream_or_not = generate_config.get("stream", False)
244
- if stream_or_not:
245
- return generator_wrapper(
246
- _prompt=prompt,
247
- _max_new_tokens=max_new_tokens,
248
- _generate_config=generate_config,
249
- )
250
- else:
251
- assert self.model_uid is not None
252
- completion_chunk = None
253
- completion_usage = None
254
- for completion_chunk, completion_usage in generate_stream(
255
- model=self.model_uid,
256
- model_ref=self._llm,
257
- prompt=prompt,
258
- max_new_tokens=max_new_tokens,
259
- **generate_config,
260
- ):
261
- pass
262
-
263
- assert completion_chunk is not None
264
- assert completion_usage is not None
265
-
266
- completion = Completion(
267
- id=completion_chunk["id"],
268
- object=completion_chunk["object"],
269
- created=completion_chunk["created"],
270
- model=completion_chunk["model"],
271
- choices=completion_chunk["choices"],
272
- usage=completion_usage,
273
- )
274
-
275
- logger.debug(
276
- "Generated, completion: %s, generate config: %s",
277
- completion,
278
- generate_config,
279
- )
280
-
281
- return completion
@@ -1,161 +0,0 @@
1
- # Copyright 2022-2023 XProbe Inc.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import logging
15
- import re
16
- import time
17
- import uuid
18
- from typing import Iterator, Optional, Sequence, Tuple
19
-
20
- from ....types import CompletionChoice, CompletionChunk, CompletionUsage
21
-
22
- logger = logging.getLogger(__name__)
23
-
24
-
25
- def generate_stream(
26
- model,
27
- model_ref,
28
- prompt: str,
29
- *,
30
- max_new_tokens: Optional[int] = None,
31
- top_k: Optional[int] = None,
32
- top_p: Optional[float] = None,
33
- temperature: Optional[float] = None,
34
- repetition_penalty: Optional[float] = None,
35
- last_n_tokens: Optional[int] = None,
36
- seed: Optional[int] = None,
37
- batch_size: Optional[int] = None,
38
- stream: Optional[bool] = False,
39
- threads: Optional[int] = None,
40
- stop: Optional[Sequence[str]] = None,
41
- reset: Optional[bool] = None,
42
- **kwargs,
43
- ) -> Iterator[Tuple[CompletionChunk, CompletionUsage]]:
44
- stop = stop or []
45
- if isinstance(stop, str):
46
- stop = [stop]
47
-
48
- tokens = model_ref.tokenize(prompt)
49
-
50
- stop_regex = re.compile("|".join(map(re.escape, stop)))
51
- count = 0
52
- text = ""
53
- total_text = ""
54
- incomplete = b""
55
-
56
- # parameters needed for Xinference.
57
- finish_reason = None
58
-
59
- try:
60
- from ctransformers.utils import utf8_split_incomplete
61
- except ImportError:
62
- error_message = (
63
- "Failed to import module 'ctransformers - utf8_split_incomplete'"
64
- )
65
-
66
- installation_guide = [
67
- "Please make sure 'ctransformers' is installed. You can install it by checking out the repository: "
68
- "https://github.com/marella/ctransformers",
69
- ]
70
-
71
- raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
72
-
73
- for token in model_ref.generate(
74
- tokens,
75
- top_k=top_k,
76
- top_p=top_p,
77
- temperature=temperature,
78
- repetition_penalty=repetition_penalty,
79
- last_n_tokens=last_n_tokens,
80
- seed=seed,
81
- batch_size=batch_size,
82
- threads=threads,
83
- reset=reset,
84
- ):
85
- # Handle incomplete UTF-8 multi-byte characters.
86
- incomplete += model_ref.detokenize([token], decode=False)
87
- complete, incomplete = utf8_split_incomplete(incomplete)
88
- output = complete.decode(errors="ignore")
89
- text += output
90
- total_text += output
91
-
92
- # https://github.com/abetlen/llama-cpp-python/blob/1a13d76c487df1c8560132d10bda62d6e2f4fa93/llama_cpp/llama.py#L686-L706
93
- # Check if one of the stop sequences is part of the text.
94
- # Note that the stop sequence may not always be at the end of text.
95
- if stop:
96
- match = stop_regex.search(text)
97
- if match:
98
- text = text[: match.start()]
99
- finish_reason = "stop"
100
- break
101
-
102
- # Avoid sending the longest suffix of text which is also a prefix
103
- # of a stop sequence, as it can form a stop sequence with the text
104
- # generated later.
105
- longest = 0
106
- for s in stop:
107
- for i in range(len(s), 0, -1):
108
- if text.endswith(s[:i]):
109
- longest = max(i, longest)
110
- break
111
-
112
- end = len(text) - longest
113
- if end > 0:
114
- output = text[:end]
115
- completion_choice = CompletionChoice(
116
- text=output, index=0, logprobs=None, finish_reason=None
117
- )
118
- completion_chunk = CompletionChunk(
119
- id=str(uuid.uuid1()),
120
- object="text_completion",
121
- created=int(time.time()),
122
- model=model,
123
- choices=[completion_choice],
124
- )
125
- completion_usage = CompletionUsage(
126
- prompt_tokens=len(tokens),
127
- completion_tokens=count + 1,
128
- total_tokens=count + 1 + len(tokens),
129
- )
130
-
131
- yield completion_chunk, completion_usage
132
- text = text[end:]
133
-
134
- count += 1
135
- if max_new_tokens is not None and count >= max_new_tokens:
136
- finish_reason = "length"
137
- break
138
-
139
- if stream is False:
140
- completion_choice = CompletionChoice(
141
- text=total_text, index=0, logprobs=None, finish_reason=finish_reason
142
- )
143
- else:
144
- completion_choice = CompletionChoice(
145
- text=text, index=0, logprobs=None, finish_reason=finish_reason
146
- )
147
-
148
- completion_chunk = CompletionChunk(
149
- id=str(uuid.uuid1()),
150
- object="text_completion",
151
- created=int(time.time()),
152
- model=model,
153
- choices=[completion_choice],
154
- )
155
- completion_usage = CompletionUsage(
156
- prompt_tokens=len(tokens),
157
- completion_tokens=count,
158
- total_tokens=count + len(tokens),
159
- )
160
-
161
- yield completion_chunk, completion_usage