xinference 0.11.0__py3-none-any.whl → 0.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (37) hide show
  1. xinference/_version.py +3 -3
  2. xinference/core/chat_interface.py +10 -4
  3. xinference/core/model.py +2 -2
  4. xinference/fields.py +3 -1
  5. xinference/model/llm/ggml/chatglm.py +98 -13
  6. xinference/model/llm/ggml/llamacpp.py +49 -2
  7. xinference/model/llm/llm_family.json +132 -3
  8. xinference/model/llm/llm_family_modelscope.json +139 -3
  9. xinference/model/llm/pytorch/chatglm.py +48 -0
  10. xinference/model/llm/pytorch/core.py +23 -6
  11. xinference/model/llm/pytorch/deepseek_vl.py +35 -9
  12. xinference/model/llm/pytorch/internlm2.py +32 -1
  13. xinference/model/llm/pytorch/qwen_vl.py +38 -11
  14. xinference/model/llm/pytorch/utils.py +38 -1
  15. xinference/model/llm/pytorch/yi_vl.py +42 -14
  16. xinference/model/llm/sglang/core.py +31 -9
  17. xinference/model/llm/utils.py +25 -5
  18. xinference/model/llm/vllm/core.py +82 -3
  19. xinference/types.py +10 -1
  20. xinference/web/ui/build/asset-manifest.json +3 -3
  21. xinference/web/ui/build/index.html +1 -1
  22. xinference/web/ui/build/static/js/{main.8e44da4b.js → main.551aa479.js} +3 -3
  23. xinference/web/ui/build/static/js/main.551aa479.js.map +1 -0
  24. xinference/web/ui/node_modules/.cache/babel-loader/1fa824d82b2af519de7700c594e50bde4bbca60d13bd3fabff576802e4070304.json +1 -0
  25. xinference/web/ui/node_modules/.cache/babel-loader/23caf6f1e52c43e983ca3bfd4189f41dbd645fa78f2dfdcd7f6b69bc41678665.json +1 -0
  26. xinference/web/ui/node_modules/.cache/babel-loader/a6da6bc3d0d2191adebee87fb58ecebe82d071087bd2f7f3a9c7fdd2ada130f2.json +1 -0
  27. {xinference-0.11.0.dist-info → xinference-0.11.1.dist-info}/METADATA +3 -2
  28. {xinference-0.11.0.dist-info → xinference-0.11.1.dist-info}/RECORD +33 -33
  29. xinference/web/ui/build/static/js/main.8e44da4b.js.map +0 -1
  30. xinference/web/ui/node_modules/.cache/babel-loader/1870cd6f7054d04e049e363c0a85526584fe25519378609d2838e28d7492bbf1.json +0 -1
  31. xinference/web/ui/node_modules/.cache/babel-loader/5393569d846332075b93b55656716a34f50e0a8c970be789502d7e6c49755fd7.json +0 -1
  32. xinference/web/ui/node_modules/.cache/babel-loader/ddaec68b88e5eff792df1e39a4b4b8b737bfc832293c015660c3c69334e3cf5c.json +0 -1
  33. /xinference/web/ui/build/static/js/{main.8e44da4b.js.LICENSE.txt → main.551aa479.js.LICENSE.txt} +0 -0
  34. {xinference-0.11.0.dist-info → xinference-0.11.1.dist-info}/LICENSE +0 -0
  35. {xinference-0.11.0.dist-info → xinference-0.11.1.dist-info}/WHEEL +0 -0
  36. {xinference-0.11.0.dist-info → xinference-0.11.1.dist-info}/entry_points.txt +0 -0
  37. {xinference-0.11.0.dist-info → xinference-0.11.1.dist-info}/top_level.txt +0 -0
@@ -139,6 +139,12 @@ class YiVLChatModel(PytorchChatModel):
139
139
  generate_config = {}
140
140
 
141
141
  stream = generate_config.get("stream", False)
142
+ stream_options = generate_config.pop("stream_options", None)
143
+ include_usage = (
144
+ stream_options["include_usage"]
145
+ if isinstance(stream_options, dict)
146
+ else False
147
+ )
142
148
 
143
149
  from ....thirdparty.llava.conversation import conv_templates
144
150
  from ....thirdparty.llava.mm_utils import (
@@ -166,11 +172,11 @@ class YiVLChatModel(PytorchChatModel):
166
172
  )
167
173
 
168
174
  images = state.get_images(return_pil=True)
169
- image = images[0]
170
-
171
- image_tensor = self._image_processor.preprocess(image, return_tensors="pt")[
172
- "pixel_values"
173
- ][0]
175
+ if images:
176
+ image = images[0]
177
+ image_tensor = self._image_processor.preprocess(image, return_tensors="pt")[
178
+ "pixel_values"
179
+ ][0]
174
180
 
175
181
  stop_str = state.sep
176
182
  keywords = [stop_str]
@@ -187,7 +193,9 @@ class YiVLChatModel(PytorchChatModel):
187
193
  "input_ids": input_ids,
188
194
  "images": image_tensor.unsqueeze(0)
189
195
  .to(dtype=torch.bfloat16)
190
- .to(self._model.device),
196
+ .to(self._model.device)
197
+ if images
198
+ else None,
191
199
  "streamer": streamer,
192
200
  "do_sample": True,
193
201
  "top_p": float(top_p),
@@ -200,7 +208,7 @@ class YiVLChatModel(PytorchChatModel):
200
208
  t.start()
201
209
 
202
210
  if stream:
203
- it = self._generate_stream(streamer, stop_str)
211
+ it = self._generate_stream(streamer, stop_str, input_ids, include_usage)
204
212
  return self._to_chat_completion_chunks(it)
205
213
  else:
206
214
  c = self._generate(streamer, stop_str)
@@ -229,8 +237,12 @@ class YiVLChatModel(PytorchChatModel):
229
237
  )
230
238
  return c
231
239
 
232
- def _generate_stream(self, streamer, stop_str) -> Iterator[CompletionChunk]:
240
+ def _generate_stream(
241
+ self, streamer, stop_str, input_ids, include_usage
242
+ ) -> Iterator[CompletionChunk]:
233
243
  completion_id = str(uuid.uuid1())
244
+ prompt_tokens, completion_tokens, total_tokens = 0, 0, 0
245
+ prompt_tokens = len(input_ids[0])
234
246
  for i, new_text in enumerate(streamer):
235
247
  if not new_text.endswith(stop_str):
236
248
  completion_choice = CompletionChoice(
@@ -243,10 +255,12 @@ class YiVLChatModel(PytorchChatModel):
243
255
  model=self.model_uid,
244
256
  choices=[completion_choice],
245
257
  )
258
+ completion_tokens = i
259
+ total_tokens = prompt_tokens + completion_tokens
246
260
  completion_usage = CompletionUsage(
247
- prompt_tokens=-1,
248
- completion_tokens=-1,
249
- total_tokens=-1,
261
+ prompt_tokens=prompt_tokens,
262
+ completion_tokens=completion_tokens,
263
+ total_tokens=total_tokens,
250
264
  )
251
265
  chunk["usage"] = completion_usage
252
266
  yield chunk
@@ -262,9 +276,23 @@ class YiVLChatModel(PytorchChatModel):
262
276
  choices=[completion_choice],
263
277
  )
264
278
  completion_usage = CompletionUsage(
265
- prompt_tokens=-1,
266
- completion_tokens=-1,
267
- total_tokens=-1,
279
+ prompt_tokens=prompt_tokens,
280
+ completion_tokens=completion_tokens,
281
+ total_tokens=total_tokens,
268
282
  )
269
283
  chunk["usage"] = completion_usage
270
284
  yield chunk
285
+ if include_usage:
286
+ chunk = CompletionChunk(
287
+ id=completion_id,
288
+ object="text_completion",
289
+ created=int(time.time()),
290
+ model=self.model_uid,
291
+ choices=[],
292
+ )
293
+ chunk["usage"] = CompletionUsage(
294
+ prompt_tokens=prompt_tokens,
295
+ completion_tokens=completion_tokens,
296
+ total_tokens=total_tokens,
297
+ )
298
+ yield chunk
@@ -53,6 +53,7 @@ class SGLANGGenerateConfig(TypedDict, total=False):
53
53
  stop: Optional[Union[str, List[str]]]
54
54
  ignore_eos: bool
55
55
  stream: bool
56
+ stream_options: Optional[Union[dict, None]]
56
57
 
57
58
 
58
59
  try:
@@ -157,6 +158,8 @@ class SGLANGModel(LLM):
157
158
  )
158
159
  generate_config.setdefault("stop", [])
159
160
  generate_config.setdefault("stream", False)
161
+ stream_options = generate_config.get("stream_options")
162
+ generate_config.setdefault("stream_options", stream_options)
160
163
  generate_config.setdefault("ignore_eos", False)
161
164
 
162
165
  return generate_config
@@ -192,7 +195,7 @@ class SGLANGModel(LLM):
192
195
 
193
196
  @staticmethod
194
197
  def _convert_state_to_completion_chunk(
195
- request_id: str, model: str, output_text: str, meta_info: Dict
198
+ request_id: str, model: str, output_text: str
196
199
  ) -> CompletionChunk:
197
200
  choices: List[CompletionChoice] = [
198
201
  CompletionChoice(
@@ -209,13 +212,6 @@ class SGLANGModel(LLM):
209
212
  model=model,
210
213
  choices=choices,
211
214
  )
212
- prompt_tokens = meta_info["prompt_tokens"]
213
- completion_tokens = meta_info["completion_tokens"]
214
- chunk["usage"] = CompletionUsage(
215
- prompt_tokens=prompt_tokens,
216
- completion_tokens=completion_tokens,
217
- total_tokens=prompt_tokens + completion_tokens,
218
- )
219
215
  return chunk
220
216
 
221
217
  @staticmethod
@@ -272,6 +268,9 @@ class SGLANGModel(LLM):
272
268
  "Enter generate, prompt: %s, generate config: %s", prompt, generate_config
273
269
  )
274
270
  stream = sanitized_generate_config.pop("stream")
271
+ stream_options = sanitized_generate_config.pop("stream_options")
272
+ if isinstance(stream_options, dict):
273
+ include_usage = stream_options.pop("include_usage", False)
275
274
  request_id = str(uuid.uuid1())
276
275
  state = pipeline.run(
277
276
  question=prompt,
@@ -289,11 +288,34 @@ class SGLANGModel(LLM):
289
288
  else:
290
289
 
291
290
  async def stream_results() -> AsyncGenerator[CompletionChunk, None]:
291
+ prompt_tokens, completion_tokens, total_tokens = 0, 0, 0
292
292
  async for out, meta_info in state.text_async_iter(
293
293
  var_name="answer", return_meta_data=True
294
294
  ):
295
295
  chunk = self._convert_state_to_completion_chunk(
296
- request_id, self.model_uid, output_text=out, meta_info=meta_info
296
+ request_id, self.model_uid, output_text=out
297
+ )
298
+ prompt_tokens = meta_info["prompt_tokens"]
299
+ completion_tokens = meta_info["completion_tokens"]
300
+ total_tokens = prompt_tokens + completion_tokens
301
+ chunk["usage"] = CompletionUsage(
302
+ prompt_tokens=prompt_tokens,
303
+ completion_tokens=completion_tokens,
304
+ total_tokens=total_tokens,
305
+ )
306
+ yield chunk
307
+ if include_usage:
308
+ chunk = CompletionChunk(
309
+ id=request_id,
310
+ object="text_completion",
311
+ created=int(time.time()),
312
+ model=self.model_uid,
313
+ choices=[],
314
+ )
315
+ chunk["usage"] = CompletionUsage(
316
+ prompt_tokens=prompt_tokens,
317
+ completion_tokens=completion_tokens,
318
+ total_tokens=total_tokens,
297
319
  )
298
320
  yield chunk
299
321
 
@@ -482,9 +482,6 @@ Begin!"""
482
482
  for i, choice in enumerate(chunk["choices"])
483
483
  ],
484
484
  }
485
- usage = chunk.get("usage")
486
- if usage is not None:
487
- chat_chunk["usage"] = usage
488
485
  return cast(ChatCompletionChunk, chat_chunk)
489
486
 
490
487
  @classmethod
@@ -508,6 +505,19 @@ Begin!"""
508
505
  for i, choice in enumerate(chunk["choices"])
509
506
  ],
510
507
  }
508
+ return cast(ChatCompletionChunk, chat_chunk)
509
+
510
+ @classmethod
511
+ def _get_final_chat_completion_chunk(
512
+ cls, chunk: CompletionChunk
513
+ ) -> ChatCompletionChunk:
514
+ chat_chunk = {
515
+ "id": "chat" + chunk["id"],
516
+ "model": chunk["model"],
517
+ "created": chunk["created"],
518
+ "object": "chat.completion.chunk",
519
+ "choices": [],
520
+ }
511
521
  usage = chunk.get("usage")
512
522
  if usage is not None:
513
523
  chat_chunk["usage"] = usage
@@ -521,7 +531,12 @@ Begin!"""
521
531
  for i, chunk in enumerate(chunks):
522
532
  if i == 0:
523
533
  yield cls._get_first_chat_completion_chunk(chunk)
524
- yield cls._to_chat_completion_chunk(chunk)
534
+ # usage
535
+ choices = chunk.get("choices")
536
+ if not choices:
537
+ yield cls._get_final_chat_completion_chunk(chunk)
538
+ else:
539
+ yield cls._to_chat_completion_chunk(chunk)
525
540
 
526
541
  @classmethod
527
542
  async def _async_to_chat_completion_chunks(
@@ -532,7 +547,12 @@ Begin!"""
532
547
  async for chunk in chunks:
533
548
  if i == 0:
534
549
  yield cls._get_first_chat_completion_chunk(chunk)
535
- yield cls._to_chat_completion_chunk(chunk)
550
+ # usage
551
+ choices = chunk.get("choices")
552
+ if not choices:
553
+ yield cls._get_final_chat_completion_chunk(chunk)
554
+ else:
555
+ yield cls._to_chat_completion_chunk(chunk)
536
556
  i += 1
537
557
 
538
558
  @staticmethod
@@ -37,6 +37,7 @@ from ....types import (
37
37
  CompletionChoice,
38
38
  CompletionChunk,
39
39
  CompletionUsage,
40
+ LoRA,
40
41
  ToolCallFunction,
41
42
  ToolCalls,
42
43
  )
@@ -64,16 +65,19 @@ class VLLMModelConfig(TypedDict, total=False):
64
65
 
65
66
 
66
67
  class VLLMGenerateConfig(TypedDict, total=False):
68
+ lora_name: Optional[str]
67
69
  n: int
68
70
  best_of: Optional[int]
69
71
  presence_penalty: float
70
72
  frequency_penalty: float
71
73
  temperature: float
72
74
  top_p: float
75
+ top_k: int
73
76
  max_tokens: int
74
77
  stop_token_ids: Optional[List[int]]
75
78
  stop: Optional[Union[str, List[str]]]
76
79
  stream: bool # non-sampling param, should not be passed to the engine.
80
+ stream_options: Optional[Union[dict, None]]
77
81
 
78
82
 
79
83
  try:
@@ -90,6 +94,7 @@ VLLM_SUPPORTED_MODELS = [
90
94
  "internlm-16k",
91
95
  "mistral-v0.1",
92
96
  "Yi",
97
+ "Yi-1.5",
93
98
  "code-llama",
94
99
  "code-llama-python",
95
100
  ]
@@ -106,6 +111,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
106
111
  "internlm2-chat",
107
112
  "qwen-chat",
108
113
  "Yi-chat",
114
+ "Yi-1.5-chat",
109
115
  "code-llama-instruct",
110
116
  "mistral-instruct-v0.1",
111
117
  "mistral-instruct-v0.2",
@@ -143,16 +149,30 @@ class VLLMModel(LLM):
143
149
  quantization: str,
144
150
  model_path: str,
145
151
  model_config: Optional[VLLMModelConfig],
152
+ peft_model: Optional[List[LoRA]] = None,
146
153
  ):
154
+ try:
155
+ from vllm.lora.request import LoRARequest
156
+ except ImportError:
157
+ error_message = "Failed to import module 'vllm'"
158
+ installation_guide = [
159
+ "Please make sure 'vllm' is installed. ",
160
+ "You can install it by `pip install vllm`\n",
161
+ ]
162
+
163
+ raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
147
164
  super().__init__(model_uid, model_family, model_spec, quantization, model_path)
148
165
  self._model_config = model_config
149
166
  self._engine = None
167
+ self.lora_modules = peft_model
168
+ self.lora_requests: List[LoRARequest] = []
150
169
 
151
170
  def load(self):
152
171
  try:
153
172
  import vllm
154
173
  from vllm.engine.arg_utils import AsyncEngineArgs
155
174
  from vllm.engine.async_llm_engine import AsyncLLMEngine
175
+ from vllm.lora.request import LoRARequest
156
176
  except ImportError:
157
177
  error_message = "Failed to import module 'vllm'"
158
178
  installation_guide = [
@@ -171,11 +191,33 @@ class VLLMModel(LLM):
171
191
  multiprocessing.set_start_method("fork", force=True)
172
192
 
173
193
  self._model_config = self._sanitize_model_config(self._model_config)
194
+
195
+ if self.lora_modules is None:
196
+ self.lora_requests = []
197
+ else:
198
+ self.lora_requests = [
199
+ LoRARequest(
200
+ lora_name=lora.lora_name,
201
+ lora_int_id=i,
202
+ lora_local_path=lora.local_path,
203
+ )
204
+ for i, lora in enumerate(self.lora_modules, start=1)
205
+ ]
206
+
207
+ enable_lora = len(self.lora_requests) > 0
208
+ max_loras = len(self.lora_requests)
209
+
174
210
  logger.info(
175
211
  f"Loading {self.model_uid} with following model config: {self._model_config}"
212
+ f"Enable lora: {enable_lora}. Lora count: {max_loras}."
176
213
  )
177
214
 
178
- engine_args = AsyncEngineArgs(model=self.model_path, **self._model_config)
215
+ engine_args = AsyncEngineArgs(
216
+ model=self.model_path,
217
+ enable_lora=enable_lora,
218
+ max_loras=max_loras,
219
+ **self._model_config,
220
+ )
179
221
  self._engine = AsyncLLMEngine.from_engine_args(engine_args)
180
222
 
181
223
  def _sanitize_model_config(
@@ -206,6 +248,7 @@ class VLLMModel(LLM):
206
248
  generate_config = {}
207
249
 
208
250
  sanitized = VLLMGenerateConfig()
251
+ sanitized.setdefault("lora_name", generate_config.get("lora_name", None))
209
252
  sanitized.setdefault("n", generate_config.get("n", 1))
210
253
  sanitized.setdefault("best_of", generate_config.get("best_of", None))
211
254
  sanitized.setdefault(
@@ -216,12 +259,16 @@ class VLLMModel(LLM):
216
259
  )
217
260
  sanitized.setdefault("temperature", generate_config.get("temperature", 1.0))
218
261
  sanitized.setdefault("top_p", generate_config.get("top_p", 1.0))
262
+ sanitized.setdefault("top_k", generate_config.get("top_k", -1))
219
263
  sanitized.setdefault("max_tokens", generate_config.get("max_tokens", 1024))
220
264
  sanitized.setdefault("stop", generate_config.get("stop", None))
221
265
  sanitized.setdefault(
222
266
  "stop_token_ids", generate_config.get("stop_token_ids", None)
223
267
  )
224
- sanitized.setdefault("stream", generate_config.get("stream", None))
268
+ sanitized.setdefault("stream", generate_config.get("stream", False))
269
+ sanitized.setdefault(
270
+ "stream_options", generate_config.get("stream_options", None)
271
+ )
225
272
 
226
273
  return sanitized
227
274
 
@@ -338,16 +385,34 @@ class VLLMModel(LLM):
338
385
  "Enter generate, prompt: %s, generate config: %s", prompt, generate_config
339
386
  )
340
387
 
388
+ lora_model = sanitized_generate_config.pop("lora_name")
389
+
390
+ lora_request = None
391
+ if lora_model is not None:
392
+ for lora in self.lora_requests:
393
+ if lora_model == lora.lora_name:
394
+ lora_request = lora
395
+ break
396
+
341
397
  stream = sanitized_generate_config.pop("stream")
398
+ stream_options = sanitized_generate_config.pop("stream_options", None)
399
+ include_usage = (
400
+ stream_options["include_usage"]
401
+ if isinstance(stream_options, dict)
402
+ else False
403
+ )
342
404
  sampling_params = SamplingParams(**sanitized_generate_config)
343
405
  request_id = str(uuid.uuid1())
344
406
 
345
407
  assert self._engine is not None
346
- results_generator = self._engine.generate(prompt, sampling_params, request_id)
408
+ results_generator = self._engine.generate(
409
+ prompt, sampling_params, request_id, lora_request=lora_request
410
+ )
347
411
 
348
412
  async def stream_results() -> AsyncGenerator[CompletionChunk, None]:
349
413
  previous_texts = [""] * sanitized_generate_config["n"]
350
414
  tools_token_filter = ChatModelMixin._tools_token_filter(self.model_family)
415
+ prompt_tokens, completion_tokens, total_tokens = 0, 0, 0
351
416
  async for _request_output in results_generator:
352
417
  chunk = self._convert_request_output_to_completion_chunk(
353
418
  request_id=request_id,
@@ -398,6 +463,20 @@ class VLLMModel(LLM):
398
463
  total_tokens=total_tokens,
399
464
  )
400
465
  yield chunk
466
+ if include_usage:
467
+ chunk = CompletionChunk(
468
+ id=request_id,
469
+ object="text_completion",
470
+ created=int(time.time()),
471
+ model=self.model_uid,
472
+ choices=[],
473
+ )
474
+ chunk["usage"] = CompletionUsage(
475
+ prompt_tokens=prompt_tokens,
476
+ completion_tokens=completion_tokens,
477
+ total_tokens=total_tokens,
478
+ )
479
+ yield chunk
401
480
 
402
481
  if stream:
403
482
  return stream_results()
xinference/types.py CHANGED
@@ -187,6 +187,8 @@ class ChatglmCppGenerateConfig(TypedDict, total=False):
187
187
  top_p: float
188
188
  temperature: float
189
189
  stream: bool
190
+ lora_name: Optional[str]
191
+ stream_options: Optional[Union[dict, None]]
190
192
 
191
193
 
192
194
  class QWenCppModelConfig(TypedDict, total=False):
@@ -231,6 +233,7 @@ class LlamaCppGenerateConfig(TypedDict, total=False):
231
233
  repetition_penalty: float
232
234
  top_k: int
233
235
  stream: bool
236
+ stream_options: Optional[Union[dict, None]]
234
237
  tfs_z: float
235
238
  mirostat_mode: int
236
239
  mirostat_tau: float
@@ -279,6 +282,8 @@ class PytorchGenerateConfig(TypedDict, total=False):
279
282
  stream_interval: int
280
283
  model: Optional[str]
281
284
  tools: Optional[List[Dict]]
285
+ lora_name: Optional[str]
286
+ stream_options: Optional[Union[dict, None]]
282
287
 
283
288
 
284
289
  class PytorchModelConfig(TypedDict, total=False):
@@ -350,10 +355,12 @@ class CreateCompletionTorch(BaseModel):
350
355
  stop: Optional[Union[str, List[str]]] = stop_field
351
356
  stop_token_ids: Optional[Union[int, List[int]]] = none_field
352
357
  stream: bool = stream_field
358
+ stream_options: Optional[Union[dict, None]] = stream_option_field
353
359
  stream_interval: int = stream_interval_field
354
360
  temperature: float = temperature_field
355
361
  top_p: float = top_p_field
356
362
  top_k: int = top_k_field
363
+ lora_name: Optional[str]
357
364
 
358
365
 
359
366
  CreateCompletionLlamaCpp: BaseModel
@@ -366,6 +373,8 @@ try:
366
373
  include_fields={
367
374
  "grammar": (Optional[Any], None),
368
375
  "max_tokens": (Optional[int], max_tokens_field),
376
+ "lora_name": (Optional[str], None),
377
+ "stream_options": (Optional[Union[dict, None]], None),
369
378
  },
370
379
  )
371
380
  except ImportError:
@@ -393,7 +402,7 @@ class _CreateCompletionOpenAIFallback(BaseModel):
393
402
  seed: Optional[int] = none_field
394
403
  stop: Optional[Union[str, List[str]]] = stop_field
395
404
  stream: bool = stream_field
396
- stream_options: Optional[dict] = stream_option_field
405
+ stream_options: Optional[Union[dict, None]] = stream_option_field
397
406
  suffix: Optional[str] = none_field
398
407
  temperature: float = temperature_field
399
408
  top_p: float = top_p_field
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "files": {
3
3
  "main.css": "./static/css/main.54bca460.css",
4
- "main.js": "./static/js/main.8e44da4b.js",
4
+ "main.js": "./static/js/main.551aa479.js",
5
5
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
6
6
  "index.html": "./index.html",
7
7
  "main.54bca460.css.map": "./static/css/main.54bca460.css.map",
8
- "main.8e44da4b.js.map": "./static/js/main.8e44da4b.js.map"
8
+ "main.551aa479.js.map": "./static/js/main.551aa479.js.map"
9
9
  },
10
10
  "entrypoints": [
11
11
  "static/css/main.54bca460.css",
12
- "static/js/main.8e44da4b.js"
12
+ "static/js/main.551aa479.js"
13
13
  ]
14
14
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.8e44da4b.js"></script><link href="./static/css/main.54bca460.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.551aa479.js"></script><link href="./static/css/main.54bca460.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>