xinference 0.13.1__py3-none-any.whl → 0.13.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2024-07-12T17:56:13+0800",
11
+ "date": "2024-07-19T19:15:54+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "5e3f254d48383f37d849dd16db564ad9449e5163",
15
- "version": "0.13.1"
14
+ "full-revisionid": "880929cbbc73e5206ca069591b03d9d16dd858bf",
15
+ "version": "0.13.2"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -129,6 +129,7 @@ class SpeechRequest(BaseModel):
129
129
  voice: Optional[str]
130
130
  response_format: Optional[str] = "mp3"
131
131
  speed: Optional[float] = 1.0
132
+ stream: Optional[bool] = False
132
133
 
133
134
 
134
135
  class RegisterModelRequest(BaseModel):
@@ -491,6 +492,17 @@ class RESTfulAPI:
491
492
  else None
492
493
  ),
493
494
  )
495
+ self._router.add_api_route(
496
+ "/v1/images/inpainting",
497
+ self.create_inpainting,
498
+ methods=["POST"],
499
+ response_model=ImageList,
500
+ dependencies=(
501
+ [Security(self._auth_service, scopes=["models:read"])]
502
+ if self.is_authenticated()
503
+ else None
504
+ ),
505
+ )
494
506
  self._router.add_api_route(
495
507
  "/v1/chat/completions",
496
508
  self.create_chat_completion,
@@ -1317,8 +1329,14 @@ class RESTfulAPI:
1317
1329
  voice=body.voice,
1318
1330
  response_format=body.response_format,
1319
1331
  speed=body.speed,
1332
+ stream=body.stream,
1320
1333
  )
1321
- return Response(media_type="application/octet-stream", content=out)
1334
+ if body.stream:
1335
+ return EventSourceResponse(
1336
+ media_type="application/octet-stream", content=out
1337
+ )
1338
+ else:
1339
+ return Response(media_type="application/octet-stream", content=out)
1322
1340
  except RuntimeError as re:
1323
1341
  logger.error(re, exc_info=True)
1324
1342
  await self._report_error_event(model_uid, str(re))
@@ -1410,6 +1428,60 @@ class RESTfulAPI:
1410
1428
  await self._report_error_event(model_uid, str(e))
1411
1429
  raise HTTPException(status_code=500, detail=str(e))
1412
1430
 
1431
+ async def create_inpainting(
1432
+ self,
1433
+ model: str = Form(...),
1434
+ image: UploadFile = File(media_type="application/octet-stream"),
1435
+ mask_image: UploadFile = File(media_type="application/octet-stream"),
1436
+ prompt: Optional[Union[str, List[str]]] = Form(None),
1437
+ negative_prompt: Optional[Union[str, List[str]]] = Form(None),
1438
+ n: Optional[int] = Form(1),
1439
+ response_format: Optional[str] = Form("url"),
1440
+ size: Optional[str] = Form(None),
1441
+ kwargs: Optional[str] = Form(None),
1442
+ ) -> Response:
1443
+ model_uid = model
1444
+ try:
1445
+ model_ref = await (await self._get_supervisor_ref()).get_model(model_uid)
1446
+ except ValueError as ve:
1447
+ logger.error(str(ve), exc_info=True)
1448
+ await self._report_error_event(model_uid, str(ve))
1449
+ raise HTTPException(status_code=400, detail=str(ve))
1450
+ except Exception as e:
1451
+ logger.error(e, exc_info=True)
1452
+ await self._report_error_event(model_uid, str(e))
1453
+ raise HTTPException(status_code=500, detail=str(e))
1454
+
1455
+ try:
1456
+ if kwargs is not None:
1457
+ parsed_kwargs = json.loads(kwargs)
1458
+ else:
1459
+ parsed_kwargs = {}
1460
+ im = Image.open(image.file)
1461
+ mask_im = Image.open(mask_image.file)
1462
+ if not size:
1463
+ w, h = im.size
1464
+ size = f"{w}*{h}"
1465
+ image_list = await model_ref.inpainting(
1466
+ image=im,
1467
+ mask_image=mask_im,
1468
+ prompt=prompt,
1469
+ negative_prompt=negative_prompt,
1470
+ n=n,
1471
+ size=size,
1472
+ response_format=response_format,
1473
+ **parsed_kwargs,
1474
+ )
1475
+ return Response(content=image_list, media_type="application/json")
1476
+ except RuntimeError as re:
1477
+ logger.error(re, exc_info=True)
1478
+ await self._report_error_event(model_uid, str(re))
1479
+ raise HTTPException(status_code=400, detail=str(re))
1480
+ except Exception as e:
1481
+ logger.error(e, exc_info=True)
1482
+ await self._report_error_event(model_uid, str(e))
1483
+ raise HTTPException(status_code=500, detail=str(e))
1484
+
1413
1485
  async def create_flexible_infer(self, request: Request) -> Response:
1414
1486
  payload = await request.json()
1415
1487
 
@@ -294,6 +294,81 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
294
294
  response_data = response.json()
295
295
  return response_data
296
296
 
297
+ def inpainting(
298
+ self,
299
+ image: Union[str, bytes],
300
+ mask_image: Union[str, bytes],
301
+ prompt: str,
302
+ negative_prompt: Optional[str] = None,
303
+ n: int = 1,
304
+ size: Optional[str] = None,
305
+ response_format: str = "url",
306
+ **kwargs,
307
+ ) -> "ImageList":
308
+ """
309
+ Inpaint an image by the input text.
310
+
311
+ Parameters
312
+ ----------
313
+ image: `Union[str, bytes]`
314
+ an image batch to be inpainted (which parts of the image to
315
+ be masked out with `mask_image` and repainted according to `prompt`). For both numpy array and pytorch
316
+ tensor, the expected value range is between `[0, 1]` If it's a tensor or a list or tensors, the
317
+ expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a list of arrays, the
318
+ expected shape should be `(B, H, W, C)` or `(H, W, C)` It can also accept image latents as `image`, but
319
+ if passing latents directly it is not encoded again.
320
+ mask_image: `Union[str, bytes]`
321
+ representing an image batch to mask `image`. White pixels in the mask
322
+ are repainted while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a
323
+ single channel (luminance) before use. If it's a numpy array or pytorch tensor, it should contain one
324
+ color channel (L) instead of 3, so the expected shape for pytorch tensor would be `(B, 1, H, W)`, `(B,
325
+ H, W)`, `(1, H, W)`, `(H, W)`. And for numpy array would be for `(B, H, W, 1)`, `(B, H, W)`, `(H, W,
326
+ 1)`, or `(H, W)`.
327
+ prompt: `str` or `List[str]`
328
+ The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
329
+ negative_prompt (`str` or `List[str]`, *optional*):
330
+ The prompt or prompts not to guide the image generation. If not defined, one has to pass
331
+ `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
332
+ less than `1`).
333
+ n: `int`, defaults to 1
334
+ The number of images to generate per prompt. Must be between 1 and 10.
335
+ size: `str`, defaults to None
336
+ The width*height in pixels of the generated image.
337
+ response_format: `str`, defaults to `url`
338
+ The format in which the generated images are returned. Must be one of url or b64_json.
339
+ Returns
340
+ -------
341
+ ImageList
342
+ A list of image objects.
343
+ :param prompt:
344
+ :param image:
345
+ """
346
+ url = f"{self._base_url}/v1/images/inpainting"
347
+ params = {
348
+ "model": self._model_uid,
349
+ "prompt": prompt,
350
+ "negative_prompt": negative_prompt,
351
+ "n": n,
352
+ "size": size,
353
+ "response_format": response_format,
354
+ "kwargs": json.dumps(kwargs),
355
+ }
356
+ files: List[Any] = []
357
+ for key, value in params.items():
358
+ files.append((key, (None, value)))
359
+ files.append(("image", ("image", image, "application/octet-stream")))
360
+ files.append(
361
+ ("mask_image", ("mask_image", mask_image, "application/octet-stream"))
362
+ )
363
+ response = requests.post(url, files=files, headers=self.auth_headers)
364
+ if response.status_code != 200:
365
+ raise RuntimeError(
366
+ f"Failed to inpaint the images, detail: {_get_error_string(response)}"
367
+ )
368
+
369
+ response_data = response.json()
370
+ return response_data
371
+
297
372
 
298
373
  class RESTfulGenerateModelHandle(RESTfulModelHandle):
299
374
  def generate(
@@ -692,6 +767,7 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
692
767
  voice: str = "",
693
768
  response_format: str = "mp3",
694
769
  speed: float = 1.0,
770
+ stream: bool = False,
695
771
  ):
696
772
  """
697
773
  Generates audio from the input text.
@@ -707,6 +783,8 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
707
783
  The format to audio in.
708
784
  speed: str
709
785
  The speed of the generated audio.
786
+ stream: bool
787
+ Use stream or not.
710
788
 
711
789
  Returns
712
790
  -------
@@ -720,6 +798,7 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
720
798
  "voice": voice,
721
799
  "response_format": response_format,
722
800
  "speed": speed,
801
+ "stream": stream,
723
802
  }
724
803
  response = requests.post(url, json=params, headers=self.auth_headers)
725
804
  if response.status_code != 200:
@@ -727,6 +806,9 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
727
806
  f"Failed to speech the text, detail: {_get_error_string(response)}"
728
807
  )
729
808
 
809
+ if stream:
810
+ return response.iter_content(chunk_size=1024)
811
+
730
812
  return response.content
731
813
 
732
814
 
xinference/core/model.py CHANGED
@@ -310,7 +310,7 @@ class ModelActor(xo.StatelessActor):
310
310
  )
311
311
  )
312
312
 
313
- def _to_json_generator(self, gen: types.GeneratorType):
313
+ def _to_generator(self, output_type: str, gen: types.GeneratorType):
314
314
  start_time = time.time()
315
315
  time_to_first_token = None
316
316
  final_usage = None
@@ -318,8 +318,13 @@ class ModelActor(xo.StatelessActor):
318
318
  for v in gen:
319
319
  if time_to_first_token is None:
320
320
  time_to_first_token = (time.time() - start_time) * 1000
321
- final_usage = v.get("usage", None)
322
- v = dict(data=json.dumps(v, ensure_ascii=False))
321
+ if output_type == "json":
322
+ final_usage = v.get("usage", None)
323
+ v = dict(data=json.dumps(v, ensure_ascii=False))
324
+ else:
325
+ assert (
326
+ output_type == "binary"
327
+ ), f"Unknown output type '{output_type}'"
323
328
  yield sse_starlette.sse.ensure_bytes(v, None)
324
329
  except OutOfMemoryError:
325
330
  logger.exception(
@@ -342,7 +347,7 @@ class ModelActor(xo.StatelessActor):
342
347
  )
343
348
  asyncio.run_coroutine_threadsafe(coro, loop=self._loop)
344
349
 
345
- async def _to_json_async_gen(self, gen: types.AsyncGeneratorType):
350
+ async def _to_async_gen(self, output_type: str, gen: types.AsyncGeneratorType):
346
351
  start_time = time.time()
347
352
  time_to_first_token = None
348
353
  final_usage = None
@@ -351,8 +356,13 @@ class ModelActor(xo.StatelessActor):
351
356
  if time_to_first_token is None:
352
357
  time_to_first_token = (time.time() - start_time) * 1000
353
358
  final_usage = v.get("usage", None)
354
- v = await asyncio.to_thread(json.dumps, v)
355
- v = dict(data=v) # noqa: F821
359
+ if output_type == "json":
360
+ v = await asyncio.to_thread(json.dumps, v, ensure_ascii=False)
361
+ v = dict(data=v) # noqa: F821
362
+ else:
363
+ assert (
364
+ output_type == "binary"
365
+ ), f"Unknown output type '{output_type}'"
356
366
  yield await asyncio.to_thread(sse_starlette.sse.ensure_bytes, v, None)
357
367
  except OutOfMemoryError:
358
368
  logger.exception(
@@ -379,8 +389,14 @@ class ModelActor(xo.StatelessActor):
379
389
  )
380
390
  await asyncio.gather(*coros)
381
391
 
392
+ async def _call_wrapper_json(self, fn: Callable, *args, **kwargs):
393
+ return await self._call_wrapper("json", fn, *args, **kwargs)
394
+
395
+ async def _call_wrapper_binary(self, fn: Callable, *args, **kwargs):
396
+ return await self._call_wrapper("binary", fn, *args, **kwargs)
397
+
382
398
  @oom_check
383
- async def _call_wrapper(self, fn: Callable, *args, **kwargs):
399
+ async def _call_wrapper(self, output_type: str, fn: Callable, *args, **kwargs):
384
400
  if self._lock is None:
385
401
  if inspect.iscoroutinefunction(fn):
386
402
  ret = await fn(*args, **kwargs)
@@ -397,16 +413,18 @@ class ModelActor(xo.StatelessActor):
397
413
  raise Exception("Parallel generation is not supported by ggml.")
398
414
 
399
415
  if inspect.isgenerator(ret):
400
- gen = self._to_json_generator(ret)
416
+ gen = self._to_generator(output_type, ret)
401
417
  self._current_generator = weakref.ref(gen)
402
418
  return gen
403
419
  if inspect.isasyncgen(ret):
404
- gen = self._to_json_async_gen(ret)
420
+ gen = self._to_async_gen(output_type, ret)
405
421
  self._current_generator = weakref.ref(gen)
406
422
  return gen
407
- if isinstance(ret, bytes):
423
+ if output_type == "json":
424
+ return await asyncio.to_thread(json_dumps, ret)
425
+ else:
426
+ assert output_type == "binary", f"Unknown output type '{output_type}'"
408
427
  return ret
409
- return await asyncio.to_thread(json_dumps, ret)
410
428
 
411
429
  @log_async(logger=logger)
412
430
  @request_limit
@@ -419,11 +437,11 @@ class ModelActor(xo.StatelessActor):
419
437
  else:
420
438
  kwargs.pop("raw_params", None)
421
439
  if hasattr(self._model, "generate"):
422
- return await self._call_wrapper(
440
+ return await self._call_wrapper_json(
423
441
  self._model.generate, prompt, *args, **kwargs
424
442
  )
425
443
  if hasattr(self._model, "async_generate"):
426
- return await self._call_wrapper(
444
+ return await self._call_wrapper_json(
427
445
  self._model.async_generate, prompt, *args, **kwargs
428
446
  )
429
447
  raise AttributeError(f"Model {self._model.model_spec} is not for generate.")
@@ -471,7 +489,7 @@ class ModelActor(xo.StatelessActor):
471
489
  queue: Queue[Any] = Queue()
472
490
  ret = self._queue_consumer(queue)
473
491
  await self._scheduler_ref.add_request(prompt, queue, *args, **kwargs)
474
- gen = self._to_json_async_gen(ret)
492
+ gen = self._to_async_gen("json", ret)
475
493
  self._current_generator = weakref.ref(gen)
476
494
  return gen
477
495
  else:
@@ -502,12 +520,12 @@ class ModelActor(xo.StatelessActor):
502
520
  else:
503
521
  kwargs.pop("raw_params", None)
504
522
  if hasattr(self._model, "chat"):
505
- response = await self._call_wrapper(
523
+ response = await self._call_wrapper_json(
506
524
  self._model.chat, prompt, *args, **kwargs
507
525
  )
508
526
  return response
509
527
  if hasattr(self._model, "async_chat"):
510
- response = await self._call_wrapper(
528
+ response = await self._call_wrapper_json(
511
529
  self._model.async_chat, prompt, *args, **kwargs
512
530
  )
513
531
  return response
@@ -543,7 +561,7 @@ class ModelActor(xo.StatelessActor):
543
561
  @request_limit
544
562
  async def create_embedding(self, input: Union[str, List[str]], *args, **kwargs):
545
563
  if hasattr(self._model, "create_embedding"):
546
- return await self._call_wrapper(
564
+ return await self._call_wrapper_json(
547
565
  self._model.create_embedding, input, *args, **kwargs
548
566
  )
549
567
 
@@ -565,7 +583,7 @@ class ModelActor(xo.StatelessActor):
565
583
  **kwargs,
566
584
  ):
567
585
  if hasattr(self._model, "rerank"):
568
- return await self._call_wrapper(
586
+ return await self._call_wrapper_json(
569
587
  self._model.rerank,
570
588
  documents,
571
589
  query,
@@ -590,7 +608,7 @@ class ModelActor(xo.StatelessActor):
590
608
  timestamp_granularities: Optional[List[str]] = None,
591
609
  ):
592
610
  if hasattr(self._model, "transcriptions"):
593
- return await self._call_wrapper(
611
+ return await self._call_wrapper_json(
594
612
  self._model.transcriptions,
595
613
  audio,
596
614
  language,
@@ -615,7 +633,7 @@ class ModelActor(xo.StatelessActor):
615
633
  timestamp_granularities: Optional[List[str]] = None,
616
634
  ):
617
635
  if hasattr(self._model, "translations"):
618
- return await self._call_wrapper(
636
+ return await self._call_wrapper_json(
619
637
  self._model.translations,
620
638
  audio,
621
639
  language,
@@ -630,16 +648,23 @@ class ModelActor(xo.StatelessActor):
630
648
 
631
649
  @log_async(logger=logger)
632
650
  @request_limit
651
+ @xo.generator
633
652
  async def speech(
634
- self, input: str, voice: str, response_format: str = "mp3", speed: float = 1.0
653
+ self,
654
+ input: str,
655
+ voice: str,
656
+ response_format: str = "mp3",
657
+ speed: float = 1.0,
658
+ stream: bool = False,
635
659
  ):
636
660
  if hasattr(self._model, "speech"):
637
- return await self._call_wrapper(
661
+ return await self._call_wrapper_binary(
638
662
  self._model.speech,
639
663
  input,
640
664
  voice,
641
665
  response_format,
642
666
  speed,
667
+ stream,
643
668
  )
644
669
  raise AttributeError(
645
670
  f"Model {self._model.model_spec} is not for creating speech."
@@ -657,7 +682,7 @@ class ModelActor(xo.StatelessActor):
657
682
  **kwargs,
658
683
  ):
659
684
  if hasattr(self._model, "text_to_image"):
660
- return await self._call_wrapper(
685
+ return await self._call_wrapper_json(
661
686
  self._model.text_to_image,
662
687
  prompt,
663
688
  n,
@@ -682,7 +707,7 @@ class ModelActor(xo.StatelessActor):
682
707
  **kwargs,
683
708
  ):
684
709
  if hasattr(self._model, "image_to_image"):
685
- return await self._call_wrapper(
710
+ return await self._call_wrapper_json(
686
711
  self._model.image_to_image,
687
712
  image,
688
713
  prompt,
@@ -697,6 +722,35 @@ class ModelActor(xo.StatelessActor):
697
722
  f"Model {self._model.model_spec} is not for creating image."
698
723
  )
699
724
 
725
+ async def inpainting(
726
+ self,
727
+ image: "PIL.Image",
728
+ mask_image: "PIL.Image",
729
+ prompt: str,
730
+ negative_prompt: str,
731
+ n: int = 1,
732
+ size: str = "1024*1024",
733
+ response_format: str = "url",
734
+ *args,
735
+ **kwargs,
736
+ ):
737
+ if hasattr(self._model, "inpainting"):
738
+ return await self._call_wrapper(
739
+ self._model.inpainting,
740
+ image,
741
+ mask_image,
742
+ prompt,
743
+ negative_prompt,
744
+ n,
745
+ size,
746
+ response_format,
747
+ *args,
748
+ **kwargs,
749
+ )
750
+ raise AttributeError(
751
+ f"Model {self._model.model_spec} is not for creating image."
752
+ )
753
+
700
754
  @log_async(logger=logger)
701
755
  @request_limit
702
756
  async def infer(
@@ -48,7 +48,12 @@ class ChatTTSModel:
48
48
  self._model.load(source="custom", custom_path=self._model_path, compile=True)
49
49
 
50
50
  def speech(
51
- self, input: str, voice: str, response_format: str = "mp3", speed: float = 1.0
51
+ self,
52
+ input: str,
53
+ voice: str,
54
+ response_format: str = "mp3",
55
+ speed: float = 1.0,
56
+ stream: bool = False,
52
57
  ):
53
58
  import ChatTTS
54
59
  import numpy as np
@@ -74,11 +79,38 @@ class ChatTTSModel:
74
79
  )
75
80
 
76
81
  assert self._model is not None
77
- wavs = self._model.infer([input], params_infer_code=params_infer_code)
78
-
79
- # Save the generated audio
80
- with BytesIO() as out:
81
- torchaudio.save(
82
- out, torch.from_numpy(wavs[0]), 24000, format=response_format
82
+ if stream:
83
+ iter = self._model.infer(
84
+ [input], params_infer_code=params_infer_code, stream=True
83
85
  )
84
- return out.getvalue()
86
+
87
+ def _generator():
88
+ with BytesIO() as out:
89
+ writer = torchaudio.io.StreamWriter(out, format=response_format)
90
+ writer.add_audio_stream(sample_rate=24000, num_channels=1)
91
+ i = 0
92
+ last_pos = 0
93
+ with writer.open():
94
+ for it in iter:
95
+ for itt in it:
96
+ for chunk in itt:
97
+ chunk = np.array([chunk]).transpose()
98
+ writer.write_audio_chunk(i, torch.from_numpy(chunk))
99
+ new_last_pos = out.tell()
100
+ if new_last_pos != last_pos:
101
+ out.seek(last_pos)
102
+ encoded_bytes = out.read()
103
+ print(len(encoded_bytes))
104
+ yield encoded_bytes
105
+ last_pos = new_last_pos
106
+
107
+ return _generator()
108
+ else:
109
+ wavs = self._model.infer([input], params_infer_code=params_infer_code)
110
+
111
+ # Save the generated audio
112
+ with BytesIO() as out:
113
+ torchaudio.save(
114
+ out, torch.from_numpy(wavs[0]), 24000, format=response_format
115
+ )
116
+ return out.getvalue()
@@ -45,6 +45,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
45
45
  model_id: str
46
46
  model_revision: str
47
47
  model_hub: str = "huggingface"
48
+ ability: Optional[str]
48
49
  controlnet: Optional[List["ImageModelFamilyV1"]]
49
50
 
50
51
 
@@ -71,6 +72,7 @@ class ImageModelDescription(ModelDescription):
71
72
  "model_name": self._model_spec.model_name,
72
73
  "model_family": self._model_spec.model_family,
73
74
  "model_revision": self._model_spec.model_revision,
75
+ "ability": self._model_spec.ability,
74
76
  "controlnet": controlnet,
75
77
  }
76
78
 
@@ -234,6 +236,7 @@ def create_image_model_instance(
234
236
  lora_model_paths=lora_model,
235
237
  lora_load_kwargs=lora_load_kwargs,
236
238
  lora_fuse_kwargs=lora_fuse_kwargs,
239
+ ability=model_spec.ability,
237
240
  **kwargs,
238
241
  )
239
242
  model_description = ImageModelDescription(
@@ -92,5 +92,19 @@
92
92
  "model_revision": "62134b9d8e703b5d6f74f1534457287a8bba77ef"
93
93
  }
94
94
  ]
95
+ },
96
+ {
97
+ "model_name": "stable-diffusion-inpainting",
98
+ "model_family": "stable_diffusion",
99
+ "model_id": "runwayml/stable-diffusion-inpainting",
100
+ "model_revision": "51388a731f57604945fddd703ecb5c50e8e7b49d",
101
+ "ability": "inpainting"
102
+ },
103
+ {
104
+ "model_name": "stable-diffusion-2-inpainting",
105
+ "model_family": "stable_diffusion",
106
+ "model_id": "stabilityai/stable-diffusion-2-inpainting",
107
+ "model_revision": "81a84f49b15956b60b4272a405ad3daef3da4590",
108
+ "ability": "inpainting"
95
109
  }
96
110
  ]
@@ -16,6 +16,7 @@ import base64
16
16
  import logging
17
17
  import os
18
18
  import re
19
+ import sys
19
20
  import time
20
21
  import uuid
21
22
  from concurrent.futures import ThreadPoolExecutor
@@ -39,6 +40,7 @@ class DiffusionModel:
39
40
  lora_model: Optional[List[LoRA]] = None,
40
41
  lora_load_kwargs: Optional[Dict] = None,
41
42
  lora_fuse_kwargs: Optional[Dict] = None,
43
+ ability: Optional[str] = None,
42
44
  **kwargs,
43
45
  ):
44
46
  self._model_uid = model_uid
@@ -48,6 +50,7 @@ class DiffusionModel:
48
50
  self._lora_model = lora_model
49
51
  self._lora_load_kwargs = lora_load_kwargs or {}
50
52
  self._lora_fuse_kwargs = lora_fuse_kwargs or {}
53
+ self._ability = ability
51
54
  self._kwargs = kwargs
52
55
 
53
56
  def _apply_lora(self):
@@ -64,8 +67,14 @@ class DiffusionModel:
64
67
  logger.info(f"Successfully loaded the LoRA for model {self._model_uid}.")
65
68
 
66
69
  def load(self):
67
- # import torch
68
- from diffusers import AutoPipelineForText2Image
70
+ import torch
71
+
72
+ if self._ability in [None, "text2image", "image2image"]:
73
+ from diffusers import AutoPipelineForText2Image as AutoPipelineModel
74
+ elif self._ability == "inpainting":
75
+ from diffusers import AutoPipelineForInpainting as AutoPipelineModel
76
+ else:
77
+ raise ValueError(f"Unknown ability: {self._ability}")
69
78
 
70
79
  controlnet = self._kwargs.get("controlnet")
71
80
  if controlnet is not None:
@@ -74,12 +83,16 @@ class DiffusionModel:
74
83
  logger.debug("Loading controlnet %s", controlnet)
75
84
  self._kwargs["controlnet"] = ControlNetModel.from_pretrained(controlnet)
76
85
 
77
- self._model = AutoPipelineForText2Image.from_pretrained(
86
+ torch_dtype = self._kwargs.get("torch_dtype")
87
+ if sys.platform != "darwin" and torch_dtype is None:
88
+ # The following params crashes on Mac M2
89
+ self._kwargs["torch_dtype"] = torch.float16
90
+ self._kwargs["use_safetensors"] = True
91
+
92
+ logger.debug("Loading model %s", AutoPipelineModel)
93
+ self._model = AutoPipelineModel.from_pretrained(
78
94
  self._model_path,
79
95
  **self._kwargs,
80
- # The following params crashes on Mac M2
81
- # torch_dtype=torch.float16,
82
- # use_safetensors=True,
83
96
  )
84
97
  self._model = move_model_to_available_device(self._model)
85
98
  # Recommended if your computer has < 64 GB of RAM
@@ -174,3 +187,27 @@ class DiffusionModel:
174
187
  response_format=response_format,
175
188
  **kwargs,
176
189
  )
190
+
191
+ def inpainting(
192
+ self,
193
+ image: bytes,
194
+ mask_image: bytes,
195
+ prompt: Optional[Union[str, List[str]]] = None,
196
+ negative_prompt: Optional[Union[str, List[str]]] = None,
197
+ n: int = 1,
198
+ size: str = "1024*1024",
199
+ response_format: str = "url",
200
+ **kwargs,
201
+ ):
202
+ width, height = map(int, re.split(r"[^\d]+", size))
203
+ return self._call_model(
204
+ image=image,
205
+ mask_image=mask_image,
206
+ prompt=prompt,
207
+ negative_prompt=negative_prompt,
208
+ height=height,
209
+ width=width,
210
+ num_images_per_prompt=n,
211
+ response_format=response_format,
212
+ **kwargs,
213
+ )
@@ -983,6 +983,65 @@
983
983
  ]
984
984
  }
985
985
  },
986
+ {
987
+ "version": 1,
988
+ "context_length": 131072,
989
+ "model_name": "codegeex4",
990
+ "model_lang": [
991
+ "en",
992
+ "zh"
993
+ ],
994
+ "model_ability": [
995
+ "chat"
996
+ ],
997
+ "model_description": "the open-source version of the latest CodeGeeX4 model series",
998
+ "model_specs": [
999
+ {
1000
+ "model_format": "pytorch",
1001
+ "model_size_in_billions": 9,
1002
+ "quantizations": [
1003
+ "4-bit",
1004
+ "8-bit",
1005
+ "none"
1006
+ ],
1007
+ "model_id": "THUDM/codegeex4-all-9b",
1008
+ "model_revision": "8c4ec1d2f2888412640825a7aa23355939a8f4c6"
1009
+ },
1010
+ {
1011
+ "model_format": "ggufv2",
1012
+ "model_size_in_billions": 9,
1013
+ "quantizations": [
1014
+ "IQ2_M",
1015
+ "IQ3_M",
1016
+ "Q4_K_M",
1017
+ "Q5_K_M",
1018
+ "Q6_K_L",
1019
+ "Q8_0"
1020
+ ],
1021
+ "model_file_name_template": "codegeex4-all-9b-{quantization}.gguf",
1022
+ "model_id": "THUDM/codegeex4-all-9b-GGUF",
1023
+ "model_revision": "6a04071c54c943949826d4815ee00717ed8cf153"
1024
+ }
1025
+ ],
1026
+ "prompt_style": {
1027
+ "style_name": "CHATGLM3",
1028
+ "system_prompt": "",
1029
+ "roles": [
1030
+ "user",
1031
+ "assistant"
1032
+ ],
1033
+ "stop_token_ids": [
1034
+ 151329,
1035
+ 151336,
1036
+ 151338
1037
+ ],
1038
+ "stop": [
1039
+ "<|endoftext|>",
1040
+ "<|user|>",
1041
+ "<|observation|>"
1042
+ ]
1043
+ }
1044
+ },
986
1045
  {
987
1046
  "version": 1,
988
1047
  "context_length": 2048,
@@ -5791,7 +5850,7 @@
5791
5850
  },
5792
5851
  {
5793
5852
  "version": 1,
5794
- "context_length": 204800,
5853
+ "context_length": 32768,
5795
5854
  "model_name": "internlm2-chat",
5796
5855
  "model_lang": [
5797
5856
  "en",
@@ -5839,6 +5898,140 @@
5839
5898
  ]
5840
5899
  }
5841
5900
  },
5901
+ {
5902
+ "version": 1,
5903
+ "context_length": 32768,
5904
+ "model_name": "internlm2.5-chat",
5905
+ "model_lang": [
5906
+ "en",
5907
+ "zh"
5908
+ ],
5909
+ "model_ability": [
5910
+ "chat"
5911
+ ],
5912
+ "model_description": "InternLM2.5 series of the InternLM model.",
5913
+ "model_specs": [
5914
+ {
5915
+ "model_format": "pytorch",
5916
+ "model_size_in_billions": 7,
5917
+ "quantizations": [
5918
+ "none"
5919
+ ],
5920
+ "model_id": "internlm/internlm2_5-7b-chat",
5921
+ "model_revision": "9dc8536a922ab4954726aad1b37fa199004a291a"
5922
+ },
5923
+ {
5924
+ "model_format": "gptq",
5925
+ "model_size_in_billions": 7,
5926
+ "quantizations": [
5927
+ "Int4"
5928
+ ],
5929
+ "model_id": "ModelCloud/internlm-2.5-7b-chat-gptq-4bit",
5930
+ "model_revision": "2e2dda735c326544921a4035bbeb6c6e316a8254"
5931
+ },
5932
+ {
5933
+ "model_format": "ggufv2",
5934
+ "model_size_in_billions": 7,
5935
+ "quantizations": [
5936
+ "q2_k",
5937
+ "q3_k_m",
5938
+ "q4_0",
5939
+ "q4_k_m",
5940
+ "q5_0",
5941
+ "q5_k_m",
5942
+ "q6_k",
5943
+ "q8_0",
5944
+ "fp16"
5945
+ ],
5946
+ "model_id": "internlm/internlm2_5-7b-chat-gguf",
5947
+ "model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf"
5948
+ }
5949
+ ],
5950
+ "prompt_style": {
5951
+ "style_name": "INTERNLM2",
5952
+ "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
5953
+ "roles": [
5954
+ "<|im_start|>user",
5955
+ "<|im_start|>assistant"
5956
+ ],
5957
+ "intra_message_sep": "<|im_end|>",
5958
+ "stop_token_ids": [
5959
+ 2,
5960
+ 92542
5961
+ ],
5962
+ "stop": [
5963
+ "</s>",
5964
+ "<|im_end|>"
5965
+ ]
5966
+ }
5967
+ },
5968
+ {
5969
+ "version": 1,
5970
+ "context_length": 262144,
5971
+ "model_name": "internlm2.5-chat-1m",
5972
+ "model_lang": [
5973
+ "en",
5974
+ "zh"
5975
+ ],
5976
+ "model_ability": [
5977
+ "chat"
5978
+ ],
5979
+ "model_description": "InternLM2.5 series of the InternLM model supports 1M long-context",
5980
+ "model_specs": [
5981
+ {
5982
+ "model_format": "pytorch",
5983
+ "model_size_in_billions": 7,
5984
+ "quantizations": [
5985
+ "none"
5986
+ ],
5987
+ "model_id": "internlm/internlm2_5-7b-chat-1m",
5988
+ "model_revision": "8d1a709a04d71440ef3df6ebbe204672f411c8b6"
5989
+ },
5990
+ {
5991
+ "model_format": "gptq",
5992
+ "model_size_in_billions": 7,
5993
+ "quantizations": [
5994
+ "Int4"
5995
+ ],
5996
+ "model_id": "ModelCloud/internlm-2.5-7b-chat-1m-gptq-4bit",
5997
+ "model_revision": "022e59cb30f03b271d56178478acb038b2b9b58c"
5998
+ },
5999
+ {
6000
+ "model_format": "ggufv2",
6001
+ "model_size_in_billions": 7,
6002
+ "quantizations": [
6003
+ "q2_k",
6004
+ "q3_k_m",
6005
+ "q4_0",
6006
+ "q4_k_m",
6007
+ "q5_0",
6008
+ "q5_k_m",
6009
+ "q6_k",
6010
+ "q8_0",
6011
+ "fp16"
6012
+ ],
6013
+ "model_id": "internlm/internlm2_5-7b-chat-1m-gguf",
6014
+ "model_file_name_template": "internlm2_5-7b-chat-1m-{quantization}.gguf"
6015
+ }
6016
+ ],
6017
+ "prompt_style": {
6018
+ "style_name": "INTERNLM2",
6019
+ "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
6020
+ "roles": [
6021
+ "<|im_start|>user",
6022
+ "<|im_start|>assistant"
6023
+ ],
6024
+ "intra_message_sep": "<|im_end|>",
6025
+ "stop_token_ids": [
6026
+ 2,
6027
+ 92542
6028
+ ],
6029
+ "stop": [
6030
+ "</s>",
6031
+ "<|im_end|>"
6032
+ ]
6033
+ }
6034
+ },
5842
6035
  {
5843
6036
  "version":1,
5844
6037
  "context_length":2048,
@@ -6192,6 +6385,52 @@
6192
6385
  ],
6193
6386
  "model_id": "google/gemma-2-27b-it"
6194
6387
  },
6388
+ {
6389
+ "model_format": "ggufv2",
6390
+ "model_size_in_billions": 9,
6391
+ "quantizations": [
6392
+ "Q2_K",
6393
+ "Q2_K_L",
6394
+ "Q3_K_L",
6395
+ "Q3_K_M",
6396
+ "Q3_K_S",
6397
+ "Q4_K_L",
6398
+ "Q4_K_M",
6399
+ "Q4_K_S",
6400
+ "Q5_K_L",
6401
+ "Q5_K_M",
6402
+ "Q5_K_S",
6403
+ "Q6_K",
6404
+ "Q6_K_L",
6405
+ "Q8_0",
6406
+ "f32"
6407
+ ],
6408
+ "model_id": "bartowski/gemma-2-9b-it-GGUF",
6409
+ "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf"
6410
+ },
6411
+ {
6412
+ "model_format": "ggufv2",
6413
+ "model_size_in_billions": 27,
6414
+ "quantizations": [
6415
+ "Q2_K",
6416
+ "Q2_K_L",
6417
+ "Q3_K_L",
6418
+ "Q3_K_M",
6419
+ "Q3_K_S",
6420
+ "Q4_K_L",
6421
+ "Q4_K_M",
6422
+ "Q4_K_S",
6423
+ "Q5_K_L",
6424
+ "Q5_K_M",
6425
+ "Q5_K_S",
6426
+ "Q6_K",
6427
+ "Q6_K_L",
6428
+ "Q8_0",
6429
+ "f32"
6430
+ ],
6431
+ "model_id": "bartowski/gemma-2-27b-it-GGUF",
6432
+ "model_file_name_template": "gemma-2-27b-it-{quantization}.gguf"
6433
+ },
6195
6434
  {
6196
6435
  "model_format": "mlx",
6197
6436
  "model_size_in_billions": 9,
@@ -554,16 +554,36 @@ def _get_cache_dir(
554
554
  quant_suffix = q
555
555
  break
556
556
 
557
- cache_dir_name = (
557
+ # some model name includes ".", e.g. qwen1.5-chat
558
+ # if the model does not require trust_remote_code, it's OK
559
+ # because no need to import modeling_xxx.py from the path
560
+ # but when the model need to trust_remote_code,
561
+ # e.g. internlm2.5-chat, the import will fail,
562
+ # but before the model may have been downloaded,
563
+ # thus we check it first, if exist, return it,
564
+ # otherwise, we replace the "." with "_" in model name
565
+ old_cache_dir_name = (
558
566
  f"{llm_family.model_name}-{llm_spec.model_format}"
559
567
  f"-{llm_spec.model_size_in_billions}b"
560
568
  )
561
569
  if quant_suffix:
562
- cache_dir_name += f"-{quant_suffix}"
563
- cache_dir = os.path.realpath(os.path.join(XINFERENCE_CACHE_DIR, cache_dir_name))
564
- if create_if_not_exist and not os.path.exists(cache_dir):
565
- os.makedirs(cache_dir, exist_ok=True)
566
- return cache_dir
570
+ old_cache_dir_name += f"-{quant_suffix}"
571
+ old_cache_dir = os.path.realpath(
572
+ os.path.join(XINFERENCE_CACHE_DIR, old_cache_dir_name)
573
+ )
574
+ if os.path.exists(old_cache_dir):
575
+ return old_cache_dir
576
+ else:
577
+ cache_dir_name = (
578
+ f"{llm_family.model_name.replace('.', '_')}-{llm_spec.model_format}"
579
+ f"-{llm_spec.model_size_in_billions}b"
580
+ )
581
+ if quant_suffix:
582
+ cache_dir_name += f"-{quant_suffix}"
583
+ cache_dir = os.path.realpath(os.path.join(XINFERENCE_CACHE_DIR, cache_dir_name))
584
+ if create_if_not_exist and not os.path.exists(cache_dir):
585
+ os.makedirs(cache_dir, exist_ok=True)
586
+ return cache_dir
567
587
 
568
588
 
569
589
  def _get_meta_path(
@@ -688,6 +688,66 @@
688
688
  ]
689
689
  }
690
690
  },
691
+ {
692
+ "version": 1,
693
+ "context_length": 131072,
694
+ "model_name": "codegeex4",
695
+ "model_lang": [
696
+ "en",
697
+ "zh"
698
+ ],
699
+ "model_ability": [
700
+ "chat"
701
+ ],
702
+ "model_description": "the open-source version of the latest CodeGeeX4 model series",
703
+ "model_specs": [
704
+ {
705
+ "model_format": "pytorch",
706
+ "model_size_in_billions": 9,
707
+ "quantizations": [
708
+ "4-bit",
709
+ "8-bit",
710
+ "none"
711
+ ],
712
+ "model_id": "ZhipuAI/codegeex4-all-9b",
713
+ "model_hub": "modelscope",
714
+ "model_revision": "master"
715
+ },
716
+ {
717
+ "model_format": "ggufv2",
718
+ "model_size_in_billions": 9,
719
+ "quantizations": [
720
+ "IQ2_M",
721
+ "IQ3_M",
722
+ "Q4_K_M",
723
+ "Q5_K_M",
724
+ "Q6_K_L",
725
+ "Q8_0"
726
+ ],
727
+ "model_file_name_template": "codegeex4-all-9b-{quantization}.gguf",
728
+ "model_id": "ZhipuAI/codegeex4-all-9b-GGUF",
729
+ "model_hub": "modelscope"
730
+ }
731
+ ],
732
+ "prompt_style": {
733
+ "style_name": "CHATGLM3",
734
+ "system_prompt": "",
735
+ "roles": [
736
+ "user",
737
+ "assistant"
738
+ ],
739
+ "stop_token_ids": [
740
+ 151329,
741
+ 151336,
742
+ 151338
743
+ ],
744
+ "stop": [
745
+ "<|endoftext|>",
746
+ "<|user|>",
747
+ "<|observation|>"
748
+ ]
749
+ }
750
+ },
691
751
  {
692
752
  "version": 1,
693
753
  "context_length": 2048,
@@ -928,6 +988,88 @@
928
988
  ]
929
989
  }
930
990
  },
991
+ {
992
+ "version": 1,
993
+ "context_length": 32768,
994
+ "model_name": "internlm2.5-chat",
995
+ "model_lang": [
996
+ "en",
997
+ "zh"
998
+ ],
999
+ "model_ability": [
1000
+ "chat"
1001
+ ],
1002
+ "model_description": "InternLM2.5 series of the InternLM model.",
1003
+ "model_specs": [
1004
+ {
1005
+ "model_format": "pytorch",
1006
+ "model_size_in_billions": 7,
1007
+ "quantizations": [
1008
+ "none"
1009
+ ],
1010
+ "model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat",
1011
+ "model_hub": "modelscope"
1012
+ }
1013
+ ],
1014
+ "prompt_style": {
1015
+ "style_name": "INTERNLM2",
1016
+ "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
1017
+ "roles": [
1018
+ "<|im_start|>user",
1019
+ "<|im_start|>assistant"
1020
+ ],
1021
+ "intra_message_sep": "<|im_end|>",
1022
+ "stop_token_ids": [
1023
+ 2,
1024
+ 92542
1025
+ ],
1026
+ "stop": [
1027
+ "</s>",
1028
+ "<|im_end|>"
1029
+ ]
1030
+ }
1031
+ },
1032
+ {
1033
+ "version": 1,
1034
+ "context_length": 262144,
1035
+ "model_name": "internlm2.5-chat-1m",
1036
+ "model_lang": [
1037
+ "en",
1038
+ "zh"
1039
+ ],
1040
+ "model_ability": [
1041
+ "chat"
1042
+ ],
1043
+ "model_description": "InternLM2.5 series of the InternLM model supports 1M long-context",
1044
+ "model_specs": [
1045
+ {
1046
+ "model_format": "pytorch",
1047
+ "model_size_in_billions": 7,
1048
+ "quantizations": [
1049
+ "none"
1050
+ ],
1051
+ "model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat-1m",
1052
+ "model_hub": "modelscope"
1053
+ }
1054
+ ],
1055
+ "prompt_style": {
1056
+ "style_name": "INTERNLM2",
1057
+ "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
1058
+ "roles": [
1059
+ "<|im_start|>user",
1060
+ "<|im_start|>assistant"
1061
+ ],
1062
+ "intra_message_sep": "<|im_end|>",
1063
+ "stop_token_ids": [
1064
+ 2,
1065
+ 92542
1066
+ ],
1067
+ "stop": [
1068
+ "</s>",
1069
+ "<|im_end|>"
1070
+ ]
1071
+ }
1072
+ },
931
1073
  {
932
1074
  "version": 1,
933
1075
  "context_length": 100000,
@@ -3799,6 +3941,29 @@
3799
3941
  ],
3800
3942
  "model_id": "AI-ModelScope/gemma-2-27b-it",
3801
3943
  "model_hub": "modelscope"
3944
+ },
3945
+ {
3946
+ "model_format": "ggufv2",
3947
+ "model_size_in_billions": 9,
3948
+ "quantizations": [
3949
+ "Q2_K",
3950
+ "Q3_K_L",
3951
+ "Q3_K_M",
3952
+ "Q3_K_S",
3953
+ "Q4_K_L",
3954
+ "Q4_K_M",
3955
+ "Q4_K_S",
3956
+ "Q5_K_L",
3957
+ "Q5_K_M",
3958
+ "Q5_K_S",
3959
+ "Q6_K",
3960
+ "Q6_K_L",
3961
+ "Q8_0",
3962
+ "f32"
3963
+ ],
3964
+ "model_id": "LLM-Research/gemma-2-9b-it-GGUF",
3965
+ "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf",
3966
+ "model_hub": "modelscope"
3802
3967
  }
3803
3968
  ],
3804
3969
  "prompt_style": {
@@ -269,8 +269,13 @@ class SGLANGModel(LLM):
269
269
  )
270
270
  stream = sanitized_generate_config.pop("stream")
271
271
  stream_options = sanitized_generate_config.pop("stream_options")
272
- if isinstance(stream_options, dict):
273
- include_usage = stream_options.pop("include_usage", False)
272
+
273
+ include_usage = (
274
+ stream_options.pop("include_usage")
275
+ if isinstance(stream_options, dict)
276
+ else False
277
+ )
278
+
274
279
  request_id = str(uuid.uuid1())
275
280
  state = pipeline.run(
276
281
  question=prompt,
@@ -112,6 +112,8 @@ VLLM_SUPPORTED_CHAT_MODELS = [
112
112
  "internlm-chat-8k",
113
113
  "internlm-chat-20b",
114
114
  "internlm2-chat",
115
+ "internlm2.5-chat",
116
+ "internlm2.5-chat-1m",
115
117
  "qwen-chat",
116
118
  "Yi-chat",
117
119
  "Yi-1.5-chat",
@@ -127,6 +129,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
127
129
  "chatglm3-128k",
128
130
  "glm4-chat",
129
131
  "glm4-chat-1m",
132
+ "codegeex4",
130
133
  "deepseek-chat",
131
134
  "deepseek-coder-instruct",
132
135
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xinference
3
- Version: 0.13.1
3
+ Version: 0.13.2
4
4
  Summary: Model Serving Made Easy
5
5
  Home-page: https://github.com/xorbitsai/inference
6
6
  Author: Qin Xuye
@@ -72,6 +72,7 @@ Requires-Dist: nemo-text-processing ; extra == 'all'
72
72
  Requires-Dist: WeTextProcessing ; extra == 'all'
73
73
  Requires-Dist: librosa ; extra == 'all'
74
74
  Requires-Dist: xxhash ; extra == 'all'
75
+ Requires-Dist: torchaudio ; extra == 'all'
75
76
  Requires-Dist: ChatTTS >0.1 ; extra == 'all'
76
77
  Requires-Dist: boto3 <1.28.65,>=1.28.55 ; extra == 'all'
77
78
  Requires-Dist: tensorizer ~=2.9.0 ; extra == 'all'
@@ -86,6 +87,7 @@ Requires-Dist: nemo-text-processing ; extra == 'audio'
86
87
  Requires-Dist: WeTextProcessing ; extra == 'audio'
87
88
  Requires-Dist: librosa ; extra == 'audio'
88
89
  Requires-Dist: xxhash ; extra == 'audio'
90
+ Requires-Dist: torchaudio ; extra == 'audio'
89
91
  Requires-Dist: ChatTTS >0.1 ; extra == 'audio'
90
92
  Provides-Extra: benchmark
91
93
  Requires-Dist: psutil ; extra == 'benchmark'
@@ -1,6 +1,6 @@
1
1
  xinference/__init__.py,sha256=0LgIveLP6CXxoIaSrxhlFyOh0lOqPgJBVcBe0tkWJjc,987
2
2
  xinference/_compat.py,sha256=SQAjZMGxtBIce45qtW7ob7RWzA0zhv2yB3AxT0rb0uU,1778
3
- xinference/_version.py,sha256=uHlOZ7Byu6M1gveNIzmgZapOLatEU9Yocfj3tEKXElM,498
3
+ xinference/_version.py,sha256=npzZYwlsc_rih9EcKTaoGkkL3KLm52-9xfgm8jq_R4A,498
4
4
  xinference/conftest.py,sha256=FF-ZkqkfOxQw4hz_8G7p5aB7gFdsJlr6u2ZdFuuauAA,9744
5
5
  xinference/constants.py,sha256=_uyBB84fgZM64J3mw8_RELVJfm_dgeNRUZF9t9ZuFcM,3541
6
6
  xinference/device_utils.py,sha256=zswJiws3VyTIaNO8z-MOcsJH_UiPoePPiKK5zoNrjTA,3285
@@ -9,7 +9,7 @@ xinference/isolation.py,sha256=uhkzVyL3fSYZSuFexkG6Jm-tRTC5I607uNg000BXAnE,1949
9
9
  xinference/types.py,sha256=mN6lTFGqwFCycCMCwNELtRm2lmvuynvzD7Wwq_NEINY,14255
10
10
  xinference/utils.py,sha256=VSOJMFd9H7kce98OtJZbcDjjpfzRpHAFs8WU0xXPBM8,717
11
11
  xinference/api/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
12
- xinference/api/restful_api.py,sha256=RRU4EItwywwOLvYwEOvbAB9p6eBF_ZX0eDPIDBJE0ag,71697
12
+ xinference/api/restful_api.py,sha256=7n77U-5t0SDzpOOad4SqbFbZx-fSIQJJdM_bLwdozus,74572
13
13
  xinference/api/oauth2/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
14
14
  xinference/api/oauth2/auth_service.py,sha256=74JzB42fbbmBu4Q1dW3A9Fp_N7167KgRGB42Z0NHjAM,6119
15
15
  xinference/api/oauth2/types.py,sha256=K923sv_XySIUtM2Eozl9IG082IJcDOS5SFLrPZ5ELBg,996
@@ -18,14 +18,14 @@ xinference/client/__init__.py,sha256=Gc4HOzAy_1cic5kXlso7hahYgw89CKvZSJDicEU461k
18
18
  xinference/client/common.py,sha256=iciZRs5YjM2gYsXnwACPMaiBZp4_XpawWwfym0Iyu40,1617
19
19
  xinference/client/handlers.py,sha256=3gd9C7u4URbcVdR6Eyv8cpEZ175Ll4q_jGL07CnEIpg,648
20
20
  xinference/client/restful/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
21
- xinference/client/restful/restful_client.py,sha256=3-OxNq9hoxbUrKbpDq0kCi3GssV_BZeenzVDlCeRrvE,50536
21
+ xinference/client/restful/restful_client.py,sha256=fMgazFQRSMefUx0_40Q_9c3o7mf0G39D8HmlkQ9KFhs,54304
22
22
  xinference/core/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
23
23
  xinference/core/cache_tracker.py,sha256=2hk8ANOYruhxAt4MPz482tYEQcvYBh_B7sq0eYd0rTU,6963
24
24
  xinference/core/chat_interface.py,sha256=7SOm6Qi-iFh1otycHpn6CpISq2wTLlJzEUngJtOwMIk,19558
25
25
  xinference/core/event.py,sha256=Lkx_-Ohwyzyt-MBbkrZy9N-7aeYs-wux0fDtZpa2SJY,1632
26
26
  xinference/core/image_interface.py,sha256=G2iK24auEN4MrLkPlu1CAA_gf-BQrGQTjazi_FYqIxE,8825
27
27
  xinference/core/metrics.py,sha256=ScmTG15Uq3h_ob72ybZSMWdnk8P4sUZFcm60f4ikSXc,2631
28
- xinference/core/model.py,sha256=2f0eaKxW0Vk0_or4hnEPrZDKxvBk36tsEcqbDAnw500,24951
28
+ xinference/core/model.py,sha256=QWz9LeUyWwwlgxzE0JQmsNqaMc1uwtU_Q3wm6H430rw,26778
29
29
  xinference/core/resource.py,sha256=FQ0aRt3T4ZQo0P6CZZf5QUKHiCsr5llBvKb1f7wfnxg,1611
30
30
  xinference/core/scheduler.py,sha256=e-fhhMeWmVdx_37sNDf2BOkvHt_17wclNcby7DcUNso,15627
31
31
  xinference/core/status_guard.py,sha256=fF5hisvfn6es9DV6Z6RRD6V_S_uLcb8lHM6PArGgb04,2820
@@ -47,7 +47,7 @@ xinference/model/__init__.py,sha256=IRC3ojiqYkVLIK_xsIxYeKypEeeTTdrovnVzK_4L4eg,
47
47
  xinference/model/core.py,sha256=5dr7y2cq2OS3aFgqIIR3uQbT1ln3xiolUsbXgu2dHGw,3999
48
48
  xinference/model/utils.py,sha256=NGIXgpkUY0dXGxnh-FsfeNq6OS9SPwBzNfASLXWCqUo,15146
49
49
  xinference/model/audio/__init__.py,sha256=QyQwELIYk7DuD5Hen2q45pLMJ4K8iAnto8zlOA9QUSY,2839
50
- xinference/model/audio/chattts.py,sha256=EISJj6mgppAZwjiPmStvUuM2H9ogXP0FXCD5rrL2AwA,2618
50
+ xinference/model/audio/chattts.py,sha256=JZA_0TR4nMGqJ-2WYqwb8DcjhsTC57D0QlkPBl4v788,3973
51
51
  xinference/model/audio/core.py,sha256=uMkZpd5IIs9WK8K0t2FWiGKagcicSjK20w4USKGSCEw,5708
52
52
  xinference/model/audio/custom.py,sha256=01NTD927pairIBWOo9At6Bjqpo1kdcIn3AVijbOdp7Y,5056
53
53
  xinference/model/audio/model_spec.json,sha256=ueOHO14d8lIzuiExJyPUgC3swYA3CfgOgMiDu5L1cOA,3205
@@ -66,19 +66,19 @@ xinference/model/flexible/utils.py,sha256=_GlEarRHKPAxT7o6N39VOd9sB580zKzdSktqjb
66
66
  xinference/model/flexible/launchers/__init__.py,sha256=x_5s73qABN_94hnf5UyrfyxUObayntD6Gh1UOtctCe8,642
67
67
  xinference/model/flexible/launchers/transformers_launcher.py,sha256=OZeeogDfopRUGhulP4PRJ4fZEJ2D9cfv7lcC2qJBoDE,2012
68
68
  xinference/model/image/__init__.py,sha256=lDtP961bpu6h5TK57kJ531Zoch2xU5DM-Eco_YQne-Y,2780
69
- xinference/model/image/core.py,sha256=U70IcFXEFrow0HyUoc5401z5H01l0dQD5aCdq8s11so,8697
69
+ xinference/model/image/core.py,sha256=zpaiym5t5cWrBOOscvFFBBUD4-YWBU_NZLsyuqeeamA,8809
70
70
  xinference/model/image/custom.py,sha256=nn1iZDTYNz68A2gWFXvUuv__Gx8EGdkz_sHvHnPnSoA,3841
71
- xinference/model/image/model_spec.json,sha256=xSEmKnzi4n2hzu9FspusFjeX6pAd8w05ZgvkMV_9HzE,3178
71
+ xinference/model/image/model_spec.json,sha256=kQMWtQo-Z4tawKdgckYFJz1fvbGnXVSZGQsGwjOxa3M,3681
72
72
  xinference/model/image/model_spec_modelscope.json,sha256=vWAoR1gsexay6jn8vnObslYF3YE5SAfqMcJPkYQ-Wc4,3176
73
73
  xinference/model/image/utils.py,sha256=gxg8jJ2nYaDknzCcSC53WCy1slbB5aWU14AbJbfm6Z4,906
74
74
  xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
75
- xinference/model/image/stable_diffusion/core.py,sha256=ib_ZeSg7hzynmRqSnhjtrVuhoLOgZPrR1ZH2LjBmH2E,6063
75
+ xinference/model/image/stable_diffusion/core.py,sha256=e2kpID5iTUTWuMO01aF7z4uGMpAttF1g1nSnwdDNIz4,7349
76
76
  xinference/model/llm/__init__.py,sha256=D9zXjltqlzKahDiOFYyn_EcLoiw_6tO8bhj3u8wnT0A,11462
77
77
  xinference/model/llm/core.py,sha256=ZAzRGphjRZ2KAdTPADIuqSbVU9dTQrHgLvCEgNP9pOk,8088
78
- xinference/model/llm/llm_family.json,sha256=DzRDfpkBqX4VePEMxdCuExuSnMvqgf7sdMfQs8hIhbQ,178453
79
- xinference/model/llm/llm_family.py,sha256=l1utaKl_XXXNIhFbMHzIn0nLI_8JhMZ2005nZ8u-auM,42038
78
+ xinference/model/llm/llm_family.json,sha256=6ZXDEqlZddhrR9A3lnOtzmEdOMd7rfc4DexRzeJVodw,184400
79
+ xinference/model/llm/llm_family.py,sha256=2XykGoXMIffDIOCI1hefprgPJTOvE80r7Rh6Zosb6dY,42934
80
80
  xinference/model/llm/llm_family_csghub.json,sha256=zWiMlX0mbCvuaR7gZh0qDPRPaswFJ-zKssuN6XuAQ6s,1417
81
- xinference/model/llm/llm_family_modelscope.json,sha256=jqTg0YAdN5Px7v0XTnze6BS2gu-v8iga2Y9DozUG0BI,113046
81
+ xinference/model/llm/llm_family_modelscope.json,sha256=BQR99BYPXxXxq0CnFiVlAEUUeuOLXezCTBVPhdZs1Jg,116982
82
82
  xinference/model/llm/memory.py,sha256=PTD8m6TCZVU1zrwc9wepX9cUjCqAXBENj6X7tjua0to,10207
83
83
  xinference/model/llm/utils.py,sha256=3KkpM-HaI97jAFj5Pb1-Kau3BL8-8d-SypDkKCWFqPs,32655
84
84
  xinference/model/llm/ggml/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
@@ -108,9 +108,9 @@ xinference/model/llm/pytorch/utils.py,sha256=HZhJKQG1O1P1qTpxvVzIjBp-2J8aTRxUmS9
108
108
  xinference/model/llm/pytorch/vicuna.py,sha256=avNOgt9fBjwYzahL-j6-EcQS-7km167h8ttJolnNWnE,2334
109
109
  xinference/model/llm/pytorch/yi_vl.py,sha256=MljT7tpgFIhL6n5rdoS3hmq_u0rtHRE6cxXCseujklQ,10911
110
110
  xinference/model/llm/sglang/__init__.py,sha256=-sjSIQ4K6w-TEzx49kVaWeWC443fnZqODU91GCQ_JNo,581
111
- xinference/model/llm/sglang/core.py,sha256=RGHy6t9n0c4zL6Uha8P7t-qPvisPyulFVHw-8Aq8CJ0,14046
111
+ xinference/model/llm/sglang/core.py,sha256=9c4KgEFswu1Fx3qI4VFszv26902FwIifq9AVzMijDa4,14087
112
112
  xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
113
- xinference/model/llm/vllm/core.py,sha256=j0vqOp295TS1l0O2CNkTfjgwmQ-t5KtX-bFfbgSKlYs,22097
113
+ xinference/model/llm/vllm/core.py,sha256=beZeuCR_wCbIjtU-WWM8q0rVPPPiPLO2VINnvNQfq8w,22165
114
114
  xinference/model/rerank/__init__.py,sha256=BXIL1uu3ZpZHX9bODhW9lxKUXudZE7-OkXFmmM5rpMU,2817
115
115
  xinference/model/rerank/core.py,sha256=qAUwOdRHomn0uCzCw6klDxJSZyIDQ4tvgz9pOPm-0GY,12150
116
116
  xinference/model/rerank/custom.py,sha256=NKk7jA7p4xkuwS5WoOs2SY2wdnoAVpyCjBTvv317bBw,3917
@@ -15428,9 +15428,9 @@ xinference/web/ui/node_modules/yargs-parser/package.json,sha256=BSwbOzgetKXMK4u0
15428
15428
  xinference/web/ui/node_modules/yocto-queue/package.json,sha256=6U1XHQPGXJTqsiFvT953ORihUtXTblZy4fXBWP9qxC0,725
15429
15429
  xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9Ll074uo8OS1zEw0qhA,1206
15430
15430
  xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
15431
- xinference-0.13.1.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15432
- xinference-0.13.1.dist-info/METADATA,sha256=GmkfFt_HXs3gvuBke75uA__8UL-jAwj41F47KEBUj4E,16633
15433
- xinference-0.13.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15434
- xinference-0.13.1.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15435
- xinference-0.13.1.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15436
- xinference-0.13.1.dist-info/RECORD,,
15431
+ xinference-0.13.2.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15432
+ xinference-0.13.2.dist-info/METADATA,sha256=EmYaz9n8oJHqQSU8Er7kqRuuN01VWaRBLZ8lgQMCMgc,16721
15433
+ xinference-0.13.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
15434
+ xinference-0.13.2.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15435
+ xinference-0.13.2.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15436
+ xinference-0.13.2.dist-info/RECORD,,