xinference 0.13.1__py3-none-any.whl → 0.13.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +73 -1
- xinference/client/restful/restful_client.py +82 -0
- xinference/core/model.py +78 -24
- xinference/model/audio/chattts.py +40 -8
- xinference/model/image/core.py +3 -0
- xinference/model/image/model_spec.json +14 -0
- xinference/model/image/stable_diffusion/core.py +43 -6
- xinference/model/llm/llm_family.json +240 -1
- xinference/model/llm/llm_family.py +26 -6
- xinference/model/llm/llm_family_modelscope.json +165 -0
- xinference/model/llm/sglang/core.py +7 -2
- xinference/model/llm/vllm/core.py +3 -0
- {xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/METADATA +3 -1
- {xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/RECORD +19 -19
- {xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/LICENSE +0 -0
- {xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/WHEEL +0 -0
- {xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/entry_points.txt +0 -0
- {xinference-0.13.1.dist-info → xinference-0.13.2.dist-info}/top_level.txt +0 -0
xinference/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2024-07-
|
|
11
|
+
"date": "2024-07-19T19:15:54+0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.13.
|
|
14
|
+
"full-revisionid": "880929cbbc73e5206ca069591b03d9d16dd858bf",
|
|
15
|
+
"version": "0.13.2"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
xinference/api/restful_api.py
CHANGED
|
@@ -129,6 +129,7 @@ class SpeechRequest(BaseModel):
|
|
|
129
129
|
voice: Optional[str]
|
|
130
130
|
response_format: Optional[str] = "mp3"
|
|
131
131
|
speed: Optional[float] = 1.0
|
|
132
|
+
stream: Optional[bool] = False
|
|
132
133
|
|
|
133
134
|
|
|
134
135
|
class RegisterModelRequest(BaseModel):
|
|
@@ -491,6 +492,17 @@ class RESTfulAPI:
|
|
|
491
492
|
else None
|
|
492
493
|
),
|
|
493
494
|
)
|
|
495
|
+
self._router.add_api_route(
|
|
496
|
+
"/v1/images/inpainting",
|
|
497
|
+
self.create_inpainting,
|
|
498
|
+
methods=["POST"],
|
|
499
|
+
response_model=ImageList,
|
|
500
|
+
dependencies=(
|
|
501
|
+
[Security(self._auth_service, scopes=["models:read"])]
|
|
502
|
+
if self.is_authenticated()
|
|
503
|
+
else None
|
|
504
|
+
),
|
|
505
|
+
)
|
|
494
506
|
self._router.add_api_route(
|
|
495
507
|
"/v1/chat/completions",
|
|
496
508
|
self.create_chat_completion,
|
|
@@ -1317,8 +1329,14 @@ class RESTfulAPI:
|
|
|
1317
1329
|
voice=body.voice,
|
|
1318
1330
|
response_format=body.response_format,
|
|
1319
1331
|
speed=body.speed,
|
|
1332
|
+
stream=body.stream,
|
|
1320
1333
|
)
|
|
1321
|
-
|
|
1334
|
+
if body.stream:
|
|
1335
|
+
return EventSourceResponse(
|
|
1336
|
+
media_type="application/octet-stream", content=out
|
|
1337
|
+
)
|
|
1338
|
+
else:
|
|
1339
|
+
return Response(media_type="application/octet-stream", content=out)
|
|
1322
1340
|
except RuntimeError as re:
|
|
1323
1341
|
logger.error(re, exc_info=True)
|
|
1324
1342
|
await self._report_error_event(model_uid, str(re))
|
|
@@ -1410,6 +1428,60 @@ class RESTfulAPI:
|
|
|
1410
1428
|
await self._report_error_event(model_uid, str(e))
|
|
1411
1429
|
raise HTTPException(status_code=500, detail=str(e))
|
|
1412
1430
|
|
|
1431
|
+
async def create_inpainting(
|
|
1432
|
+
self,
|
|
1433
|
+
model: str = Form(...),
|
|
1434
|
+
image: UploadFile = File(media_type="application/octet-stream"),
|
|
1435
|
+
mask_image: UploadFile = File(media_type="application/octet-stream"),
|
|
1436
|
+
prompt: Optional[Union[str, List[str]]] = Form(None),
|
|
1437
|
+
negative_prompt: Optional[Union[str, List[str]]] = Form(None),
|
|
1438
|
+
n: Optional[int] = Form(1),
|
|
1439
|
+
response_format: Optional[str] = Form("url"),
|
|
1440
|
+
size: Optional[str] = Form(None),
|
|
1441
|
+
kwargs: Optional[str] = Form(None),
|
|
1442
|
+
) -> Response:
|
|
1443
|
+
model_uid = model
|
|
1444
|
+
try:
|
|
1445
|
+
model_ref = await (await self._get_supervisor_ref()).get_model(model_uid)
|
|
1446
|
+
except ValueError as ve:
|
|
1447
|
+
logger.error(str(ve), exc_info=True)
|
|
1448
|
+
await self._report_error_event(model_uid, str(ve))
|
|
1449
|
+
raise HTTPException(status_code=400, detail=str(ve))
|
|
1450
|
+
except Exception as e:
|
|
1451
|
+
logger.error(e, exc_info=True)
|
|
1452
|
+
await self._report_error_event(model_uid, str(e))
|
|
1453
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1454
|
+
|
|
1455
|
+
try:
|
|
1456
|
+
if kwargs is not None:
|
|
1457
|
+
parsed_kwargs = json.loads(kwargs)
|
|
1458
|
+
else:
|
|
1459
|
+
parsed_kwargs = {}
|
|
1460
|
+
im = Image.open(image.file)
|
|
1461
|
+
mask_im = Image.open(mask_image.file)
|
|
1462
|
+
if not size:
|
|
1463
|
+
w, h = im.size
|
|
1464
|
+
size = f"{w}*{h}"
|
|
1465
|
+
image_list = await model_ref.inpainting(
|
|
1466
|
+
image=im,
|
|
1467
|
+
mask_image=mask_im,
|
|
1468
|
+
prompt=prompt,
|
|
1469
|
+
negative_prompt=negative_prompt,
|
|
1470
|
+
n=n,
|
|
1471
|
+
size=size,
|
|
1472
|
+
response_format=response_format,
|
|
1473
|
+
**parsed_kwargs,
|
|
1474
|
+
)
|
|
1475
|
+
return Response(content=image_list, media_type="application/json")
|
|
1476
|
+
except RuntimeError as re:
|
|
1477
|
+
logger.error(re, exc_info=True)
|
|
1478
|
+
await self._report_error_event(model_uid, str(re))
|
|
1479
|
+
raise HTTPException(status_code=400, detail=str(re))
|
|
1480
|
+
except Exception as e:
|
|
1481
|
+
logger.error(e, exc_info=True)
|
|
1482
|
+
await self._report_error_event(model_uid, str(e))
|
|
1483
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1484
|
+
|
|
1413
1485
|
async def create_flexible_infer(self, request: Request) -> Response:
|
|
1414
1486
|
payload = await request.json()
|
|
1415
1487
|
|
|
@@ -294,6 +294,81 @@ class RESTfulImageModelHandle(RESTfulModelHandle):
|
|
|
294
294
|
response_data = response.json()
|
|
295
295
|
return response_data
|
|
296
296
|
|
|
297
|
+
def inpainting(
|
|
298
|
+
self,
|
|
299
|
+
image: Union[str, bytes],
|
|
300
|
+
mask_image: Union[str, bytes],
|
|
301
|
+
prompt: str,
|
|
302
|
+
negative_prompt: Optional[str] = None,
|
|
303
|
+
n: int = 1,
|
|
304
|
+
size: Optional[str] = None,
|
|
305
|
+
response_format: str = "url",
|
|
306
|
+
**kwargs,
|
|
307
|
+
) -> "ImageList":
|
|
308
|
+
"""
|
|
309
|
+
Inpaint an image by the input text.
|
|
310
|
+
|
|
311
|
+
Parameters
|
|
312
|
+
----------
|
|
313
|
+
image: `Union[str, bytes]`
|
|
314
|
+
an image batch to be inpainted (which parts of the image to
|
|
315
|
+
be masked out with `mask_image` and repainted according to `prompt`). For both numpy array and pytorch
|
|
316
|
+
tensor, the expected value range is between `[0, 1]` If it's a tensor or a list or tensors, the
|
|
317
|
+
expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a list of arrays, the
|
|
318
|
+
expected shape should be `(B, H, W, C)` or `(H, W, C)` It can also accept image latents as `image`, but
|
|
319
|
+
if passing latents directly it is not encoded again.
|
|
320
|
+
mask_image: `Union[str, bytes]`
|
|
321
|
+
representing an image batch to mask `image`. White pixels in the mask
|
|
322
|
+
are repainted while black pixels are preserved. If `mask_image` is a PIL image, it is converted to a
|
|
323
|
+
single channel (luminance) before use. If it's a numpy array or pytorch tensor, it should contain one
|
|
324
|
+
color channel (L) instead of 3, so the expected shape for pytorch tensor would be `(B, 1, H, W)`, `(B,
|
|
325
|
+
H, W)`, `(1, H, W)`, `(H, W)`. And for numpy array would be for `(B, H, W, 1)`, `(B, H, W)`, `(H, W,
|
|
326
|
+
1)`, or `(H, W)`.
|
|
327
|
+
prompt: `str` or `List[str]`
|
|
328
|
+
The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
|
|
329
|
+
negative_prompt (`str` or `List[str]`, *optional*):
|
|
330
|
+
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
|
331
|
+
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
|
332
|
+
less than `1`).
|
|
333
|
+
n: `int`, defaults to 1
|
|
334
|
+
The number of images to generate per prompt. Must be between 1 and 10.
|
|
335
|
+
size: `str`, defaults to None
|
|
336
|
+
The width*height in pixels of the generated image.
|
|
337
|
+
response_format: `str`, defaults to `url`
|
|
338
|
+
The format in which the generated images are returned. Must be one of url or b64_json.
|
|
339
|
+
Returns
|
|
340
|
+
-------
|
|
341
|
+
ImageList
|
|
342
|
+
A list of image objects.
|
|
343
|
+
:param prompt:
|
|
344
|
+
:param image:
|
|
345
|
+
"""
|
|
346
|
+
url = f"{self._base_url}/v1/images/inpainting"
|
|
347
|
+
params = {
|
|
348
|
+
"model": self._model_uid,
|
|
349
|
+
"prompt": prompt,
|
|
350
|
+
"negative_prompt": negative_prompt,
|
|
351
|
+
"n": n,
|
|
352
|
+
"size": size,
|
|
353
|
+
"response_format": response_format,
|
|
354
|
+
"kwargs": json.dumps(kwargs),
|
|
355
|
+
}
|
|
356
|
+
files: List[Any] = []
|
|
357
|
+
for key, value in params.items():
|
|
358
|
+
files.append((key, (None, value)))
|
|
359
|
+
files.append(("image", ("image", image, "application/octet-stream")))
|
|
360
|
+
files.append(
|
|
361
|
+
("mask_image", ("mask_image", mask_image, "application/octet-stream"))
|
|
362
|
+
)
|
|
363
|
+
response = requests.post(url, files=files, headers=self.auth_headers)
|
|
364
|
+
if response.status_code != 200:
|
|
365
|
+
raise RuntimeError(
|
|
366
|
+
f"Failed to inpaint the images, detail: {_get_error_string(response)}"
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
response_data = response.json()
|
|
370
|
+
return response_data
|
|
371
|
+
|
|
297
372
|
|
|
298
373
|
class RESTfulGenerateModelHandle(RESTfulModelHandle):
|
|
299
374
|
def generate(
|
|
@@ -692,6 +767,7 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
|
|
|
692
767
|
voice: str = "",
|
|
693
768
|
response_format: str = "mp3",
|
|
694
769
|
speed: float = 1.0,
|
|
770
|
+
stream: bool = False,
|
|
695
771
|
):
|
|
696
772
|
"""
|
|
697
773
|
Generates audio from the input text.
|
|
@@ -707,6 +783,8 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
|
|
|
707
783
|
The format to audio in.
|
|
708
784
|
speed: str
|
|
709
785
|
The speed of the generated audio.
|
|
786
|
+
stream: bool
|
|
787
|
+
Use stream or not.
|
|
710
788
|
|
|
711
789
|
Returns
|
|
712
790
|
-------
|
|
@@ -720,6 +798,7 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
|
|
|
720
798
|
"voice": voice,
|
|
721
799
|
"response_format": response_format,
|
|
722
800
|
"speed": speed,
|
|
801
|
+
"stream": stream,
|
|
723
802
|
}
|
|
724
803
|
response = requests.post(url, json=params, headers=self.auth_headers)
|
|
725
804
|
if response.status_code != 200:
|
|
@@ -727,6 +806,9 @@ class RESTfulAudioModelHandle(RESTfulModelHandle):
|
|
|
727
806
|
f"Failed to speech the text, detail: {_get_error_string(response)}"
|
|
728
807
|
)
|
|
729
808
|
|
|
809
|
+
if stream:
|
|
810
|
+
return response.iter_content(chunk_size=1024)
|
|
811
|
+
|
|
730
812
|
return response.content
|
|
731
813
|
|
|
732
814
|
|
xinference/core/model.py
CHANGED
|
@@ -310,7 +310,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
310
310
|
)
|
|
311
311
|
)
|
|
312
312
|
|
|
313
|
-
def
|
|
313
|
+
def _to_generator(self, output_type: str, gen: types.GeneratorType):
|
|
314
314
|
start_time = time.time()
|
|
315
315
|
time_to_first_token = None
|
|
316
316
|
final_usage = None
|
|
@@ -318,8 +318,13 @@ class ModelActor(xo.StatelessActor):
|
|
|
318
318
|
for v in gen:
|
|
319
319
|
if time_to_first_token is None:
|
|
320
320
|
time_to_first_token = (time.time() - start_time) * 1000
|
|
321
|
-
|
|
322
|
-
|
|
321
|
+
if output_type == "json":
|
|
322
|
+
final_usage = v.get("usage", None)
|
|
323
|
+
v = dict(data=json.dumps(v, ensure_ascii=False))
|
|
324
|
+
else:
|
|
325
|
+
assert (
|
|
326
|
+
output_type == "binary"
|
|
327
|
+
), f"Unknown output type '{output_type}'"
|
|
323
328
|
yield sse_starlette.sse.ensure_bytes(v, None)
|
|
324
329
|
except OutOfMemoryError:
|
|
325
330
|
logger.exception(
|
|
@@ -342,7 +347,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
342
347
|
)
|
|
343
348
|
asyncio.run_coroutine_threadsafe(coro, loop=self._loop)
|
|
344
349
|
|
|
345
|
-
async def
|
|
350
|
+
async def _to_async_gen(self, output_type: str, gen: types.AsyncGeneratorType):
|
|
346
351
|
start_time = time.time()
|
|
347
352
|
time_to_first_token = None
|
|
348
353
|
final_usage = None
|
|
@@ -351,8 +356,13 @@ class ModelActor(xo.StatelessActor):
|
|
|
351
356
|
if time_to_first_token is None:
|
|
352
357
|
time_to_first_token = (time.time() - start_time) * 1000
|
|
353
358
|
final_usage = v.get("usage", None)
|
|
354
|
-
|
|
355
|
-
|
|
359
|
+
if output_type == "json":
|
|
360
|
+
v = await asyncio.to_thread(json.dumps, v, ensure_ascii=False)
|
|
361
|
+
v = dict(data=v) # noqa: F821
|
|
362
|
+
else:
|
|
363
|
+
assert (
|
|
364
|
+
output_type == "binary"
|
|
365
|
+
), f"Unknown output type '{output_type}'"
|
|
356
366
|
yield await asyncio.to_thread(sse_starlette.sse.ensure_bytes, v, None)
|
|
357
367
|
except OutOfMemoryError:
|
|
358
368
|
logger.exception(
|
|
@@ -379,8 +389,14 @@ class ModelActor(xo.StatelessActor):
|
|
|
379
389
|
)
|
|
380
390
|
await asyncio.gather(*coros)
|
|
381
391
|
|
|
392
|
+
async def _call_wrapper_json(self, fn: Callable, *args, **kwargs):
|
|
393
|
+
return await self._call_wrapper("json", fn, *args, **kwargs)
|
|
394
|
+
|
|
395
|
+
async def _call_wrapper_binary(self, fn: Callable, *args, **kwargs):
|
|
396
|
+
return await self._call_wrapper("binary", fn, *args, **kwargs)
|
|
397
|
+
|
|
382
398
|
@oom_check
|
|
383
|
-
async def _call_wrapper(self, fn: Callable, *args, **kwargs):
|
|
399
|
+
async def _call_wrapper(self, output_type: str, fn: Callable, *args, **kwargs):
|
|
384
400
|
if self._lock is None:
|
|
385
401
|
if inspect.iscoroutinefunction(fn):
|
|
386
402
|
ret = await fn(*args, **kwargs)
|
|
@@ -397,16 +413,18 @@ class ModelActor(xo.StatelessActor):
|
|
|
397
413
|
raise Exception("Parallel generation is not supported by ggml.")
|
|
398
414
|
|
|
399
415
|
if inspect.isgenerator(ret):
|
|
400
|
-
gen = self.
|
|
416
|
+
gen = self._to_generator(output_type, ret)
|
|
401
417
|
self._current_generator = weakref.ref(gen)
|
|
402
418
|
return gen
|
|
403
419
|
if inspect.isasyncgen(ret):
|
|
404
|
-
gen = self.
|
|
420
|
+
gen = self._to_async_gen(output_type, ret)
|
|
405
421
|
self._current_generator = weakref.ref(gen)
|
|
406
422
|
return gen
|
|
407
|
-
if
|
|
423
|
+
if output_type == "json":
|
|
424
|
+
return await asyncio.to_thread(json_dumps, ret)
|
|
425
|
+
else:
|
|
426
|
+
assert output_type == "binary", f"Unknown output type '{output_type}'"
|
|
408
427
|
return ret
|
|
409
|
-
return await asyncio.to_thread(json_dumps, ret)
|
|
410
428
|
|
|
411
429
|
@log_async(logger=logger)
|
|
412
430
|
@request_limit
|
|
@@ -419,11 +437,11 @@ class ModelActor(xo.StatelessActor):
|
|
|
419
437
|
else:
|
|
420
438
|
kwargs.pop("raw_params", None)
|
|
421
439
|
if hasattr(self._model, "generate"):
|
|
422
|
-
return await self.
|
|
440
|
+
return await self._call_wrapper_json(
|
|
423
441
|
self._model.generate, prompt, *args, **kwargs
|
|
424
442
|
)
|
|
425
443
|
if hasattr(self._model, "async_generate"):
|
|
426
|
-
return await self.
|
|
444
|
+
return await self._call_wrapper_json(
|
|
427
445
|
self._model.async_generate, prompt, *args, **kwargs
|
|
428
446
|
)
|
|
429
447
|
raise AttributeError(f"Model {self._model.model_spec} is not for generate.")
|
|
@@ -471,7 +489,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
471
489
|
queue: Queue[Any] = Queue()
|
|
472
490
|
ret = self._queue_consumer(queue)
|
|
473
491
|
await self._scheduler_ref.add_request(prompt, queue, *args, **kwargs)
|
|
474
|
-
gen = self.
|
|
492
|
+
gen = self._to_async_gen("json", ret)
|
|
475
493
|
self._current_generator = weakref.ref(gen)
|
|
476
494
|
return gen
|
|
477
495
|
else:
|
|
@@ -502,12 +520,12 @@ class ModelActor(xo.StatelessActor):
|
|
|
502
520
|
else:
|
|
503
521
|
kwargs.pop("raw_params", None)
|
|
504
522
|
if hasattr(self._model, "chat"):
|
|
505
|
-
response = await self.
|
|
523
|
+
response = await self._call_wrapper_json(
|
|
506
524
|
self._model.chat, prompt, *args, **kwargs
|
|
507
525
|
)
|
|
508
526
|
return response
|
|
509
527
|
if hasattr(self._model, "async_chat"):
|
|
510
|
-
response = await self.
|
|
528
|
+
response = await self._call_wrapper_json(
|
|
511
529
|
self._model.async_chat, prompt, *args, **kwargs
|
|
512
530
|
)
|
|
513
531
|
return response
|
|
@@ -543,7 +561,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
543
561
|
@request_limit
|
|
544
562
|
async def create_embedding(self, input: Union[str, List[str]], *args, **kwargs):
|
|
545
563
|
if hasattr(self._model, "create_embedding"):
|
|
546
|
-
return await self.
|
|
564
|
+
return await self._call_wrapper_json(
|
|
547
565
|
self._model.create_embedding, input, *args, **kwargs
|
|
548
566
|
)
|
|
549
567
|
|
|
@@ -565,7 +583,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
565
583
|
**kwargs,
|
|
566
584
|
):
|
|
567
585
|
if hasattr(self._model, "rerank"):
|
|
568
|
-
return await self.
|
|
586
|
+
return await self._call_wrapper_json(
|
|
569
587
|
self._model.rerank,
|
|
570
588
|
documents,
|
|
571
589
|
query,
|
|
@@ -590,7 +608,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
590
608
|
timestamp_granularities: Optional[List[str]] = None,
|
|
591
609
|
):
|
|
592
610
|
if hasattr(self._model, "transcriptions"):
|
|
593
|
-
return await self.
|
|
611
|
+
return await self._call_wrapper_json(
|
|
594
612
|
self._model.transcriptions,
|
|
595
613
|
audio,
|
|
596
614
|
language,
|
|
@@ -615,7 +633,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
615
633
|
timestamp_granularities: Optional[List[str]] = None,
|
|
616
634
|
):
|
|
617
635
|
if hasattr(self._model, "translations"):
|
|
618
|
-
return await self.
|
|
636
|
+
return await self._call_wrapper_json(
|
|
619
637
|
self._model.translations,
|
|
620
638
|
audio,
|
|
621
639
|
language,
|
|
@@ -630,16 +648,23 @@ class ModelActor(xo.StatelessActor):
|
|
|
630
648
|
|
|
631
649
|
@log_async(logger=logger)
|
|
632
650
|
@request_limit
|
|
651
|
+
@xo.generator
|
|
633
652
|
async def speech(
|
|
634
|
-
self,
|
|
653
|
+
self,
|
|
654
|
+
input: str,
|
|
655
|
+
voice: str,
|
|
656
|
+
response_format: str = "mp3",
|
|
657
|
+
speed: float = 1.0,
|
|
658
|
+
stream: bool = False,
|
|
635
659
|
):
|
|
636
660
|
if hasattr(self._model, "speech"):
|
|
637
|
-
return await self.
|
|
661
|
+
return await self._call_wrapper_binary(
|
|
638
662
|
self._model.speech,
|
|
639
663
|
input,
|
|
640
664
|
voice,
|
|
641
665
|
response_format,
|
|
642
666
|
speed,
|
|
667
|
+
stream,
|
|
643
668
|
)
|
|
644
669
|
raise AttributeError(
|
|
645
670
|
f"Model {self._model.model_spec} is not for creating speech."
|
|
@@ -657,7 +682,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
657
682
|
**kwargs,
|
|
658
683
|
):
|
|
659
684
|
if hasattr(self._model, "text_to_image"):
|
|
660
|
-
return await self.
|
|
685
|
+
return await self._call_wrapper_json(
|
|
661
686
|
self._model.text_to_image,
|
|
662
687
|
prompt,
|
|
663
688
|
n,
|
|
@@ -682,7 +707,7 @@ class ModelActor(xo.StatelessActor):
|
|
|
682
707
|
**kwargs,
|
|
683
708
|
):
|
|
684
709
|
if hasattr(self._model, "image_to_image"):
|
|
685
|
-
return await self.
|
|
710
|
+
return await self._call_wrapper_json(
|
|
686
711
|
self._model.image_to_image,
|
|
687
712
|
image,
|
|
688
713
|
prompt,
|
|
@@ -697,6 +722,35 @@ class ModelActor(xo.StatelessActor):
|
|
|
697
722
|
f"Model {self._model.model_spec} is not for creating image."
|
|
698
723
|
)
|
|
699
724
|
|
|
725
|
+
async def inpainting(
|
|
726
|
+
self,
|
|
727
|
+
image: "PIL.Image",
|
|
728
|
+
mask_image: "PIL.Image",
|
|
729
|
+
prompt: str,
|
|
730
|
+
negative_prompt: str,
|
|
731
|
+
n: int = 1,
|
|
732
|
+
size: str = "1024*1024",
|
|
733
|
+
response_format: str = "url",
|
|
734
|
+
*args,
|
|
735
|
+
**kwargs,
|
|
736
|
+
):
|
|
737
|
+
if hasattr(self._model, "inpainting"):
|
|
738
|
+
return await self._call_wrapper(
|
|
739
|
+
self._model.inpainting,
|
|
740
|
+
image,
|
|
741
|
+
mask_image,
|
|
742
|
+
prompt,
|
|
743
|
+
negative_prompt,
|
|
744
|
+
n,
|
|
745
|
+
size,
|
|
746
|
+
response_format,
|
|
747
|
+
*args,
|
|
748
|
+
**kwargs,
|
|
749
|
+
)
|
|
750
|
+
raise AttributeError(
|
|
751
|
+
f"Model {self._model.model_spec} is not for creating image."
|
|
752
|
+
)
|
|
753
|
+
|
|
700
754
|
@log_async(logger=logger)
|
|
701
755
|
@request_limit
|
|
702
756
|
async def infer(
|
|
@@ -48,7 +48,12 @@ class ChatTTSModel:
|
|
|
48
48
|
self._model.load(source="custom", custom_path=self._model_path, compile=True)
|
|
49
49
|
|
|
50
50
|
def speech(
|
|
51
|
-
self,
|
|
51
|
+
self,
|
|
52
|
+
input: str,
|
|
53
|
+
voice: str,
|
|
54
|
+
response_format: str = "mp3",
|
|
55
|
+
speed: float = 1.0,
|
|
56
|
+
stream: bool = False,
|
|
52
57
|
):
|
|
53
58
|
import ChatTTS
|
|
54
59
|
import numpy as np
|
|
@@ -74,11 +79,38 @@ class ChatTTSModel:
|
|
|
74
79
|
)
|
|
75
80
|
|
|
76
81
|
assert self._model is not None
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
with BytesIO() as out:
|
|
81
|
-
torchaudio.save(
|
|
82
|
-
out, torch.from_numpy(wavs[0]), 24000, format=response_format
|
|
82
|
+
if stream:
|
|
83
|
+
iter = self._model.infer(
|
|
84
|
+
[input], params_infer_code=params_infer_code, stream=True
|
|
83
85
|
)
|
|
84
|
-
|
|
86
|
+
|
|
87
|
+
def _generator():
|
|
88
|
+
with BytesIO() as out:
|
|
89
|
+
writer = torchaudio.io.StreamWriter(out, format=response_format)
|
|
90
|
+
writer.add_audio_stream(sample_rate=24000, num_channels=1)
|
|
91
|
+
i = 0
|
|
92
|
+
last_pos = 0
|
|
93
|
+
with writer.open():
|
|
94
|
+
for it in iter:
|
|
95
|
+
for itt in it:
|
|
96
|
+
for chunk in itt:
|
|
97
|
+
chunk = np.array([chunk]).transpose()
|
|
98
|
+
writer.write_audio_chunk(i, torch.from_numpy(chunk))
|
|
99
|
+
new_last_pos = out.tell()
|
|
100
|
+
if new_last_pos != last_pos:
|
|
101
|
+
out.seek(last_pos)
|
|
102
|
+
encoded_bytes = out.read()
|
|
103
|
+
print(len(encoded_bytes))
|
|
104
|
+
yield encoded_bytes
|
|
105
|
+
last_pos = new_last_pos
|
|
106
|
+
|
|
107
|
+
return _generator()
|
|
108
|
+
else:
|
|
109
|
+
wavs = self._model.infer([input], params_infer_code=params_infer_code)
|
|
110
|
+
|
|
111
|
+
# Save the generated audio
|
|
112
|
+
with BytesIO() as out:
|
|
113
|
+
torchaudio.save(
|
|
114
|
+
out, torch.from_numpy(wavs[0]), 24000, format=response_format
|
|
115
|
+
)
|
|
116
|
+
return out.getvalue()
|
xinference/model/image/core.py
CHANGED
|
@@ -45,6 +45,7 @@ class ImageModelFamilyV1(CacheableModelSpec):
|
|
|
45
45
|
model_id: str
|
|
46
46
|
model_revision: str
|
|
47
47
|
model_hub: str = "huggingface"
|
|
48
|
+
ability: Optional[str]
|
|
48
49
|
controlnet: Optional[List["ImageModelFamilyV1"]]
|
|
49
50
|
|
|
50
51
|
|
|
@@ -71,6 +72,7 @@ class ImageModelDescription(ModelDescription):
|
|
|
71
72
|
"model_name": self._model_spec.model_name,
|
|
72
73
|
"model_family": self._model_spec.model_family,
|
|
73
74
|
"model_revision": self._model_spec.model_revision,
|
|
75
|
+
"ability": self._model_spec.ability,
|
|
74
76
|
"controlnet": controlnet,
|
|
75
77
|
}
|
|
76
78
|
|
|
@@ -234,6 +236,7 @@ def create_image_model_instance(
|
|
|
234
236
|
lora_model_paths=lora_model,
|
|
235
237
|
lora_load_kwargs=lora_load_kwargs,
|
|
236
238
|
lora_fuse_kwargs=lora_fuse_kwargs,
|
|
239
|
+
ability=model_spec.ability,
|
|
237
240
|
**kwargs,
|
|
238
241
|
)
|
|
239
242
|
model_description = ImageModelDescription(
|
|
@@ -92,5 +92,19 @@
|
|
|
92
92
|
"model_revision": "62134b9d8e703b5d6f74f1534457287a8bba77ef"
|
|
93
93
|
}
|
|
94
94
|
]
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"model_name": "stable-diffusion-inpainting",
|
|
98
|
+
"model_family": "stable_diffusion",
|
|
99
|
+
"model_id": "runwayml/stable-diffusion-inpainting",
|
|
100
|
+
"model_revision": "51388a731f57604945fddd703ecb5c50e8e7b49d",
|
|
101
|
+
"ability": "inpainting"
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"model_name": "stable-diffusion-2-inpainting",
|
|
105
|
+
"model_family": "stable_diffusion",
|
|
106
|
+
"model_id": "stabilityai/stable-diffusion-2-inpainting",
|
|
107
|
+
"model_revision": "81a84f49b15956b60b4272a405ad3daef3da4590",
|
|
108
|
+
"ability": "inpainting"
|
|
95
109
|
}
|
|
96
110
|
]
|
|
@@ -16,6 +16,7 @@ import base64
|
|
|
16
16
|
import logging
|
|
17
17
|
import os
|
|
18
18
|
import re
|
|
19
|
+
import sys
|
|
19
20
|
import time
|
|
20
21
|
import uuid
|
|
21
22
|
from concurrent.futures import ThreadPoolExecutor
|
|
@@ -39,6 +40,7 @@ class DiffusionModel:
|
|
|
39
40
|
lora_model: Optional[List[LoRA]] = None,
|
|
40
41
|
lora_load_kwargs: Optional[Dict] = None,
|
|
41
42
|
lora_fuse_kwargs: Optional[Dict] = None,
|
|
43
|
+
ability: Optional[str] = None,
|
|
42
44
|
**kwargs,
|
|
43
45
|
):
|
|
44
46
|
self._model_uid = model_uid
|
|
@@ -48,6 +50,7 @@ class DiffusionModel:
|
|
|
48
50
|
self._lora_model = lora_model
|
|
49
51
|
self._lora_load_kwargs = lora_load_kwargs or {}
|
|
50
52
|
self._lora_fuse_kwargs = lora_fuse_kwargs or {}
|
|
53
|
+
self._ability = ability
|
|
51
54
|
self._kwargs = kwargs
|
|
52
55
|
|
|
53
56
|
def _apply_lora(self):
|
|
@@ -64,8 +67,14 @@ class DiffusionModel:
|
|
|
64
67
|
logger.info(f"Successfully loaded the LoRA for model {self._model_uid}.")
|
|
65
68
|
|
|
66
69
|
def load(self):
|
|
67
|
-
|
|
68
|
-
|
|
70
|
+
import torch
|
|
71
|
+
|
|
72
|
+
if self._ability in [None, "text2image", "image2image"]:
|
|
73
|
+
from diffusers import AutoPipelineForText2Image as AutoPipelineModel
|
|
74
|
+
elif self._ability == "inpainting":
|
|
75
|
+
from diffusers import AutoPipelineForInpainting as AutoPipelineModel
|
|
76
|
+
else:
|
|
77
|
+
raise ValueError(f"Unknown ability: {self._ability}")
|
|
69
78
|
|
|
70
79
|
controlnet = self._kwargs.get("controlnet")
|
|
71
80
|
if controlnet is not None:
|
|
@@ -74,12 +83,16 @@ class DiffusionModel:
|
|
|
74
83
|
logger.debug("Loading controlnet %s", controlnet)
|
|
75
84
|
self._kwargs["controlnet"] = ControlNetModel.from_pretrained(controlnet)
|
|
76
85
|
|
|
77
|
-
|
|
86
|
+
torch_dtype = self._kwargs.get("torch_dtype")
|
|
87
|
+
if sys.platform != "darwin" and torch_dtype is None:
|
|
88
|
+
# The following params crashes on Mac M2
|
|
89
|
+
self._kwargs["torch_dtype"] = torch.float16
|
|
90
|
+
self._kwargs["use_safetensors"] = True
|
|
91
|
+
|
|
92
|
+
logger.debug("Loading model %s", AutoPipelineModel)
|
|
93
|
+
self._model = AutoPipelineModel.from_pretrained(
|
|
78
94
|
self._model_path,
|
|
79
95
|
**self._kwargs,
|
|
80
|
-
# The following params crashes on Mac M2
|
|
81
|
-
# torch_dtype=torch.float16,
|
|
82
|
-
# use_safetensors=True,
|
|
83
96
|
)
|
|
84
97
|
self._model = move_model_to_available_device(self._model)
|
|
85
98
|
# Recommended if your computer has < 64 GB of RAM
|
|
@@ -174,3 +187,27 @@ class DiffusionModel:
|
|
|
174
187
|
response_format=response_format,
|
|
175
188
|
**kwargs,
|
|
176
189
|
)
|
|
190
|
+
|
|
191
|
+
def inpainting(
|
|
192
|
+
self,
|
|
193
|
+
image: bytes,
|
|
194
|
+
mask_image: bytes,
|
|
195
|
+
prompt: Optional[Union[str, List[str]]] = None,
|
|
196
|
+
negative_prompt: Optional[Union[str, List[str]]] = None,
|
|
197
|
+
n: int = 1,
|
|
198
|
+
size: str = "1024*1024",
|
|
199
|
+
response_format: str = "url",
|
|
200
|
+
**kwargs,
|
|
201
|
+
):
|
|
202
|
+
width, height = map(int, re.split(r"[^\d]+", size))
|
|
203
|
+
return self._call_model(
|
|
204
|
+
image=image,
|
|
205
|
+
mask_image=mask_image,
|
|
206
|
+
prompt=prompt,
|
|
207
|
+
negative_prompt=negative_prompt,
|
|
208
|
+
height=height,
|
|
209
|
+
width=width,
|
|
210
|
+
num_images_per_prompt=n,
|
|
211
|
+
response_format=response_format,
|
|
212
|
+
**kwargs,
|
|
213
|
+
)
|
|
@@ -983,6 +983,65 @@
|
|
|
983
983
|
]
|
|
984
984
|
}
|
|
985
985
|
},
|
|
986
|
+
{
|
|
987
|
+
"version": 1,
|
|
988
|
+
"context_length": 131072,
|
|
989
|
+
"model_name": "codegeex4",
|
|
990
|
+
"model_lang": [
|
|
991
|
+
"en",
|
|
992
|
+
"zh"
|
|
993
|
+
],
|
|
994
|
+
"model_ability": [
|
|
995
|
+
"chat"
|
|
996
|
+
],
|
|
997
|
+
"model_description": "the open-source version of the latest CodeGeeX4 model series",
|
|
998
|
+
"model_specs": [
|
|
999
|
+
{
|
|
1000
|
+
"model_format": "pytorch",
|
|
1001
|
+
"model_size_in_billions": 9,
|
|
1002
|
+
"quantizations": [
|
|
1003
|
+
"4-bit",
|
|
1004
|
+
"8-bit",
|
|
1005
|
+
"none"
|
|
1006
|
+
],
|
|
1007
|
+
"model_id": "THUDM/codegeex4-all-9b",
|
|
1008
|
+
"model_revision": "8c4ec1d2f2888412640825a7aa23355939a8f4c6"
|
|
1009
|
+
},
|
|
1010
|
+
{
|
|
1011
|
+
"model_format": "ggufv2",
|
|
1012
|
+
"model_size_in_billions": 9,
|
|
1013
|
+
"quantizations": [
|
|
1014
|
+
"IQ2_M",
|
|
1015
|
+
"IQ3_M",
|
|
1016
|
+
"Q4_K_M",
|
|
1017
|
+
"Q5_K_M",
|
|
1018
|
+
"Q6_K_L",
|
|
1019
|
+
"Q8_0"
|
|
1020
|
+
],
|
|
1021
|
+
"model_file_name_template": "codegeex4-all-9b-{quantization}.gguf",
|
|
1022
|
+
"model_id": "THUDM/codegeex4-all-9b-GGUF",
|
|
1023
|
+
"model_revision": "6a04071c54c943949826d4815ee00717ed8cf153"
|
|
1024
|
+
}
|
|
1025
|
+
],
|
|
1026
|
+
"prompt_style": {
|
|
1027
|
+
"style_name": "CHATGLM3",
|
|
1028
|
+
"system_prompt": "",
|
|
1029
|
+
"roles": [
|
|
1030
|
+
"user",
|
|
1031
|
+
"assistant"
|
|
1032
|
+
],
|
|
1033
|
+
"stop_token_ids": [
|
|
1034
|
+
151329,
|
|
1035
|
+
151336,
|
|
1036
|
+
151338
|
|
1037
|
+
],
|
|
1038
|
+
"stop": [
|
|
1039
|
+
"<|endoftext|>",
|
|
1040
|
+
"<|user|>",
|
|
1041
|
+
"<|observation|>"
|
|
1042
|
+
]
|
|
1043
|
+
}
|
|
1044
|
+
},
|
|
986
1045
|
{
|
|
987
1046
|
"version": 1,
|
|
988
1047
|
"context_length": 2048,
|
|
@@ -5791,7 +5850,7 @@
|
|
|
5791
5850
|
},
|
|
5792
5851
|
{
|
|
5793
5852
|
"version": 1,
|
|
5794
|
-
"context_length":
|
|
5853
|
+
"context_length": 32768,
|
|
5795
5854
|
"model_name": "internlm2-chat",
|
|
5796
5855
|
"model_lang": [
|
|
5797
5856
|
"en",
|
|
@@ -5839,6 +5898,140 @@
|
|
|
5839
5898
|
]
|
|
5840
5899
|
}
|
|
5841
5900
|
},
|
|
5901
|
+
{
|
|
5902
|
+
"version": 1,
|
|
5903
|
+
"context_length": 32768,
|
|
5904
|
+
"model_name": "internlm2.5-chat",
|
|
5905
|
+
"model_lang": [
|
|
5906
|
+
"en",
|
|
5907
|
+
"zh"
|
|
5908
|
+
],
|
|
5909
|
+
"model_ability": [
|
|
5910
|
+
"chat"
|
|
5911
|
+
],
|
|
5912
|
+
"model_description": "InternLM2.5 series of the InternLM model.",
|
|
5913
|
+
"model_specs": [
|
|
5914
|
+
{
|
|
5915
|
+
"model_format": "pytorch",
|
|
5916
|
+
"model_size_in_billions": 7,
|
|
5917
|
+
"quantizations": [
|
|
5918
|
+
"none"
|
|
5919
|
+
],
|
|
5920
|
+
"model_id": "internlm/internlm2_5-7b-chat",
|
|
5921
|
+
"model_revision": "9dc8536a922ab4954726aad1b37fa199004a291a"
|
|
5922
|
+
},
|
|
5923
|
+
{
|
|
5924
|
+
"model_format": "gptq",
|
|
5925
|
+
"model_size_in_billions": 7,
|
|
5926
|
+
"quantizations": [
|
|
5927
|
+
"Int4"
|
|
5928
|
+
],
|
|
5929
|
+
"model_id": "ModelCloud/internlm-2.5-7b-chat-gptq-4bit",
|
|
5930
|
+
"model_revision": "2e2dda735c326544921a4035bbeb6c6e316a8254"
|
|
5931
|
+
},
|
|
5932
|
+
{
|
|
5933
|
+
"model_format": "ggufv2",
|
|
5934
|
+
"model_size_in_billions": 7,
|
|
5935
|
+
"quantizations": [
|
|
5936
|
+
"q2_k",
|
|
5937
|
+
"q3_k_m",
|
|
5938
|
+
"q4_0",
|
|
5939
|
+
"q4_k_m",
|
|
5940
|
+
"q5_0",
|
|
5941
|
+
"q5_k_m",
|
|
5942
|
+
"q6_k",
|
|
5943
|
+
"q8_0",
|
|
5944
|
+
"fp16"
|
|
5945
|
+
],
|
|
5946
|
+
"model_id": "internlm/internlm2_5-7b-chat-gguf",
|
|
5947
|
+
"model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf"
|
|
5948
|
+
}
|
|
5949
|
+
],
|
|
5950
|
+
"prompt_style": {
|
|
5951
|
+
"style_name": "INTERNLM2",
|
|
5952
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
5953
|
+
"roles": [
|
|
5954
|
+
"<|im_start|>user",
|
|
5955
|
+
"<|im_start|>assistant"
|
|
5956
|
+
],
|
|
5957
|
+
"intra_message_sep": "<|im_end|>",
|
|
5958
|
+
"stop_token_ids": [
|
|
5959
|
+
2,
|
|
5960
|
+
92542
|
|
5961
|
+
],
|
|
5962
|
+
"stop": [
|
|
5963
|
+
"</s>",
|
|
5964
|
+
"<|im_end|>"
|
|
5965
|
+
]
|
|
5966
|
+
}
|
|
5967
|
+
},
|
|
5968
|
+
{
|
|
5969
|
+
"version": 1,
|
|
5970
|
+
"context_length": 262144,
|
|
5971
|
+
"model_name": "internlm2.5-chat-1m",
|
|
5972
|
+
"model_lang": [
|
|
5973
|
+
"en",
|
|
5974
|
+
"zh"
|
|
5975
|
+
],
|
|
5976
|
+
"model_ability": [
|
|
5977
|
+
"chat"
|
|
5978
|
+
],
|
|
5979
|
+
"model_description": "InternLM2.5 series of the InternLM model supports 1M long-context",
|
|
5980
|
+
"model_specs": [
|
|
5981
|
+
{
|
|
5982
|
+
"model_format": "pytorch",
|
|
5983
|
+
"model_size_in_billions": 7,
|
|
5984
|
+
"quantizations": [
|
|
5985
|
+
"none"
|
|
5986
|
+
],
|
|
5987
|
+
"model_id": "internlm/internlm2_5-7b-chat-1m",
|
|
5988
|
+
"model_revision": "8d1a709a04d71440ef3df6ebbe204672f411c8b6"
|
|
5989
|
+
},
|
|
5990
|
+
{
|
|
5991
|
+
"model_format": "gptq",
|
|
5992
|
+
"model_size_in_billions": 7,
|
|
5993
|
+
"quantizations": [
|
|
5994
|
+
"Int4"
|
|
5995
|
+
],
|
|
5996
|
+
"model_id": "ModelCloud/internlm-2.5-7b-chat-1m-gptq-4bit",
|
|
5997
|
+
"model_revision": "022e59cb30f03b271d56178478acb038b2b9b58c"
|
|
5998
|
+
},
|
|
5999
|
+
{
|
|
6000
|
+
"model_format": "ggufv2",
|
|
6001
|
+
"model_size_in_billions": 7,
|
|
6002
|
+
"quantizations": [
|
|
6003
|
+
"q2_k",
|
|
6004
|
+
"q3_k_m",
|
|
6005
|
+
"q4_0",
|
|
6006
|
+
"q4_k_m",
|
|
6007
|
+
"q5_0",
|
|
6008
|
+
"q5_k_m",
|
|
6009
|
+
"q6_k",
|
|
6010
|
+
"q8_0",
|
|
6011
|
+
"fp16"
|
|
6012
|
+
],
|
|
6013
|
+
"model_id": "internlm/internlm2_5-7b-chat-1m-gguf",
|
|
6014
|
+
"model_file_name_template": "internlm2_5-7b-chat-1m-{quantization}.gguf"
|
|
6015
|
+
}
|
|
6016
|
+
],
|
|
6017
|
+
"prompt_style": {
|
|
6018
|
+
"style_name": "INTERNLM2",
|
|
6019
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
6020
|
+
"roles": [
|
|
6021
|
+
"<|im_start|>user",
|
|
6022
|
+
"<|im_start|>assistant"
|
|
6023
|
+
],
|
|
6024
|
+
"intra_message_sep": "<|im_end|>",
|
|
6025
|
+
"stop_token_ids": [
|
|
6026
|
+
2,
|
|
6027
|
+
92542
|
|
6028
|
+
],
|
|
6029
|
+
"stop": [
|
|
6030
|
+
"</s>",
|
|
6031
|
+
"<|im_end|>"
|
|
6032
|
+
]
|
|
6033
|
+
}
|
|
6034
|
+
},
|
|
5842
6035
|
{
|
|
5843
6036
|
"version":1,
|
|
5844
6037
|
"context_length":2048,
|
|
@@ -6192,6 +6385,52 @@
|
|
|
6192
6385
|
],
|
|
6193
6386
|
"model_id": "google/gemma-2-27b-it"
|
|
6194
6387
|
},
|
|
6388
|
+
{
|
|
6389
|
+
"model_format": "ggufv2",
|
|
6390
|
+
"model_size_in_billions": 9,
|
|
6391
|
+
"quantizations": [
|
|
6392
|
+
"Q2_K",
|
|
6393
|
+
"Q2_K_L",
|
|
6394
|
+
"Q3_K_L",
|
|
6395
|
+
"Q3_K_M",
|
|
6396
|
+
"Q3_K_S",
|
|
6397
|
+
"Q4_K_L",
|
|
6398
|
+
"Q4_K_M",
|
|
6399
|
+
"Q4_K_S",
|
|
6400
|
+
"Q5_K_L",
|
|
6401
|
+
"Q5_K_M",
|
|
6402
|
+
"Q5_K_S",
|
|
6403
|
+
"Q6_K",
|
|
6404
|
+
"Q6_K_L",
|
|
6405
|
+
"Q8_0",
|
|
6406
|
+
"f32"
|
|
6407
|
+
],
|
|
6408
|
+
"model_id": "bartowski/gemma-2-9b-it-GGUF",
|
|
6409
|
+
"model_file_name_template": "gemma-2-9b-it-{quantization}.gguf"
|
|
6410
|
+
},
|
|
6411
|
+
{
|
|
6412
|
+
"model_format": "ggufv2",
|
|
6413
|
+
"model_size_in_billions": 27,
|
|
6414
|
+
"quantizations": [
|
|
6415
|
+
"Q2_K",
|
|
6416
|
+
"Q2_K_L",
|
|
6417
|
+
"Q3_K_L",
|
|
6418
|
+
"Q3_K_M",
|
|
6419
|
+
"Q3_K_S",
|
|
6420
|
+
"Q4_K_L",
|
|
6421
|
+
"Q4_K_M",
|
|
6422
|
+
"Q4_K_S",
|
|
6423
|
+
"Q5_K_L",
|
|
6424
|
+
"Q5_K_M",
|
|
6425
|
+
"Q5_K_S",
|
|
6426
|
+
"Q6_K",
|
|
6427
|
+
"Q6_K_L",
|
|
6428
|
+
"Q8_0",
|
|
6429
|
+
"f32"
|
|
6430
|
+
],
|
|
6431
|
+
"model_id": "bartowski/gemma-2-27b-it-GGUF",
|
|
6432
|
+
"model_file_name_template": "gemma-2-27b-it-{quantization}.gguf"
|
|
6433
|
+
},
|
|
6195
6434
|
{
|
|
6196
6435
|
"model_format": "mlx",
|
|
6197
6436
|
"model_size_in_billions": 9,
|
|
@@ -554,16 +554,36 @@ def _get_cache_dir(
|
|
|
554
554
|
quant_suffix = q
|
|
555
555
|
break
|
|
556
556
|
|
|
557
|
-
|
|
557
|
+
# some model name includes ".", e.g. qwen1.5-chat
|
|
558
|
+
# if the model does not require trust_remote_code, it's OK
|
|
559
|
+
# because no need to import modeling_xxx.py from the path
|
|
560
|
+
# but when the model need to trust_remote_code,
|
|
561
|
+
# e.g. internlm2.5-chat, the import will fail,
|
|
562
|
+
# but before the model may have been downloaded,
|
|
563
|
+
# thus we check it first, if exist, return it,
|
|
564
|
+
# otherwise, we replace the "." with "_" in model name
|
|
565
|
+
old_cache_dir_name = (
|
|
558
566
|
f"{llm_family.model_name}-{llm_spec.model_format}"
|
|
559
567
|
f"-{llm_spec.model_size_in_billions}b"
|
|
560
568
|
)
|
|
561
569
|
if quant_suffix:
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
570
|
+
old_cache_dir_name += f"-{quant_suffix}"
|
|
571
|
+
old_cache_dir = os.path.realpath(
|
|
572
|
+
os.path.join(XINFERENCE_CACHE_DIR, old_cache_dir_name)
|
|
573
|
+
)
|
|
574
|
+
if os.path.exists(old_cache_dir):
|
|
575
|
+
return old_cache_dir
|
|
576
|
+
else:
|
|
577
|
+
cache_dir_name = (
|
|
578
|
+
f"{llm_family.model_name.replace('.', '_')}-{llm_spec.model_format}"
|
|
579
|
+
f"-{llm_spec.model_size_in_billions}b"
|
|
580
|
+
)
|
|
581
|
+
if quant_suffix:
|
|
582
|
+
cache_dir_name += f"-{quant_suffix}"
|
|
583
|
+
cache_dir = os.path.realpath(os.path.join(XINFERENCE_CACHE_DIR, cache_dir_name))
|
|
584
|
+
if create_if_not_exist and not os.path.exists(cache_dir):
|
|
585
|
+
os.makedirs(cache_dir, exist_ok=True)
|
|
586
|
+
return cache_dir
|
|
567
587
|
|
|
568
588
|
|
|
569
589
|
def _get_meta_path(
|
|
@@ -688,6 +688,66 @@
|
|
|
688
688
|
]
|
|
689
689
|
}
|
|
690
690
|
},
|
|
691
|
+
{
|
|
692
|
+
"version": 1,
|
|
693
|
+
"context_length": 131072,
|
|
694
|
+
"model_name": "codegeex4",
|
|
695
|
+
"model_lang": [
|
|
696
|
+
"en",
|
|
697
|
+
"zh"
|
|
698
|
+
],
|
|
699
|
+
"model_ability": [
|
|
700
|
+
"chat"
|
|
701
|
+
],
|
|
702
|
+
"model_description": "the open-source version of the latest CodeGeeX4 model series",
|
|
703
|
+
"model_specs": [
|
|
704
|
+
{
|
|
705
|
+
"model_format": "pytorch",
|
|
706
|
+
"model_size_in_billions": 9,
|
|
707
|
+
"quantizations": [
|
|
708
|
+
"4-bit",
|
|
709
|
+
"8-bit",
|
|
710
|
+
"none"
|
|
711
|
+
],
|
|
712
|
+
"model_id": "ZhipuAI/codegeex4-all-9b",
|
|
713
|
+
"model_hub": "modelscope",
|
|
714
|
+
"model_revision": "master"
|
|
715
|
+
},
|
|
716
|
+
{
|
|
717
|
+
"model_format": "ggufv2",
|
|
718
|
+
"model_size_in_billions": 9,
|
|
719
|
+
"quantizations": [
|
|
720
|
+
"IQ2_M",
|
|
721
|
+
"IQ3_M",
|
|
722
|
+
"Q4_K_M",
|
|
723
|
+
"Q5_K_M",
|
|
724
|
+
"Q6_K_L",
|
|
725
|
+
"Q8_0"
|
|
726
|
+
],
|
|
727
|
+
"model_file_name_template": "codegeex4-all-9b-{quantization}.gguf",
|
|
728
|
+
"model_id": "ZhipuAI/codegeex4-all-9b-GGUF",
|
|
729
|
+
"model_hub": "modelscope"
|
|
730
|
+
}
|
|
731
|
+
],
|
|
732
|
+
"prompt_style": {
|
|
733
|
+
"style_name": "CHATGLM3",
|
|
734
|
+
"system_prompt": "",
|
|
735
|
+
"roles": [
|
|
736
|
+
"user",
|
|
737
|
+
"assistant"
|
|
738
|
+
],
|
|
739
|
+
"stop_token_ids": [
|
|
740
|
+
151329,
|
|
741
|
+
151336,
|
|
742
|
+
151338
|
|
743
|
+
],
|
|
744
|
+
"stop": [
|
|
745
|
+
"<|endoftext|>",
|
|
746
|
+
"<|user|>",
|
|
747
|
+
"<|observation|>"
|
|
748
|
+
]
|
|
749
|
+
}
|
|
750
|
+
},
|
|
691
751
|
{
|
|
692
752
|
"version": 1,
|
|
693
753
|
"context_length": 2048,
|
|
@@ -928,6 +988,88 @@
|
|
|
928
988
|
]
|
|
929
989
|
}
|
|
930
990
|
},
|
|
991
|
+
{
|
|
992
|
+
"version": 1,
|
|
993
|
+
"context_length": 32768,
|
|
994
|
+
"model_name": "internlm2.5-chat",
|
|
995
|
+
"model_lang": [
|
|
996
|
+
"en",
|
|
997
|
+
"zh"
|
|
998
|
+
],
|
|
999
|
+
"model_ability": [
|
|
1000
|
+
"chat"
|
|
1001
|
+
],
|
|
1002
|
+
"model_description": "InternLM2.5 series of the InternLM model.",
|
|
1003
|
+
"model_specs": [
|
|
1004
|
+
{
|
|
1005
|
+
"model_format": "pytorch",
|
|
1006
|
+
"model_size_in_billions": 7,
|
|
1007
|
+
"quantizations": [
|
|
1008
|
+
"none"
|
|
1009
|
+
],
|
|
1010
|
+
"model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat",
|
|
1011
|
+
"model_hub": "modelscope"
|
|
1012
|
+
}
|
|
1013
|
+
],
|
|
1014
|
+
"prompt_style": {
|
|
1015
|
+
"style_name": "INTERNLM2",
|
|
1016
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
1017
|
+
"roles": [
|
|
1018
|
+
"<|im_start|>user",
|
|
1019
|
+
"<|im_start|>assistant"
|
|
1020
|
+
],
|
|
1021
|
+
"intra_message_sep": "<|im_end|>",
|
|
1022
|
+
"stop_token_ids": [
|
|
1023
|
+
2,
|
|
1024
|
+
92542
|
|
1025
|
+
],
|
|
1026
|
+
"stop": [
|
|
1027
|
+
"</s>",
|
|
1028
|
+
"<|im_end|>"
|
|
1029
|
+
]
|
|
1030
|
+
}
|
|
1031
|
+
},
|
|
1032
|
+
{
|
|
1033
|
+
"version": 1,
|
|
1034
|
+
"context_length": 262144,
|
|
1035
|
+
"model_name": "internlm2.5-chat-1m",
|
|
1036
|
+
"model_lang": [
|
|
1037
|
+
"en",
|
|
1038
|
+
"zh"
|
|
1039
|
+
],
|
|
1040
|
+
"model_ability": [
|
|
1041
|
+
"chat"
|
|
1042
|
+
],
|
|
1043
|
+
"model_description": "InternLM2.5 series of the InternLM model supports 1M long-context",
|
|
1044
|
+
"model_specs": [
|
|
1045
|
+
{
|
|
1046
|
+
"model_format": "pytorch",
|
|
1047
|
+
"model_size_in_billions": 7,
|
|
1048
|
+
"quantizations": [
|
|
1049
|
+
"none"
|
|
1050
|
+
],
|
|
1051
|
+
"model_id": "Shanghai_AI_Laboratory/internlm2_5-7b-chat-1m",
|
|
1052
|
+
"model_hub": "modelscope"
|
|
1053
|
+
}
|
|
1054
|
+
],
|
|
1055
|
+
"prompt_style": {
|
|
1056
|
+
"style_name": "INTERNLM2",
|
|
1057
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
1058
|
+
"roles": [
|
|
1059
|
+
"<|im_start|>user",
|
|
1060
|
+
"<|im_start|>assistant"
|
|
1061
|
+
],
|
|
1062
|
+
"intra_message_sep": "<|im_end|>",
|
|
1063
|
+
"stop_token_ids": [
|
|
1064
|
+
2,
|
|
1065
|
+
92542
|
|
1066
|
+
],
|
|
1067
|
+
"stop": [
|
|
1068
|
+
"</s>",
|
|
1069
|
+
"<|im_end|>"
|
|
1070
|
+
]
|
|
1071
|
+
}
|
|
1072
|
+
},
|
|
931
1073
|
{
|
|
932
1074
|
"version": 1,
|
|
933
1075
|
"context_length": 100000,
|
|
@@ -3799,6 +3941,29 @@
|
|
|
3799
3941
|
],
|
|
3800
3942
|
"model_id": "AI-ModelScope/gemma-2-27b-it",
|
|
3801
3943
|
"model_hub": "modelscope"
|
|
3944
|
+
},
|
|
3945
|
+
{
|
|
3946
|
+
"model_format": "ggufv2",
|
|
3947
|
+
"model_size_in_billions": 9,
|
|
3948
|
+
"quantizations": [
|
|
3949
|
+
"Q2_K",
|
|
3950
|
+
"Q3_K_L",
|
|
3951
|
+
"Q3_K_M",
|
|
3952
|
+
"Q3_K_S",
|
|
3953
|
+
"Q4_K_L",
|
|
3954
|
+
"Q4_K_M",
|
|
3955
|
+
"Q4_K_S",
|
|
3956
|
+
"Q5_K_L",
|
|
3957
|
+
"Q5_K_M",
|
|
3958
|
+
"Q5_K_S",
|
|
3959
|
+
"Q6_K",
|
|
3960
|
+
"Q6_K_L",
|
|
3961
|
+
"Q8_0",
|
|
3962
|
+
"f32"
|
|
3963
|
+
],
|
|
3964
|
+
"model_id": "LLM-Research/gemma-2-9b-it-GGUF",
|
|
3965
|
+
"model_file_name_template": "gemma-2-9b-it-{quantization}.gguf",
|
|
3966
|
+
"model_hub": "modelscope"
|
|
3802
3967
|
}
|
|
3803
3968
|
],
|
|
3804
3969
|
"prompt_style": {
|
|
@@ -269,8 +269,13 @@ class SGLANGModel(LLM):
|
|
|
269
269
|
)
|
|
270
270
|
stream = sanitized_generate_config.pop("stream")
|
|
271
271
|
stream_options = sanitized_generate_config.pop("stream_options")
|
|
272
|
-
|
|
273
|
-
|
|
272
|
+
|
|
273
|
+
include_usage = (
|
|
274
|
+
stream_options.pop("include_usage")
|
|
275
|
+
if isinstance(stream_options, dict)
|
|
276
|
+
else False
|
|
277
|
+
)
|
|
278
|
+
|
|
274
279
|
request_id = str(uuid.uuid1())
|
|
275
280
|
state = pipeline.run(
|
|
276
281
|
question=prompt,
|
|
@@ -112,6 +112,8 @@ VLLM_SUPPORTED_CHAT_MODELS = [
|
|
|
112
112
|
"internlm-chat-8k",
|
|
113
113
|
"internlm-chat-20b",
|
|
114
114
|
"internlm2-chat",
|
|
115
|
+
"internlm2.5-chat",
|
|
116
|
+
"internlm2.5-chat-1m",
|
|
115
117
|
"qwen-chat",
|
|
116
118
|
"Yi-chat",
|
|
117
119
|
"Yi-1.5-chat",
|
|
@@ -127,6 +129,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
|
|
|
127
129
|
"chatglm3-128k",
|
|
128
130
|
"glm4-chat",
|
|
129
131
|
"glm4-chat-1m",
|
|
132
|
+
"codegeex4",
|
|
130
133
|
"deepseek-chat",
|
|
131
134
|
"deepseek-coder-instruct",
|
|
132
135
|
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: xinference
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.2
|
|
4
4
|
Summary: Model Serving Made Easy
|
|
5
5
|
Home-page: https://github.com/xorbitsai/inference
|
|
6
6
|
Author: Qin Xuye
|
|
@@ -72,6 +72,7 @@ Requires-Dist: nemo-text-processing ; extra == 'all'
|
|
|
72
72
|
Requires-Dist: WeTextProcessing ; extra == 'all'
|
|
73
73
|
Requires-Dist: librosa ; extra == 'all'
|
|
74
74
|
Requires-Dist: xxhash ; extra == 'all'
|
|
75
|
+
Requires-Dist: torchaudio ; extra == 'all'
|
|
75
76
|
Requires-Dist: ChatTTS >0.1 ; extra == 'all'
|
|
76
77
|
Requires-Dist: boto3 <1.28.65,>=1.28.55 ; extra == 'all'
|
|
77
78
|
Requires-Dist: tensorizer ~=2.9.0 ; extra == 'all'
|
|
@@ -86,6 +87,7 @@ Requires-Dist: nemo-text-processing ; extra == 'audio'
|
|
|
86
87
|
Requires-Dist: WeTextProcessing ; extra == 'audio'
|
|
87
88
|
Requires-Dist: librosa ; extra == 'audio'
|
|
88
89
|
Requires-Dist: xxhash ; extra == 'audio'
|
|
90
|
+
Requires-Dist: torchaudio ; extra == 'audio'
|
|
89
91
|
Requires-Dist: ChatTTS >0.1 ; extra == 'audio'
|
|
90
92
|
Provides-Extra: benchmark
|
|
91
93
|
Requires-Dist: psutil ; extra == 'benchmark'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
xinference/__init__.py,sha256=0LgIveLP6CXxoIaSrxhlFyOh0lOqPgJBVcBe0tkWJjc,987
|
|
2
2
|
xinference/_compat.py,sha256=SQAjZMGxtBIce45qtW7ob7RWzA0zhv2yB3AxT0rb0uU,1778
|
|
3
|
-
xinference/_version.py,sha256=
|
|
3
|
+
xinference/_version.py,sha256=npzZYwlsc_rih9EcKTaoGkkL3KLm52-9xfgm8jq_R4A,498
|
|
4
4
|
xinference/conftest.py,sha256=FF-ZkqkfOxQw4hz_8G7p5aB7gFdsJlr6u2ZdFuuauAA,9744
|
|
5
5
|
xinference/constants.py,sha256=_uyBB84fgZM64J3mw8_RELVJfm_dgeNRUZF9t9ZuFcM,3541
|
|
6
6
|
xinference/device_utils.py,sha256=zswJiws3VyTIaNO8z-MOcsJH_UiPoePPiKK5zoNrjTA,3285
|
|
@@ -9,7 +9,7 @@ xinference/isolation.py,sha256=uhkzVyL3fSYZSuFexkG6Jm-tRTC5I607uNg000BXAnE,1949
|
|
|
9
9
|
xinference/types.py,sha256=mN6lTFGqwFCycCMCwNELtRm2lmvuynvzD7Wwq_NEINY,14255
|
|
10
10
|
xinference/utils.py,sha256=VSOJMFd9H7kce98OtJZbcDjjpfzRpHAFs8WU0xXPBM8,717
|
|
11
11
|
xinference/api/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
|
|
12
|
-
xinference/api/restful_api.py,sha256=
|
|
12
|
+
xinference/api/restful_api.py,sha256=7n77U-5t0SDzpOOad4SqbFbZx-fSIQJJdM_bLwdozus,74572
|
|
13
13
|
xinference/api/oauth2/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
|
|
14
14
|
xinference/api/oauth2/auth_service.py,sha256=74JzB42fbbmBu4Q1dW3A9Fp_N7167KgRGB42Z0NHjAM,6119
|
|
15
15
|
xinference/api/oauth2/types.py,sha256=K923sv_XySIUtM2Eozl9IG082IJcDOS5SFLrPZ5ELBg,996
|
|
@@ -18,14 +18,14 @@ xinference/client/__init__.py,sha256=Gc4HOzAy_1cic5kXlso7hahYgw89CKvZSJDicEU461k
|
|
|
18
18
|
xinference/client/common.py,sha256=iciZRs5YjM2gYsXnwACPMaiBZp4_XpawWwfym0Iyu40,1617
|
|
19
19
|
xinference/client/handlers.py,sha256=3gd9C7u4URbcVdR6Eyv8cpEZ175Ll4q_jGL07CnEIpg,648
|
|
20
20
|
xinference/client/restful/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
|
|
21
|
-
xinference/client/restful/restful_client.py,sha256=
|
|
21
|
+
xinference/client/restful/restful_client.py,sha256=fMgazFQRSMefUx0_40Q_9c3o7mf0G39D8HmlkQ9KFhs,54304
|
|
22
22
|
xinference/core/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
|
|
23
23
|
xinference/core/cache_tracker.py,sha256=2hk8ANOYruhxAt4MPz482tYEQcvYBh_B7sq0eYd0rTU,6963
|
|
24
24
|
xinference/core/chat_interface.py,sha256=7SOm6Qi-iFh1otycHpn6CpISq2wTLlJzEUngJtOwMIk,19558
|
|
25
25
|
xinference/core/event.py,sha256=Lkx_-Ohwyzyt-MBbkrZy9N-7aeYs-wux0fDtZpa2SJY,1632
|
|
26
26
|
xinference/core/image_interface.py,sha256=G2iK24auEN4MrLkPlu1CAA_gf-BQrGQTjazi_FYqIxE,8825
|
|
27
27
|
xinference/core/metrics.py,sha256=ScmTG15Uq3h_ob72ybZSMWdnk8P4sUZFcm60f4ikSXc,2631
|
|
28
|
-
xinference/core/model.py,sha256=
|
|
28
|
+
xinference/core/model.py,sha256=QWz9LeUyWwwlgxzE0JQmsNqaMc1uwtU_Q3wm6H430rw,26778
|
|
29
29
|
xinference/core/resource.py,sha256=FQ0aRt3T4ZQo0P6CZZf5QUKHiCsr5llBvKb1f7wfnxg,1611
|
|
30
30
|
xinference/core/scheduler.py,sha256=e-fhhMeWmVdx_37sNDf2BOkvHt_17wclNcby7DcUNso,15627
|
|
31
31
|
xinference/core/status_guard.py,sha256=fF5hisvfn6es9DV6Z6RRD6V_S_uLcb8lHM6PArGgb04,2820
|
|
@@ -47,7 +47,7 @@ xinference/model/__init__.py,sha256=IRC3ojiqYkVLIK_xsIxYeKypEeeTTdrovnVzK_4L4eg,
|
|
|
47
47
|
xinference/model/core.py,sha256=5dr7y2cq2OS3aFgqIIR3uQbT1ln3xiolUsbXgu2dHGw,3999
|
|
48
48
|
xinference/model/utils.py,sha256=NGIXgpkUY0dXGxnh-FsfeNq6OS9SPwBzNfASLXWCqUo,15146
|
|
49
49
|
xinference/model/audio/__init__.py,sha256=QyQwELIYk7DuD5Hen2q45pLMJ4K8iAnto8zlOA9QUSY,2839
|
|
50
|
-
xinference/model/audio/chattts.py,sha256=
|
|
50
|
+
xinference/model/audio/chattts.py,sha256=JZA_0TR4nMGqJ-2WYqwb8DcjhsTC57D0QlkPBl4v788,3973
|
|
51
51
|
xinference/model/audio/core.py,sha256=uMkZpd5IIs9WK8K0t2FWiGKagcicSjK20w4USKGSCEw,5708
|
|
52
52
|
xinference/model/audio/custom.py,sha256=01NTD927pairIBWOo9At6Bjqpo1kdcIn3AVijbOdp7Y,5056
|
|
53
53
|
xinference/model/audio/model_spec.json,sha256=ueOHO14d8lIzuiExJyPUgC3swYA3CfgOgMiDu5L1cOA,3205
|
|
@@ -66,19 +66,19 @@ xinference/model/flexible/utils.py,sha256=_GlEarRHKPAxT7o6N39VOd9sB580zKzdSktqjb
|
|
|
66
66
|
xinference/model/flexible/launchers/__init__.py,sha256=x_5s73qABN_94hnf5UyrfyxUObayntD6Gh1UOtctCe8,642
|
|
67
67
|
xinference/model/flexible/launchers/transformers_launcher.py,sha256=OZeeogDfopRUGhulP4PRJ4fZEJ2D9cfv7lcC2qJBoDE,2012
|
|
68
68
|
xinference/model/image/__init__.py,sha256=lDtP961bpu6h5TK57kJ531Zoch2xU5DM-Eco_YQne-Y,2780
|
|
69
|
-
xinference/model/image/core.py,sha256=
|
|
69
|
+
xinference/model/image/core.py,sha256=zpaiym5t5cWrBOOscvFFBBUD4-YWBU_NZLsyuqeeamA,8809
|
|
70
70
|
xinference/model/image/custom.py,sha256=nn1iZDTYNz68A2gWFXvUuv__Gx8EGdkz_sHvHnPnSoA,3841
|
|
71
|
-
xinference/model/image/model_spec.json,sha256=
|
|
71
|
+
xinference/model/image/model_spec.json,sha256=kQMWtQo-Z4tawKdgckYFJz1fvbGnXVSZGQsGwjOxa3M,3681
|
|
72
72
|
xinference/model/image/model_spec_modelscope.json,sha256=vWAoR1gsexay6jn8vnObslYF3YE5SAfqMcJPkYQ-Wc4,3176
|
|
73
73
|
xinference/model/image/utils.py,sha256=gxg8jJ2nYaDknzCcSC53WCy1slbB5aWU14AbJbfm6Z4,906
|
|
74
74
|
xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
|
|
75
|
-
xinference/model/image/stable_diffusion/core.py,sha256=
|
|
75
|
+
xinference/model/image/stable_diffusion/core.py,sha256=e2kpID5iTUTWuMO01aF7z4uGMpAttF1g1nSnwdDNIz4,7349
|
|
76
76
|
xinference/model/llm/__init__.py,sha256=D9zXjltqlzKahDiOFYyn_EcLoiw_6tO8bhj3u8wnT0A,11462
|
|
77
77
|
xinference/model/llm/core.py,sha256=ZAzRGphjRZ2KAdTPADIuqSbVU9dTQrHgLvCEgNP9pOk,8088
|
|
78
|
-
xinference/model/llm/llm_family.json,sha256=
|
|
79
|
-
xinference/model/llm/llm_family.py,sha256=
|
|
78
|
+
xinference/model/llm/llm_family.json,sha256=6ZXDEqlZddhrR9A3lnOtzmEdOMd7rfc4DexRzeJVodw,184400
|
|
79
|
+
xinference/model/llm/llm_family.py,sha256=2XykGoXMIffDIOCI1hefprgPJTOvE80r7Rh6Zosb6dY,42934
|
|
80
80
|
xinference/model/llm/llm_family_csghub.json,sha256=zWiMlX0mbCvuaR7gZh0qDPRPaswFJ-zKssuN6XuAQ6s,1417
|
|
81
|
-
xinference/model/llm/llm_family_modelscope.json,sha256=
|
|
81
|
+
xinference/model/llm/llm_family_modelscope.json,sha256=BQR99BYPXxXxq0CnFiVlAEUUeuOLXezCTBVPhdZs1Jg,116982
|
|
82
82
|
xinference/model/llm/memory.py,sha256=PTD8m6TCZVU1zrwc9wepX9cUjCqAXBENj6X7tjua0to,10207
|
|
83
83
|
xinference/model/llm/utils.py,sha256=3KkpM-HaI97jAFj5Pb1-Kau3BL8-8d-SypDkKCWFqPs,32655
|
|
84
84
|
xinference/model/llm/ggml/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
|
|
@@ -108,9 +108,9 @@ xinference/model/llm/pytorch/utils.py,sha256=HZhJKQG1O1P1qTpxvVzIjBp-2J8aTRxUmS9
|
|
|
108
108
|
xinference/model/llm/pytorch/vicuna.py,sha256=avNOgt9fBjwYzahL-j6-EcQS-7km167h8ttJolnNWnE,2334
|
|
109
109
|
xinference/model/llm/pytorch/yi_vl.py,sha256=MljT7tpgFIhL6n5rdoS3hmq_u0rtHRE6cxXCseujklQ,10911
|
|
110
110
|
xinference/model/llm/sglang/__init__.py,sha256=-sjSIQ4K6w-TEzx49kVaWeWC443fnZqODU91GCQ_JNo,581
|
|
111
|
-
xinference/model/llm/sglang/core.py,sha256=
|
|
111
|
+
xinference/model/llm/sglang/core.py,sha256=9c4KgEFswu1Fx3qI4VFszv26902FwIifq9AVzMijDa4,14087
|
|
112
112
|
xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
|
|
113
|
-
xinference/model/llm/vllm/core.py,sha256=
|
|
113
|
+
xinference/model/llm/vllm/core.py,sha256=beZeuCR_wCbIjtU-WWM8q0rVPPPiPLO2VINnvNQfq8w,22165
|
|
114
114
|
xinference/model/rerank/__init__.py,sha256=BXIL1uu3ZpZHX9bODhW9lxKUXudZE7-OkXFmmM5rpMU,2817
|
|
115
115
|
xinference/model/rerank/core.py,sha256=qAUwOdRHomn0uCzCw6klDxJSZyIDQ4tvgz9pOPm-0GY,12150
|
|
116
116
|
xinference/model/rerank/custom.py,sha256=NKk7jA7p4xkuwS5WoOs2SY2wdnoAVpyCjBTvv317bBw,3917
|
|
@@ -15428,9 +15428,9 @@ xinference/web/ui/node_modules/yargs-parser/package.json,sha256=BSwbOzgetKXMK4u0
|
|
|
15428
15428
|
xinference/web/ui/node_modules/yocto-queue/package.json,sha256=6U1XHQPGXJTqsiFvT953ORihUtXTblZy4fXBWP9qxC0,725
|
|
15429
15429
|
xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9Ll074uo8OS1zEw0qhA,1206
|
|
15430
15430
|
xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
|
|
15431
|
-
xinference-0.13.
|
|
15432
|
-
xinference-0.13.
|
|
15433
|
-
xinference-0.13.
|
|
15434
|
-
xinference-0.13.
|
|
15435
|
-
xinference-0.13.
|
|
15436
|
-
xinference-0.13.
|
|
15431
|
+
xinference-0.13.2.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
|
15432
|
+
xinference-0.13.2.dist-info/METADATA,sha256=EmYaz9n8oJHqQSU8Er7kqRuuN01VWaRBLZ8lgQMCMgc,16721
|
|
15433
|
+
xinference-0.13.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
15434
|
+
xinference-0.13.2.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
|
|
15435
|
+
xinference-0.13.2.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
|
|
15436
|
+
xinference-0.13.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|