PyPI - xinference - Versions diffs - 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

xinference 0.16.3py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (373) hide show

{xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.44.0)
+Generator: bdist_wheel (0.45.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

xinference/thirdparty/cosyvoice/bin/export_trt.py DELETED Viewed

@@ -1,8 +0,0 @@
-# TODO 跟export_jit一样的逻辑，完成flow部分的estimator的onnx导出。
-# tensorrt的安装方式，再这里写一下步骤提示如下，如果没有安装，那么不要执行这个脚本，提示用户先安装，不给选择
-try:
-    import tensorrt
-except ImportError:
-    print('step1, 下载\n step2. 解压，安装whl，')
-# 安装命令里tensosrt的根目录用环境变量导入，比如os.environ['tensorrt_root_dir']/bin/exetrace，然后python里subprocess里执行导出命令
-# 后面我会在run.sh里写好执行命令 tensorrt_root_dir=xxxx python cosyvoice/bin/export_trt.py --model_dir xxx

xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py DELETED Viewed

File without changes

xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py DELETED Viewed

File without changes

xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py DELETED Viewed

File without changes

xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py DELETED Viewed

File without changes

xinference/thirdparty/fish_speech/fish_speech/models/__init__.py DELETED Viewed

File without changes

xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py DELETED Viewed

File without changes

xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py DELETED Viewed

File without changes

xinference/thirdparty/fish_speech/tools/__init__.py DELETED Viewed

File without changes

xinference/thirdparty/fish_speech/tools/api.py DELETED Viewed

@@ -1,440 +0,0 @@
-import base64
-import io
-import json
-import queue
-import random
-import sys
-import traceback
-import wave
-from argparse import ArgumentParser
-from http import HTTPStatus
-from pathlib import Path
-from typing import Annotated, Any, Literal, Optional
-import numpy as np
-import ormsgpack
-# import pyrootutils
-import soundfile as sf
-import torch
-import torchaudio
-# from baize.datastructures import ContentType
-# from kui.asgi import (
-#     Body,
-#     FactoryClass,
-#     HTTPException,
-#     HttpRequest,
-#     HttpView,
-#     JSONResponse,
-#     Kui,
-#     OpenAPI,
-#     StreamResponse,
-# )
-# from kui.asgi.routing import MultimethodRoutes
-from loguru import logger
-from pydantic import BaseModel, Field, conint
-# pyrootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
-# from fish_speech.models.vqgan.lit_module import VQGAN
-from fish_speech.models.vqgan.modules.firefly import FireflyArchitecture
-from fish_speech.text.chn_text_norm.text import Text as ChnNormedText
-from fish_speech.utils import autocast_exclude_mps
-from tools.commons import ServeReferenceAudio, ServeTTSRequest
-from tools.file import AUDIO_EXTENSIONS, audio_to_bytes, list_files, read_ref_text
-from tools.llama.generate import (
-    GenerateRequest,
-    GenerateResponse,
-    WrappedGenerateResponse,
-    launch_thread_safe_queue,
-)
-from tools.vqgan.inference import load_model as load_decoder_model
-def wav_chunk_header(sample_rate=44100, bit_depth=16, channels=1):
-    buffer = io.BytesIO()
-    with wave.open(buffer, "wb") as wav_file:
-        wav_file.setnchannels(channels)
-        wav_file.setsampwidth(bit_depth // 8)
-        wav_file.setframerate(sample_rate)
-    wav_header_bytes = buffer.getvalue()
-    buffer.close()
-    return wav_header_bytes
-# Define utils for web server
-# async def http_execption_handler(exc: HTTPException):
-#     return JSONResponse(
-#         dict(
-#             statusCode=exc.status_code,
-#             message=exc.content,
-#             error=HTTPStatus(exc.status_code).phrase,
-#         ),
-#         exc.status_code,
-#         exc.headers,
-#     )
-async def other_exception_handler(exc: "Exception"):
-    traceback.print_exc()
-    status = HTTPStatus.INTERNAL_SERVER_ERROR
-    return JSONResponse(
-        dict(statusCode=status, message=str(exc), error=status.phrase),
-        status,
-    )
-def load_audio(reference_audio, sr):
-    if len(reference_audio) > 255 or not Path(reference_audio).exists():
-        audio_data = reference_audio
-        reference_audio = io.BytesIO(audio_data)
-    waveform, original_sr = torchaudio.load(
-        reference_audio, backend="sox" if sys.platform == "linux" else "soundfile"
-    )
-    if waveform.shape[0] > 1:
-        waveform = torch.mean(waveform, dim=0, keepdim=True)
-    if original_sr != sr:
-        resampler = torchaudio.transforms.Resample(orig_freq=original_sr, new_freq=sr)
-        waveform = resampler(waveform)
-    audio = waveform.squeeze().numpy()
-    return audio
-def encode_reference(*, decoder_model, reference_audio, enable_reference_audio):
-    if enable_reference_audio and reference_audio is not None:
-        # Load audios, and prepare basic info here
-        reference_audio_content = load_audio(
-            reference_audio, decoder_model.spec_transform.sample_rate
-        )
-        audios = torch.from_numpy(reference_audio_content).to(decoder_model.device)[
-            None, None, :
-        ]
-        audio_lengths = torch.tensor(
-            [audios.shape[2]], device=decoder_model.device, dtype=torch.long
-        )
-        logger.info(
-            f"Loaded audio with {audios.shape[2] / decoder_model.spec_transform.sample_rate:.2f} seconds"
-        )
-        # VQ Encoder
-        if isinstance(decoder_model, FireflyArchitecture):
-            prompt_tokens = decoder_model.encode(audios, audio_lengths)[0][0]
-        logger.info(f"Encoded prompt: {prompt_tokens.shape}")
-    else:
-        prompt_tokens = None
-        logger.info("No reference audio provided")
-    return prompt_tokens
-def decode_vq_tokens(
-    *,
-    decoder_model,
-    codes,
-):
-    feature_lengths = torch.tensor([codes.shape[1]], device=decoder_model.device)
-    logger.info(f"VQ features: {codes.shape}")
-    if isinstance(decoder_model, FireflyArchitecture):
-        # VQGAN Inference
-        return decoder_model.decode(
-            indices=codes[None],
-            feature_lengths=feature_lengths,
-        )[0].squeeze()
-    raise ValueError(f"Unknown model type: {type(decoder_model)}")
-# routes = MultimethodRoutes(base_class=HttpView)
-def get_content_type(audio_format):
-    if audio_format == "wav":
-        return "audio/wav"
-    elif audio_format == "flac":
-        return "audio/flac"
-    elif audio_format == "mp3":
-        return "audio/mpeg"
-    else:
-        return "application/octet-stream"
-@torch.inference_mode()
-def inference(req: ServeTTSRequest):
-    idstr: str | None = req.reference_id
-    if idstr is not None:
-        ref_folder = Path("references") / idstr
-        ref_folder.mkdir(parents=True, exist_ok=True)
-        ref_audios = list_files(
-            ref_folder, AUDIO_EXTENSIONS, recursive=True, sort=False
-        )
-        prompt_tokens = [
-            encode_reference(
-                decoder_model=decoder_model,
-                reference_audio=audio_to_bytes(str(ref_audio)),
-                enable_reference_audio=True,
-            )
-            for ref_audio in ref_audios
-        ]
-        prompt_texts = [
-            read_ref_text(str(ref_audio.with_suffix(".lab")))
-            for ref_audio in ref_audios
-        ]
-    else:
-        # Parse reference audio aka prompt
-        refs = req.references
-        if refs is None:
-            refs = []
-        prompt_tokens = [
-            encode_reference(
-                decoder_model=decoder_model,
-                reference_audio=ref.audio,
-                enable_reference_audio=True,
-            )
-            for ref in refs
-        ]
-        prompt_texts = [ref.text for ref in refs]
-    # LLAMA Inference
-    request = dict(
-        device=decoder_model.device,
-        max_new_tokens=req.max_new_tokens,
-        text=(
-            req.text
-            if not req.normalize
-            else ChnNormedText(raw_text=req.text).normalize()
-        ),
-        top_p=req.top_p,
-        repetition_penalty=req.repetition_penalty,
-        temperature=req.temperature,
-        compile=args.compile,
-        iterative_prompt=req.chunk_length > 0,
-        chunk_length=req.chunk_length,
-        max_length=2048,
-        prompt_tokens=prompt_tokens,
-        prompt_text=prompt_texts,
-    )
-    response_queue = queue.Queue()
-    llama_queue.put(
-        GenerateRequest(
-            request=request,
-            response_queue=response_queue,
-        )
-    )
-    if req.streaming:
-        yield wav_chunk_header()
-    segments = []
-    while True:
-        result: WrappedGenerateResponse = response_queue.get()
-        if result.status == "error":
-            raise result.response
-            break
-        result: GenerateResponse = result.response
-        if result.action == "next":
-            break
-        with autocast_exclude_mps(
-            device_type=decoder_model.device.type, dtype=args.precision
-        ):
-            fake_audios = decode_vq_tokens(
-                decoder_model=decoder_model,
-                codes=result.codes,
-            )
-        fake_audios = fake_audios.float().cpu().numpy()
-        if req.streaming:
-            yield (fake_audios * 32768).astype(np.int16).tobytes()
-        else:
-            segments.append(fake_audios)
-    if req.streaming:
-        return
-    if len(segments) == 0:
-        raise HTTPException(
-            HTTPStatus.INTERNAL_SERVER_ERROR,
-            content="No audio generated, please check the input text.",
-        )
-    fake_audios = np.concatenate(segments, axis=0)
-    yield fake_audios
-async def inference_async(req: ServeTTSRequest):
-    for chunk in inference(req):
-        yield chunk
-async def buffer_to_async_generator(buffer):
-    yield buffer
-# @routes.http.post("/v1/tts")
-# async def api_invoke_model(
-#     req: Annotated[ServeTTSRequest, Body(exclusive=True)],
-# ):
-#     """
-#     Invoke model and generate audio
-#     """
-#
-#     if args.max_text_length > 0 and len(req.text) > args.max_text_length:
-#         raise HTTPException(
-#             HTTPStatus.BAD_REQUEST,
-#             content=f"Text is too long, max length is {args.max_text_length}",
-#         )
-#
-#     if req.streaming and req.format != "wav":
-#         raise HTTPException(
-#             HTTPStatus.BAD_REQUEST,
-#             content="Streaming only supports WAV format",
-#         )
-#
-#     if req.streaming:
-#         return StreamResponse(
-#             iterable=inference_async(req),
-#             headers={
-#                 "Content-Disposition": f"attachment; filename=audio.{req.format}",
-#             },
-#             content_type=get_content_type(req.format),
-#         )
-#     else:
-#         fake_audios = next(inference(req))
-#         buffer = io.BytesIO()
-#         sf.write(
-#             buffer,
-#             fake_audios,
-#             decoder_model.spec_transform.sample_rate,
-#             format=req.format,
-#         )
-#
-#         return StreamResponse(
-#             iterable=buffer_to_async_generator(buffer.getvalue()),
-#             headers={
-#                 "Content-Disposition": f"attachment; filename=audio.{req.format}",
-#             },
-#             content_type=get_content_type(req.format),
-#         )
-#
-#
-# @routes.http.post("/v1/health")
-# async def api_health():
-#     """
-#     Health check
-#     """
-#
-#     return JSONResponse({"status": "ok"})
-def parse_args():
-    parser = ArgumentParser()
-    parser.add_argument(
-        "--llama-checkpoint-path",
-        type=str,
-        default="checkpoints/fish-speech-1.4",
-    )
-    parser.add_argument(
-        "--decoder-checkpoint-path",
-        type=str,
-        default="checkpoints/fish-speech-1.4/firefly-gan-vq-fsq-8x1024-21hz-generator.pth",
-    )
-    parser.add_argument("--decoder-config-name", type=str, default="firefly_gan_vq")
-    parser.add_argument("--device", type=str, default="cuda")
-    parser.add_argument("--half", action="store_true")
-    parser.add_argument("--compile", action="store_true")
-    parser.add_argument("--max-text-length", type=int, default=0)
-    parser.add_argument("--listen", type=str, default="127.0.0.1:8080")
-    parser.add_argument("--workers", type=int, default=1)
-    return parser.parse_args()
-# Define Kui app
-# openapi = OpenAPI(
-#     {
-#         "title": "Fish Speech API",
-#     },
-# ).routes
-#
-#
-# class MsgPackRequest(HttpRequest):
-#     async def data(self) -> Annotated[Any, ContentType("application/msgpack")]:
-#         if self.content_type == "application/msgpack":
-#             return ormsgpack.unpackb(await self.body)
-#
-#         raise HTTPException(
-#             HTTPStatus.UNSUPPORTED_MEDIA_TYPE,
-#             headers={"Accept": "application/msgpack"},
-#         )
-#
-#
-# app = Kui(
-#     routes=routes + openapi[1:],  # Remove the default route
-#     exception_handlers={
-#         HTTPException: http_execption_handler,
-#         Exception: other_exception_handler,
-#     },
-#     factory_class=FactoryClass(http=MsgPackRequest),
-#     cors_config={},
-# )
-if __name__ == "__main__":
-    import uvicorn
-    args = parse_args()
-    args.precision = torch.half if args.half else torch.bfloat16
-    logger.info("Loading Llama model...")
-    llama_queue = launch_thread_safe_queue(
-        checkpoint_path=args.llama_checkpoint_path,
-        device=args.device,
-        precision=args.precision,
-        compile=args.compile,
-    )
-    logger.info("Llama model loaded, loading VQ-GAN model...")
-    decoder_model = load_decoder_model(
-        config_name=args.decoder_config_name,
-        checkpoint_path=args.decoder_checkpoint_path,
-        device=args.device,
-    )
-    logger.info("VQ-GAN model loaded, warming up...")
-    # Dry run to check if the model is loaded correctly and avoid the first-time latency
-    list(
-        inference(
-            ServeTTSRequest(
-                text="Hello world.",
-                references=[],
-                reference_id=None,
-                max_new_tokens=1024,
-                chunk_length=200,
-                top_p=0.7,
-                repetition_penalty=1.2,
-                temperature=0.7,
-                emotion=None,
-                format="wav",
-            )
-        )
-    )
-    logger.info(f"Warming up done, starting server at http://{args.listen}")
-    host, port = args.listen.split(":")
-    uvicorn.run(app, host=host, port=int(port), workers=args.workers, log_level="info")

xinference/thirdparty/fish_speech/tools/commons.py DELETED Viewed

@@ -1,35 +0,0 @@
-from typing import Annotated, Literal, Optional
-from pydantic import BaseModel, Field, conint
-class ServeReferenceAudio(BaseModel):
-    audio: bytes
-    text: str
-class ServeTTSRequest(BaseModel):
-    text: str
-    chunk_length: Annotated[int, conint(ge=100, le=300, strict=True)] = 200
-    # Audio format
-    format: Literal["wav", "pcm", "mp3"] = "wav"
-    mp3_bitrate: Literal[64, 128, 192] = 128
-    # References audios for in-context learning
-    references: list[ServeReferenceAudio] = []
-    # Reference id
-    # For example, if you want use https://fish.audio/m/7f92f8afb8ec43bf81429cc1c9199cb1/
-    # Just pass 7f92f8afb8ec43bf81429cc1c9199cb1
-    reference_id: str | None = None
-    # Normalize text for en & zh, this increase stability for numbers
-    normalize: bool = True
-    mp3_bitrate: Optional[int] = 64
-    opus_bitrate: Optional[int] = -1000
-    # Balance mode will reduce latency to 300ms, but may decrease stability
-    latency: Literal["normal", "balanced"] = "normal"
-    # not usually used below
-    streaming: bool = False
-    emotion: Optional[str] = None
-    max_new_tokens: int = 1024
-    top_p: Annotated[float, Field(ge=0.1, le=1.0, strict=True)] = 0.7
-    repetition_penalty: Annotated[float, Field(ge=0.9, le=2.0, strict=True)] = 1.2
-    temperature: Annotated[float, Field(ge=0.1, le=1.0, strict=True)] = 0.7

xinference/thirdparty/fish_speech/tools/llama/__init__.py DELETED Viewed

File without changes

xinference/thirdparty/fish_speech/tools/msgpack_api.py DELETED Viewed

@@ -1,34 +0,0 @@
-import httpx
-import ormsgpack
-from tools.commons import ServeReferenceAudio, ServeTTSRequest
-# priority: ref_id > references
-request = ServeTTSRequest(
-    text="你说的对, 但是原神是一款由米哈游自主研发的开放世界手游.",
-    # reference_id="114514",
-    references=[
-        ServeReferenceAudio(
-            audio=open("lengyue.wav", "rb").read(),
-            text=open("lengyue.lab", "r", encoding="utf-8").read(),
-        )
-    ],
-    streaming=True,
-)
-with (
-    httpx.Client() as client,
-    open("hello.wav", "wb") as f,
-):
-    with client.stream(
-        "POST",
-        "http://127.0.0.1:8080/v1/tts",
-        content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
-        headers={
-            "authorization": "Bearer YOUR_API_KEY",
-            "content-type": "application/msgpack",
-        },
-        timeout=None,
-    ) as response:
-        for chunk in response.iter_bytes():
-            f.write(chunk)

xinference/thirdparty/fish_speech/tools/vqgan/__init__.py DELETED Viewed

File without changes

xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl

Potentially problematic release.

xinference 0.16.3py3-none-any.whl → 1.2.1py3-none-any.whl