PyPI - xinference - Versions diffs - 0.13.0__py3-none-any.whl → 0.13.1__py3-none-any.whl - Mend

xinference 0.13.0py3-none-any.whl → 0.13.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (66) hide show

xinference/model/llm/ggml/chatglm.py DELETED Viewed

@@ -1,457 +0,0 @@
-# Copyright 2022-2023 XProbe Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import json
-import logging
-import os
-import time
-import uuid
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Union
-from ....types import (
-    SPECIAL_TOOL_PROMPT,
-    ChatCompletion,
-    ChatCompletionChunk,
-    ChatCompletionMessage,
-    ChatglmCppGenerateConfig,
-    ChatglmCppModelConfig,
-    Completion,
-    CompletionChunk,
-)
-from .. import LLMFamilyV1, LLMSpecV1
-from ..core import LLM
-if TYPE_CHECKING:
-    from chatglm_cpp import Pipeline
-logger = logging.getLogger(__name__)
-class ChatglmCppChatModel(LLM):
-    def __init__(
-        self,
-        model_uid: str,
-        model_family: "LLMFamilyV1",
-        model_spec: "LLMSpecV1",
-        quantization: str,
-        model_path: str,
-        model_config: Optional[ChatglmCppModelConfig] = None,
-    ):
-        super().__init__(model_uid, model_family, model_spec, quantization, model_path)
-        self._llm: Optional["Pipeline"] = None
-        # just a placeholder for now as the chatglm_cpp repo doesn't support model config.
-        self._model_config = model_config
-    @classmethod
-    def _sanitize_generate_config(
-        cls,
-        chatglmcpp_generate_config: Optional[ChatglmCppGenerateConfig],
-    ) -> ChatglmCppGenerateConfig:
-        if chatglmcpp_generate_config is None:
-            chatglmcpp_generate_config = ChatglmCppGenerateConfig()
-        chatglmcpp_generate_config.setdefault("stream", False)
-        return chatglmcpp_generate_config
-    def load(self):
-        try:
-            import chatglm_cpp
-        except ImportError:
-            error_message = "Failed to import module 'chatglm_cpp'"
-            installation_guide = [
-                "Please make sure 'chatglm_cpp' is installed. ",
-                "You can install it by running the following command in the terminal:\n",
-                "pip install git+https://github.com/li-plus/chatglm.cpp.git@main\n\n",
-                "Or visit the original git repo if the above command fails:\n",
-                "https://github.com/li-plus/chatglm.cpp",
-            ]
-            raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
-        model_file_path = os.path.join(
-            self.model_path,
-            self.model_spec.model_file_name_template.format(
-                quantization=self.quantization
-            ),
-        )
-        # handle legacy cache.
-        legacy_model_file_path = os.path.join(self.model_path, "model.bin")
-        if os.path.exists(legacy_model_file_path):
-            model_file_path = legacy_model_file_path
-        self._llm = chatglm_cpp.Pipeline(Path(model_file_path))
-    @classmethod
-    def match(
-        cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
-    ) -> bool:
-        if llm_spec.model_format != "ggmlv3":
-            return False
-        if "chatglm" not in llm_family.model_name:
-            return False
-        if "chat" not in llm_family.model_ability:
-            return False
-        return True
-    @staticmethod
-    def _convert_raw_text_chunks_to_chat(
-        tokens: Iterator[Any], model_name: str, include_usage: bool, input_ids
-    ) -> Iterator[ChatCompletionChunk]:
-        request_id = str(uuid.uuid4())
-        yield {
-            "id": "chat" + f"cmpl-{request_id}",
-            "model": model_name,
-            "object": "chat.completion.chunk",
-            "created": int(time.time()),
-            "choices": [
-                {
-                    "index": 0,
-                    "delta": {
-                        "role": "assistant",
-                    },
-                    "finish_reason": None,
-                }
-            ],
-        }
-        prompt_tokens, completion_tokens, total_tokens = 0, 0, 0
-        for token in tokens:
-            prompt_tokens = len(input_ids)
-            completion_tokens = completion_tokens + 1
-            total_tokens = prompt_tokens + completion_tokens
-            yield {
-                "id": "chat" + f"cmpl-{request_id}",
-                "model": model_name,
-                "object": "chat.completion.chunk",
-                "created": int(time.time()),
-                "choices": [
-                    {
-                        "index": 0,
-                        "delta": {
-                            "content": (
-                                token if isinstance(token, str) else token.content
-                            ),
-                        },
-                        "finish_reason": None,
-                    }
-                ],
-            }
-        # stop
-        yield {
-            "id": "chat" + f"cmpl-{request_id}",
-            "model": model_name,
-            "object": "chat.completion.chunk",
-            "created": int(time.time()),
-            "choices": [
-                {
-                    "index": 0,
-                    "delta": {
-                        "content": "",
-                    },
-                    "finish_reason": "stop",
-                }
-            ],
-        }
-        if include_usage:
-            yield {
-                "id": "chat" + f"cmpl-{request_id}",
-                "model": model_name,
-                "object": "chat.completion.chunk",
-                "created": int(time.time()),
-                "choices": [],
-                "usage": {
-                    "prompt_tokens": prompt_tokens,
-                    "completion_tokens": completion_tokens,
-                    "total_tokens": total_tokens,
-                },
-            }
-    @classmethod
-    def _convert_raw_text_completion_to_chat(
-        cls, text: Any, model_name: str
-    ) -> ChatCompletion:
-        _id = str(uuid.uuid4())
-        return {
-            "id": "chat" + f"cmpl-{_id}",
-            "model": model_name,
-            "object": "chat.completion",
-            "created": int(time.time()),
-            "choices": [
-                {
-                    "index": 0,
-                    "message": cls._message_to_json_string(_id, text),
-                    "finish_reason": cls._finish_reason_from_msg(text),
-                }
-            ],
-            "usage": {
-                "prompt_tokens": -1,
-                "completion_tokens": -1,
-                "total_tokens": -1,
-            },
-        }
-    @staticmethod
-    def _finish_reason_from_msg(msg):
-        if isinstance(msg, str):
-            return None
-        else:
-            return "tool_calls" if msg.tool_calls else "stop"
-    @staticmethod
-    def _eval_arguments(arguments):
-        def tool_call(**kwargs):
-            return kwargs
-        try:
-            return json.dumps(eval(arguments, dict(tool_call=tool_call)))
-        except Exception:
-            return f"Invalid arguments {arguments}"
-    @classmethod
-    def _message_to_json_string(cls, _id, msg) -> ChatCompletionMessage:
-        if isinstance(msg, str):
-            return {
-                "role": "assistant",
-                "content": msg,
-            }
-        else:
-            return {
-                "role": msg.role,
-                "content": msg.content,
-                "tool_calls": [
-                    {
-                        "id": f"call_{_id}",
-                        "type": tc.type,
-                        "function": {
-                            "name": tc.function.name,
-                            "arguments": cls._eval_arguments(tc.function.arguments),
-                        },
-                    }
-                    for tc in msg.tool_calls
-                ],
-            }
-    @staticmethod
-    def _handle_tools(generate_config) -> Optional[ChatCompletionMessage]:
-        """Convert openai tools to ChatGLM tools."""
-        if generate_config is None:
-            return None
-        tools = generate_config.pop("tools", None)
-        if tools is None:
-            return None
-        chatglm_tools = []
-        for elem in tools:
-            if elem.get("type") != "function" or "function" not in elem:
-                raise ValueError("ChatGLM tools only support function type.")
-            chatglm_tools.append(elem["function"])
-        return {
-            "role": "system",
-            "content": (
-                f"Answer the following questions as best as you can. You have access to the following tools:\n"
-                f"{json.dumps(chatglm_tools, indent=4, ensure_ascii=False)}"
-            ),
-        }
-    @staticmethod
-    def _to_chatglm_chat_messages(history_list: List[Any]):
-        from chatglm_cpp import ChatMessage
-        return [ChatMessage(role=v["role"], content=v["content"]) for v in history_list]
-    def chat(
-        self,
-        prompt: str,
-        system_prompt: Optional[str] = None,
-        chat_history: Optional[List[ChatCompletionMessage]] = None,
-        generate_config: Optional[ChatglmCppGenerateConfig] = None,
-    ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
-        chat_history_list = []
-        if system_prompt is not None:
-            chat_history_list.append({"role": "system", "content": system_prompt})
-        if chat_history is not None:
-            chat_history_list.extend(chat_history)  # type: ignore
-        tool_message = self._handle_tools(generate_config)
-        if tool_message is not None:
-            chat_history_list.insert(0, tool_message)  # type: ignore
-        # We drop the message which contains tool calls to walkaround the issue:
-        # https://github.com/li-plus/chatglm.cpp/issues/231
-        chat_history_list = [m for m in chat_history_list if not m.get("tool_calls")]
-        for idx, m in enumerate(chat_history_list):
-            if m.get("role") == "tool":
-                # Reconstruct a simple tool message.
-                chat_history_list[idx] = {
-                    "content": m["content"],
-                    "role": "observation",
-                }
-                break
-        if prompt != SPECIAL_TOOL_PROMPT:
-            chat_history_list.append({"role": "user", "content": prompt})
-        logger.debug("Full conversation history:\n%s", str(chat_history_list))
-        generate_config = self._sanitize_generate_config(generate_config)
-        params = {
-            "max_length": generate_config.get("max_tokens"),
-            "max_context_length": generate_config.get("max_tokens", 1024),
-            "top_k": generate_config.get("top_k"),
-            "top_p": generate_config.get("top_p"),
-            "temperature": generate_config.get("temperature"),
-            "stream": generate_config.get("stream", False),
-        }
-        # Remove None values to exclude missing keys from params
-        params = {k: v for k, v in params.items() if v is not None}
-        assert self._llm is not None
-        chat_history_messages = self._to_chatglm_chat_messages(chat_history_list)
-        stream = generate_config.get("stream")
-        stream_options = generate_config.get("stream_options", None)
-        include_usage = (
-            stream_options["include_usage"]
-            if isinstance(stream_options, dict)
-            else False
-        )
-        if stream:
-            it = self._llm.chat(
-                chat_history_messages,
-                **params,
-            )
-            assert not isinstance(it, str)
-            input_ids = self._llm.tokenizer.encode_messages(
-                chat_history_messages, params["max_context_length"]
-            )
-            return self._convert_raw_text_chunks_to_chat(
-                it, self.model_uid, include_usage, input_ids
-            )
-        else:
-            c = self._llm.chat(
-                chat_history_messages,
-                **params,
-            )
-            assert not isinstance(c, Iterator)
-            return self._convert_raw_text_completion_to_chat(c, self.model_uid)
-    @staticmethod
-    def _convert_str_to_completion(data: str, model_name: str) -> Completion:
-        return {
-            "id": "generate" + f"-{str(uuid.uuid4())}",
-            "model": model_name,
-            "object": "text_completion",
-            "created": int(time.time()),
-            "choices": [
-                {"index": 0, "text": data, "finish_reason": None, "logprobs": None}
-            ],
-            "usage": {
-                "prompt_tokens": -1,
-                "completion_tokens": -1,
-                "total_tokens": -1,
-            },
-        }
-    @staticmethod
-    def _convert_str_to_completion_chunk(
-        tokens: Iterator[str], model_name: str, include_usage: bool, input_ids
-    ) -> Iterator[CompletionChunk]:
-        request_id = str(uuid.uuid4())
-        prompt_tokens, completion_tokens, total_tokens = 0, 0, 0
-        for i, token in enumerate(tokens):
-            yield {
-                "id": "generate" + f"-{request_id}",
-                "model": model_name,
-                "object": "text_completion",
-                "created": int(time.time()),
-                "choices": [
-                    {"index": 0, "text": token, "finish_reason": None, "logprobs": None}
-                ],
-            }
-            prompt_tokens = len(input_ids)
-            completion_tokens = i
-            total_tokens = prompt_tokens + completion_tokens
-        # stop
-        yield {
-            "id": "chat" + f"cmpl-{request_id}",
-            "model": model_name,
-            "object": "text_completion",
-            "created": int(time.time()),
-            "choices": [
-                {"index": 0, "text": "", "finish_reason": "stop", "logprobs": None}
-            ],
-        }
-        if include_usage:
-            yield {
-                "id": "chat" + f"cmpl-{request_id}",
-                "model": model_name,
-                "object": "text_completion",
-                "created": int(time.time()),
-                "choices": [],
-                "usage": {
-                    "prompt_tokens": prompt_tokens,
-                    "completion_tokens": completion_tokens,
-                    "total_tokens": total_tokens,
-                },
-            }
-    def generate(
-        self,
-        prompt: str,
-        generate_config: Optional[ChatglmCppGenerateConfig] = None,
-    ) -> Union[Completion, Iterator[CompletionChunk]]:
-        logger.debug(f"Prompt for generate:\n{prompt}")
-        generate_config = self._sanitize_generate_config(generate_config)
-        params = {
-            "max_length": generate_config.get("max_tokens"),
-            "max_context_length": generate_config.get("max_tokens", 1024),
-            "top_k": generate_config.get("top_k"),
-            "top_p": generate_config.get("top_p"),
-            "temperature": generate_config.get("temperature"),
-            "stream": generate_config.get("stream", False),
-        }
-        # Remove None values to exclude missing keys from params
-        params = {k: v for k, v in params.items() if v is not None}
-        assert self._llm is not None
-        stream = generate_config.get("stream")
-        stream_options = generate_config.get("stream_options", None)
-        include_usage = (
-            stream_options["include_usage"]
-            if isinstance(stream_options, dict)
-            else False
-        )
-        if stream:
-            it = self._llm.generate(
-                prompt,
-                **params,
-            )
-            assert not isinstance(it, str)
-            input_ids = self._llm.tokenizer.encode(prompt, params["max_context_length"])
-            return self._convert_str_to_completion_chunk(
-                it, self.model_uid, include_usage, input_ids
-            )
-        else:
-            c = self._llm.generate(
-                prompt,
-                **params,
-            )
-            assert not isinstance(c, Iterator)
-            return self._convert_str_to_completion(c, self.model_uid)

xinference/thirdparty/ChatTTS/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- from .core import Chat

xinference/thirdparty/ChatTTS/core.py DELETED Viewed

@@ -1,200 +0,0 @@
-import os
-import logging
-from functools import partial
-from omegaconf import OmegaConf
-import torch
-from vocos import Vocos
-from .model.dvae import DVAE
-from .model.gpt import GPT_warpper
-from .utils.gpu_utils import select_device
-from .utils.infer_utils import count_invalid_characters, detect_language, apply_character_map, apply_half2full_map
-from .utils.io_utils import get_latest_modified_file
-from .infer.api import refine_text, infer_code
-from huggingface_hub import snapshot_download
-logging.basicConfig(level = logging.INFO)
-class Chat:
-    def __init__(self, ):
-        self.pretrain_models = {}
-        self.normalizer = {}
-        self.logger = logging.getLogger(__name__)
-    def check_model(self, level = logging.INFO, use_decoder = False):
-        not_finish = False
-        check_list = ['vocos', 'gpt', 'tokenizer']
-        if use_decoder:
-            check_list.append('decoder')
-        else:
-            check_list.append('dvae')
-        for module in check_list:
-            if module not in self.pretrain_models:
-                self.logger.log(logging.WARNING, f'{module} not initialized.')
-                not_finish = True
-        if not not_finish:
-            self.logger.log(level, f'All initialized.')
-        return not not_finish
-    def load_models(self, source='huggingface', force_redownload=False, local_path='<LOCAL_PATH>', **kwargs):
-        if source == 'huggingface':
-            hf_home = os.getenv('HF_HOME', os.path.expanduser("~/.cache/huggingface"))
-            try:
-                download_path = get_latest_modified_file(os.path.join(hf_home, 'hub/models--2Noise--ChatTTS/snapshots'))
-            except:
-                download_path = None
-            if download_path is None or force_redownload:
-                self.logger.log(logging.INFO, f'Download from HF: https://huggingface.co/2Noise/ChatTTS')
-                download_path = snapshot_download(repo_id="2Noise/ChatTTS", allow_patterns=["*.pt", "*.yaml"])
-            else:
-                self.logger.log(logging.INFO, f'Load from cache: {download_path}')
-        elif source == 'local':
-            self.logger.log(logging.INFO, f'Load from local: {local_path}')
-            download_path = local_path
-        self._load(**{k: os.path.join(download_path, v) for k, v in OmegaConf.load(os.path.join(download_path, 'config', 'path.yaml')).items()}, **kwargs)
-    def _load(
-        self,
-        vocos_config_path: str = None,
-        vocos_ckpt_path: str = None,
-        dvae_config_path: str = None,
-        dvae_ckpt_path: str = None,
-        gpt_config_path: str = None,
-        gpt_ckpt_path: str = None,
-        decoder_config_path: str = None,
-        decoder_ckpt_path: str = None,
-        tokenizer_path: str = None,
-        device: str = None,
-        compile: bool = True,
-    ):
-        if not device:
-            device = select_device(4096)
-            self.logger.log(logging.INFO, f'use {device}')
-        if vocos_config_path:
-            vocos = Vocos.from_hparams(vocos_config_path).to(device).eval()
-            assert vocos_ckpt_path, 'vocos_ckpt_path should not be None'
-            vocos.load_state_dict(torch.load(vocos_ckpt_path))
-            self.pretrain_models['vocos'] = vocos
-            self.logger.log(logging.INFO, 'vocos loaded.')
-        if dvae_config_path:
-            cfg = OmegaConf.load(dvae_config_path)
-            dvae = DVAE(**cfg).to(device).eval()
-            assert dvae_ckpt_path, 'dvae_ckpt_path should not be None'
-            dvae.load_state_dict(torch.load(dvae_ckpt_path, map_location='cpu'))
-            self.pretrain_models['dvae'] = dvae
-            self.logger.log(logging.INFO, 'dvae loaded.')
-        if gpt_config_path:
-            cfg = OmegaConf.load(gpt_config_path)
-            gpt = GPT_warpper(**cfg).to(device).eval()
-            assert gpt_ckpt_path, 'gpt_ckpt_path should not be None'
-            gpt.load_state_dict(torch.load(gpt_ckpt_path, map_location='cpu'))
-            if compile and 'cuda' in str(device):
-                gpt.gpt.forward = torch.compile(gpt.gpt.forward,  backend='inductor', dynamic=True)
-            self.pretrain_models['gpt'] = gpt
-            spk_stat_path = os.path.join(os.path.dirname(gpt_ckpt_path), 'spk_stat.pt')
-            assert os.path.exists(spk_stat_path), f'Missing spk_stat.pt: {spk_stat_path}'
-            self.pretrain_models['spk_stat'] = torch.load(spk_stat_path).to(device)
-            self.logger.log(logging.INFO, 'gpt loaded.')
-        if decoder_config_path:
-            cfg = OmegaConf.load(decoder_config_path)
-            decoder = DVAE(**cfg).to(device).eval()
-            assert decoder_ckpt_path, 'decoder_ckpt_path should not be None'
-            decoder.load_state_dict(torch.load(decoder_ckpt_path, map_location='cpu'))
-            self.pretrain_models['decoder'] = decoder
-            self.logger.log(logging.INFO, 'decoder loaded.')
-        if tokenizer_path:
-            tokenizer = torch.load(tokenizer_path, map_location='cpu')
-            tokenizer.padding_side = 'left'
-            self.pretrain_models['tokenizer'] = tokenizer
-            self.logger.log(logging.INFO, 'tokenizer loaded.')
-        self.check_model()
-    def infer(
-        self,
-        text,
-        skip_refine_text=False,
-        refine_text_only=False,
-        params_refine_text={},
-        params_infer_code={'prompt':'[speed_5]'},
-        use_decoder=True,
-        do_text_normalization=True,
-        lang=None,
-    ):
-        assert self.check_model(use_decoder=use_decoder)
-        if not isinstance(text, list):
-            text = [text]
-        if do_text_normalization:
-            for i, t in enumerate(text):
-                _lang = detect_language(t) if lang is None else lang
-                self.init_normalizer(_lang)
-                text[i] = self.normalizer[_lang](t)
-                if _lang == 'zh':
-                    text[i] = apply_half2full_map(text[i])
-        for i, t in enumerate(text):
-            invalid_characters = count_invalid_characters(t)
-            if len(invalid_characters):
-                self.logger.log(logging.WARNING, f'Invalid characters found! : {invalid_characters}')
-                text[i] = apply_character_map(t)
-        if not skip_refine_text:
-            text_tokens = refine_text(self.pretrain_models, text, **params_refine_text)['ids']
-            text_tokens = [i[i < self.pretrain_models['tokenizer'].convert_tokens_to_ids('[break_0]')] for i in text_tokens]
-            text = self.pretrain_models['tokenizer'].batch_decode(text_tokens)
-            if refine_text_only:
-                return text
-        text = [params_infer_code.get('prompt', '') + i for i in text]
-        params_infer_code.pop('prompt', '')
-        result = infer_code(self.pretrain_models, text, **params_infer_code, return_hidden=use_decoder)
-        if use_decoder:
-            mel_spec = [self.pretrain_models['decoder'](i[None].permute(0,2,1)) for i in result['hiddens']]
-        else:
-            mel_spec = [self.pretrain_models['dvae'](i[None].permute(0,2,1)) for i in result['ids']]
-        wav = [self.pretrain_models['vocos'].decode(i).cpu().numpy() for i in mel_spec]
-        return wav
-    def sample_random_speaker(self, ):
-        dim = self.pretrain_models['gpt'].gpt.layers[0].mlp.gate_proj.in_features
-        std, mean = self.pretrain_models['spk_stat'].chunk(2)
-        return torch.randn(dim, device=std.device) * std + mean
-    def init_normalizer(self, lang):
-        if lang not in self.normalizer:
-            if lang == 'zh':
-                try:
-                    from tn.chinese.normalizer import Normalizer
-                except:
-                    self.logger.log(logging.WARNING, f'Package WeTextProcessing not found! \
-                        Run: conda install -c conda-forge pynini=2.1.5 && pip install WeTextProcessing')
-                self.normalizer[lang] = Normalizer().normalize
-            else:
-                try:
-                    from nemo_text_processing.text_normalization.normalize import Normalizer
-                except:
-                    self.logger.log(logging.WARNING, f'Package nemo_text_processing not found! \
-                        Run: conda install -c conda-forge pynini=2.1.5 && pip install nemo_text_processing')
-                self.normalizer[lang] = partial(Normalizer(input_case='cased', lang=lang).normalize, verbose=False, punct_post_process=True)

xinference/thirdparty/ChatTTS/experimental/__init__.py DELETED Viewed

File without changes

xinference 0.13.0__py3-none-any.whl → 0.13.1__py3-none-any.whl

Potentially problematic release.

xinference 0.13.0py3-none-any.whl → 0.13.1py3-none-any.whl