PyPI - xinference - Versions diffs - 0.15.0__py3-none-any.whl → 0.15.1__py3-none-any.whl - Mend

xinference 0.15.0py3-none-any.whl → 0.15.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (83) hide show

xinference/model/llm/utils.py CHANGED Viewed

@@ -301,99 +301,89 @@ class ChatModelMixin:
         }
     @staticmethod
-    def _eval_glm_chat_arguments(c):
+    def _eval_glm_chat_arguments(c) -> List[Tuple]:
+        """
+        Currently, glm4 tool call only supports one function
+        """
         try:
             if isinstance(c, dict):
-                return None, c["name"], c["arguments"]
+                return [(None, c["name"], c["arguments"])]
         except KeyError:
             logger.error("Can't parse glm output: %s", c)
-            return str(c), None, None
+            return [(str(c), None, None)]
         else:
-            return str(c), None, None
+            return [(str(c), None, None)]
-    @staticmethod
-    def _eval_qwen_chat_arguments(c):
+    @classmethod
+    def _handle_qwen_tool_result(cls, text: str) -> List[Tuple]:
+        text: str = text.strip()  # type: ignore
+        contents: List[str] = text.split(QWEN_TOOL_CALL_SYMBOLS[1])
+        results: List[Tuple] = []
+        for content in contents:
+            content = content.strip()
+            if content:
+                if content.startswith(QWEN_TOOL_CALL_SYMBOLS[0]):
+                    content = content[len(QWEN_TOOL_CALL_SYMBOLS[0]) :]
+                content = content.strip()
+                try:
+                    res = json.loads(content)
+                    results.append((None, res["name"], res["arguments"]))
+                except Exception as e:
+                    logger.error(
+                        "Can't parse single qwen tool call output: %s. Error: %s",
+                        content,
+                        e,
+                    )
+                    results.append((content, None, None))
+        return results
+    @classmethod
+    def _eval_qwen_chat_arguments(cls, c) -> List[Tuple]:
         text = c["choices"][0]["text"]
-        text: str = text.strip()
-        if text.startswith(QWEN_TOOL_CALL_SYMBOLS[0]):
-            text = text[len(QWEN_TOOL_CALL_SYMBOLS[0]) :]
-        if text.endswith(QWEN_TOOL_CALL_SYMBOLS[1]):
-            text = text[: -len(QWEN_TOOL_CALL_SYMBOLS[1])]
-        text = text.strip()
-        try:
-            content = json.loads(text)
-            return None, content["name"], content["arguments"]
-        except Exception as e:
-            logger.error("Can't parse qwen tool call output: %s. Error: %s", text, e)
-            return text, None, None
+        return cls._handle_qwen_tool_result(text)
     @classmethod
     def _eval_tool_arguments(cls, model_family, c):
         family = model_family.model_family or model_family.model_name
         if family in GLM4_TOOL_CALL_FAMILY:
-            content, func, args = cls._eval_glm_chat_arguments(c)
+            result = cls._eval_glm_chat_arguments(c)
         elif family in QWEN_TOOL_CALL_FAMILY:
-            content, func, args = cls._eval_qwen_chat_arguments(c)
+            result = cls._eval_qwen_chat_arguments(c)
         else:
             raise Exception(
                 f"Model {model_family.model_name} is not support tool calls."
             )
-        logger.debug("Tool call content: %s, func: %s, args: %s", content, func, args)
-        return content, func, args
-    @classmethod
-    def _tools_token_filter(cls, model_family):
-        """
-        Generates a filter function for Qwen series models to retain outputs after "\nFinal Answer:".
-        Returns:
-            A function that takes tokens (string output by the model so far) and delta (new tokens added) as input,
-            returns the part after "\nFinal Answer:" if found, else returns delta.
-        """
-        family = model_family.model_family or model_family.model_name
-        if family in QWEN_TOOL_CALL_FAMILY:
-            # Encapsulating function to reset 'found' after each call
-            found = False
-            def process_tokens(tokens: str, delta: str):
-                nonlocal found
-                # Once "Final Answer:" is found, future tokens are allowed.
-                if found:
-                    return delta
-                # Check if the token ends with "\nFinal Answer:" and update `found`.
-                final_answer_idx = tokens.lower().rfind("\nfinal answer:")
-                if final_answer_idx != -1:
-                    found = True
-                    return tokens[final_answer_idx + len("\nfinal answer:") :]
-                return ""
-            return process_tokens
-        else:
-            return lambda tokens, delta: delta
+        logger.debug(f"Tool call content: {result}")
+        return result
     @classmethod
     def _tool_calls_completion_chunk(cls, model_family, model_uid, c):
         _id = str(uuid.uuid4())
-        content, func, args = cls._eval_tool_arguments(model_family, c)
-        if func:
-            d = {
-                "role": "assistant",
-                "content": content,
-                "tool_calls": [
-                    {
-                        "id": f"call_{_id}",
-                        "type": "function",
-                        "function": {
-                            "name": func,
-                            "arguments": json.dumps(args, ensure_ascii=False),
-                        },
-                    }
-                ],
-            }
-            finish_reason = "tool_calls"
-        else:
-            d = {"role": "assistant", "content": content, "tool_calls": []}
-            finish_reason = "stop"
+        tool_result = cls._eval_tool_arguments(model_family, c)
+        tool_calls = []
+        failed_contents = []
+        for content, func, args in tool_result:
+            if func:
+                tool_calls.append(
+                    [
+                        {
+                            "id": f"call_{_id}",
+                            "type": "function",
+                            "function": {
+                                "name": func,
+                                "arguments": json.dumps(args, ensure_ascii=False),
+                            },
+                        }
+                    ]
+                )
+            else:
+                failed_contents.append(content)
+        finish_reason = "tool_calls" if tool_calls else "stop"
+        d = {
+            "role": "assistant",
+            "content": ". ".join(failed_contents) if failed_contents else None,
+            "tool_calls": tool_calls,
+        }
         try:
             usage = c.get("usage")
             assert "prompt_tokens" in usage
@@ -422,12 +412,13 @@ class ChatModelMixin:
     @classmethod
     def _tool_calls_completion(cls, model_family, model_uid, c):
         _id = str(uuid.uuid4())
-        content, func, args = cls._eval_tool_arguments(model_family, c)
-        if func:
-            m = {
-                "role": "assistant",
-                "content": content,
-                "tool_calls": [
+        tool_result = cls._eval_tool_arguments(model_family, c)
+        tool_calls = []
+        failed_contents = []
+        for content, func, args in tool_result:
+            if func:
+                tool_calls.append(
                     {
                         "id": f"call_{_id}",
                         "type": "function",
@@ -436,12 +427,15 @@ class ChatModelMixin:
                             "arguments": json.dumps(args, ensure_ascii=False),
                         },
                     }
-                ],
-            }
-            finish_reason = "tool_calls"
-        else:
-            m = {"role": "assistant", "content": content, "tool_calls": []}
-            finish_reason = "stop"
+                )
+            else:
+                failed_contents.append(content)
+        finish_reason = "tool_calls" if tool_calls else "stop"
+        m = {
+            "role": "assistant",
+            "content": ". ".join(failed_contents) if failed_contents else None,
+            "tool_calls": tool_calls,
+        }
         try:
             usage = c.get("usage")
             assert "prompt_tokens" in usage
@@ -555,6 +549,32 @@ def generate_completion_chunk(
     )
+def generate_completion(
+    model_uid: str,
+    response: str,
+    prompt_tokens=-1,
+    completion_tokens=-1,
+    total_tokens=-1,
+    finish_reason="stop",
+) -> Completion:
+    return Completion(
+        id=str(uuid.uuid1()),
+        object="text_completion",
+        created=int(time.time()),
+        model=model_uid,
+        choices=[
+            CompletionChoice(
+                text=response, index=0, logprobs=None, finish_reason=finish_reason
+            )
+        ],
+        usage=CompletionUsage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=total_tokens,
+        ),
+    )
 def generate_chat_completion(
     model_uid: str,
     response: str,

xinference/model/llm/vllm/core.py CHANGED Viewed

@@ -104,6 +104,7 @@ VLLM_SUPPORTED_MODELS = [
     "code-llama-python",
     "deepseek",
     "deepseek-coder",
+    "yi-coder",
 ]
 VLLM_SUPPORTED_CHAT_MODELS = [
     "llama-2-chat",
@@ -130,6 +131,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
     "codegeex4",
     "deepseek-chat",
     "deepseek-coder-instruct",
+    "yi-coder-chat",
 ]
 if VLLM_INSTALLED and vllm.__version__ >= "0.3.0":
     VLLM_SUPPORTED_CHAT_MODELS.append("qwen1.5-chat")
@@ -149,6 +151,12 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.4.0":
     VLLM_SUPPORTED_CHAT_MODELS.append("qwen2-moe-instruct")
     VLLM_SUPPORTED_CHAT_MODELS.append("c4ai-command-r-v01")
+if VLLM_INSTALLED and vllm.__version__ >= "0.5.1":
+    VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2-chat")
+    VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2-chat-0628")
+    VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2.5")
 if VLLM_INSTALLED and vllm.__version__ >= "0.5.3":
     VLLM_SUPPORTED_CHAT_MODELS.append("gemma-2-it")
     VLLM_SUPPORTED_CHAT_MODELS.append("mistral-nemo-instruct")

xinference/thirdparty/fish_speech/fish_speech/configs/firefly_gan_vq.yaml CHANGED Viewed

@@ -22,13 +22,12 @@ head:
   resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
   num_mels: 512
   upsample_initial_channel: 512
-  use_template: false
   pre_conv_kernel_size: 13
   post_conv_kernel_size: 13
 quantizer:
   _target_: fish_speech.models.vqgan.modules.fsq.DownsampleFiniteScalarQuantize
   input_dim: 512
-  n_groups: 4
+  n_groups: 8
   n_codebooks: 1
   levels: [8, 5, 5, 5]
-  downsample_factor: [2]
+  downsample_factor: [2, 2]

xinference/thirdparty/fish_speech/fish_speech/configs/text2semantic_finetune.yaml CHANGED Viewed

@@ -4,7 +4,7 @@ defaults:
 project: text2semantic_finetune_dual_ar
 max_length: 4096
-pretrained_ckpt_path: checkpoints/fish-speech-1.2-sft
+pretrained_ckpt_path: checkpoints/fish-speech-1.4
 # Lightning Trainer
 trainer:

xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json CHANGED Viewed

@@ -72,7 +72,7 @@
   "Put your text here.": "Put your text here.",
   "Reference Audio": "Reference Audio",
   "Reference Text": "Reference Text",
-  "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.",
+  "Related code and weights are released under CC BY-NC-SA 4.0 License.": "Related code and weights are released under CC BY-NC-SA 4.0 License.",
   "Remove Selected Data": "Remove Selected Data",
   "Removed path successfully!": "Removed path successfully!",
   "Repetition Penalty": "Repetition Penalty",

xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json CHANGED Viewed

@@ -72,7 +72,7 @@
   "Put your text here.": "Ponga su texto aquí.",
   "Reference Audio": "Audio de Referencia",
   "Reference Text": "Texto de Referencia",
-  "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "El código relacionado se publica bajo la Licencia BSD-3-Clause, y los pesos se publican bajo la Licencia CC BY-NC-SA 4.0.",
+  "Related code and weights are released under CC BY-NC-SA 4.0 License.": "El código relacionado y los pesos se publican bajo la Licencia CC BY-NC-SA 4.0.",
   "Remove Selected Data": "Eliminar Datos Seleccionados",
   "Removed path successfully!": "¡Ruta eliminada exitosamente!",
   "Repetition Penalty": "Penalización por Repetición",

xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json CHANGED Viewed

@@ -72,7 +72,7 @@
   "Put your text here.": "ここにテキストを入力してください。",
   "Reference Audio": "リファレンスオーディオ",
   "Reference Text": "リファレンステキスト",
-  "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "関連コードはBSD-3-Clauseライセンスの下でリリースされ、重みはCC BY-NC-SA 4.0ライセンスの下でリリースされます。",
+  "Related code and weights are released under CC BY-NC-SA 4.0 License.": "関連コードと重みはCC BY-NC-SA 4.0ライセンスの下でリリースされます。",
   "Remove Selected Data": "選択したデータを削除",
   "Removed path successfully!": "パスの削除に成功しました！",
   "Repetition Penalty": "反復ペナルティ",

xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json CHANGED Viewed

@@ -84,7 +84,7 @@
   "Reference Text": "Texto de Referência",
   "warning": "Aviso",
   "Pre-processing begins...": "O pré-processamento começou!",
-  "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "O código relacionado é licenciado sob a Licença BSD-3-Clause, e os pesos sob a Licença CC BY-NC-SA 4.0.",
+  "Related code and weights are released under CC BY-NC-SA 4.0 License.": "O código relacionado e os pesos são licenciados sob a Licença CC BY-NC-SA 4.0.",
   "Remove Selected Data": "Remover Dados Selecionados",
   "Removed path successfully!": "Caminho removido com sucesso!",
   "Repetition Penalty": "Penalidade de Repetição",

xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json CHANGED Viewed

@@ -72,7 +72,7 @@
   "Put your text here.": "在此处输入文本.",
   "Reference Audio": "参考音频",
   "Reference Text": "参考文本",
-  "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "相关代码使用 BSD-3-Clause 许可证发布，权重使用 CC BY-NC-SA 4.0 许可证发布.",
+  "Related code and weights are released under CC BY-NC-SA 4.0 License.": "相关代码和权重使用 CC BY-NC-SA 4.0 许可证发布.",
   "Remove Selected Data": "移除选中数据",
   "Removed path successfully!": "移除路径成功!",
   "Repetition Penalty": "重复惩罚",

xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py CHANGED Viewed

@@ -353,7 +353,7 @@ class BaseTransformer(nn.Module):
             if "int8" in str(Path(path)):
                 logger.info("Using int8 weight-only quantization!")
-                from ...tools.llama.quantize import WeightOnlyInt8QuantHandler
+                from tools.llama.quantize import WeightOnlyInt8QuantHandler
                 simple_quantizer = WeightOnlyInt8QuantHandler(model)
                 model = simple_quantizer.convert_for_runtime()
@@ -363,7 +363,7 @@ class BaseTransformer(nn.Module):
                 path_comps = path.name.split("-")
                 assert path_comps[-2].startswith("g")
                 groupsize = int(path_comps[-2][1:])
-                from ...tools.llama.quantize import WeightOnlyInt4QuantHandler
+                from tools.llama.quantize import WeightOnlyInt4QuantHandler
                 simple_quantizer = WeightOnlyInt4QuantHandler(model, groupsize)
                 model = simple_quantizer.convert_for_runtime()

xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py CHANGED Viewed

@@ -1,3 +0,0 @@
-from .lit_module import VQGAN
-__all__ = ["VQGAN"]

xinference 0.15.0__py3-none-any.whl → 0.15.1__py3-none-any.whl

Potentially problematic release.

xinference 0.15.0py3-none-any.whl → 0.15.1py3-none-any.whl