PyPI - xinference - Versions diffs - 0.16.2__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

xinference 0.16.2py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (60) hide show

xinference/deploy/cmdline.py CHANGED Viewed

@@ -43,6 +43,7 @@ from .utils import (
     get_log_file,
     get_timestamp_ms,
     handle_click_args_type,
+    set_envs,
 )
 try:
@@ -106,6 +107,8 @@ def start_local_cluster(
         XINFERENCE_LOG_MAX_BYTES,
     )
     logging.config.dictConfig(dict_config)  # type: ignore
+    # refer to https://huggingface.co/docs/transformers/main_classes/logging
+    set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
     main(
         host=host,
@@ -280,6 +283,7 @@ def supervisor(
         XINFERENCE_LOG_MAX_BYTES,
     )
     logging.config.dictConfig(dict_config)  # type: ignore
+    set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
     main(
         host=host,
@@ -342,6 +346,7 @@ def worker(
         XINFERENCE_LOG_MAX_BYTES,
     )
     logging.config.dictConfig(dict_config)  # type: ignore
+    set_envs("TRANSFORMERS_VERBOSITY", log_level.lower())
     endpoint = get_endpoint(endpoint)

xinference/deploy/utils.py CHANGED Viewed

@@ -134,10 +134,6 @@ def get_config_dict(
                 "propagate": False,
             },
         },
-        "root": {
-            "level": "WARN",
-            "handlers": ["stream_handler", "file_handler"],
-        },
     }
     return config_dict
@@ -220,3 +216,10 @@ def handle_click_args_type(arg: str) -> Any:
         pass
     return arg
+def set_envs(key: str, value: str):
+    """
+    Environment variables are set by the parent process and inherited by child processes
+    """
+    os.environ[key] = value

xinference/model/audio/model_spec.json CHANGED Viewed

@@ -127,7 +127,7 @@
     "model_name": "ChatTTS",
     "model_family": "ChatTTS",
     "model_id": "2Noise/ChatTTS",
-    "model_revision": "3b34118f6d25850440b8901cef3e71c6ef8619c8",
+    "model_revision": "1a3c04a8b0651689bd9242fbb55b1f4b5a9aef84",
     "model_ability": "text-to-audio",
     "multilingual": true
   },
@@ -159,7 +159,7 @@
     "model_name": "FishSpeech-1.4",
     "model_family": "FishAudio",
     "model_id": "fishaudio/fish-speech-1.4",
-    "model_revision": "3c49651b8e583b6b13f55e375432e0d57e1aa84d",
+    "model_revision": "069c573759936b35191d3380deb89183c0656f59",
     "model_ability": "text-to-audio",
     "multilingual": true
   }

xinference/model/image/stable_diffusion/core.py CHANGED Viewed

@@ -17,9 +17,11 @@ import gc
 import inspect
 import itertools
 import logging
+import os
 import re
 import sys
 import warnings
+from glob import glob
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 import PIL.Image
@@ -194,8 +196,9 @@ class DiffusionModel(SDAPIDiffusionModelMixin):
         if sys.platform != "darwin" and torch_dtype is None:
             # The following params crashes on Mac M2
             self._torch_dtype = self._kwargs["torch_dtype"] = torch.float16
-            self._kwargs["variant"] = "fp16"
-            self._kwargs["use_safetensors"] = True
+            self._kwargs["use_safetensors"] = any(
+                glob(os.path.join(self._model_path, "*/*.safetensors"))
+            )
         if isinstance(torch_dtype, str):
             self._kwargs["torch_dtype"] = getattr(torch, torch_dtype)

xinference/model/llm/core.py CHANGED Viewed

@@ -52,9 +52,7 @@ class LLM(abc.ABC):
         *args,
         **kwargs,
     ):
-        self.model_uid, self.replica, self.rep_id = parse_replica_model_uid(
-            replica_model_uid
-        )
+        self.model_uid, self.rep_id = parse_replica_model_uid(replica_model_uid)
         self.model_family = model_family
         self.model_spec = model_spec
         self.quantization = quantization

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -1312,6 +1312,93 @@
       "<|eom_id|>"
     ]
   },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "llama-3.2-vision-instruct",
+    "model_lang": [
+      "en",
+      "de",
+      "fr",
+      "it",
+      "pt",
+      "hi",
+      "es",
+      "th"
+    ],
+    "model_ability": [
+	"chat",
+	"vision"
+    ],
+    "model_description": "Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 11,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "meta-llama/Llama-3.2-11B-Vision-Instruct"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 90,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "meta-llama/Llama-3.2-90B-Vision-Instruct"
+      }
+    ],
+    "chat_template": "{% for message in messages %}{% if loop.index0 == 0 %}{{ bos_token }}{% endif %}{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<|image|>' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{ '<|eot_id|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+    "stop_token_ids": [
+	128001,
+	128008,
+	128009
+    ],
+    "stop": [
+      "<|end_of_text|>",
+	"<|eot_id|>",
+	"<|eom_id|>"
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "llama-3.2-vision",
+    "model_lang": [
+      "en",
+      "de",
+      "fr",
+      "it",
+      "pt",
+      "hi",
+      "es",
+      "th"
+    ],
+    "model_ability": [
+	"generate",
+	"vision"
+    ],
+    "model_description": "The Llama 3.2-Vision instruction-tuned models are optimized for visual recognition, image reasoning, captioning, and answering general questions about an image...",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 11,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3.2-11B-Vision"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 90,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3.2-90B-Vision"
+      }
+    ]
+  },
   {
     "version": 1,
     "context_length": 2048,
@@ -8118,6 +8205,16 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -8126,8 +8223,17 @@
           "8-bit",
           "none"
         ],
-        "model_id": "Qwen/Qwen2.5-Coder-1.5B",
-        "model_revision": "d3586cfe793730945f8e4d7ef31032a3ee50247d"
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B"
       },
       {
         "model_format": "pytorch",
@@ -8137,8 +8243,27 @@
           "8-bit",
           "none"
         ],
-        "model_id": "Qwen/Qwen2.5-Coder-7B",
-        "model_revision": "30b6a7e874a78d46b80fa1db3194ea427dd41b08"
+        "model_id": "Qwen/Qwen2.5-Coder-7B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B"
       }
     ]
   },
@@ -8156,6 +8281,16 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -8166,6 +8301,16 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 7,
@@ -8176,6 +8321,53 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "3",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}"
+      },
       {
         "model_format": "gptq",
         "model_size_in_billions": "7",
@@ -8185,6 +8377,73 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "14",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "32",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "3",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "7",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "14",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "32",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
+      },
       {
         "model_format": "ggufv2",
         "model_size_in_billions": "1_5",

xinference 0.16.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

xinference 0.16.2py3-none-any.whl → 1.0.0py3-none-any.whl