PyPI - xinference - Versions diffs - 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl - Mend

xinference 0.13.1py3-none-any.whl → 0.13.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of xinference might be problematic. Click here for more details.

Files changed (82) hide show

xinference/__init__.py +0 -1
xinference/_version.py +3 -3
xinference/api/restful_api.py +99 -5
xinference/client/restful/restful_client.py +98 -1
xinference/core/chat_interface.py +2 -2
xinference/core/model.py +85 -26
xinference/core/scheduler.py +4 -4
xinference/model/audio/chattts.py +40 -8
xinference/model/audio/core.py +5 -2
xinference/model/audio/cosyvoice.py +136 -0
xinference/model/audio/model_spec.json +24 -0
xinference/model/audio/model_spec_modelscope.json +27 -0
xinference/model/flexible/launchers/__init__.py +1 -0
xinference/model/flexible/launchers/image_process_launcher.py +70 -0
xinference/model/image/core.py +3 -0
xinference/model/image/model_spec.json +21 -0
xinference/model/image/stable_diffusion/core.py +49 -7
xinference/model/llm/llm_family.json +1065 -106
xinference/model/llm/llm_family.py +26 -6
xinference/model/llm/llm_family_csghub.json +39 -0
xinference/model/llm/llm_family_modelscope.json +460 -47
xinference/model/llm/pytorch/chatglm.py +243 -5
xinference/model/llm/pytorch/cogvlm2.py +1 -1
xinference/model/llm/sglang/core.py +7 -2
xinference/model/llm/utils.py +78 -1
xinference/model/llm/vllm/core.py +11 -0
xinference/thirdparty/cosyvoice/__init__.py +0 -0
xinference/thirdparty/cosyvoice/bin/__init__.py +0 -0
xinference/thirdparty/cosyvoice/bin/inference.py +114 -0
xinference/thirdparty/cosyvoice/bin/train.py +136 -0
xinference/thirdparty/cosyvoice/cli/__init__.py +0 -0
xinference/thirdparty/cosyvoice/cli/cosyvoice.py +83 -0
xinference/thirdparty/cosyvoice/cli/frontend.py +168 -0
xinference/thirdparty/cosyvoice/cli/model.py +60 -0
xinference/thirdparty/cosyvoice/dataset/__init__.py +0 -0
xinference/thirdparty/cosyvoice/dataset/dataset.py +160 -0
xinference/thirdparty/cosyvoice/dataset/processor.py +369 -0
xinference/thirdparty/cosyvoice/flow/__init__.py +0 -0
xinference/thirdparty/cosyvoice/flow/decoder.py +222 -0
xinference/thirdparty/cosyvoice/flow/flow.py +135 -0
xinference/thirdparty/cosyvoice/flow/flow_matching.py +138 -0
xinference/thirdparty/cosyvoice/flow/length_regulator.py +49 -0
xinference/thirdparty/cosyvoice/hifigan/__init__.py +0 -0
xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +55 -0
xinference/thirdparty/cosyvoice/hifigan/generator.py +391 -0
xinference/thirdparty/cosyvoice/llm/__init__.py +0 -0
xinference/thirdparty/cosyvoice/llm/llm.py +206 -0
xinference/thirdparty/cosyvoice/transformer/__init__.py +0 -0
xinference/thirdparty/cosyvoice/transformer/activation.py +84 -0
xinference/thirdparty/cosyvoice/transformer/attention.py +326 -0
xinference/thirdparty/cosyvoice/transformer/convolution.py +145 -0
xinference/thirdparty/cosyvoice/transformer/decoder.py +396 -0
xinference/thirdparty/cosyvoice/transformer/decoder_layer.py +132 -0
xinference/thirdparty/cosyvoice/transformer/embedding.py +293 -0
xinference/thirdparty/cosyvoice/transformer/encoder.py +472 -0
xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +236 -0
xinference/thirdparty/cosyvoice/transformer/label_smoothing_loss.py +96 -0
xinference/thirdparty/cosyvoice/transformer/positionwise_feed_forward.py +115 -0
xinference/thirdparty/cosyvoice/transformer/subsampling.py +383 -0
xinference/thirdparty/cosyvoice/utils/__init__.py +0 -0
xinference/thirdparty/cosyvoice/utils/class_utils.py +70 -0
xinference/thirdparty/cosyvoice/utils/common.py +103 -0
xinference/thirdparty/cosyvoice/utils/executor.py +110 -0
xinference/thirdparty/cosyvoice/utils/file_utils.py +41 -0
xinference/thirdparty/cosyvoice/utils/frontend_utils.py +125 -0
xinference/thirdparty/cosyvoice/utils/mask.py +227 -0
xinference/thirdparty/cosyvoice/utils/scheduler.py +739 -0
xinference/thirdparty/cosyvoice/utils/train_utils.py +289 -0
xinference/web/ui/build/asset-manifest.json +3 -3
xinference/web/ui/build/index.html +1 -1
xinference/web/ui/build/static/js/{main.95c1d652.js → main.2ef0cfaf.js} +3 -3
xinference/web/ui/build/static/js/main.2ef0cfaf.js.map +1 -0
xinference/web/ui/node_modules/.cache/babel-loader/b6807ecc0c231fea699533518a0eb2a2bf68a081ce00d452be40600dbffa17a7.json +1 -0
{xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/METADATA +18 -8
{xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/RECORD +80 -36
xinference/web/ui/build/static/js/main.95c1d652.js.map +0 -1
xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +0 -1
/xinference/web/ui/build/static/js/{main.95c1d652.js.LICENSE.txt → main.2ef0cfaf.js.LICENSE.txt} +0 -0
{xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/LICENSE +0 -0
{xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/WHEEL +0 -0
{xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/entry_points.txt +0 -0
{xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/top_level.txt +0 -0

xinference/model/llm/llm_family.json CHANGED Viewed

@@ -819,7 +819,7 @@
           "none"
         ],
         "model_id": "THUDM/glm-4-9b-chat",
-        "model_revision": "b84dc74294ccd507a3d78bde8aebf628221af9bd"
+        "model_revision": "76f3474a854145aa4a9ed2612fee9bc8d4a8966b"
       },
       {
         "model_format": "ggufv2",
@@ -983,6 +983,65 @@
       ]
     }
   },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "codegeex4",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "the open-source version of the latest CodeGeeX4 model series",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "THUDM/codegeex4-all-9b",
+        "model_revision": "8c4ec1d2f2888412640825a7aa23355939a8f4c6"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "IQ2_M",
+          "IQ3_M",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K_L",
+          "Q8_0"
+        ],
+        "model_file_name_template": "codegeex4-all-9b-{quantization}.gguf",
+        "model_id": "THUDM/codegeex4-all-9b-GGUF",
+        "model_revision": "6a04071c54c943949826d4815ee00717ed8cf153"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "CHATGLM3",
+      "system_prompt": "",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "stop_token_ids": [
+        151329,
+        151336,
+        151338
+      ],
+      "stop": [
+        "<|endoftext|>",
+        "<|user|>",
+        "<|observation|>"
+      ]
+    }
+  },
   {
     "version": 1,
     "context_length": 2048,
@@ -1593,6 +1652,329 @@
           "none"
         ],
         "model_id": "meta-llama/Meta-Llama-3-70B-Instruct"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/Meta-Llama-3-8B-Instruct-4bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/Meta-Llama-3-8B-Instruct-8bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "mlx-community/Meta-Llama-3-8B-Instruct"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-4bit-mlx"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-8bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-mlx-unquantized"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "TechxGenus/Meta-Llama-3-70B-Instruct-GPTQ"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "LLAMA3",
+      "system_prompt": "You are a helpful assistant.",
+      "roles": [
+        "user",
+        "assistant"
+      ],
+      "intra_message_sep": "\n\n",
+      "inter_message_sep": "<|eot_id|>",
+      "stop_token_ids": [
+        128001,
+        128009
+      ],
+      "stop": [
+        "<|end_of_text|>",
+        "<|eot_id|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "llama-3.1",
+    "model_lang": [
+      "en",
+      "de",
+      "fr",
+      "it",
+      "pt",
+      "hi",
+      "es",
+      "th"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3.1-8B"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "QuantFactory/Meta-Llama-3.1-8B-GGUF",
+        "model_file_name_template": "Meta-Llama-3.1-8B.{quantization}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3.1-70B"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 131072,
+    "model_name": "llama-3.1-instruct",
+    "model_lang": [
+      "en",
+      "de",
+      "fr",
+      "it",
+      "pt",
+      "hi",
+      "es",
+      "th"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "The Llama 3.1 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
+    "model_specs": [
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Q3_K_L",
+          "IQ4_XS",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",
+        "model_file_name_template": "Meta-Llama-3.1-8B-Instruct-{quantization}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "IQ2_M",
+          "IQ4_XS",
+          "Q2_K",
+          "Q3_K_S",
+          "Q4_K_M",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "quantization_parts": {
+          "Q5_K_M": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "Q6_K": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ],
+          "Q8_0": [
+            "00001-of-00002",
+            "00002-of-00002"
+          ]
+        },
+        "model_id": "lmstudio-community/Meta-Llama-3.1-70B-Instruct-GGUF",
+        "model_file_name_template": "Meta-Llama-3.1-70B-Instruct-{quantization}.gguf",
+        "model_file_name_split_template": "Meta-Llama-3.1-70B-Instruct-{quantization}-{part}.gguf"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "meta-llama/Meta-Llama-3.1-70B-Instruct"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "hugging-quants/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-8bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 70,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-bf16"
       }
     ],
     "prompt_style": {
@@ -3732,19 +4114,219 @@
           "Q6_K",
           "Q8_0"
         ],
-        "model_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
-        "model_file_name_template": "mistral-7b-instruct-v0.2.{quantization}.gguf"
+        "model_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
+        "model_file_name_template": "mistral-7b-instruct-v0.2.{quantization}.gguf"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "LLAMA2",
+      "system_prompt": "[INST] ",
+      "roles": [
+        "[INST]",
+        "[/INST]"
+      ],
+      "intra_message_sep": " ",
+      "inter_message_sep": "<s>",
+      "stop_token_ids": [
+        2
+      ],
+      "stop": [
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "mistral-instruct-v0.3",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
+        "model_revision": "83e9aa141f2e28c82232fea5325f54edf17c43de"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "neuralmagic/Mistral-7B-Instruct-v0.3-GPTQ-4bit"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "solidrust/Mistral-7B-Instruct-v0.3-AWQ"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_S",
+          "Q3_K_M",
+          "Q3_K_L",
+          "Q4_K_S",
+          "Q4_K_M",
+          "Q5_K_S",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "fp16"
+        ],
+        "model_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
+        "model_file_name_template": "Mistral-7B-Instruct-v0.3.{quantization}.gguf"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "LLAMA2",
+      "system_prompt": "[INST] ",
+      "roles": [
+        "[INST]",
+        "[/INST]"
+      ],
+      "intra_message_sep": " ",
+      "inter_message_sep": "<s>",
+      "stop_token_ids": [
+        2
+      ],
+      "stop": [
+        "</s>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 1024000,
+    "model_name": "mistral-nemo-instruct",
+    "model_lang": [
+      "en",
+      "fr",
+      "de",
+      "es",
+      "it",
+      "pt",
+      "zh",
+      "ru",
+      "ja"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "mistralai/Mistral-Nemo-Instruct-2407",
+        "model_revision": "05b1e4f3e189ec1b5189fb3c973d4cf3369c27af"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
+        "model_revision": "1d85adc9e0fff0b8e4479a037bd75fe1346333ca"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "afrizalha/Mistral-Nemo-Instruct-2407-bnb-8bit",
+        "model_revision": "1d2dacf18a486c745219317d1507441406bc7e25"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "ModelCloud/Mistral-Nemo-Instruct-2407-gptq-4bit"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "casperhansen/mistral-nemo-instruct-2407-awq"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K_S",
+          "Q3_K_M",
+          "Q3_K_L",
+          "Q4_K_S",
+          "Q4_K_M",
+          "Q5_K_S",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0",
+          "fp16"
+        ],
+        "model_id": "MaziyarPanahi/Mistral-Nemo-Instruct-2407-GGUF",
+        "model_file_name_template": "Mistral-Nemo-Instruct-2407.{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-bf16"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-4bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 12,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-8bit"
       }
     ],
     "prompt_style": {
-      "style_name": "LLAMA2",
-      "system_prompt": "[INST] ",
+      "style_name": "mistral-nemo",
+      "system_prompt": "",
       "roles": [
         "[INST]",
         "[/INST]"
       ],
-      "intra_message_sep": " ",
-      "inter_message_sep": "<s>",
+      "intra_message_sep": "",
+      "inter_message_sep": "</s>",
       "stop_token_ids": [
         2
       ],
@@ -3755,72 +4337,153 @@
   },
   {
     "version": 1,
-    "context_length": 32768,
-    "model_name": "mistral-instruct-v0.3",
+    "context_length": 131072,
+    "model_name": "mistral-large-instruct",
     "model_lang": [
-      "en"
+      "en",
+      "fr",
+      "de",
+      "es",
+      "it",
+      "pt",
+      "zh",
+      "ru",
+      "ja",
+      "ko"
     ],
     "model_ability": [
       "chat"
     ],
-    "model_description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.",
+    "model_description": "Mistral-Large-Instruct-2407 is an advanced dense Large Language Model (LLM) of 123B parameters with state-of-the-art reasoning, knowledge and coding capabilities.",
     "model_specs": [
       {
         "model_format": "pytorch",
-        "model_size_in_billions": 7,
+        "model_size_in_billions": 123,
         "quantizations": [
-          "4-bit",
-          "8-bit",
           "none"
         ],
-        "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
-        "model_revision": "83e9aa141f2e28c82232fea5325f54edf17c43de"
+        "model_id": "mistralai/Mistral-Large-Instruct-2407"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 123,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "unsloth/Mistral-Large-Instruct-2407-bnb-4bit"
       },
       {
         "model_format": "gptq",
-        "model_size_in_billions": 7,
+        "model_size_in_billions": 123,
         "quantizations": [
           "Int4"
         ],
-        "model_id": "neuralmagic/Mistral-7B-Instruct-v0.3-GPTQ-4bit"
+        "model_id": "ModelCloud/Mistral-Large-Instruct-2407-gptq-4bit"
       },
       {
         "model_format": "awq",
-        "model_size_in_billions": 7,
+        "model_size_in_billions": 123,
         "quantizations": [
           "Int4"
         ],
-        "model_id": "solidrust/Mistral-7B-Instruct-v0.3-AWQ"
+        "model_id": "TechxGenus/Mistral-Large-Instruct-2407-AWQ"
       },
       {
         "model_format": "ggufv2",
-        "model_size_in_billions": 7,
+        "model_size_in_billions": 123,
         "quantizations": [
           "Q2_K",
           "Q3_K_S",
           "Q3_K_M",
           "Q3_K_L",
           "Q4_K_S",
-          "Q4_K_M",
-          "Q5_K_S",
-          "Q5_K_M",
-          "Q6_K",
-          "Q8_0",
-          "fp16"
+          "Q4_K_M"
         ],
-        "model_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
-        "model_file_name_template": "Mistral-7B-Instruct-v0.3.{quantization}.gguf"
+        "model_id": "MaziyarPanahi/Mistral-Large-Instruct-2407-GGUF",
+        "model_file_name_template": "Mistral-Large-Instruct-2407.{quantization}.gguf",
+        "model_file_name_split_template": "Mixtral-8x22B-Instruct-v0.1.{quantization}-{part}.gguf",
+        "quantization_parts": {
+          "Q3_K_L": [
+            "00001-of-00007",
+            "00002-of-00007",
+            "00003-of-00007",
+            "00004-of-00007",
+            "00005-of-00007",
+            "00006-of-00007",
+            "00007-of-00007"
+          ],
+          "Q3_K_M": [
+            "00001-of-00007",
+            "00002-of-00007",
+            "00003-of-00007",
+            "00004-of-00007",
+            "00005-of-00007",
+            "00006-of-00007",
+            "00007-of-00007"
+          ],
+          "Q3_K_S": [
+            "00001-of-00007",
+            "00002-of-00007",
+            "00003-of-00007",
+            "00004-of-00007",
+            "00005-of-00007",
+            "00006-of-00007",
+            "00007-of-00007"
+          ],
+          "Q4_K_M": [
+            "00001-of-00007",
+            "00002-of-00007",
+            "00003-of-00007",
+            "00004-of-00007",
+            "00005-of-00007",
+            "00006-of-00007",
+            "00007-of-00007"
+          ],
+          "Q4_K_S": [
+            "00001-of-00007",
+            "00002-of-00007",
+            "00003-of-00007",
+            "00004-of-00007",
+            "00005-of-00007",
+            "00006-of-00007",
+            "00007-of-00007"
+          ]
+        }
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 123,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "mlx-community/Mistral-Large-Instruct-2407-bf16"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 123,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/Mistral-Large-Instruct-2407-4bit"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 123,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/Mistral-Large-Instruct-2407-8bit"
       }
     ],
     "prompt_style": {
-      "style_name": "LLAMA2",
-      "system_prompt": "[INST] ",
+      "style_name": "mistral-nemo",
+      "system_prompt": "",
       "roles": [
         "[INST]",
         "[/INST]"
       ],
-      "intra_message_sep": " ",
-      "inter_message_sep": "<s>",
+      "intra_message_sep": "",
+      "inter_message_sep": "</s>",
       "stop_token_ids": [
         2
       ],
@@ -3869,6 +4532,24 @@
         ],
         "model_id": "bartowski/Codestral-22B-v0.1-GGUF",
         "model_file_name_template": "Codestral-22B-v0.1-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 22,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/Codestral-22B-v0.1-4bit",
+        "model_revision": "544626b38eb1c9524f0fa570ec7b29550c26b78d"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 22,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/Codestral-22B-v0.1-8bit",
+        "model_revision": "0399a53970663950d57010e61a2796af524a1588"
       }
     ]
   },
@@ -4609,6 +5290,61 @@
         "model_id": "modelscope/Yi-1.5-34B-Chat-AWQ",
         "model_revision": "26234fea6ac49d456f32f8017289021fb1087a04"
       }
+      ,
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/Yi-1.5-6B-Chat-4bit",
+        "model_revision": "0177c9a12b869d6bc73f772b5a1981a7c966adb6"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 6,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/Yi-1.5-6B-Chat-8bit",
+        "model_revision": "7756e65d1bf1e2e6e97aef6bc9484307225f536b"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/Yi-1.5-9B-Chat-4bit",
+        "model_revision": "e15f886479c44e7d90f0ac13ace69b2319b71c2f"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/Yi-1.5-9B-Chat-8bit",
+        "model_revision": "c1f742fcf3683edbe2d2c2fd1ad7ac2bb6c5ca36"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/Yi-1.5-34B-Chat-4bit",
+        "model_revision": "945e3b306ef37c46ab444fdc857d1f3ea7247374"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 34,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/Yi-1.5-34B-Chat-8bit",
+        "model_revision": "3c12761a2c6663f216caab6dff84b0dd29b472ac"
+      }
     ],
     "prompt_style": {
       "style_name": "CHATML",
@@ -5766,33 +6502,168 @@
   },
   {
     "version": 1,
-    "context_length": 4096,
-    "model_name": "Skywork-Math",
+    "context_length": 4096,
+    "model_name": "Skywork-Math",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "generate"
+    ],
+    "model_description": "Skywork is a series of large models developed by the Kunlun Group · Skywork team.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 13,
+        "quantizations": [
+          "8-bit",
+          "none"
+        ],
+        "model_id": "skywork/Skywork-13B-Math",
+        "model_revision": "70d1740208c8ba39f9ba250b22117ec25311ab33"
+      }
+    ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "internlm2-chat",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "The second generation of the InternLM model, InternLM2.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "internlm/internlm2-chat-7b",
+        "model_revision": "2292b86b21cb856642782cebed0a453997453b1f"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 20,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "internlm/internlm2-chat-20b",
+        "model_revision": "b666125047cd98c5a7c85ca28720b44a06aed124"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "INTERNLM2",
+      "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
+      "roles": [
+        "<|im_start|>user",
+        "<|im_start|>assistant"
+      ],
+      "intra_message_sep": "<|im_end|>",
+      "stop_token_ids": [
+        2,
+        92542
+      ],
+      "stop": [
+        "</s>",
+        "<|im_end|>"
+      ]
+    }
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "internlm2.5-chat",
     "model_lang": [
       "en",
       "zh"
     ],
     "model_ability": [
-      "generate"
+      "chat"
     ],
-    "model_description": "Skywork is a series of large models developed by the Kunlun Group · Skywork team.",
+    "model_description": "InternLM2.5 series of the InternLM model.",
     "model_specs": [
       {
         "model_format": "pytorch",
-        "model_size_in_billions": 13,
+        "model_size_in_billions": 7,
         "quantizations": [
-          "8-bit",
           "none"
         ],
-        "model_id": "skywork/Skywork-13B-Math",
-        "model_revision": "70d1740208c8ba39f9ba250b22117ec25311ab33"
+        "model_id": "internlm/internlm2_5-7b-chat",
+        "model_revision": "9dc8536a922ab4954726aad1b37fa199004a291a"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "ModelCloud/internlm-2.5-7b-chat-gptq-4bit",
+        "model_revision": "2e2dda735c326544921a4035bbeb6c6e316a8254"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0",
+          "fp16"
+        ],
+        "model_id": "internlm/internlm2_5-7b-chat-gguf",
+        "model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "mlx-community/internlm2_5-7b-chat-4bit",
+        "model_revision": "d12097a867721978142a6048399f470a3d18beee"
+      },
+      {
+        "model_format": "mlx",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "8-bit"
+        ],
+        "model_id": "mlx-community/internlm2_5-7b-chat-8bit",
+        "model_revision": "0ec94d61d30ab161b49c69f9bf92ec2b9986d234"
       }
-    ]
+    ],
+    "prompt_style": {
+      "style_name": "INTERNLM2",
+      "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
+      "roles": [
+        "<|im_start|>user",
+        "<|im_start|>assistant"
+      ],
+      "intra_message_sep": "<|im_end|>",
+      "stop_token_ids": [
+        2,
+        92542
+      ],
+      "stop": [
+        "</s>",
+        "<|im_end|>"
+      ]
+    }
   },
   {
     "version": 1,
-    "context_length": 204800,
-    "model_name": "internlm2-chat",
+    "context_length": 262144,
+    "model_name": "internlm2.5-chat-1m",
     "model_lang": [
       "en",
       "zh"
@@ -5800,7 +6671,7 @@
     "model_ability": [
       "chat"
     ],
-    "model_description": "The second generation of the InternLM model, InternLM2.",
+    "model_description": "InternLM2.5 series of the InternLM model supports 1M long-context",
     "model_specs": [
       {
         "model_format": "pytorch",
@@ -5808,17 +6679,34 @@
         "quantizations": [
           "none"
         ],
-        "model_id": "internlm/internlm2-chat-7b",
-        "model_revision": "2292b86b21cb856642782cebed0a453997453b1f"
+        "model_id": "internlm/internlm2_5-7b-chat-1m",
+        "model_revision": "8d1a709a04d71440ef3df6ebbe204672f411c8b6"
       },
       {
-        "model_format": "pytorch",
-        "model_size_in_billions": 20,
+        "model_format": "gptq",
+        "model_size_in_billions": 7,
         "quantizations": [
-          "none"
+          "Int4"
         ],
-        "model_id": "internlm/internlm2-chat-20b",
-        "model_revision": "b666125047cd98c5a7c85ca28720b44a06aed124"
+        "model_id": "ModelCloud/internlm-2.5-7b-chat-1m-gptq-4bit",
+        "model_revision": "022e59cb30f03b271d56178478acb038b2b9b58c"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0",
+          "fp16"
+        ],
+        "model_id": "internlm/internlm2_5-7b-chat-1m-gguf",
+        "model_file_name_template": "internlm2_5-7b-chat-1m-{quantization}.gguf"
       }
     ],
     "prompt_style": {
@@ -6192,6 +7080,52 @@
         ],
         "model_id": "google/gemma-2-27b-it"
       },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 9,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "f32"
+        ],
+        "model_id": "bartowski/gemma-2-9b-it-GGUF",
+        "model_file_name_template": "gemma-2-9b-it-{quantization}.gguf"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 27,
+        "quantizations": [
+          "Q2_K",
+          "Q2_K_L",
+          "Q3_K_L",
+          "Q3_K_M",
+          "Q3_K_S",
+          "Q4_K_L",
+          "Q4_K_M",
+          "Q4_K_S",
+          "Q5_K_L",
+          "Q5_K_M",
+          "Q5_K_S",
+          "Q6_K",
+          "Q6_K_L",
+          "Q8_0",
+          "f32"
+        ],
+        "model_id": "bartowski/gemma-2-27b-it-GGUF",
+        "model_file_name_template": "gemma-2-27b-it-{quantization}.gguf"
+      },
       {
         "model_format": "mlx",
         "model_size_in_billions": 9,
@@ -6809,6 +7743,15 @@
         "model_id": "CohereForAI/c4ai-command-r-v01",
         "model_revision": "16881ccde1c68bbc7041280e6a66637bc46bfe88"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 35,
+        "quantizations": [
+          "4-bit"
+        ],
+        "model_id": "CohereForAI/c4ai-command-r-v01-4bit",
+        "model_revision": "f2e87936a146643c9dd143422dcafb9cb1552611"
+      },
       {
         "model_format": "ggufv2",
         "model_size_in_billions": 35,
@@ -6838,69 +7781,23 @@
         "model_id": "CohereForAI/c4ai-command-r-plus",
         "model_revision": "ba7f1d954c9d1609013677d87e4142ab95c34e62"
       },
-      {
-        "model_format": "gptq",
-        "model_size_in_billions": 104,
-        "quantizations": [
-          "Int4"
-        ],
-        "model_id": "alpindale/c4ai-command-r-plus-GPTQ",
-        "model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
-      }
-    ],
-    "prompt_style": {
-      "style_name": "c4ai-command-r",
-      "system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
-      "roles": [
-        "<|USER_TOKEN|>",
-        "<|CHATBOT_TOKEN|>"
-      ],
-      "intra_message_sep": "",
-      "inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
-      "stop_token_ids": [
-        6,
-        255001
-      ]
-    }
-  },
-  {
-    "version": 1,
-    "context_length": 131072,
-    "model_name": "c4ai-command-r-v01-4bit",
-    "model_lang": [
-      "en",
-      "fr",
-      "de",
-      "es",
-      "it",
-      "pt",
-      "ja",
-      "ko",
-      "zh",
-      "ar"
-    ],
-    "model_ability": [
-      "generate"
-    ],
-    "model_description": "This model is 4bit quantized version of C4AI Command-R using bitsandbytes.",
-    "model_specs": [
       {
         "model_format": "pytorch",
-        "model_size_in_billions": 35,
+        "model_size_in_billions": 104,
         "quantizations": [
-          "none"
+          "4-bit"
         ],
-        "model_id": "CohereForAI/c4ai-command-r-v01-4bit",
-        "model_revision": "f2e87936a146643c9dd143422dcafb9cb1552611"
+        "model_id": "CohereForAI/c4ai-command-r-plus-4bit",
+        "model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
       },
       {
-        "model_format": "pytorch",
+        "model_format": "gptq",
         "model_size_in_billions": 104,
         "quantizations": [
-          "none"
+          "Int4"
         ],
-        "model_id": "CohereForAI/c4ai-command-r-plus-4bit",
-        "model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
+        "model_id": "alpindale/c4ai-command-r-plus-GPTQ",
+        "model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
       }
     ],
     "prompt_style": {
@@ -7148,5 +8045,67 @@
         160132
       ]
     }
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "csg-wukong-chat-v0.1",
+    "model_lang": [
+      "en"
+    ],
+    "model_ability": [
+      "chat"
+    ],
+    "model_description": "csg-wukong-1B is a 1 billion-parameter small language model(SLM) pretrained on 1T tokens.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 1,
+        "quantizations": [
+          "none"
+        ],
+        "model_id": "opencsg/csg-wukong-1B-chat-v0.1",
+        "model_revision": "2443c903d46074af0856e2ba11398dcd01d35536"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 1,
+        "quantizations": [
+          "Q2_K",
+          "Q3_K",
+          "Q3_K_S",
+          "Q3_K_M",
+          "Q3_K_L",
+          "Q4_0",
+          "Q4_1",
+          "Q4_K_S",
+          "Q4_K_M",
+          "Q5_0",
+          "Q5_1",
+          "Q5_K_S",
+          "Q5_K_M",
+          "Q6_K",
+          "Q8_0"
+        ],
+        "model_id": "RichardErkhov/opencsg_-_csg-wukong-1B-chat-v0.1-gguf",
+        "model_file_name_template": "csg-wukong-1B-chat-v0.1.{quantization}.gguf"
+      }
+    ],
+    "prompt_style": {
+      "style_name": "NO_COLON_TWO",
+      "system_prompt": "<|system|>\nYou are a creative super artificial intelligence assistant, possessing all the knowledge of humankind. Your name is csg-wukong, developed by OpenCSG. You need to understand and infer the true intentions of users based on the topics discussed in the chat history, and respond to user questions correctly as required. You enjoy responding to users with accurate and insightful answers. Please pay attention to the appropriate style and format when replying, try to avoid repetitive words and sentences, and keep your responses as concise and profound as possible. You carefully consider the context of the discussion when replying to users. When the user says \"continue,\" please proceed with the continuation of the previous assistant's response.</s>\n",
+      "roles": [
+        "<|user|>\n",
+        "<|assistant|>\n"
+      ],
+      "intra_message_sep": "</s>\n",
+      "inter_message_sep": "</s>\n",
+      "stop_token_ids": [
+        2
+      ],
+      "stop": [
+        "</s>"
+      ]
+    }
   }
 ]

xinference 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl

Potentially problematic release.

xinference 0.13.1py3-none-any.whl → 0.13.3py3-none-any.whl