xinference 0.13.2__py3-none-any.whl → 0.13.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (103) hide show
  1. xinference/__init__.py +0 -1
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +30 -5
  4. xinference/client/restful/restful_client.py +18 -3
  5. xinference/constants.py +0 -4
  6. xinference/core/chat_interface.py +2 -2
  7. xinference/core/image_interface.py +6 -3
  8. xinference/core/model.py +9 -4
  9. xinference/core/scheduler.py +4 -4
  10. xinference/core/supervisor.py +2 -0
  11. xinference/core/worker.py +7 -0
  12. xinference/deploy/utils.py +6 -0
  13. xinference/model/audio/core.py +9 -4
  14. xinference/model/audio/cosyvoice.py +136 -0
  15. xinference/model/audio/model_spec.json +24 -0
  16. xinference/model/audio/model_spec_modelscope.json +27 -0
  17. xinference/model/core.py +25 -4
  18. xinference/model/embedding/core.py +88 -13
  19. xinference/model/embedding/model_spec.json +8 -0
  20. xinference/model/embedding/model_spec_modelscope.json +8 -0
  21. xinference/model/flexible/core.py +8 -2
  22. xinference/model/flexible/launchers/__init__.py +1 -0
  23. xinference/model/flexible/launchers/image_process_launcher.py +70 -0
  24. xinference/model/image/core.py +8 -5
  25. xinference/model/image/model_spec.json +36 -5
  26. xinference/model/image/model_spec_modelscope.json +21 -3
  27. xinference/model/image/stable_diffusion/core.py +36 -28
  28. xinference/model/llm/core.py +6 -4
  29. xinference/model/llm/ggml/llamacpp.py +7 -5
  30. xinference/model/llm/llm_family.json +802 -82
  31. xinference/model/llm/llm_family.py +6 -6
  32. xinference/model/llm/llm_family_csghub.json +39 -0
  33. xinference/model/llm/llm_family_modelscope.json +295 -47
  34. xinference/model/llm/mlx/core.py +7 -0
  35. xinference/model/llm/pytorch/chatglm.py +246 -5
  36. xinference/model/llm/pytorch/cogvlm2.py +1 -1
  37. xinference/model/llm/pytorch/deepseek_vl.py +2 -1
  38. xinference/model/llm/pytorch/falcon.py +2 -1
  39. xinference/model/llm/pytorch/llama_2.py +4 -2
  40. xinference/model/llm/pytorch/omnilmm.py +2 -1
  41. xinference/model/llm/pytorch/qwen_vl.py +2 -1
  42. xinference/model/llm/pytorch/vicuna.py +2 -1
  43. xinference/model/llm/pytorch/yi_vl.py +2 -1
  44. xinference/model/llm/sglang/core.py +12 -6
  45. xinference/model/llm/utils.py +78 -1
  46. xinference/model/llm/vllm/core.py +9 -5
  47. xinference/model/rerank/core.py +4 -3
  48. xinference/thirdparty/cosyvoice/__init__.py +0 -0
  49. xinference/thirdparty/cosyvoice/bin/__init__.py +0 -0
  50. xinference/thirdparty/cosyvoice/bin/inference.py +114 -0
  51. xinference/thirdparty/cosyvoice/bin/train.py +136 -0
  52. xinference/thirdparty/cosyvoice/cli/__init__.py +0 -0
  53. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +83 -0
  54. xinference/thirdparty/cosyvoice/cli/frontend.py +168 -0
  55. xinference/thirdparty/cosyvoice/cli/model.py +60 -0
  56. xinference/thirdparty/cosyvoice/dataset/__init__.py +0 -0
  57. xinference/thirdparty/cosyvoice/dataset/dataset.py +160 -0
  58. xinference/thirdparty/cosyvoice/dataset/processor.py +369 -0
  59. xinference/thirdparty/cosyvoice/flow/__init__.py +0 -0
  60. xinference/thirdparty/cosyvoice/flow/decoder.py +222 -0
  61. xinference/thirdparty/cosyvoice/flow/flow.py +135 -0
  62. xinference/thirdparty/cosyvoice/flow/flow_matching.py +138 -0
  63. xinference/thirdparty/cosyvoice/flow/length_regulator.py +49 -0
  64. xinference/thirdparty/cosyvoice/hifigan/__init__.py +0 -0
  65. xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +55 -0
  66. xinference/thirdparty/cosyvoice/hifigan/generator.py +391 -0
  67. xinference/thirdparty/cosyvoice/llm/__init__.py +0 -0
  68. xinference/thirdparty/cosyvoice/llm/llm.py +206 -0
  69. xinference/thirdparty/cosyvoice/transformer/__init__.py +0 -0
  70. xinference/thirdparty/cosyvoice/transformer/activation.py +84 -0
  71. xinference/thirdparty/cosyvoice/transformer/attention.py +326 -0
  72. xinference/thirdparty/cosyvoice/transformer/convolution.py +145 -0
  73. xinference/thirdparty/cosyvoice/transformer/decoder.py +396 -0
  74. xinference/thirdparty/cosyvoice/transformer/decoder_layer.py +132 -0
  75. xinference/thirdparty/cosyvoice/transformer/embedding.py +293 -0
  76. xinference/thirdparty/cosyvoice/transformer/encoder.py +472 -0
  77. xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +236 -0
  78. xinference/thirdparty/cosyvoice/transformer/label_smoothing_loss.py +96 -0
  79. xinference/thirdparty/cosyvoice/transformer/positionwise_feed_forward.py +115 -0
  80. xinference/thirdparty/cosyvoice/transformer/subsampling.py +383 -0
  81. xinference/thirdparty/cosyvoice/utils/__init__.py +0 -0
  82. xinference/thirdparty/cosyvoice/utils/class_utils.py +70 -0
  83. xinference/thirdparty/cosyvoice/utils/common.py +103 -0
  84. xinference/thirdparty/cosyvoice/utils/executor.py +110 -0
  85. xinference/thirdparty/cosyvoice/utils/file_utils.py +41 -0
  86. xinference/thirdparty/cosyvoice/utils/frontend_utils.py +125 -0
  87. xinference/thirdparty/cosyvoice/utils/mask.py +227 -0
  88. xinference/thirdparty/cosyvoice/utils/scheduler.py +739 -0
  89. xinference/thirdparty/cosyvoice/utils/train_utils.py +289 -0
  90. xinference/web/ui/build/asset-manifest.json +3 -3
  91. xinference/web/ui/build/index.html +1 -1
  92. xinference/web/ui/build/static/js/{main.95c1d652.js → main.af906659.js} +3 -3
  93. xinference/web/ui/build/static/js/main.af906659.js.map +1 -0
  94. xinference/web/ui/node_modules/.cache/babel-loader/2cd5e4279ad7e13a1f41d486e9fca7756295bfad5bd77d90992f4ac3e10b496d.json +1 -0
  95. {xinference-0.13.2.dist-info → xinference-0.13.4.dist-info}/METADATA +39 -11
  96. {xinference-0.13.2.dist-info → xinference-0.13.4.dist-info}/RECORD +101 -57
  97. xinference/web/ui/build/static/js/main.95c1d652.js.map +0 -1
  98. xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +0 -1
  99. /xinference/web/ui/build/static/js/{main.95c1d652.js.LICENSE.txt → main.af906659.js.LICENSE.txt} +0 -0
  100. {xinference-0.13.2.dist-info → xinference-0.13.4.dist-info}/LICENSE +0 -0
  101. {xinference-0.13.2.dist-info → xinference-0.13.4.dist-info}/WHEEL +0 -0
  102. {xinference-0.13.2.dist-info → xinference-0.13.4.dist-info}/entry_points.txt +0 -0
  103. {xinference-0.13.2.dist-info → xinference-0.13.4.dist-info}/top_level.txt +0 -0
@@ -819,7 +819,7 @@
819
819
  "none"
820
820
  ],
821
821
  "model_id": "THUDM/glm-4-9b-chat",
822
- "model_revision": "b84dc74294ccd507a3d78bde8aebf628221af9bd"
822
+ "model_revision": "76f3474a854145aa4a9ed2612fee9bc8d4a8966b"
823
823
  },
824
824
  {
825
825
  "model_format": "ggufv2",
@@ -1652,6 +1652,329 @@
1652
1652
  "none"
1653
1653
  ],
1654
1654
  "model_id": "meta-llama/Meta-Llama-3-70B-Instruct"
1655
+ },
1656
+ {
1657
+ "model_format": "mlx",
1658
+ "model_size_in_billions": 8,
1659
+ "quantizations": [
1660
+ "4-bit"
1661
+ ],
1662
+ "model_id": "mlx-community/Meta-Llama-3-8B-Instruct-4bit"
1663
+ },
1664
+ {
1665
+ "model_format": "mlx",
1666
+ "model_size_in_billions": 8,
1667
+ "quantizations": [
1668
+ "8-bit"
1669
+ ],
1670
+ "model_id": "mlx-community/Meta-Llama-3-8B-Instruct-8bit"
1671
+ },
1672
+ {
1673
+ "model_format": "mlx",
1674
+ "model_size_in_billions": 8,
1675
+ "quantizations": [
1676
+ "none"
1677
+ ],
1678
+ "model_id": "mlx-community/Meta-Llama-3-8B-Instruct"
1679
+ },
1680
+ {
1681
+ "model_format": "mlx",
1682
+ "model_size_in_billions": 70,
1683
+ "quantizations": [
1684
+ "4-bit"
1685
+ ],
1686
+ "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-4bit-mlx"
1687
+ },
1688
+ {
1689
+ "model_format": "mlx",
1690
+ "model_size_in_billions": 70,
1691
+ "quantizations": [
1692
+ "8-bit"
1693
+ ],
1694
+ "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-8bit"
1695
+ },
1696
+ {
1697
+ "model_format": "mlx",
1698
+ "model_size_in_billions": 70,
1699
+ "quantizations": [
1700
+ "none"
1701
+ ],
1702
+ "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-mlx-unquantized"
1703
+ },
1704
+ {
1705
+ "model_format": "gptq",
1706
+ "model_size_in_billions": 8,
1707
+ "quantizations": [
1708
+ "Int4"
1709
+ ],
1710
+ "model_id": "TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ"
1711
+ },
1712
+ {
1713
+ "model_format": "gptq",
1714
+ "model_size_in_billions": 70,
1715
+ "quantizations": [
1716
+ "Int4"
1717
+ ],
1718
+ "model_id": "TechxGenus/Meta-Llama-3-70B-Instruct-GPTQ"
1719
+ }
1720
+ ],
1721
+ "prompt_style": {
1722
+ "style_name": "LLAMA3",
1723
+ "system_prompt": "You are a helpful assistant.",
1724
+ "roles": [
1725
+ "user",
1726
+ "assistant"
1727
+ ],
1728
+ "intra_message_sep": "\n\n",
1729
+ "inter_message_sep": "<|eot_id|>",
1730
+ "stop_token_ids": [
1731
+ 128001,
1732
+ 128009
1733
+ ],
1734
+ "stop": [
1735
+ "<|end_of_text|>",
1736
+ "<|eot_id|>"
1737
+ ]
1738
+ }
1739
+ },
1740
+ {
1741
+ "version": 1,
1742
+ "context_length": 131072,
1743
+ "model_name": "llama-3.1",
1744
+ "model_lang": [
1745
+ "en",
1746
+ "de",
1747
+ "fr",
1748
+ "it",
1749
+ "pt",
1750
+ "hi",
1751
+ "es",
1752
+ "th"
1753
+ ],
1754
+ "model_ability": [
1755
+ "generate"
1756
+ ],
1757
+ "model_description": "Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture",
1758
+ "model_specs": [
1759
+ {
1760
+ "model_format": "pytorch",
1761
+ "model_size_in_billions": 8,
1762
+ "quantizations": [
1763
+ "4-bit",
1764
+ "8-bit",
1765
+ "none"
1766
+ ],
1767
+ "model_id": "meta-llama/Meta-Llama-3.1-8B"
1768
+ },
1769
+ {
1770
+ "model_format": "ggufv2",
1771
+ "model_size_in_billions": 8,
1772
+ "quantizations": [
1773
+ "Q2_K",
1774
+ "Q3_K_L",
1775
+ "Q3_K_M",
1776
+ "Q3_K_S",
1777
+ "Q4_0",
1778
+ "Q4_1",
1779
+ "Q4_K_M",
1780
+ "Q4_K_S",
1781
+ "Q5_0",
1782
+ "Q5_1",
1783
+ "Q5_K_M",
1784
+ "Q5_K_S",
1785
+ "Q6_K",
1786
+ "Q8_0"
1787
+ ],
1788
+ "model_id": "QuantFactory/Meta-Llama-3.1-8B-GGUF",
1789
+ "model_file_name_template": "Meta-Llama-3.1-8B.{quantization}.gguf"
1790
+ },
1791
+ {
1792
+ "model_format": "pytorch",
1793
+ "model_size_in_billions": 70,
1794
+ "quantizations": [
1795
+ "4-bit",
1796
+ "8-bit",
1797
+ "none"
1798
+ ],
1799
+ "model_id": "meta-llama/Meta-Llama-3.1-70B"
1800
+ }
1801
+ ]
1802
+ },
1803
+ {
1804
+ "version": 1,
1805
+ "context_length": 131072,
1806
+ "model_name": "llama-3.1-instruct",
1807
+ "model_lang": [
1808
+ "en",
1809
+ "de",
1810
+ "fr",
1811
+ "it",
1812
+ "pt",
1813
+ "hi",
1814
+ "es",
1815
+ "th"
1816
+ ],
1817
+ "model_ability": [
1818
+ "chat"
1819
+ ],
1820
+ "model_description": "The Llama 3.1 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
1821
+ "model_specs": [
1822
+ {
1823
+ "model_format": "ggufv2",
1824
+ "model_size_in_billions": 8,
1825
+ "quantizations": [
1826
+ "Q3_K_L",
1827
+ "IQ4_XS",
1828
+ "Q4_K_M",
1829
+ "Q5_K_M",
1830
+ "Q6_K",
1831
+ "Q8_0"
1832
+ ],
1833
+ "model_id": "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",
1834
+ "model_file_name_template": "Meta-Llama-3.1-8B-Instruct-{quantization}.gguf"
1835
+ },
1836
+ {
1837
+ "model_format": "pytorch",
1838
+ "model_size_in_billions": 8,
1839
+ "quantizations": [
1840
+ "none"
1841
+ ],
1842
+ "model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct"
1843
+ },
1844
+ {
1845
+ "model_format": "pytorch",
1846
+ "model_size_in_billions": 8,
1847
+ "quantizations": [
1848
+ "4-bit"
1849
+ ],
1850
+ "model_id": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
1851
+ },
1852
+ {
1853
+ "model_format": "gptq",
1854
+ "model_size_in_billions": 8,
1855
+ "quantizations": [
1856
+ "Int4"
1857
+ ],
1858
+ "model_id": "hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
1859
+ },
1860
+ {
1861
+ "model_format": "awq",
1862
+ "model_size_in_billions": 8,
1863
+ "quantizations": [
1864
+ "Int4"
1865
+ ],
1866
+ "model_id": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
1867
+ },
1868
+ {
1869
+ "model_format": "ggufv2",
1870
+ "model_size_in_billions": 70,
1871
+ "quantizations": [
1872
+ "IQ2_M",
1873
+ "IQ4_XS",
1874
+ "Q2_K",
1875
+ "Q3_K_S",
1876
+ "Q4_K_M",
1877
+ "Q5_K_M",
1878
+ "Q6_K",
1879
+ "Q8_0"
1880
+ ],
1881
+ "quantization_parts": {
1882
+ "Q5_K_M": [
1883
+ "00001-of-00002",
1884
+ "00002-of-00002"
1885
+ ],
1886
+ "Q6_K": [
1887
+ "00001-of-00002",
1888
+ "00002-of-00002"
1889
+ ],
1890
+ "Q8_0": [
1891
+ "00001-of-00002",
1892
+ "00002-of-00002"
1893
+ ]
1894
+ },
1895
+ "model_id": "lmstudio-community/Meta-Llama-3.1-70B-Instruct-GGUF",
1896
+ "model_file_name_template": "Meta-Llama-3.1-70B-Instruct-{quantization}.gguf",
1897
+ "model_file_name_split_template": "Meta-Llama-3.1-70B-Instruct-{quantization}-{part}.gguf"
1898
+ },
1899
+ {
1900
+ "model_format": "pytorch",
1901
+ "model_size_in_billions": 70,
1902
+ "quantizations": [
1903
+ "none"
1904
+ ],
1905
+ "model_id": "meta-llama/Meta-Llama-3.1-70B-Instruct"
1906
+ },
1907
+ {
1908
+ "model_format": "pytorch",
1909
+ "model_size_in_billions": 70,
1910
+ "quantizations": [
1911
+ "4-bit"
1912
+ ],
1913
+ "model_id": "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit"
1914
+ },
1915
+ {
1916
+ "model_format": "gptq",
1917
+ "model_size_in_billions": 70,
1918
+ "quantizations": [
1919
+ "Int4"
1920
+ ],
1921
+ "model_id": "hugging-quants/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4"
1922
+ },
1923
+ {
1924
+ "model_format": "awq",
1925
+ "model_size_in_billions": 70,
1926
+ "quantizations": [
1927
+ "Int4"
1928
+ ],
1929
+ "model_id": "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4"
1930
+ },
1931
+ {
1932
+ "model_format": "mlx",
1933
+ "model_size_in_billions": 8,
1934
+ "quantizations": [
1935
+ "4-bit"
1936
+ ],
1937
+ "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
1938
+ },
1939
+ {
1940
+ "model_format": "mlx",
1941
+ "model_size_in_billions": 8,
1942
+ "quantizations": [
1943
+ "8-bit"
1944
+ ],
1945
+ "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
1946
+ },
1947
+ {
1948
+ "model_format": "mlx",
1949
+ "model_size_in_billions": 8,
1950
+ "quantizations": [
1951
+ "none"
1952
+ ],
1953
+ "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct"
1954
+ },
1955
+ {
1956
+ "model_format": "mlx",
1957
+ "model_size_in_billions": 70,
1958
+ "quantizations": [
1959
+ "4-bit"
1960
+ ],
1961
+ "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
1962
+ },
1963
+ {
1964
+ "model_format": "mlx",
1965
+ "model_size_in_billions": 70,
1966
+ "quantizations": [
1967
+ "8-bit"
1968
+ ],
1969
+ "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-8bit"
1970
+ },
1971
+ {
1972
+ "model_format": "mlx",
1973
+ "model_size_in_billions": 70,
1974
+ "quantizations": [
1975
+ "none"
1976
+ ],
1977
+ "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-bf16"
1655
1978
  }
1656
1979
  ],
1657
1980
  "prompt_style": {
@@ -3836,50 +4159,331 @@
3836
4159
  "model_revision": "83e9aa141f2e28c82232fea5325f54edf17c43de"
3837
4160
  },
3838
4161
  {
3839
- "model_format": "gptq",
3840
- "model_size_in_billions": 7,
4162
+ "model_format": "gptq",
4163
+ "model_size_in_billions": 7,
4164
+ "quantizations": [
4165
+ "Int4"
4166
+ ],
4167
+ "model_id": "neuralmagic/Mistral-7B-Instruct-v0.3-GPTQ-4bit"
4168
+ },
4169
+ {
4170
+ "model_format": "awq",
4171
+ "model_size_in_billions": 7,
4172
+ "quantizations": [
4173
+ "Int4"
4174
+ ],
4175
+ "model_id": "solidrust/Mistral-7B-Instruct-v0.3-AWQ"
4176
+ },
4177
+ {
4178
+ "model_format": "ggufv2",
4179
+ "model_size_in_billions": 7,
4180
+ "quantizations": [
4181
+ "Q2_K",
4182
+ "Q3_K_S",
4183
+ "Q3_K_M",
4184
+ "Q3_K_L",
4185
+ "Q4_K_S",
4186
+ "Q4_K_M",
4187
+ "Q5_K_S",
4188
+ "Q5_K_M",
4189
+ "Q6_K",
4190
+ "Q8_0",
4191
+ "fp16"
4192
+ ],
4193
+ "model_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
4194
+ "model_file_name_template": "Mistral-7B-Instruct-v0.3.{quantization}.gguf"
4195
+ }
4196
+ ],
4197
+ "prompt_style": {
4198
+ "style_name": "LLAMA2",
4199
+ "system_prompt": "[INST] ",
4200
+ "roles": [
4201
+ "[INST]",
4202
+ "[/INST]"
4203
+ ],
4204
+ "intra_message_sep": " ",
4205
+ "inter_message_sep": "<s>",
4206
+ "stop_token_ids": [
4207
+ 2
4208
+ ],
4209
+ "stop": [
4210
+ "</s>"
4211
+ ]
4212
+ }
4213
+ },
4214
+ {
4215
+ "version": 1,
4216
+ "context_length": 1024000,
4217
+ "model_name": "mistral-nemo-instruct",
4218
+ "model_lang": [
4219
+ "en",
4220
+ "fr",
4221
+ "de",
4222
+ "es",
4223
+ "it",
4224
+ "pt",
4225
+ "zh",
4226
+ "ru",
4227
+ "ja"
4228
+ ],
4229
+ "model_ability": [
4230
+ "chat"
4231
+ ],
4232
+ "model_description": "The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407",
4233
+ "model_specs": [
4234
+ {
4235
+ "model_format": "pytorch",
4236
+ "model_size_in_billions": 12,
4237
+ "quantizations": [
4238
+ "none"
4239
+ ],
4240
+ "model_id": "mistralai/Mistral-Nemo-Instruct-2407",
4241
+ "model_revision": "05b1e4f3e189ec1b5189fb3c973d4cf3369c27af"
4242
+ },
4243
+ {
4244
+ "model_format": "pytorch",
4245
+ "model_size_in_billions": 12,
4246
+ "quantizations": [
4247
+ "4-bit"
4248
+ ],
4249
+ "model_id": "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
4250
+ "model_revision": "1d85adc9e0fff0b8e4479a037bd75fe1346333ca"
4251
+ },
4252
+ {
4253
+ "model_format": "pytorch",
4254
+ "model_size_in_billions": 12,
4255
+ "quantizations": [
4256
+ "8-bit"
4257
+ ],
4258
+ "model_id": "afrizalha/Mistral-Nemo-Instruct-2407-bnb-8bit",
4259
+ "model_revision": "1d2dacf18a486c745219317d1507441406bc7e25"
4260
+ },
4261
+ {
4262
+ "model_format": "gptq",
4263
+ "model_size_in_billions": 12,
4264
+ "quantizations": [
4265
+ "Int4"
4266
+ ],
4267
+ "model_id": "ModelCloud/Mistral-Nemo-Instruct-2407-gptq-4bit"
4268
+ },
4269
+ {
4270
+ "model_format": "awq",
4271
+ "model_size_in_billions": 12,
4272
+ "quantizations": [
4273
+ "Int4"
4274
+ ],
4275
+ "model_id": "casperhansen/mistral-nemo-instruct-2407-awq"
4276
+ },
4277
+ {
4278
+ "model_format": "ggufv2",
4279
+ "model_size_in_billions": 12,
4280
+ "quantizations": [
4281
+ "Q2_K",
4282
+ "Q3_K_S",
4283
+ "Q3_K_M",
4284
+ "Q3_K_L",
4285
+ "Q4_K_S",
4286
+ "Q4_K_M",
4287
+ "Q5_K_S",
4288
+ "Q5_K_M",
4289
+ "Q6_K",
4290
+ "Q8_0",
4291
+ "fp16"
4292
+ ],
4293
+ "model_id": "MaziyarPanahi/Mistral-Nemo-Instruct-2407-GGUF",
4294
+ "model_file_name_template": "Mistral-Nemo-Instruct-2407.{quantization}.gguf"
4295
+ },
4296
+ {
4297
+ "model_format": "mlx",
4298
+ "model_size_in_billions": 12,
4299
+ "quantizations": [
4300
+ "none"
4301
+ ],
4302
+ "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-bf16"
4303
+ },
4304
+ {
4305
+ "model_format": "mlx",
4306
+ "model_size_in_billions": 12,
4307
+ "quantizations": [
4308
+ "4-bit"
4309
+ ],
4310
+ "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-4bit"
4311
+ },
4312
+ {
4313
+ "model_format": "mlx",
4314
+ "model_size_in_billions": 12,
4315
+ "quantizations": [
4316
+ "8-bit"
4317
+ ],
4318
+ "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-8bit"
4319
+ }
4320
+ ],
4321
+ "prompt_style": {
4322
+ "style_name": "mistral-nemo",
4323
+ "system_prompt": "",
4324
+ "roles": [
4325
+ "[INST]",
4326
+ "[/INST]"
4327
+ ],
4328
+ "intra_message_sep": "",
4329
+ "inter_message_sep": "</s>",
4330
+ "stop_token_ids": [
4331
+ 2
4332
+ ],
4333
+ "stop": [
4334
+ "</s>"
4335
+ ]
4336
+ }
4337
+ },
4338
+ {
4339
+ "version": 1,
4340
+ "context_length": 131072,
4341
+ "model_name": "mistral-large-instruct",
4342
+ "model_lang": [
4343
+ "en",
4344
+ "fr",
4345
+ "de",
4346
+ "es",
4347
+ "it",
4348
+ "pt",
4349
+ "zh",
4350
+ "ru",
4351
+ "ja",
4352
+ "ko"
4353
+ ],
4354
+ "model_ability": [
4355
+ "chat"
4356
+ ],
4357
+ "model_description": "Mistral-Large-Instruct-2407 is an advanced dense Large Language Model (LLM) of 123B parameters with state-of-the-art reasoning, knowledge and coding capabilities.",
4358
+ "model_specs": [
4359
+ {
4360
+ "model_format": "pytorch",
4361
+ "model_size_in_billions": 123,
4362
+ "quantizations": [
4363
+ "none"
4364
+ ],
4365
+ "model_id": "mistralai/Mistral-Large-Instruct-2407"
4366
+ },
4367
+ {
4368
+ "model_format": "pytorch",
4369
+ "model_size_in_billions": 123,
4370
+ "quantizations": [
4371
+ "4-bit"
4372
+ ],
4373
+ "model_id": "unsloth/Mistral-Large-Instruct-2407-bnb-4bit"
4374
+ },
4375
+ {
4376
+ "model_format": "gptq",
4377
+ "model_size_in_billions": 123,
4378
+ "quantizations": [
4379
+ "Int4"
4380
+ ],
4381
+ "model_id": "ModelCloud/Mistral-Large-Instruct-2407-gptq-4bit"
4382
+ },
4383
+ {
4384
+ "model_format": "awq",
4385
+ "model_size_in_billions": 123,
4386
+ "quantizations": [
4387
+ "Int4"
4388
+ ],
4389
+ "model_id": "TechxGenus/Mistral-Large-Instruct-2407-AWQ"
4390
+ },
4391
+ {
4392
+ "model_format": "ggufv2",
4393
+ "model_size_in_billions": 123,
4394
+ "quantizations": [
4395
+ "Q2_K",
4396
+ "Q3_K_S",
4397
+ "Q3_K_M",
4398
+ "Q3_K_L",
4399
+ "Q4_K_S",
4400
+ "Q4_K_M"
4401
+ ],
4402
+ "model_id": "MaziyarPanahi/Mistral-Large-Instruct-2407-GGUF",
4403
+ "model_file_name_template": "Mistral-Large-Instruct-2407.{quantization}.gguf",
4404
+ "model_file_name_split_template": "Mixtral-8x22B-Instruct-v0.1.{quantization}-{part}.gguf",
4405
+ "quantization_parts": {
4406
+ "Q3_K_L": [
4407
+ "00001-of-00007",
4408
+ "00002-of-00007",
4409
+ "00003-of-00007",
4410
+ "00004-of-00007",
4411
+ "00005-of-00007",
4412
+ "00006-of-00007",
4413
+ "00007-of-00007"
4414
+ ],
4415
+ "Q3_K_M": [
4416
+ "00001-of-00007",
4417
+ "00002-of-00007",
4418
+ "00003-of-00007",
4419
+ "00004-of-00007",
4420
+ "00005-of-00007",
4421
+ "00006-of-00007",
4422
+ "00007-of-00007"
4423
+ ],
4424
+ "Q3_K_S": [
4425
+ "00001-of-00007",
4426
+ "00002-of-00007",
4427
+ "00003-of-00007",
4428
+ "00004-of-00007",
4429
+ "00005-of-00007",
4430
+ "00006-of-00007",
4431
+ "00007-of-00007"
4432
+ ],
4433
+ "Q4_K_M": [
4434
+ "00001-of-00007",
4435
+ "00002-of-00007",
4436
+ "00003-of-00007",
4437
+ "00004-of-00007",
4438
+ "00005-of-00007",
4439
+ "00006-of-00007",
4440
+ "00007-of-00007"
4441
+ ],
4442
+ "Q4_K_S": [
4443
+ "00001-of-00007",
4444
+ "00002-of-00007",
4445
+ "00003-of-00007",
4446
+ "00004-of-00007",
4447
+ "00005-of-00007",
4448
+ "00006-of-00007",
4449
+ "00007-of-00007"
4450
+ ]
4451
+ }
4452
+ },
4453
+ {
4454
+ "model_format": "mlx",
4455
+ "model_size_in_billions": 123,
3841
4456
  "quantizations": [
3842
- "Int4"
4457
+ "none"
3843
4458
  ],
3844
- "model_id": "neuralmagic/Mistral-7B-Instruct-v0.3-GPTQ-4bit"
4459
+ "model_id": "mlx-community/Mistral-Large-Instruct-2407-bf16"
3845
4460
  },
3846
4461
  {
3847
- "model_format": "awq",
3848
- "model_size_in_billions": 7,
4462
+ "model_format": "mlx",
4463
+ "model_size_in_billions": 123,
3849
4464
  "quantizations": [
3850
- "Int4"
4465
+ "4-bit"
3851
4466
  ],
3852
- "model_id": "solidrust/Mistral-7B-Instruct-v0.3-AWQ"
4467
+ "model_id": "mlx-community/Mistral-Large-Instruct-2407-4bit"
3853
4468
  },
3854
4469
  {
3855
- "model_format": "ggufv2",
3856
- "model_size_in_billions": 7,
4470
+ "model_format": "mlx",
4471
+ "model_size_in_billions": 123,
3857
4472
  "quantizations": [
3858
- "Q2_K",
3859
- "Q3_K_S",
3860
- "Q3_K_M",
3861
- "Q3_K_L",
3862
- "Q4_K_S",
3863
- "Q4_K_M",
3864
- "Q5_K_S",
3865
- "Q5_K_M",
3866
- "Q6_K",
3867
- "Q8_0",
3868
- "fp16"
4473
+ "8-bit"
3869
4474
  ],
3870
- "model_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
3871
- "model_file_name_template": "Mistral-7B-Instruct-v0.3.{quantization}.gguf"
4475
+ "model_id": "mlx-community/Mistral-Large-Instruct-2407-8bit"
3872
4476
  }
3873
4477
  ],
3874
4478
  "prompt_style": {
3875
- "style_name": "LLAMA2",
3876
- "system_prompt": "[INST] ",
4479
+ "style_name": "mistral-nemo",
4480
+ "system_prompt": "",
3877
4481
  "roles": [
3878
4482
  "[INST]",
3879
4483
  "[/INST]"
3880
4484
  ],
3881
- "intra_message_sep": " ",
3882
- "inter_message_sep": "<s>",
4485
+ "intra_message_sep": "",
4486
+ "inter_message_sep": "</s>",
3883
4487
  "stop_token_ids": [
3884
4488
  2
3885
4489
  ],
@@ -3928,6 +4532,24 @@
3928
4532
  ],
3929
4533
  "model_id": "bartowski/Codestral-22B-v0.1-GGUF",
3930
4534
  "model_file_name_template": "Codestral-22B-v0.1-{quantization}.gguf"
4535
+ },
4536
+ {
4537
+ "model_format": "mlx",
4538
+ "model_size_in_billions": 22,
4539
+ "quantizations": [
4540
+ "4-bit"
4541
+ ],
4542
+ "model_id": "mlx-community/Codestral-22B-v0.1-4bit",
4543
+ "model_revision": "544626b38eb1c9524f0fa570ec7b29550c26b78d"
4544
+ },
4545
+ {
4546
+ "model_format": "mlx",
4547
+ "model_size_in_billions": 22,
4548
+ "quantizations": [
4549
+ "8-bit"
4550
+ ],
4551
+ "model_id": "mlx-community/Codestral-22B-v0.1-8bit",
4552
+ "model_revision": "0399a53970663950d57010e61a2796af524a1588"
3931
4553
  }
3932
4554
  ]
3933
4555
  },
@@ -4668,6 +5290,61 @@
4668
5290
  "model_id": "modelscope/Yi-1.5-34B-Chat-AWQ",
4669
5291
  "model_revision": "26234fea6ac49d456f32f8017289021fb1087a04"
4670
5292
  }
5293
+ ,
5294
+ {
5295
+ "model_format": "mlx",
5296
+ "model_size_in_billions": 6,
5297
+ "quantizations": [
5298
+ "4-bit"
5299
+ ],
5300
+ "model_id": "mlx-community/Yi-1.5-6B-Chat-4bit",
5301
+ "model_revision": "0177c9a12b869d6bc73f772b5a1981a7c966adb6"
5302
+ },
5303
+ {
5304
+ "model_format": "mlx",
5305
+ "model_size_in_billions": 6,
5306
+ "quantizations": [
5307
+ "8-bit"
5308
+ ],
5309
+ "model_id": "mlx-community/Yi-1.5-6B-Chat-8bit",
5310
+ "model_revision": "7756e65d1bf1e2e6e97aef6bc9484307225f536b"
5311
+ },
5312
+ {
5313
+ "model_format": "mlx",
5314
+ "model_size_in_billions": 9,
5315
+ "quantizations": [
5316
+ "4-bit"
5317
+ ],
5318
+ "model_id": "mlx-community/Yi-1.5-9B-Chat-4bit",
5319
+ "model_revision": "e15f886479c44e7d90f0ac13ace69b2319b71c2f"
5320
+ },
5321
+ {
5322
+ "model_format": "mlx",
5323
+ "model_size_in_billions": 9,
5324
+ "quantizations": [
5325
+ "8-bit"
5326
+ ],
5327
+ "model_id": "mlx-community/Yi-1.5-9B-Chat-8bit",
5328
+ "model_revision": "c1f742fcf3683edbe2d2c2fd1ad7ac2bb6c5ca36"
5329
+ },
5330
+ {
5331
+ "model_format": "mlx",
5332
+ "model_size_in_billions": 34,
5333
+ "quantizations": [
5334
+ "4-bit"
5335
+ ],
5336
+ "model_id": "mlx-community/Yi-1.5-34B-Chat-4bit",
5337
+ "model_revision": "945e3b306ef37c46ab444fdc857d1f3ea7247374"
5338
+ },
5339
+ {
5340
+ "model_format": "mlx",
5341
+ "model_size_in_billions": 34,
5342
+ "quantizations": [
5343
+ "8-bit"
5344
+ ],
5345
+ "model_id": "mlx-community/Yi-1.5-34B-Chat-8bit",
5346
+ "model_revision": "3c12761a2c6663f216caab6dff84b0dd29b472ac"
5347
+ }
4671
5348
  ],
4672
5349
  "prompt_style": {
4673
5350
  "style_name": "CHATML",
@@ -5945,6 +6622,24 @@
5945
6622
  ],
5946
6623
  "model_id": "internlm/internlm2_5-7b-chat-gguf",
5947
6624
  "model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf"
6625
+ },
6626
+ {
6627
+ "model_format": "mlx",
6628
+ "model_size_in_billions": 7,
6629
+ "quantizations": [
6630
+ "4-bit"
6631
+ ],
6632
+ "model_id": "mlx-community/internlm2_5-7b-chat-4bit",
6633
+ "model_revision": "d12097a867721978142a6048399f470a3d18beee"
6634
+ },
6635
+ {
6636
+ "model_format": "mlx",
6637
+ "model_size_in_billions": 7,
6638
+ "quantizations": [
6639
+ "8-bit"
6640
+ ],
6641
+ "model_id": "mlx-community/internlm2_5-7b-chat-8bit",
6642
+ "model_revision": "0ec94d61d30ab161b49c69f9bf92ec2b9986d234"
5948
6643
  }
5949
6644
  ],
5950
6645
  "prompt_style": {
@@ -7048,6 +7743,15 @@
7048
7743
  "model_id": "CohereForAI/c4ai-command-r-v01",
7049
7744
  "model_revision": "16881ccde1c68bbc7041280e6a66637bc46bfe88"
7050
7745
  },
7746
+ {
7747
+ "model_format": "pytorch",
7748
+ "model_size_in_billions": 35,
7749
+ "quantizations": [
7750
+ "4-bit"
7751
+ ],
7752
+ "model_id": "CohereForAI/c4ai-command-r-v01-4bit",
7753
+ "model_revision": "f2e87936a146643c9dd143422dcafb9cb1552611"
7754
+ },
7051
7755
  {
7052
7756
  "model_format": "ggufv2",
7053
7757
  "model_size_in_billions": 35,
@@ -7077,69 +7781,23 @@
7077
7781
  "model_id": "CohereForAI/c4ai-command-r-plus",
7078
7782
  "model_revision": "ba7f1d954c9d1609013677d87e4142ab95c34e62"
7079
7783
  },
7080
- {
7081
- "model_format": "gptq",
7082
- "model_size_in_billions": 104,
7083
- "quantizations": [
7084
- "Int4"
7085
- ],
7086
- "model_id": "alpindale/c4ai-command-r-plus-GPTQ",
7087
- "model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
7088
- }
7089
- ],
7090
- "prompt_style": {
7091
- "style_name": "c4ai-command-r",
7092
- "system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
7093
- "roles": [
7094
- "<|USER_TOKEN|>",
7095
- "<|CHATBOT_TOKEN|>"
7096
- ],
7097
- "intra_message_sep": "",
7098
- "inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
7099
- "stop_token_ids": [
7100
- 6,
7101
- 255001
7102
- ]
7103
- }
7104
- },
7105
- {
7106
- "version": 1,
7107
- "context_length": 131072,
7108
- "model_name": "c4ai-command-r-v01-4bit",
7109
- "model_lang": [
7110
- "en",
7111
- "fr",
7112
- "de",
7113
- "es",
7114
- "it",
7115
- "pt",
7116
- "ja",
7117
- "ko",
7118
- "zh",
7119
- "ar"
7120
- ],
7121
- "model_ability": [
7122
- "generate"
7123
- ],
7124
- "model_description": "This model is 4bit quantized version of C4AI Command-R using bitsandbytes.",
7125
- "model_specs": [
7126
7784
  {
7127
7785
  "model_format": "pytorch",
7128
- "model_size_in_billions": 35,
7786
+ "model_size_in_billions": 104,
7129
7787
  "quantizations": [
7130
- "none"
7788
+ "4-bit"
7131
7789
  ],
7132
- "model_id": "CohereForAI/c4ai-command-r-v01-4bit",
7133
- "model_revision": "f2e87936a146643c9dd143422dcafb9cb1552611"
7790
+ "model_id": "CohereForAI/c4ai-command-r-plus-4bit",
7791
+ "model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
7134
7792
  },
7135
7793
  {
7136
- "model_format": "pytorch",
7794
+ "model_format": "gptq",
7137
7795
  "model_size_in_billions": 104,
7138
7796
  "quantizations": [
7139
- "none"
7797
+ "Int4"
7140
7798
  ],
7141
- "model_id": "CohereForAI/c4ai-command-r-plus-4bit",
7142
- "model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
7799
+ "model_id": "alpindale/c4ai-command-r-plus-GPTQ",
7800
+ "model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
7143
7801
  }
7144
7802
  ],
7145
7803
  "prompt_style": {
@@ -7387,5 +8045,67 @@
7387
8045
  160132
7388
8046
  ]
7389
8047
  }
8048
+ },
8049
+ {
8050
+ "version": 1,
8051
+ "context_length": 32768,
8052
+ "model_name": "csg-wukong-chat-v0.1",
8053
+ "model_lang": [
8054
+ "en"
8055
+ ],
8056
+ "model_ability": [
8057
+ "chat"
8058
+ ],
8059
+ "model_description": "csg-wukong-1B is a 1 billion-parameter small language model(SLM) pretrained on 1T tokens.",
8060
+ "model_specs": [
8061
+ {
8062
+ "model_format": "pytorch",
8063
+ "model_size_in_billions": 1,
8064
+ "quantizations": [
8065
+ "none"
8066
+ ],
8067
+ "model_id": "opencsg/csg-wukong-1B-chat-v0.1",
8068
+ "model_revision": "2443c903d46074af0856e2ba11398dcd01d35536"
8069
+ },
8070
+ {
8071
+ "model_format": "ggufv2",
8072
+ "model_size_in_billions": 1,
8073
+ "quantizations": [
8074
+ "Q2_K",
8075
+ "Q3_K",
8076
+ "Q3_K_S",
8077
+ "Q3_K_M",
8078
+ "Q3_K_L",
8079
+ "Q4_0",
8080
+ "Q4_1",
8081
+ "Q4_K_S",
8082
+ "Q4_K_M",
8083
+ "Q5_0",
8084
+ "Q5_1",
8085
+ "Q5_K_S",
8086
+ "Q5_K_M",
8087
+ "Q6_K",
8088
+ "Q8_0"
8089
+ ],
8090
+ "model_id": "RichardErkhov/opencsg_-_csg-wukong-1B-chat-v0.1-gguf",
8091
+ "model_file_name_template": "csg-wukong-1B-chat-v0.1.{quantization}.gguf"
8092
+ }
8093
+ ],
8094
+ "prompt_style": {
8095
+ "style_name": "NO_COLON_TWO",
8096
+ "system_prompt": "<|system|>\nYou are a creative super artificial intelligence assistant, possessing all the knowledge of humankind. Your name is csg-wukong, developed by OpenCSG. You need to understand and infer the true intentions of users based on the topics discussed in the chat history, and respond to user questions correctly as required. You enjoy responding to users with accurate and insightful answers. Please pay attention to the appropriate style and format when replying, try to avoid repetitive words and sentences, and keep your responses as concise and profound as possible. You carefully consider the context of the discussion when replying to users. When the user says \"continue,\" please proceed with the continuation of the previous assistant's response.</s>\n",
8097
+ "roles": [
8098
+ "<|user|>\n",
8099
+ "<|assistant|>\n"
8100
+ ],
8101
+ "intra_message_sep": "</s>\n",
8102
+ "inter_message_sep": "</s>\n",
8103
+ "stop_token_ids": [
8104
+ 2
8105
+ ],
8106
+ "stop": [
8107
+ "</s>"
8108
+ ]
8109
+ }
7390
8110
  }
7391
8111
  ]