xinference 0.13.2__py3-none-any.whl → 0.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/__init__.py +0 -1
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +26 -4
- xinference/client/restful/restful_client.py +16 -1
- xinference/core/chat_interface.py +2 -2
- xinference/core/model.py +8 -3
- xinference/core/scheduler.py +4 -4
- xinference/model/audio/core.py +5 -2
- xinference/model/audio/cosyvoice.py +136 -0
- xinference/model/audio/model_spec.json +24 -0
- xinference/model/audio/model_spec_modelscope.json +27 -0
- xinference/model/flexible/launchers/__init__.py +1 -0
- xinference/model/flexible/launchers/image_process_launcher.py +70 -0
- xinference/model/image/model_spec.json +7 -0
- xinference/model/image/stable_diffusion/core.py +6 -1
- xinference/model/llm/llm_family.json +802 -82
- xinference/model/llm/llm_family_csghub.json +39 -0
- xinference/model/llm/llm_family_modelscope.json +295 -47
- xinference/model/llm/pytorch/chatglm.py +243 -5
- xinference/model/llm/pytorch/cogvlm2.py +1 -1
- xinference/model/llm/utils.py +78 -1
- xinference/model/llm/vllm/core.py +8 -0
- xinference/thirdparty/cosyvoice/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/bin/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +114 -0
- xinference/thirdparty/cosyvoice/bin/train.py +136 -0
- xinference/thirdparty/cosyvoice/cli/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +83 -0
- xinference/thirdparty/cosyvoice/cli/frontend.py +168 -0
- xinference/thirdparty/cosyvoice/cli/model.py +60 -0
- xinference/thirdparty/cosyvoice/dataset/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/dataset/dataset.py +160 -0
- xinference/thirdparty/cosyvoice/dataset/processor.py +369 -0
- xinference/thirdparty/cosyvoice/flow/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/flow/decoder.py +222 -0
- xinference/thirdparty/cosyvoice/flow/flow.py +135 -0
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +138 -0
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +49 -0
- xinference/thirdparty/cosyvoice/hifigan/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +55 -0
- xinference/thirdparty/cosyvoice/hifigan/generator.py +391 -0
- xinference/thirdparty/cosyvoice/llm/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/llm/llm.py +206 -0
- xinference/thirdparty/cosyvoice/transformer/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/transformer/activation.py +84 -0
- xinference/thirdparty/cosyvoice/transformer/attention.py +326 -0
- xinference/thirdparty/cosyvoice/transformer/convolution.py +145 -0
- xinference/thirdparty/cosyvoice/transformer/decoder.py +396 -0
- xinference/thirdparty/cosyvoice/transformer/decoder_layer.py +132 -0
- xinference/thirdparty/cosyvoice/transformer/embedding.py +293 -0
- xinference/thirdparty/cosyvoice/transformer/encoder.py +472 -0
- xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +236 -0
- xinference/thirdparty/cosyvoice/transformer/label_smoothing_loss.py +96 -0
- xinference/thirdparty/cosyvoice/transformer/positionwise_feed_forward.py +115 -0
- xinference/thirdparty/cosyvoice/transformer/subsampling.py +383 -0
- xinference/thirdparty/cosyvoice/utils/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/utils/class_utils.py +70 -0
- xinference/thirdparty/cosyvoice/utils/common.py +103 -0
- xinference/thirdparty/cosyvoice/utils/executor.py +110 -0
- xinference/thirdparty/cosyvoice/utils/file_utils.py +41 -0
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +125 -0
- xinference/thirdparty/cosyvoice/utils/mask.py +227 -0
- xinference/thirdparty/cosyvoice/utils/scheduler.py +739 -0
- xinference/thirdparty/cosyvoice/utils/train_utils.py +289 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.95c1d652.js → main.2ef0cfaf.js} +3 -3
- xinference/web/ui/build/static/js/main.2ef0cfaf.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b6807ecc0c231fea699533518a0eb2a2bf68a081ce00d452be40600dbffa17a7.json +1 -0
- {xinference-0.13.2.dist-info → xinference-0.13.3.dist-info}/METADATA +16 -8
- {xinference-0.13.2.dist-info → xinference-0.13.3.dist-info}/RECORD +76 -32
- xinference/web/ui/build/static/js/main.95c1d652.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +0 -1
- /xinference/web/ui/build/static/js/{main.95c1d652.js.LICENSE.txt → main.2ef0cfaf.js.LICENSE.txt} +0 -0
- {xinference-0.13.2.dist-info → xinference-0.13.3.dist-info}/LICENSE +0 -0
- {xinference-0.13.2.dist-info → xinference-0.13.3.dist-info}/WHEEL +0 -0
- {xinference-0.13.2.dist-info → xinference-0.13.3.dist-info}/entry_points.txt +0 -0
- {xinference-0.13.2.dist-info → xinference-0.13.3.dist-info}/top_level.txt +0 -0
|
@@ -819,7 +819,7 @@
|
|
|
819
819
|
"none"
|
|
820
820
|
],
|
|
821
821
|
"model_id": "THUDM/glm-4-9b-chat",
|
|
822
|
-
"model_revision": "
|
|
822
|
+
"model_revision": "76f3474a854145aa4a9ed2612fee9bc8d4a8966b"
|
|
823
823
|
},
|
|
824
824
|
{
|
|
825
825
|
"model_format": "ggufv2",
|
|
@@ -1652,6 +1652,329 @@
|
|
|
1652
1652
|
"none"
|
|
1653
1653
|
],
|
|
1654
1654
|
"model_id": "meta-llama/Meta-Llama-3-70B-Instruct"
|
|
1655
|
+
},
|
|
1656
|
+
{
|
|
1657
|
+
"model_format": "mlx",
|
|
1658
|
+
"model_size_in_billions": 8,
|
|
1659
|
+
"quantizations": [
|
|
1660
|
+
"4-bit"
|
|
1661
|
+
],
|
|
1662
|
+
"model_id": "mlx-community/Meta-Llama-3-8B-Instruct-4bit"
|
|
1663
|
+
},
|
|
1664
|
+
{
|
|
1665
|
+
"model_format": "mlx",
|
|
1666
|
+
"model_size_in_billions": 8,
|
|
1667
|
+
"quantizations": [
|
|
1668
|
+
"8-bit"
|
|
1669
|
+
],
|
|
1670
|
+
"model_id": "mlx-community/Meta-Llama-3-8B-Instruct-8bit"
|
|
1671
|
+
},
|
|
1672
|
+
{
|
|
1673
|
+
"model_format": "mlx",
|
|
1674
|
+
"model_size_in_billions": 8,
|
|
1675
|
+
"quantizations": [
|
|
1676
|
+
"none"
|
|
1677
|
+
],
|
|
1678
|
+
"model_id": "mlx-community/Meta-Llama-3-8B-Instruct"
|
|
1679
|
+
},
|
|
1680
|
+
{
|
|
1681
|
+
"model_format": "mlx",
|
|
1682
|
+
"model_size_in_billions": 70,
|
|
1683
|
+
"quantizations": [
|
|
1684
|
+
"4-bit"
|
|
1685
|
+
],
|
|
1686
|
+
"model_id": "mlx-community/Meta-Llama-3-70B-Instruct-4bit-mlx"
|
|
1687
|
+
},
|
|
1688
|
+
{
|
|
1689
|
+
"model_format": "mlx",
|
|
1690
|
+
"model_size_in_billions": 70,
|
|
1691
|
+
"quantizations": [
|
|
1692
|
+
"8-bit"
|
|
1693
|
+
],
|
|
1694
|
+
"model_id": "mlx-community/Meta-Llama-3-70B-Instruct-8bit"
|
|
1695
|
+
},
|
|
1696
|
+
{
|
|
1697
|
+
"model_format": "mlx",
|
|
1698
|
+
"model_size_in_billions": 70,
|
|
1699
|
+
"quantizations": [
|
|
1700
|
+
"none"
|
|
1701
|
+
],
|
|
1702
|
+
"model_id": "mlx-community/Meta-Llama-3-70B-Instruct-mlx-unquantized"
|
|
1703
|
+
},
|
|
1704
|
+
{
|
|
1705
|
+
"model_format": "gptq",
|
|
1706
|
+
"model_size_in_billions": 8,
|
|
1707
|
+
"quantizations": [
|
|
1708
|
+
"Int4"
|
|
1709
|
+
],
|
|
1710
|
+
"model_id": "TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ"
|
|
1711
|
+
},
|
|
1712
|
+
{
|
|
1713
|
+
"model_format": "gptq",
|
|
1714
|
+
"model_size_in_billions": 70,
|
|
1715
|
+
"quantizations": [
|
|
1716
|
+
"Int4"
|
|
1717
|
+
],
|
|
1718
|
+
"model_id": "TechxGenus/Meta-Llama-3-70B-Instruct-GPTQ"
|
|
1719
|
+
}
|
|
1720
|
+
],
|
|
1721
|
+
"prompt_style": {
|
|
1722
|
+
"style_name": "LLAMA3",
|
|
1723
|
+
"system_prompt": "You are a helpful assistant.",
|
|
1724
|
+
"roles": [
|
|
1725
|
+
"user",
|
|
1726
|
+
"assistant"
|
|
1727
|
+
],
|
|
1728
|
+
"intra_message_sep": "\n\n",
|
|
1729
|
+
"inter_message_sep": "<|eot_id|>",
|
|
1730
|
+
"stop_token_ids": [
|
|
1731
|
+
128001,
|
|
1732
|
+
128009
|
|
1733
|
+
],
|
|
1734
|
+
"stop": [
|
|
1735
|
+
"<|end_of_text|>",
|
|
1736
|
+
"<|eot_id|>"
|
|
1737
|
+
]
|
|
1738
|
+
}
|
|
1739
|
+
},
|
|
1740
|
+
{
|
|
1741
|
+
"version": 1,
|
|
1742
|
+
"context_length": 131072,
|
|
1743
|
+
"model_name": "llama-3.1",
|
|
1744
|
+
"model_lang": [
|
|
1745
|
+
"en",
|
|
1746
|
+
"de",
|
|
1747
|
+
"fr",
|
|
1748
|
+
"it",
|
|
1749
|
+
"pt",
|
|
1750
|
+
"hi",
|
|
1751
|
+
"es",
|
|
1752
|
+
"th"
|
|
1753
|
+
],
|
|
1754
|
+
"model_ability": [
|
|
1755
|
+
"generate"
|
|
1756
|
+
],
|
|
1757
|
+
"model_description": "Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture",
|
|
1758
|
+
"model_specs": [
|
|
1759
|
+
{
|
|
1760
|
+
"model_format": "pytorch",
|
|
1761
|
+
"model_size_in_billions": 8,
|
|
1762
|
+
"quantizations": [
|
|
1763
|
+
"4-bit",
|
|
1764
|
+
"8-bit",
|
|
1765
|
+
"none"
|
|
1766
|
+
],
|
|
1767
|
+
"model_id": "meta-llama/Meta-Llama-3.1-8B"
|
|
1768
|
+
},
|
|
1769
|
+
{
|
|
1770
|
+
"model_format": "ggufv2",
|
|
1771
|
+
"model_size_in_billions": 8,
|
|
1772
|
+
"quantizations": [
|
|
1773
|
+
"Q2_K",
|
|
1774
|
+
"Q3_K_L",
|
|
1775
|
+
"Q3_K_M",
|
|
1776
|
+
"Q3_K_S",
|
|
1777
|
+
"Q4_0",
|
|
1778
|
+
"Q4_1",
|
|
1779
|
+
"Q4_K_M",
|
|
1780
|
+
"Q4_K_S",
|
|
1781
|
+
"Q5_0",
|
|
1782
|
+
"Q5_1",
|
|
1783
|
+
"Q5_K_M",
|
|
1784
|
+
"Q5_K_S",
|
|
1785
|
+
"Q6_K",
|
|
1786
|
+
"Q8_0"
|
|
1787
|
+
],
|
|
1788
|
+
"model_id": "QuantFactory/Meta-Llama-3.1-8B-GGUF",
|
|
1789
|
+
"model_file_name_template": "Meta-Llama-3.1-8B.{quantization}.gguf"
|
|
1790
|
+
},
|
|
1791
|
+
{
|
|
1792
|
+
"model_format": "pytorch",
|
|
1793
|
+
"model_size_in_billions": 70,
|
|
1794
|
+
"quantizations": [
|
|
1795
|
+
"4-bit",
|
|
1796
|
+
"8-bit",
|
|
1797
|
+
"none"
|
|
1798
|
+
],
|
|
1799
|
+
"model_id": "meta-llama/Meta-Llama-3.1-70B"
|
|
1800
|
+
}
|
|
1801
|
+
]
|
|
1802
|
+
},
|
|
1803
|
+
{
|
|
1804
|
+
"version": 1,
|
|
1805
|
+
"context_length": 131072,
|
|
1806
|
+
"model_name": "llama-3.1-instruct",
|
|
1807
|
+
"model_lang": [
|
|
1808
|
+
"en",
|
|
1809
|
+
"de",
|
|
1810
|
+
"fr",
|
|
1811
|
+
"it",
|
|
1812
|
+
"pt",
|
|
1813
|
+
"hi",
|
|
1814
|
+
"es",
|
|
1815
|
+
"th"
|
|
1816
|
+
],
|
|
1817
|
+
"model_ability": [
|
|
1818
|
+
"chat"
|
|
1819
|
+
],
|
|
1820
|
+
"model_description": "The Llama 3.1 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
|
|
1821
|
+
"model_specs": [
|
|
1822
|
+
{
|
|
1823
|
+
"model_format": "ggufv2",
|
|
1824
|
+
"model_size_in_billions": 8,
|
|
1825
|
+
"quantizations": [
|
|
1826
|
+
"Q3_K_L",
|
|
1827
|
+
"IQ4_XS",
|
|
1828
|
+
"Q4_K_M",
|
|
1829
|
+
"Q5_K_M",
|
|
1830
|
+
"Q6_K",
|
|
1831
|
+
"Q8_0"
|
|
1832
|
+
],
|
|
1833
|
+
"model_id": "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",
|
|
1834
|
+
"model_file_name_template": "Meta-Llama-3.1-8B-Instruct-{quantization}.gguf"
|
|
1835
|
+
},
|
|
1836
|
+
{
|
|
1837
|
+
"model_format": "pytorch",
|
|
1838
|
+
"model_size_in_billions": 8,
|
|
1839
|
+
"quantizations": [
|
|
1840
|
+
"none"
|
|
1841
|
+
],
|
|
1842
|
+
"model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
|
1843
|
+
},
|
|
1844
|
+
{
|
|
1845
|
+
"model_format": "pytorch",
|
|
1846
|
+
"model_size_in_billions": 8,
|
|
1847
|
+
"quantizations": [
|
|
1848
|
+
"4-bit"
|
|
1849
|
+
],
|
|
1850
|
+
"model_id": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
|
|
1851
|
+
},
|
|
1852
|
+
{
|
|
1853
|
+
"model_format": "gptq",
|
|
1854
|
+
"model_size_in_billions": 8,
|
|
1855
|
+
"quantizations": [
|
|
1856
|
+
"Int4"
|
|
1857
|
+
],
|
|
1858
|
+
"model_id": "hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
|
|
1859
|
+
},
|
|
1860
|
+
{
|
|
1861
|
+
"model_format": "awq",
|
|
1862
|
+
"model_size_in_billions": 8,
|
|
1863
|
+
"quantizations": [
|
|
1864
|
+
"Int4"
|
|
1865
|
+
],
|
|
1866
|
+
"model_id": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
|
|
1867
|
+
},
|
|
1868
|
+
{
|
|
1869
|
+
"model_format": "ggufv2",
|
|
1870
|
+
"model_size_in_billions": 70,
|
|
1871
|
+
"quantizations": [
|
|
1872
|
+
"IQ2_M",
|
|
1873
|
+
"IQ4_XS",
|
|
1874
|
+
"Q2_K",
|
|
1875
|
+
"Q3_K_S",
|
|
1876
|
+
"Q4_K_M",
|
|
1877
|
+
"Q5_K_M",
|
|
1878
|
+
"Q6_K",
|
|
1879
|
+
"Q8_0"
|
|
1880
|
+
],
|
|
1881
|
+
"quantization_parts": {
|
|
1882
|
+
"Q5_K_M": [
|
|
1883
|
+
"00001-of-00002",
|
|
1884
|
+
"00002-of-00002"
|
|
1885
|
+
],
|
|
1886
|
+
"Q6_K": [
|
|
1887
|
+
"00001-of-00002",
|
|
1888
|
+
"00002-of-00002"
|
|
1889
|
+
],
|
|
1890
|
+
"Q8_0": [
|
|
1891
|
+
"00001-of-00002",
|
|
1892
|
+
"00002-of-00002"
|
|
1893
|
+
]
|
|
1894
|
+
},
|
|
1895
|
+
"model_id": "lmstudio-community/Meta-Llama-3.1-70B-Instruct-GGUF",
|
|
1896
|
+
"model_file_name_template": "Meta-Llama-3.1-70B-Instruct-{quantization}.gguf",
|
|
1897
|
+
"model_file_name_split_template": "Meta-Llama-3.1-70B-Instruct-{quantization}-{part}.gguf"
|
|
1898
|
+
},
|
|
1899
|
+
{
|
|
1900
|
+
"model_format": "pytorch",
|
|
1901
|
+
"model_size_in_billions": 70,
|
|
1902
|
+
"quantizations": [
|
|
1903
|
+
"none"
|
|
1904
|
+
],
|
|
1905
|
+
"model_id": "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
|
1906
|
+
},
|
|
1907
|
+
{
|
|
1908
|
+
"model_format": "pytorch",
|
|
1909
|
+
"model_size_in_billions": 70,
|
|
1910
|
+
"quantizations": [
|
|
1911
|
+
"4-bit"
|
|
1912
|
+
],
|
|
1913
|
+
"model_id": "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit"
|
|
1914
|
+
},
|
|
1915
|
+
{
|
|
1916
|
+
"model_format": "gptq",
|
|
1917
|
+
"model_size_in_billions": 70,
|
|
1918
|
+
"quantizations": [
|
|
1919
|
+
"Int4"
|
|
1920
|
+
],
|
|
1921
|
+
"model_id": "hugging-quants/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4"
|
|
1922
|
+
},
|
|
1923
|
+
{
|
|
1924
|
+
"model_format": "awq",
|
|
1925
|
+
"model_size_in_billions": 70,
|
|
1926
|
+
"quantizations": [
|
|
1927
|
+
"Int4"
|
|
1928
|
+
],
|
|
1929
|
+
"model_id": "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4"
|
|
1930
|
+
},
|
|
1931
|
+
{
|
|
1932
|
+
"model_format": "mlx",
|
|
1933
|
+
"model_size_in_billions": 8,
|
|
1934
|
+
"quantizations": [
|
|
1935
|
+
"4-bit"
|
|
1936
|
+
],
|
|
1937
|
+
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
|
|
1938
|
+
},
|
|
1939
|
+
{
|
|
1940
|
+
"model_format": "mlx",
|
|
1941
|
+
"model_size_in_billions": 8,
|
|
1942
|
+
"quantizations": [
|
|
1943
|
+
"8-bit"
|
|
1944
|
+
],
|
|
1945
|
+
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
|
|
1946
|
+
},
|
|
1947
|
+
{
|
|
1948
|
+
"model_format": "mlx",
|
|
1949
|
+
"model_size_in_billions": 8,
|
|
1950
|
+
"quantizations": [
|
|
1951
|
+
"none"
|
|
1952
|
+
],
|
|
1953
|
+
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct"
|
|
1954
|
+
},
|
|
1955
|
+
{
|
|
1956
|
+
"model_format": "mlx",
|
|
1957
|
+
"model_size_in_billions": 70,
|
|
1958
|
+
"quantizations": [
|
|
1959
|
+
"4-bit"
|
|
1960
|
+
],
|
|
1961
|
+
"model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
|
|
1962
|
+
},
|
|
1963
|
+
{
|
|
1964
|
+
"model_format": "mlx",
|
|
1965
|
+
"model_size_in_billions": 70,
|
|
1966
|
+
"quantizations": [
|
|
1967
|
+
"8-bit"
|
|
1968
|
+
],
|
|
1969
|
+
"model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-8bit"
|
|
1970
|
+
},
|
|
1971
|
+
{
|
|
1972
|
+
"model_format": "mlx",
|
|
1973
|
+
"model_size_in_billions": 70,
|
|
1974
|
+
"quantizations": [
|
|
1975
|
+
"none"
|
|
1976
|
+
],
|
|
1977
|
+
"model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-bf16"
|
|
1655
1978
|
}
|
|
1656
1979
|
],
|
|
1657
1980
|
"prompt_style": {
|
|
@@ -3836,50 +4159,331 @@
|
|
|
3836
4159
|
"model_revision": "83e9aa141f2e28c82232fea5325f54edf17c43de"
|
|
3837
4160
|
},
|
|
3838
4161
|
{
|
|
3839
|
-
"model_format": "gptq",
|
|
3840
|
-
"model_size_in_billions": 7,
|
|
4162
|
+
"model_format": "gptq",
|
|
4163
|
+
"model_size_in_billions": 7,
|
|
4164
|
+
"quantizations": [
|
|
4165
|
+
"Int4"
|
|
4166
|
+
],
|
|
4167
|
+
"model_id": "neuralmagic/Mistral-7B-Instruct-v0.3-GPTQ-4bit"
|
|
4168
|
+
},
|
|
4169
|
+
{
|
|
4170
|
+
"model_format": "awq",
|
|
4171
|
+
"model_size_in_billions": 7,
|
|
4172
|
+
"quantizations": [
|
|
4173
|
+
"Int4"
|
|
4174
|
+
],
|
|
4175
|
+
"model_id": "solidrust/Mistral-7B-Instruct-v0.3-AWQ"
|
|
4176
|
+
},
|
|
4177
|
+
{
|
|
4178
|
+
"model_format": "ggufv2",
|
|
4179
|
+
"model_size_in_billions": 7,
|
|
4180
|
+
"quantizations": [
|
|
4181
|
+
"Q2_K",
|
|
4182
|
+
"Q3_K_S",
|
|
4183
|
+
"Q3_K_M",
|
|
4184
|
+
"Q3_K_L",
|
|
4185
|
+
"Q4_K_S",
|
|
4186
|
+
"Q4_K_M",
|
|
4187
|
+
"Q5_K_S",
|
|
4188
|
+
"Q5_K_M",
|
|
4189
|
+
"Q6_K",
|
|
4190
|
+
"Q8_0",
|
|
4191
|
+
"fp16"
|
|
4192
|
+
],
|
|
4193
|
+
"model_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
|
|
4194
|
+
"model_file_name_template": "Mistral-7B-Instruct-v0.3.{quantization}.gguf"
|
|
4195
|
+
}
|
|
4196
|
+
],
|
|
4197
|
+
"prompt_style": {
|
|
4198
|
+
"style_name": "LLAMA2",
|
|
4199
|
+
"system_prompt": "[INST] ",
|
|
4200
|
+
"roles": [
|
|
4201
|
+
"[INST]",
|
|
4202
|
+
"[/INST]"
|
|
4203
|
+
],
|
|
4204
|
+
"intra_message_sep": " ",
|
|
4205
|
+
"inter_message_sep": "<s>",
|
|
4206
|
+
"stop_token_ids": [
|
|
4207
|
+
2
|
|
4208
|
+
],
|
|
4209
|
+
"stop": [
|
|
4210
|
+
"</s>"
|
|
4211
|
+
]
|
|
4212
|
+
}
|
|
4213
|
+
},
|
|
4214
|
+
{
|
|
4215
|
+
"version": 1,
|
|
4216
|
+
"context_length": 1024000,
|
|
4217
|
+
"model_name": "mistral-nemo-instruct",
|
|
4218
|
+
"model_lang": [
|
|
4219
|
+
"en",
|
|
4220
|
+
"fr",
|
|
4221
|
+
"de",
|
|
4222
|
+
"es",
|
|
4223
|
+
"it",
|
|
4224
|
+
"pt",
|
|
4225
|
+
"zh",
|
|
4226
|
+
"ru",
|
|
4227
|
+
"ja"
|
|
4228
|
+
],
|
|
4229
|
+
"model_ability": [
|
|
4230
|
+
"chat"
|
|
4231
|
+
],
|
|
4232
|
+
"model_description": "The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407",
|
|
4233
|
+
"model_specs": [
|
|
4234
|
+
{
|
|
4235
|
+
"model_format": "pytorch",
|
|
4236
|
+
"model_size_in_billions": 12,
|
|
4237
|
+
"quantizations": [
|
|
4238
|
+
"none"
|
|
4239
|
+
],
|
|
4240
|
+
"model_id": "mistralai/Mistral-Nemo-Instruct-2407",
|
|
4241
|
+
"model_revision": "05b1e4f3e189ec1b5189fb3c973d4cf3369c27af"
|
|
4242
|
+
},
|
|
4243
|
+
{
|
|
4244
|
+
"model_format": "pytorch",
|
|
4245
|
+
"model_size_in_billions": 12,
|
|
4246
|
+
"quantizations": [
|
|
4247
|
+
"4-bit"
|
|
4248
|
+
],
|
|
4249
|
+
"model_id": "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
|
|
4250
|
+
"model_revision": "1d85adc9e0fff0b8e4479a037bd75fe1346333ca"
|
|
4251
|
+
},
|
|
4252
|
+
{
|
|
4253
|
+
"model_format": "pytorch",
|
|
4254
|
+
"model_size_in_billions": 12,
|
|
4255
|
+
"quantizations": [
|
|
4256
|
+
"8-bit"
|
|
4257
|
+
],
|
|
4258
|
+
"model_id": "afrizalha/Mistral-Nemo-Instruct-2407-bnb-8bit",
|
|
4259
|
+
"model_revision": "1d2dacf18a486c745219317d1507441406bc7e25"
|
|
4260
|
+
},
|
|
4261
|
+
{
|
|
4262
|
+
"model_format": "gptq",
|
|
4263
|
+
"model_size_in_billions": 12,
|
|
4264
|
+
"quantizations": [
|
|
4265
|
+
"Int4"
|
|
4266
|
+
],
|
|
4267
|
+
"model_id": "ModelCloud/Mistral-Nemo-Instruct-2407-gptq-4bit"
|
|
4268
|
+
},
|
|
4269
|
+
{
|
|
4270
|
+
"model_format": "awq",
|
|
4271
|
+
"model_size_in_billions": 12,
|
|
4272
|
+
"quantizations": [
|
|
4273
|
+
"Int4"
|
|
4274
|
+
],
|
|
4275
|
+
"model_id": "casperhansen/mistral-nemo-instruct-2407-awq"
|
|
4276
|
+
},
|
|
4277
|
+
{
|
|
4278
|
+
"model_format": "ggufv2",
|
|
4279
|
+
"model_size_in_billions": 12,
|
|
4280
|
+
"quantizations": [
|
|
4281
|
+
"Q2_K",
|
|
4282
|
+
"Q3_K_S",
|
|
4283
|
+
"Q3_K_M",
|
|
4284
|
+
"Q3_K_L",
|
|
4285
|
+
"Q4_K_S",
|
|
4286
|
+
"Q4_K_M",
|
|
4287
|
+
"Q5_K_S",
|
|
4288
|
+
"Q5_K_M",
|
|
4289
|
+
"Q6_K",
|
|
4290
|
+
"Q8_0",
|
|
4291
|
+
"fp16"
|
|
4292
|
+
],
|
|
4293
|
+
"model_id": "MaziyarPanahi/Mistral-Nemo-Instruct-2407-GGUF",
|
|
4294
|
+
"model_file_name_template": "Mistral-Nemo-Instruct-2407.{quantization}.gguf"
|
|
4295
|
+
},
|
|
4296
|
+
{
|
|
4297
|
+
"model_format": "mlx",
|
|
4298
|
+
"model_size_in_billions": 12,
|
|
4299
|
+
"quantizations": [
|
|
4300
|
+
"none"
|
|
4301
|
+
],
|
|
4302
|
+
"model_id": "mlx-community/Mistral-Nemo-Instruct-2407-bf16"
|
|
4303
|
+
},
|
|
4304
|
+
{
|
|
4305
|
+
"model_format": "mlx",
|
|
4306
|
+
"model_size_in_billions": 12,
|
|
4307
|
+
"quantizations": [
|
|
4308
|
+
"4-bit"
|
|
4309
|
+
],
|
|
4310
|
+
"model_id": "mlx-community/Mistral-Nemo-Instruct-2407-4bit"
|
|
4311
|
+
},
|
|
4312
|
+
{
|
|
4313
|
+
"model_format": "mlx",
|
|
4314
|
+
"model_size_in_billions": 12,
|
|
4315
|
+
"quantizations": [
|
|
4316
|
+
"8-bit"
|
|
4317
|
+
],
|
|
4318
|
+
"model_id": "mlx-community/Mistral-Nemo-Instruct-2407-8bit"
|
|
4319
|
+
}
|
|
4320
|
+
],
|
|
4321
|
+
"prompt_style": {
|
|
4322
|
+
"style_name": "mistral-nemo",
|
|
4323
|
+
"system_prompt": "",
|
|
4324
|
+
"roles": [
|
|
4325
|
+
"[INST]",
|
|
4326
|
+
"[/INST]"
|
|
4327
|
+
],
|
|
4328
|
+
"intra_message_sep": "",
|
|
4329
|
+
"inter_message_sep": "</s>",
|
|
4330
|
+
"stop_token_ids": [
|
|
4331
|
+
2
|
|
4332
|
+
],
|
|
4333
|
+
"stop": [
|
|
4334
|
+
"</s>"
|
|
4335
|
+
]
|
|
4336
|
+
}
|
|
4337
|
+
},
|
|
4338
|
+
{
|
|
4339
|
+
"version": 1,
|
|
4340
|
+
"context_length": 131072,
|
|
4341
|
+
"model_name": "mistral-large-instruct",
|
|
4342
|
+
"model_lang": [
|
|
4343
|
+
"en",
|
|
4344
|
+
"fr",
|
|
4345
|
+
"de",
|
|
4346
|
+
"es",
|
|
4347
|
+
"it",
|
|
4348
|
+
"pt",
|
|
4349
|
+
"zh",
|
|
4350
|
+
"ru",
|
|
4351
|
+
"ja",
|
|
4352
|
+
"ko"
|
|
4353
|
+
],
|
|
4354
|
+
"model_ability": [
|
|
4355
|
+
"chat"
|
|
4356
|
+
],
|
|
4357
|
+
"model_description": "Mistral-Large-Instruct-2407 is an advanced dense Large Language Model (LLM) of 123B parameters with state-of-the-art reasoning, knowledge and coding capabilities.",
|
|
4358
|
+
"model_specs": [
|
|
4359
|
+
{
|
|
4360
|
+
"model_format": "pytorch",
|
|
4361
|
+
"model_size_in_billions": 123,
|
|
4362
|
+
"quantizations": [
|
|
4363
|
+
"none"
|
|
4364
|
+
],
|
|
4365
|
+
"model_id": "mistralai/Mistral-Large-Instruct-2407"
|
|
4366
|
+
},
|
|
4367
|
+
{
|
|
4368
|
+
"model_format": "pytorch",
|
|
4369
|
+
"model_size_in_billions": 123,
|
|
4370
|
+
"quantizations": [
|
|
4371
|
+
"4-bit"
|
|
4372
|
+
],
|
|
4373
|
+
"model_id": "unsloth/Mistral-Large-Instruct-2407-bnb-4bit"
|
|
4374
|
+
},
|
|
4375
|
+
{
|
|
4376
|
+
"model_format": "gptq",
|
|
4377
|
+
"model_size_in_billions": 123,
|
|
4378
|
+
"quantizations": [
|
|
4379
|
+
"Int4"
|
|
4380
|
+
],
|
|
4381
|
+
"model_id": "ModelCloud/Mistral-Large-Instruct-2407-gptq-4bit"
|
|
4382
|
+
},
|
|
4383
|
+
{
|
|
4384
|
+
"model_format": "awq",
|
|
4385
|
+
"model_size_in_billions": 123,
|
|
4386
|
+
"quantizations": [
|
|
4387
|
+
"Int4"
|
|
4388
|
+
],
|
|
4389
|
+
"model_id": "TechxGenus/Mistral-Large-Instruct-2407-AWQ"
|
|
4390
|
+
},
|
|
4391
|
+
{
|
|
4392
|
+
"model_format": "ggufv2",
|
|
4393
|
+
"model_size_in_billions": 123,
|
|
4394
|
+
"quantizations": [
|
|
4395
|
+
"Q2_K",
|
|
4396
|
+
"Q3_K_S",
|
|
4397
|
+
"Q3_K_M",
|
|
4398
|
+
"Q3_K_L",
|
|
4399
|
+
"Q4_K_S",
|
|
4400
|
+
"Q4_K_M"
|
|
4401
|
+
],
|
|
4402
|
+
"model_id": "MaziyarPanahi/Mistral-Large-Instruct-2407-GGUF",
|
|
4403
|
+
"model_file_name_template": "Mistral-Large-Instruct-2407.{quantization}.gguf",
|
|
4404
|
+
"model_file_name_split_template": "Mixtral-8x22B-Instruct-v0.1.{quantization}-{part}.gguf",
|
|
4405
|
+
"quantization_parts": {
|
|
4406
|
+
"Q3_K_L": [
|
|
4407
|
+
"00001-of-00007",
|
|
4408
|
+
"00002-of-00007",
|
|
4409
|
+
"00003-of-00007",
|
|
4410
|
+
"00004-of-00007",
|
|
4411
|
+
"00005-of-00007",
|
|
4412
|
+
"00006-of-00007",
|
|
4413
|
+
"00007-of-00007"
|
|
4414
|
+
],
|
|
4415
|
+
"Q3_K_M": [
|
|
4416
|
+
"00001-of-00007",
|
|
4417
|
+
"00002-of-00007",
|
|
4418
|
+
"00003-of-00007",
|
|
4419
|
+
"00004-of-00007",
|
|
4420
|
+
"00005-of-00007",
|
|
4421
|
+
"00006-of-00007",
|
|
4422
|
+
"00007-of-00007"
|
|
4423
|
+
],
|
|
4424
|
+
"Q3_K_S": [
|
|
4425
|
+
"00001-of-00007",
|
|
4426
|
+
"00002-of-00007",
|
|
4427
|
+
"00003-of-00007",
|
|
4428
|
+
"00004-of-00007",
|
|
4429
|
+
"00005-of-00007",
|
|
4430
|
+
"00006-of-00007",
|
|
4431
|
+
"00007-of-00007"
|
|
4432
|
+
],
|
|
4433
|
+
"Q4_K_M": [
|
|
4434
|
+
"00001-of-00007",
|
|
4435
|
+
"00002-of-00007",
|
|
4436
|
+
"00003-of-00007",
|
|
4437
|
+
"00004-of-00007",
|
|
4438
|
+
"00005-of-00007",
|
|
4439
|
+
"00006-of-00007",
|
|
4440
|
+
"00007-of-00007"
|
|
4441
|
+
],
|
|
4442
|
+
"Q4_K_S": [
|
|
4443
|
+
"00001-of-00007",
|
|
4444
|
+
"00002-of-00007",
|
|
4445
|
+
"00003-of-00007",
|
|
4446
|
+
"00004-of-00007",
|
|
4447
|
+
"00005-of-00007",
|
|
4448
|
+
"00006-of-00007",
|
|
4449
|
+
"00007-of-00007"
|
|
4450
|
+
]
|
|
4451
|
+
}
|
|
4452
|
+
},
|
|
4453
|
+
{
|
|
4454
|
+
"model_format": "mlx",
|
|
4455
|
+
"model_size_in_billions": 123,
|
|
3841
4456
|
"quantizations": [
|
|
3842
|
-
"
|
|
4457
|
+
"none"
|
|
3843
4458
|
],
|
|
3844
|
-
"model_id": "
|
|
4459
|
+
"model_id": "mlx-community/Mistral-Large-Instruct-2407-bf16"
|
|
3845
4460
|
},
|
|
3846
4461
|
{
|
|
3847
|
-
"model_format": "
|
|
3848
|
-
"model_size_in_billions":
|
|
4462
|
+
"model_format": "mlx",
|
|
4463
|
+
"model_size_in_billions": 123,
|
|
3849
4464
|
"quantizations": [
|
|
3850
|
-
"
|
|
4465
|
+
"4-bit"
|
|
3851
4466
|
],
|
|
3852
|
-
"model_id": "
|
|
4467
|
+
"model_id": "mlx-community/Mistral-Large-Instruct-2407-4bit"
|
|
3853
4468
|
},
|
|
3854
4469
|
{
|
|
3855
|
-
"model_format": "
|
|
3856
|
-
"model_size_in_billions":
|
|
4470
|
+
"model_format": "mlx",
|
|
4471
|
+
"model_size_in_billions": 123,
|
|
3857
4472
|
"quantizations": [
|
|
3858
|
-
"
|
|
3859
|
-
"Q3_K_S",
|
|
3860
|
-
"Q3_K_M",
|
|
3861
|
-
"Q3_K_L",
|
|
3862
|
-
"Q4_K_S",
|
|
3863
|
-
"Q4_K_M",
|
|
3864
|
-
"Q5_K_S",
|
|
3865
|
-
"Q5_K_M",
|
|
3866
|
-
"Q6_K",
|
|
3867
|
-
"Q8_0",
|
|
3868
|
-
"fp16"
|
|
4473
|
+
"8-bit"
|
|
3869
4474
|
],
|
|
3870
|
-
"model_id": "
|
|
3871
|
-
"model_file_name_template": "Mistral-7B-Instruct-v0.3.{quantization}.gguf"
|
|
4475
|
+
"model_id": "mlx-community/Mistral-Large-Instruct-2407-8bit"
|
|
3872
4476
|
}
|
|
3873
4477
|
],
|
|
3874
4478
|
"prompt_style": {
|
|
3875
|
-
"style_name": "
|
|
3876
|
-
"system_prompt": "
|
|
4479
|
+
"style_name": "mistral-nemo",
|
|
4480
|
+
"system_prompt": "",
|
|
3877
4481
|
"roles": [
|
|
3878
4482
|
"[INST]",
|
|
3879
4483
|
"[/INST]"
|
|
3880
4484
|
],
|
|
3881
|
-
"intra_message_sep": "
|
|
3882
|
-
"inter_message_sep": "
|
|
4485
|
+
"intra_message_sep": "",
|
|
4486
|
+
"inter_message_sep": "</s>",
|
|
3883
4487
|
"stop_token_ids": [
|
|
3884
4488
|
2
|
|
3885
4489
|
],
|
|
@@ -3928,6 +4532,24 @@
|
|
|
3928
4532
|
],
|
|
3929
4533
|
"model_id": "bartowski/Codestral-22B-v0.1-GGUF",
|
|
3930
4534
|
"model_file_name_template": "Codestral-22B-v0.1-{quantization}.gguf"
|
|
4535
|
+
},
|
|
4536
|
+
{
|
|
4537
|
+
"model_format": "mlx",
|
|
4538
|
+
"model_size_in_billions": 22,
|
|
4539
|
+
"quantizations": [
|
|
4540
|
+
"4-bit"
|
|
4541
|
+
],
|
|
4542
|
+
"model_id": "mlx-community/Codestral-22B-v0.1-4bit",
|
|
4543
|
+
"model_revision": "544626b38eb1c9524f0fa570ec7b29550c26b78d"
|
|
4544
|
+
},
|
|
4545
|
+
{
|
|
4546
|
+
"model_format": "mlx",
|
|
4547
|
+
"model_size_in_billions": 22,
|
|
4548
|
+
"quantizations": [
|
|
4549
|
+
"8-bit"
|
|
4550
|
+
],
|
|
4551
|
+
"model_id": "mlx-community/Codestral-22B-v0.1-8bit",
|
|
4552
|
+
"model_revision": "0399a53970663950d57010e61a2796af524a1588"
|
|
3931
4553
|
}
|
|
3932
4554
|
]
|
|
3933
4555
|
},
|
|
@@ -4668,6 +5290,61 @@
|
|
|
4668
5290
|
"model_id": "modelscope/Yi-1.5-34B-Chat-AWQ",
|
|
4669
5291
|
"model_revision": "26234fea6ac49d456f32f8017289021fb1087a04"
|
|
4670
5292
|
}
|
|
5293
|
+
,
|
|
5294
|
+
{
|
|
5295
|
+
"model_format": "mlx",
|
|
5296
|
+
"model_size_in_billions": 6,
|
|
5297
|
+
"quantizations": [
|
|
5298
|
+
"4-bit"
|
|
5299
|
+
],
|
|
5300
|
+
"model_id": "mlx-community/Yi-1.5-6B-Chat-4bit",
|
|
5301
|
+
"model_revision": "0177c9a12b869d6bc73f772b5a1981a7c966adb6"
|
|
5302
|
+
},
|
|
5303
|
+
{
|
|
5304
|
+
"model_format": "mlx",
|
|
5305
|
+
"model_size_in_billions": 6,
|
|
5306
|
+
"quantizations": [
|
|
5307
|
+
"8-bit"
|
|
5308
|
+
],
|
|
5309
|
+
"model_id": "mlx-community/Yi-1.5-6B-Chat-8bit",
|
|
5310
|
+
"model_revision": "7756e65d1bf1e2e6e97aef6bc9484307225f536b"
|
|
5311
|
+
},
|
|
5312
|
+
{
|
|
5313
|
+
"model_format": "mlx",
|
|
5314
|
+
"model_size_in_billions": 9,
|
|
5315
|
+
"quantizations": [
|
|
5316
|
+
"4-bit"
|
|
5317
|
+
],
|
|
5318
|
+
"model_id": "mlx-community/Yi-1.5-9B-Chat-4bit",
|
|
5319
|
+
"model_revision": "e15f886479c44e7d90f0ac13ace69b2319b71c2f"
|
|
5320
|
+
},
|
|
5321
|
+
{
|
|
5322
|
+
"model_format": "mlx",
|
|
5323
|
+
"model_size_in_billions": 9,
|
|
5324
|
+
"quantizations": [
|
|
5325
|
+
"8-bit"
|
|
5326
|
+
],
|
|
5327
|
+
"model_id": "mlx-community/Yi-1.5-9B-Chat-8bit",
|
|
5328
|
+
"model_revision": "c1f742fcf3683edbe2d2c2fd1ad7ac2bb6c5ca36"
|
|
5329
|
+
},
|
|
5330
|
+
{
|
|
5331
|
+
"model_format": "mlx",
|
|
5332
|
+
"model_size_in_billions": 34,
|
|
5333
|
+
"quantizations": [
|
|
5334
|
+
"4-bit"
|
|
5335
|
+
],
|
|
5336
|
+
"model_id": "mlx-community/Yi-1.5-34B-Chat-4bit",
|
|
5337
|
+
"model_revision": "945e3b306ef37c46ab444fdc857d1f3ea7247374"
|
|
5338
|
+
},
|
|
5339
|
+
{
|
|
5340
|
+
"model_format": "mlx",
|
|
5341
|
+
"model_size_in_billions": 34,
|
|
5342
|
+
"quantizations": [
|
|
5343
|
+
"8-bit"
|
|
5344
|
+
],
|
|
5345
|
+
"model_id": "mlx-community/Yi-1.5-34B-Chat-8bit",
|
|
5346
|
+
"model_revision": "3c12761a2c6663f216caab6dff84b0dd29b472ac"
|
|
5347
|
+
}
|
|
4671
5348
|
],
|
|
4672
5349
|
"prompt_style": {
|
|
4673
5350
|
"style_name": "CHATML",
|
|
@@ -5945,6 +6622,24 @@
|
|
|
5945
6622
|
],
|
|
5946
6623
|
"model_id": "internlm/internlm2_5-7b-chat-gguf",
|
|
5947
6624
|
"model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf"
|
|
6625
|
+
},
|
|
6626
|
+
{
|
|
6627
|
+
"model_format": "mlx",
|
|
6628
|
+
"model_size_in_billions": 7,
|
|
6629
|
+
"quantizations": [
|
|
6630
|
+
"4-bit"
|
|
6631
|
+
],
|
|
6632
|
+
"model_id": "mlx-community/internlm2_5-7b-chat-4bit",
|
|
6633
|
+
"model_revision": "d12097a867721978142a6048399f470a3d18beee"
|
|
6634
|
+
},
|
|
6635
|
+
{
|
|
6636
|
+
"model_format": "mlx",
|
|
6637
|
+
"model_size_in_billions": 7,
|
|
6638
|
+
"quantizations": [
|
|
6639
|
+
"8-bit"
|
|
6640
|
+
],
|
|
6641
|
+
"model_id": "mlx-community/internlm2_5-7b-chat-8bit",
|
|
6642
|
+
"model_revision": "0ec94d61d30ab161b49c69f9bf92ec2b9986d234"
|
|
5948
6643
|
}
|
|
5949
6644
|
],
|
|
5950
6645
|
"prompt_style": {
|
|
@@ -7048,6 +7743,15 @@
|
|
|
7048
7743
|
"model_id": "CohereForAI/c4ai-command-r-v01",
|
|
7049
7744
|
"model_revision": "16881ccde1c68bbc7041280e6a66637bc46bfe88"
|
|
7050
7745
|
},
|
|
7746
|
+
{
|
|
7747
|
+
"model_format": "pytorch",
|
|
7748
|
+
"model_size_in_billions": 35,
|
|
7749
|
+
"quantizations": [
|
|
7750
|
+
"4-bit"
|
|
7751
|
+
],
|
|
7752
|
+
"model_id": "CohereForAI/c4ai-command-r-v01-4bit",
|
|
7753
|
+
"model_revision": "f2e87936a146643c9dd143422dcafb9cb1552611"
|
|
7754
|
+
},
|
|
7051
7755
|
{
|
|
7052
7756
|
"model_format": "ggufv2",
|
|
7053
7757
|
"model_size_in_billions": 35,
|
|
@@ -7077,69 +7781,23 @@
|
|
|
7077
7781
|
"model_id": "CohereForAI/c4ai-command-r-plus",
|
|
7078
7782
|
"model_revision": "ba7f1d954c9d1609013677d87e4142ab95c34e62"
|
|
7079
7783
|
},
|
|
7080
|
-
{
|
|
7081
|
-
"model_format": "gptq",
|
|
7082
|
-
"model_size_in_billions": 104,
|
|
7083
|
-
"quantizations": [
|
|
7084
|
-
"Int4"
|
|
7085
|
-
],
|
|
7086
|
-
"model_id": "alpindale/c4ai-command-r-plus-GPTQ",
|
|
7087
|
-
"model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
|
|
7088
|
-
}
|
|
7089
|
-
],
|
|
7090
|
-
"prompt_style": {
|
|
7091
|
-
"style_name": "c4ai-command-r",
|
|
7092
|
-
"system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
|
|
7093
|
-
"roles": [
|
|
7094
|
-
"<|USER_TOKEN|>",
|
|
7095
|
-
"<|CHATBOT_TOKEN|>"
|
|
7096
|
-
],
|
|
7097
|
-
"intra_message_sep": "",
|
|
7098
|
-
"inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
|
|
7099
|
-
"stop_token_ids": [
|
|
7100
|
-
6,
|
|
7101
|
-
255001
|
|
7102
|
-
]
|
|
7103
|
-
}
|
|
7104
|
-
},
|
|
7105
|
-
{
|
|
7106
|
-
"version": 1,
|
|
7107
|
-
"context_length": 131072,
|
|
7108
|
-
"model_name": "c4ai-command-r-v01-4bit",
|
|
7109
|
-
"model_lang": [
|
|
7110
|
-
"en",
|
|
7111
|
-
"fr",
|
|
7112
|
-
"de",
|
|
7113
|
-
"es",
|
|
7114
|
-
"it",
|
|
7115
|
-
"pt",
|
|
7116
|
-
"ja",
|
|
7117
|
-
"ko",
|
|
7118
|
-
"zh",
|
|
7119
|
-
"ar"
|
|
7120
|
-
],
|
|
7121
|
-
"model_ability": [
|
|
7122
|
-
"generate"
|
|
7123
|
-
],
|
|
7124
|
-
"model_description": "This model is 4bit quantized version of C4AI Command-R using bitsandbytes.",
|
|
7125
|
-
"model_specs": [
|
|
7126
7784
|
{
|
|
7127
7785
|
"model_format": "pytorch",
|
|
7128
|
-
"model_size_in_billions":
|
|
7786
|
+
"model_size_in_billions": 104,
|
|
7129
7787
|
"quantizations": [
|
|
7130
|
-
"
|
|
7788
|
+
"4-bit"
|
|
7131
7789
|
],
|
|
7132
|
-
"model_id": "CohereForAI/c4ai-command-r-
|
|
7133
|
-
"model_revision": "
|
|
7790
|
+
"model_id": "CohereForAI/c4ai-command-r-plus-4bit",
|
|
7791
|
+
"model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
|
|
7134
7792
|
},
|
|
7135
7793
|
{
|
|
7136
|
-
"model_format": "
|
|
7794
|
+
"model_format": "gptq",
|
|
7137
7795
|
"model_size_in_billions": 104,
|
|
7138
7796
|
"quantizations": [
|
|
7139
|
-
"
|
|
7797
|
+
"Int4"
|
|
7140
7798
|
],
|
|
7141
|
-
"model_id": "
|
|
7142
|
-
"model_revision": "
|
|
7799
|
+
"model_id": "alpindale/c4ai-command-r-plus-GPTQ",
|
|
7800
|
+
"model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
|
|
7143
7801
|
}
|
|
7144
7802
|
],
|
|
7145
7803
|
"prompt_style": {
|
|
@@ -7387,5 +8045,67 @@
|
|
|
7387
8045
|
160132
|
|
7388
8046
|
]
|
|
7389
8047
|
}
|
|
8048
|
+
},
|
|
8049
|
+
{
|
|
8050
|
+
"version": 1,
|
|
8051
|
+
"context_length": 32768,
|
|
8052
|
+
"model_name": "csg-wukong-chat-v0.1",
|
|
8053
|
+
"model_lang": [
|
|
8054
|
+
"en"
|
|
8055
|
+
],
|
|
8056
|
+
"model_ability": [
|
|
8057
|
+
"chat"
|
|
8058
|
+
],
|
|
8059
|
+
"model_description": "csg-wukong-1B is a 1 billion-parameter small language model(SLM) pretrained on 1T tokens.",
|
|
8060
|
+
"model_specs": [
|
|
8061
|
+
{
|
|
8062
|
+
"model_format": "pytorch",
|
|
8063
|
+
"model_size_in_billions": 1,
|
|
8064
|
+
"quantizations": [
|
|
8065
|
+
"none"
|
|
8066
|
+
],
|
|
8067
|
+
"model_id": "opencsg/csg-wukong-1B-chat-v0.1",
|
|
8068
|
+
"model_revision": "2443c903d46074af0856e2ba11398dcd01d35536"
|
|
8069
|
+
},
|
|
8070
|
+
{
|
|
8071
|
+
"model_format": "ggufv2",
|
|
8072
|
+
"model_size_in_billions": 1,
|
|
8073
|
+
"quantizations": [
|
|
8074
|
+
"Q2_K",
|
|
8075
|
+
"Q3_K",
|
|
8076
|
+
"Q3_K_S",
|
|
8077
|
+
"Q3_K_M",
|
|
8078
|
+
"Q3_K_L",
|
|
8079
|
+
"Q4_0",
|
|
8080
|
+
"Q4_1",
|
|
8081
|
+
"Q4_K_S",
|
|
8082
|
+
"Q4_K_M",
|
|
8083
|
+
"Q5_0",
|
|
8084
|
+
"Q5_1",
|
|
8085
|
+
"Q5_K_S",
|
|
8086
|
+
"Q5_K_M",
|
|
8087
|
+
"Q6_K",
|
|
8088
|
+
"Q8_0"
|
|
8089
|
+
],
|
|
8090
|
+
"model_id": "RichardErkhov/opencsg_-_csg-wukong-1B-chat-v0.1-gguf",
|
|
8091
|
+
"model_file_name_template": "csg-wukong-1B-chat-v0.1.{quantization}.gguf"
|
|
8092
|
+
}
|
|
8093
|
+
],
|
|
8094
|
+
"prompt_style": {
|
|
8095
|
+
"style_name": "NO_COLON_TWO",
|
|
8096
|
+
"system_prompt": "<|system|>\nYou are a creative super artificial intelligence assistant, possessing all the knowledge of humankind. Your name is csg-wukong, developed by OpenCSG. You need to understand and infer the true intentions of users based on the topics discussed in the chat history, and respond to user questions correctly as required. You enjoy responding to users with accurate and insightful answers. Please pay attention to the appropriate style and format when replying, try to avoid repetitive words and sentences, and keep your responses as concise and profound as possible. You carefully consider the context of the discussion when replying to users. When the user says \"continue,\" please proceed with the continuation of the previous assistant's response.</s>\n",
|
|
8097
|
+
"roles": [
|
|
8098
|
+
"<|user|>\n",
|
|
8099
|
+
"<|assistant|>\n"
|
|
8100
|
+
],
|
|
8101
|
+
"intra_message_sep": "</s>\n",
|
|
8102
|
+
"inter_message_sep": "</s>\n",
|
|
8103
|
+
"stop_token_ids": [
|
|
8104
|
+
2
|
|
8105
|
+
],
|
|
8106
|
+
"stop": [
|
|
8107
|
+
"</s>"
|
|
8108
|
+
]
|
|
8109
|
+
}
|
|
7390
8110
|
}
|
|
7391
8111
|
]
|