xinference 0.12.3__py3-none-any.whl → 0.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (101) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +56 -8
  3. xinference/client/restful/restful_client.py +49 -4
  4. xinference/core/model.py +36 -4
  5. xinference/core/scheduler.py +2 -0
  6. xinference/core/supervisor.py +132 -15
  7. xinference/core/worker.py +239 -53
  8. xinference/deploy/cmdline.py +5 -0
  9. xinference/deploy/utils.py +33 -2
  10. xinference/model/audio/chattts.py +6 -6
  11. xinference/model/audio/core.py +23 -15
  12. xinference/model/core.py +12 -3
  13. xinference/model/embedding/core.py +25 -16
  14. xinference/model/flexible/__init__.py +40 -0
  15. xinference/model/flexible/core.py +228 -0
  16. xinference/model/flexible/launchers/__init__.py +15 -0
  17. xinference/model/flexible/launchers/transformers_launcher.py +63 -0
  18. xinference/model/flexible/utils.py +33 -0
  19. xinference/model/image/core.py +18 -14
  20. xinference/model/image/custom.py +1 -1
  21. xinference/model/llm/__init__.py +5 -2
  22. xinference/model/llm/core.py +3 -2
  23. xinference/model/llm/ggml/llamacpp.py +1 -10
  24. xinference/model/llm/llm_family.json +292 -36
  25. xinference/model/llm/llm_family.py +102 -53
  26. xinference/model/llm/llm_family_modelscope.json +247 -27
  27. xinference/model/llm/mlx/__init__.py +13 -0
  28. xinference/model/llm/mlx/core.py +408 -0
  29. xinference/model/llm/pytorch/chatglm.py +2 -9
  30. xinference/model/llm/pytorch/cogvlm2.py +206 -21
  31. xinference/model/llm/pytorch/core.py +213 -120
  32. xinference/model/llm/pytorch/glm4v.py +171 -15
  33. xinference/model/llm/pytorch/qwen_vl.py +168 -7
  34. xinference/model/llm/pytorch/utils.py +53 -62
  35. xinference/model/llm/utils.py +28 -7
  36. xinference/model/rerank/core.py +29 -25
  37. xinference/thirdparty/deepseek_vl/serve/__init__.py +13 -0
  38. xinference/thirdparty/deepseek_vl/serve/app_deepseek.py +510 -0
  39. xinference/thirdparty/deepseek_vl/serve/app_modules/__init__.py +13 -0
  40. xinference/thirdparty/deepseek_vl/serve/app_modules/gradio_utils.py +94 -0
  41. xinference/thirdparty/deepseek_vl/serve/app_modules/overwrites.py +81 -0
  42. xinference/thirdparty/deepseek_vl/serve/app_modules/presets.py +96 -0
  43. xinference/thirdparty/deepseek_vl/serve/app_modules/utils.py +229 -0
  44. xinference/thirdparty/deepseek_vl/serve/inference.py +170 -0
  45. xinference/types.py +0 -1
  46. xinference/web/ui/build/asset-manifest.json +3 -3
  47. xinference/web/ui/build/index.html +1 -1
  48. xinference/web/ui/build/static/js/main.95c1d652.js +3 -0
  49. xinference/web/ui/build/static/js/main.95c1d652.js.map +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/1444c41a4d04494f1cbc2d8c1537df107b451cb569cb2c1fbf5159f3a4841a5f.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/2c63090c842376cdd368c3ded88a333ef40d94785747651343040a6f7872a223.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/6450605fac003812485f6251b9f0caafbf2e5bfc3bbe2f000050d9e2fdb8dcd3.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +1 -0
  59. xinference/web/ui/node_modules/.cache/babel-loader/8a9742ddd8ba8546ef42dc14caca443f2b4524fabed7bf269e0eff3b7b64ee7d.json +1 -0
  60. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +1 -0
  61. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/d93730e2b5d7e8c957b4d0965d2ed1dac9045a649adbd47c220d11f255d4b1e0.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/e656dc00b4d8b387f0a81ba8fc558767df1601c66369e2eb86a5ef27cf080572.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +1 -0
  65. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/METADATA +10 -11
  66. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/RECORD +71 -69
  67. xinference/model/llm/ggml/chatglm.py +0 -457
  68. xinference/thirdparty/ChatTTS/__init__.py +0 -1
  69. xinference/thirdparty/ChatTTS/core.py +0 -200
  70. xinference/thirdparty/ChatTTS/experimental/__init__.py +0 -0
  71. xinference/thirdparty/ChatTTS/experimental/llm.py +0 -40
  72. xinference/thirdparty/ChatTTS/infer/__init__.py +0 -0
  73. xinference/thirdparty/ChatTTS/infer/api.py +0 -125
  74. xinference/thirdparty/ChatTTS/model/__init__.py +0 -0
  75. xinference/thirdparty/ChatTTS/model/dvae.py +0 -155
  76. xinference/thirdparty/ChatTTS/model/gpt.py +0 -265
  77. xinference/thirdparty/ChatTTS/utils/__init__.py +0 -0
  78. xinference/thirdparty/ChatTTS/utils/gpu_utils.py +0 -23
  79. xinference/thirdparty/ChatTTS/utils/infer_utils.py +0 -141
  80. xinference/thirdparty/ChatTTS/utils/io_utils.py +0 -14
  81. xinference/web/ui/build/static/js/main.77dd47c3.js +0 -3
  82. xinference/web/ui/build/static/js/main.77dd47c3.js.map +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/0cd591866aa345566e0b63fb51ff2043e163a770af6fdc2f3bad395d046353e2.json +0 -1
  84. xinference/web/ui/node_modules/.cache/babel-loader/37c1476717199863bbba1530e3513a9368f8f73001b75b4a85c2075956308027.json +0 -1
  85. xinference/web/ui/node_modules/.cache/babel-loader/3da7d55e87882a4af923e187b1351160e34ca102f589086439c15131a227fb6e.json +0 -1
  86. xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +0 -1
  87. xinference/web/ui/node_modules/.cache/babel-loader/46edc1fe657dfedb2e673148332bb442c6eb98f09f2592c389209e376510afa5.json +0 -1
  88. xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +0 -1
  89. xinference/web/ui/node_modules/.cache/babel-loader/72bcecc71c5267250edeb89608859d449b586f13ff9923a5e70e7172976ec403.json +0 -1
  90. xinference/web/ui/node_modules/.cache/babel-loader/82db357f3fd5b32215d747ee593f69ff06c95ad6cde37f71a96c8290aaab64c0.json +0 -1
  91. xinference/web/ui/node_modules/.cache/babel-loader/935efd2867664c58230378fdf2ff1ea85e58d853b7214014e20dfbca8dab7b05.json +0 -1
  92. xinference/web/ui/node_modules/.cache/babel-loader/bc6da27195ec4607bb472bf61f97c928ad4966fa64e4c2247661bedb7400abba.json +0 -1
  93. xinference/web/ui/node_modules/.cache/babel-loader/c2abe75f04ad82fba68f35ed9cbe2e287762c876684fddccccfa73f739489b65.json +0 -1
  94. xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +0 -1
  95. xinference/web/ui/node_modules/.cache/babel-loader/f118f99c22b713c678c1209c4e1dd43fe86e3f6e801a4c0c35d3bbf41fd05fe6.json +0 -1
  96. xinference/web/ui/node_modules/.cache/babel-loader/f51bf63ddaa7afd125ef2254a105789333eecc1c94fdf5157a9b88ef7ad0a5bd.json +0 -1
  97. /xinference/web/ui/build/static/js/{main.77dd47c3.js.LICENSE.txt → main.95c1d652.js.LICENSE.txt} +0 -0
  98. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/LICENSE +0 -0
  99. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/WHEEL +0 -0
  100. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/entry_points.txt +0 -0
  101. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,96 @@
1
+ # Copyright (c) 2023-2024 DeepSeek.
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ # this software and associated documentation files (the "Software"), to deal in
5
+ # the Software without restriction, including without limitation the rights to
6
+ # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7
+ # the Software, and to permit persons to whom the Software is furnished to do so,
8
+ # subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in all
11
+ # copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16
+ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17
+ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19
+
20
+ # -*- coding:utf-8 -*-
21
+ import gradio as gr
22
+
23
+ title = """<h1 align="left" style="min-width:200px; margin-top:0;">Chat with DeepSeek-VL </h1>"""
24
+ description_top = """"""
25
+ description = """"""
26
+ CONCURRENT_COUNT = 10
27
+
28
+
29
+ ALREADY_CONVERTED_MARK = "<!-- ALREADY CONVERTED BY PARSER. -->"
30
+
31
+ small_and_beautiful_theme = gr.themes.Soft(
32
+ primary_hue=gr.themes.Color(
33
+ c50="#EBFAF2",
34
+ c100="#CFF3E1",
35
+ c200="#A8EAC8",
36
+ c300="#77DEA9",
37
+ c400="#3FD086",
38
+ c500="#02C160",
39
+ c600="#06AE56",
40
+ c700="#05974E",
41
+ c800="#057F45",
42
+ c900="#04673D",
43
+ c950="#2E5541",
44
+ name="small_and_beautiful",
45
+ ),
46
+ secondary_hue=gr.themes.Color(
47
+ c50="#576b95",
48
+ c100="#576b95",
49
+ c200="#576b95",
50
+ c300="#576b95",
51
+ c400="#576b95",
52
+ c500="#576b95",
53
+ c600="#576b95",
54
+ c700="#576b95",
55
+ c800="#576b95",
56
+ c900="#576b95",
57
+ c950="#576b95",
58
+ ),
59
+ neutral_hue=gr.themes.Color(
60
+ name="gray",
61
+ c50="#f6f7f8",
62
+ # c100="#f3f4f6",
63
+ c100="#F2F2F2",
64
+ c200="#e5e7eb",
65
+ c300="#d1d5db",
66
+ c400="#B2B2B2",
67
+ c500="#808080",
68
+ c600="#636363",
69
+ c700="#515151",
70
+ c800="#393939",
71
+ # c900="#272727",
72
+ c900="#2B2B2B",
73
+ c950="#171717",
74
+ ),
75
+ radius_size=gr.themes.sizes.radius_sm,
76
+ ).set(
77
+ # button_primary_background_fill="*primary_500",
78
+ button_primary_background_fill_dark="*primary_600",
79
+ # button_primary_background_fill_hover="*primary_400",
80
+ # button_primary_border_color="*primary_500",
81
+ button_primary_border_color_dark="*primary_600",
82
+ button_primary_text_color="white",
83
+ button_primary_text_color_dark="white",
84
+ button_secondary_background_fill="*neutral_100",
85
+ button_secondary_background_fill_hover="*neutral_50",
86
+ button_secondary_background_fill_dark="*neutral_900",
87
+ button_secondary_text_color="*neutral_800",
88
+ button_secondary_text_color_dark="white",
89
+ # background_fill_primary="#F7F7F7",
90
+ # background_fill_primary_dark="#1F1F1F",
91
+ # block_title_text_color="*primary_500",
92
+ block_title_background_fill_dark="*primary_900",
93
+ block_label_background_fill_dark="*primary_900",
94
+ input_background_fill="#F6F6F6",
95
+ # chatbot_code_background_color_dark="*neutral_950",
96
+ )
@@ -0,0 +1,229 @@
1
+ # Copyright (c) 2023-2024 DeepSeek.
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ # this software and associated documentation files (the "Software"), to deal in
5
+ # the Software without restriction, including without limitation the rights to
6
+ # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7
+ # the Software, and to permit persons to whom the Software is furnished to do so,
8
+ # subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in all
11
+ # copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16
+ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17
+ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19
+
20
+ # -*- coding:utf-8 -*-
21
+ from __future__ import annotations
22
+
23
+ import html
24
+ import logging
25
+ import os
26
+ import re
27
+ import time
28
+
29
+ import mdtex2html
30
+ from markdown import markdown
31
+ from pygments import highlight
32
+ from pygments.formatters import HtmlFormatter
33
+ from pygments.lexers import ClassNotFound, get_lexer_by_name, guess_lexer
34
+
35
+ from .presets import ALREADY_CONVERTED_MARK
36
+
37
+ logger = logging.getLogger("gradio_logger")
38
+
39
+
40
+ def configure_logger():
41
+ logger = logging.getLogger("gradio_logger")
42
+ logger.setLevel(logging.DEBUG)
43
+
44
+ timestr = time.strftime("%Y%m%d-%H%M%S")
45
+ os.makedirs("deepseek_vl/serve/logs", exist_ok=True)
46
+ file_handler = logging.FileHandler(
47
+ f"deepseek_vl/serve/logs/{timestr}_gradio_log.log"
48
+ )
49
+ console_handler = logging.StreamHandler()
50
+
51
+ formatter = logging.Formatter(
52
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
53
+ )
54
+ console_handler.setFormatter(formatter)
55
+ file_handler.setFormatter(formatter)
56
+
57
+ console_handler.setLevel(logging.INFO)
58
+ file_handler.setLevel(logging.INFO)
59
+
60
+ logger.addHandler(console_handler)
61
+ logger.addHandler(file_handler)
62
+
63
+ return logger
64
+
65
+
66
+ def strip_stop_words(x, stop_words):
67
+ for w in stop_words:
68
+ if w in x:
69
+ return x[: x.index(w)].strip()
70
+ return x.strip()
71
+
72
+
73
+ def format_output(history, text, x):
74
+ updated_history = history + [[text, x]]
75
+ a = [[y[0], convert_to_markdown(y[1])] for y in updated_history]
76
+ return a, updated_history
77
+
78
+
79
+ def markdown_to_html_with_syntax_highlight(md_str): # deprecated
80
+ def replacer(match):
81
+ lang = match.group(1) or "text"
82
+ code = match.group(2)
83
+
84
+ try:
85
+ lexer = get_lexer_by_name(lang, stripall=True)
86
+ except ValueError:
87
+ lexer = get_lexer_by_name("text", stripall=True)
88
+
89
+ formatter = HtmlFormatter()
90
+ highlighted_code = highlight(code, lexer, formatter)
91
+
92
+ return f'<pre><code class="{lang}">{highlighted_code}</code></pre>'
93
+
94
+ code_block_pattern = r"```(\w+)?\n([\s\S]+?)\n```"
95
+ md_str = re.sub(code_block_pattern, replacer, md_str, flags=re.MULTILINE)
96
+
97
+ html_str = markdown(md_str)
98
+ return html_str
99
+
100
+
101
+ def normalize_markdown(md_text: str) -> str: # deprecated
102
+ lines = md_text.split("\n")
103
+ normalized_lines = []
104
+ inside_list = False
105
+
106
+ for i, line in enumerate(lines):
107
+ if re.match(r"^(\d+\.|-|\*|\+)\s", line.strip()):
108
+ if not inside_list and i > 0 and lines[i - 1].strip() != "":
109
+ normalized_lines.append("")
110
+ inside_list = True
111
+ normalized_lines.append(line)
112
+ elif inside_list and line.strip() == "":
113
+ if i < len(lines) - 1 and not re.match(
114
+ r"^(\d+\.|-|\*|\+)\s", lines[i + 1].strip()
115
+ ):
116
+ normalized_lines.append(line)
117
+ continue
118
+ else:
119
+ inside_list = False
120
+ normalized_lines.append(line)
121
+
122
+ return "\n".join(normalized_lines)
123
+
124
+
125
+ def convert_mdtext(md_text):
126
+ code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
127
+ inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
128
+ code_blocks = code_block_pattern.findall(md_text)
129
+ non_code_parts = code_block_pattern.split(md_text)[::2]
130
+
131
+ result = []
132
+ for non_code, code in zip(non_code_parts, code_blocks + [""]):
133
+ if non_code.strip():
134
+ non_code = normalize_markdown(non_code)
135
+ if inline_code_pattern.search(non_code):
136
+ result.append(markdown(non_code, extensions=["tables"]))
137
+ else:
138
+ result.append(mdtex2html.convert(non_code, extensions=["tables"]))
139
+ if code.strip():
140
+ code = f"\n```{code}\n\n```"
141
+ code = markdown_to_html_with_syntax_highlight(code)
142
+ result.append(code)
143
+ result = "".join(result)
144
+ result += ALREADY_CONVERTED_MARK
145
+ return result
146
+
147
+
148
+ def convert_asis(userinput):
149
+ return f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>{ALREADY_CONVERTED_MARK}'
150
+
151
+
152
+ def is_stop_word_or_prefix(s: str, stop_words: list) -> bool:
153
+ return any(s.endswith(stop_word) for stop_word in stop_words)
154
+
155
+
156
+ def detect_converted_mark(userinput):
157
+ return bool(userinput.endswith(ALREADY_CONVERTED_MARK))
158
+
159
+
160
+ def detect_language(code):
161
+ first_line = "" if code.startswith("\n") else code.strip().split("\n", 1)[0]
162
+ language = first_line.lower() if first_line else ""
163
+ code_without_language = code[len(first_line) :].lstrip() if first_line else code
164
+ return language, code_without_language
165
+
166
+
167
+ def convert_to_markdown(text):
168
+ text = text.replace("$", "&#36;")
169
+ text = text.replace("\r\n", "\n")
170
+
171
+ def replace_leading_tabs_and_spaces(line):
172
+ new_line = []
173
+
174
+ for char in line:
175
+ if char == "\t":
176
+ new_line.append("&#9;")
177
+ elif char == " ":
178
+ new_line.append("&nbsp;")
179
+ else:
180
+ break
181
+ return "".join(new_line) + line[len(new_line) :]
182
+
183
+ markdown_text = ""
184
+ lines = text.split("\n")
185
+ in_code_block = False
186
+
187
+ for line in lines:
188
+ if in_code_block is False and line.startswith("```"):
189
+ in_code_block = True
190
+ markdown_text += f"{line}\n"
191
+ elif in_code_block is True and line.startswith("```"):
192
+ in_code_block = False
193
+ markdown_text += f"{line}\n"
194
+ elif in_code_block:
195
+ markdown_text += f"{line}\n"
196
+ else:
197
+ line = replace_leading_tabs_and_spaces(line)
198
+ line = re.sub(r"^(#)", r"\\\1", line)
199
+ markdown_text += f"{line} \n"
200
+
201
+ return markdown_text
202
+
203
+
204
+ def add_language_tag(text):
205
+ def detect_language(code_block):
206
+ try:
207
+ lexer = guess_lexer(code_block)
208
+ return lexer.name.lower()
209
+ except ClassNotFound:
210
+ return ""
211
+
212
+ code_block_pattern = re.compile(r"(```)(\w*\n[^`]+```)", re.MULTILINE)
213
+
214
+ def replacement(match):
215
+ code_block = match.group(2)
216
+ if match.group(2).startswith("\n"):
217
+ language = detect_language(code_block)
218
+ return (
219
+ f"```{language}{code_block}```" if language else f"```\n{code_block}```"
220
+ )
221
+ else:
222
+ return match.group(1) + code_block + "```"
223
+
224
+ text2 = code_block_pattern.sub(replacement, text)
225
+ return text2
226
+
227
+
228
+ def is_variable_assigned(var_name: str) -> bool:
229
+ return var_name in locals()
@@ -0,0 +1,170 @@
1
+ # Copyright (c) 2023-2024 DeepSeek.
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ # this software and associated documentation files (the "Software"), to deal in
5
+ # the Software without restriction, including without limitation the rights to
6
+ # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7
+ # the Software, and to permit persons to whom the Software is furnished to do so,
8
+ # subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in all
11
+ # copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16
+ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17
+ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19
+
20
+ from threading import Thread
21
+ from typing import List
22
+
23
+ import torch
24
+ import transformers
25
+ from transformers import (
26
+ AutoModelForCausalLM,
27
+ StoppingCriteria,
28
+ StoppingCriteriaList,
29
+ TextIteratorStreamer,
30
+ )
31
+
32
+ from ..models import MultiModalityCausalLM, VLChatProcessor
33
+ from ..utils.conversation import Conversation
34
+
35
+
36
+ def load_model(model_path):
37
+ vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
38
+ tokenizer = vl_chat_processor.tokenizer
39
+ vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
40
+ model_path, trust_remote_code=True
41
+ )
42
+ vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval()
43
+ return tokenizer, vl_gpt, vl_chat_processor
44
+
45
+
46
+ def convert_conversation_to_prompts(conversation: Conversation):
47
+ prompts = []
48
+ messages = conversation.messages
49
+
50
+ for i in range(0, len(messages), 2):
51
+ prompt = {
52
+ "role": messages[i][0],
53
+ "content": (
54
+ messages[i][1][0]
55
+ if isinstance(messages[i][1], tuple)
56
+ else messages[i][1]
57
+ ),
58
+ "images": [messages[i][1][1]] if isinstance(messages[i][1], tuple) else [],
59
+ }
60
+ response = {"role": messages[i + 1][0], "content": messages[i + 1][1]}
61
+ prompts.extend([prompt, response])
62
+
63
+ return prompts
64
+
65
+
66
+ class StoppingCriteriaSub(StoppingCriteria):
67
+ def __init__(self, stops=[], encounters=1):
68
+ super().__init__()
69
+ self.stops = [stop.to("cuda") for stop in stops]
70
+
71
+ def __call__(
72
+ self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
73
+ ):
74
+ for stop in self.stops:
75
+ if input_ids.shape[-1] < len(stop):
76
+ continue
77
+ if torch.all((stop == input_ids[0][-len(stop) :])).item():
78
+ return True
79
+
80
+ return False
81
+
82
+
83
+ @torch.inference_mode()
84
+ def deepseek_generate(
85
+ prompts: list,
86
+ vl_gpt: torch.nn.Module,
87
+ vl_chat_processor,
88
+ tokenizer: transformers.PreTrainedTokenizer,
89
+ stop_words: list,
90
+ max_length: int = 256,
91
+ temperature: float = 1.0,
92
+ top_p: float = 1.0,
93
+ repetition_penalty=1.1,
94
+ ):
95
+ prompts = prompts
96
+ pil_images = list()
97
+ for message in prompts:
98
+ if "images" not in message:
99
+ continue
100
+ for pil_img in message["images"]:
101
+ pil_images.append(pil_img)
102
+
103
+ prepare_inputs = vl_chat_processor(
104
+ conversations=prompts, images=pil_images, force_batchify=True
105
+ ).to(vl_gpt.device)
106
+
107
+ return generate(
108
+ vl_gpt,
109
+ tokenizer,
110
+ prepare_inputs,
111
+ max_length,
112
+ temperature,
113
+ repetition_penalty,
114
+ top_p,
115
+ stop_words,
116
+ )
117
+
118
+
119
+ @torch.inference_mode()
120
+ def generate(
121
+ vl_gpt,
122
+ tokenizer,
123
+ prepare_inputs,
124
+ max_gen_len: int = 256,
125
+ temperature: float = 0,
126
+ repetition_penalty=1.1,
127
+ top_p: float = 0.95,
128
+ stop_words: List[str] = [],
129
+ ):
130
+ """Stream the text output from the multimodality model with prompt and image inputs."""
131
+ inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)
132
+
133
+ streamer = TextIteratorStreamer(tokenizer)
134
+
135
+ stop_words_ids = [
136
+ torch.tensor(tokenizer.encode(stop_word)) for stop_word in stop_words
137
+ ]
138
+ stopping_criteria = StoppingCriteriaList(
139
+ [StoppingCriteriaSub(stops=stop_words_ids)]
140
+ )
141
+
142
+ generation_config = dict(
143
+ inputs_embeds=inputs_embeds,
144
+ attention_mask=prepare_inputs.attention_mask,
145
+ pad_token_id=tokenizer.eos_token_id,
146
+ bos_token_id=tokenizer.bos_token_id,
147
+ eos_token_id=tokenizer.eos_token_id,
148
+ max_new_tokens=max_gen_len,
149
+ do_sample=True,
150
+ use_cache=True,
151
+ streamer=streamer,
152
+ stopping_criteria=stopping_criteria,
153
+ )
154
+
155
+ if temperature > 0:
156
+ generation_config.update(
157
+ {
158
+ "do_sample": True,
159
+ "top_p": top_p,
160
+ "temperature": temperature,
161
+ "repetition_penalty": repetition_penalty,
162
+ }
163
+ )
164
+ else:
165
+ generation_config["do_sample"] = False
166
+
167
+ thread = Thread(target=vl_gpt.language_model.generate, kwargs=generation_config)
168
+ thread.start()
169
+
170
+ yield from streamer
xinference/types.py CHANGED
@@ -285,7 +285,6 @@ class LlamaCppModelConfig(TypedDict, total=False):
285
285
  vocab_only: bool
286
286
  use_mmap: bool
287
287
  use_mlock: bool
288
- embedding: bool
289
288
  n_threads: Optional[int]
290
289
  n_batch: int
291
290
  last_n_tokens_size: int
@@ -1,14 +1,14 @@
1
1
  {
2
2
  "files": {
3
3
  "main.css": "./static/css/main.4bafd904.css",
4
- "main.js": "./static/js/main.77dd47c3.js",
4
+ "main.js": "./static/js/main.95c1d652.js",
5
5
  "static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
6
6
  "index.html": "./index.html",
7
7
  "main.4bafd904.css.map": "./static/css/main.4bafd904.css.map",
8
- "main.77dd47c3.js.map": "./static/js/main.77dd47c3.js.map"
8
+ "main.95c1d652.js.map": "./static/js/main.95c1d652.js.map"
9
9
  },
10
10
  "entrypoints": [
11
11
  "static/css/main.4bafd904.css",
12
- "static/js/main.77dd47c3.js"
12
+ "static/js/main.95c1d652.js"
13
13
  ]
14
14
  }
@@ -1 +1 @@
1
- <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.77dd47c3.js"></script><link href="./static/css/main.4bafd904.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
1
+ <!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.95c1d652.js"></script><link href="./static/css/main.4bafd904.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>