xinference 0.12.3__py3-none-any.whl → 0.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (101) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +56 -8
  3. xinference/client/restful/restful_client.py +49 -4
  4. xinference/core/model.py +36 -4
  5. xinference/core/scheduler.py +2 -0
  6. xinference/core/supervisor.py +132 -15
  7. xinference/core/worker.py +239 -53
  8. xinference/deploy/cmdline.py +5 -0
  9. xinference/deploy/utils.py +33 -2
  10. xinference/model/audio/chattts.py +6 -6
  11. xinference/model/audio/core.py +23 -15
  12. xinference/model/core.py +12 -3
  13. xinference/model/embedding/core.py +25 -16
  14. xinference/model/flexible/__init__.py +40 -0
  15. xinference/model/flexible/core.py +228 -0
  16. xinference/model/flexible/launchers/__init__.py +15 -0
  17. xinference/model/flexible/launchers/transformers_launcher.py +63 -0
  18. xinference/model/flexible/utils.py +33 -0
  19. xinference/model/image/core.py +18 -14
  20. xinference/model/image/custom.py +1 -1
  21. xinference/model/llm/__init__.py +5 -2
  22. xinference/model/llm/core.py +3 -2
  23. xinference/model/llm/ggml/llamacpp.py +1 -10
  24. xinference/model/llm/llm_family.json +292 -36
  25. xinference/model/llm/llm_family.py +102 -53
  26. xinference/model/llm/llm_family_modelscope.json +247 -27
  27. xinference/model/llm/mlx/__init__.py +13 -0
  28. xinference/model/llm/mlx/core.py +408 -0
  29. xinference/model/llm/pytorch/chatglm.py +2 -9
  30. xinference/model/llm/pytorch/cogvlm2.py +206 -21
  31. xinference/model/llm/pytorch/core.py +213 -120
  32. xinference/model/llm/pytorch/glm4v.py +171 -15
  33. xinference/model/llm/pytorch/qwen_vl.py +168 -7
  34. xinference/model/llm/pytorch/utils.py +53 -62
  35. xinference/model/llm/utils.py +28 -7
  36. xinference/model/rerank/core.py +29 -25
  37. xinference/thirdparty/deepseek_vl/serve/__init__.py +13 -0
  38. xinference/thirdparty/deepseek_vl/serve/app_deepseek.py +510 -0
  39. xinference/thirdparty/deepseek_vl/serve/app_modules/__init__.py +13 -0
  40. xinference/thirdparty/deepseek_vl/serve/app_modules/gradio_utils.py +94 -0
  41. xinference/thirdparty/deepseek_vl/serve/app_modules/overwrites.py +81 -0
  42. xinference/thirdparty/deepseek_vl/serve/app_modules/presets.py +96 -0
  43. xinference/thirdparty/deepseek_vl/serve/app_modules/utils.py +229 -0
  44. xinference/thirdparty/deepseek_vl/serve/inference.py +170 -0
  45. xinference/types.py +0 -1
  46. xinference/web/ui/build/asset-manifest.json +3 -3
  47. xinference/web/ui/build/index.html +1 -1
  48. xinference/web/ui/build/static/js/main.95c1d652.js +3 -0
  49. xinference/web/ui/build/static/js/main.95c1d652.js.map +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +1 -0
  52. xinference/web/ui/node_modules/.cache/babel-loader/1444c41a4d04494f1cbc2d8c1537df107b451cb569cb2c1fbf5159f3a4841a5f.json +1 -0
  53. xinference/web/ui/node_modules/.cache/babel-loader/2c63090c842376cdd368c3ded88a333ef40d94785747651343040a6f7872a223.json +1 -0
  54. xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +1 -0
  55. xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +1 -0
  56. xinference/web/ui/node_modules/.cache/babel-loader/6450605fac003812485f6251b9f0caafbf2e5bfc3bbe2f000050d9e2fdb8dcd3.json +1 -0
  57. xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +1 -0
  58. xinference/web/ui/node_modules/.cache/babel-loader/70fa8c07463a5fe57c68bf92502910105a8f647371836fe8c3a7408246ca7ba0.json +1 -0
  59. xinference/web/ui/node_modules/.cache/babel-loader/8a9742ddd8ba8546ef42dc14caca443f2b4524fabed7bf269e0eff3b7b64ee7d.json +1 -0
  60. xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +1 -0
  61. xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +1 -0
  62. xinference/web/ui/node_modules/.cache/babel-loader/d93730e2b5d7e8c957b4d0965d2ed1dac9045a649adbd47c220d11f255d4b1e0.json +1 -0
  63. xinference/web/ui/node_modules/.cache/babel-loader/e656dc00b4d8b387f0a81ba8fc558767df1601c66369e2eb86a5ef27cf080572.json +1 -0
  64. xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +1 -0
  65. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/METADATA +10 -11
  66. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/RECORD +71 -69
  67. xinference/model/llm/ggml/chatglm.py +0 -457
  68. xinference/thirdparty/ChatTTS/__init__.py +0 -1
  69. xinference/thirdparty/ChatTTS/core.py +0 -200
  70. xinference/thirdparty/ChatTTS/experimental/__init__.py +0 -0
  71. xinference/thirdparty/ChatTTS/experimental/llm.py +0 -40
  72. xinference/thirdparty/ChatTTS/infer/__init__.py +0 -0
  73. xinference/thirdparty/ChatTTS/infer/api.py +0 -125
  74. xinference/thirdparty/ChatTTS/model/__init__.py +0 -0
  75. xinference/thirdparty/ChatTTS/model/dvae.py +0 -155
  76. xinference/thirdparty/ChatTTS/model/gpt.py +0 -265
  77. xinference/thirdparty/ChatTTS/utils/__init__.py +0 -0
  78. xinference/thirdparty/ChatTTS/utils/gpu_utils.py +0 -23
  79. xinference/thirdparty/ChatTTS/utils/infer_utils.py +0 -141
  80. xinference/thirdparty/ChatTTS/utils/io_utils.py +0 -14
  81. xinference/web/ui/build/static/js/main.77dd47c3.js +0 -3
  82. xinference/web/ui/build/static/js/main.77dd47c3.js.map +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/0cd591866aa345566e0b63fb51ff2043e163a770af6fdc2f3bad395d046353e2.json +0 -1
  84. xinference/web/ui/node_modules/.cache/babel-loader/37c1476717199863bbba1530e3513a9368f8f73001b75b4a85c2075956308027.json +0 -1
  85. xinference/web/ui/node_modules/.cache/babel-loader/3da7d55e87882a4af923e187b1351160e34ca102f589086439c15131a227fb6e.json +0 -1
  86. xinference/web/ui/node_modules/.cache/babel-loader/3fa1f69162f9c6dc0f6a6e21b64d49d6b8e6fa8dfa59a82cf829931c5f97d99f.json +0 -1
  87. xinference/web/ui/node_modules/.cache/babel-loader/46edc1fe657dfedb2e673148332bb442c6eb98f09f2592c389209e376510afa5.json +0 -1
  88. xinference/web/ui/node_modules/.cache/babel-loader/62e257ed9016471035fa1a7da57c9e2a4250974ed566b4d1295873d747c68eb2.json +0 -1
  89. xinference/web/ui/node_modules/.cache/babel-loader/72bcecc71c5267250edeb89608859d449b586f13ff9923a5e70e7172976ec403.json +0 -1
  90. xinference/web/ui/node_modules/.cache/babel-loader/82db357f3fd5b32215d747ee593f69ff06c95ad6cde37f71a96c8290aaab64c0.json +0 -1
  91. xinference/web/ui/node_modules/.cache/babel-loader/935efd2867664c58230378fdf2ff1ea85e58d853b7214014e20dfbca8dab7b05.json +0 -1
  92. xinference/web/ui/node_modules/.cache/babel-loader/bc6da27195ec4607bb472bf61f97c928ad4966fa64e4c2247661bedb7400abba.json +0 -1
  93. xinference/web/ui/node_modules/.cache/babel-loader/c2abe75f04ad82fba68f35ed9cbe2e287762c876684fddccccfa73f739489b65.json +0 -1
  94. xinference/web/ui/node_modules/.cache/babel-loader/e606671420d2937102c3c34b4b04056c11736408c1d3347b8cf42dfe61fb394b.json +0 -1
  95. xinference/web/ui/node_modules/.cache/babel-loader/f118f99c22b713c678c1209c4e1dd43fe86e3f6e801a4c0c35d3bbf41fd05fe6.json +0 -1
  96. xinference/web/ui/node_modules/.cache/babel-loader/f51bf63ddaa7afd125ef2254a105789333eecc1c94fdf5157a9b88ef7ad0a5bd.json +0 -1
  97. /xinference/web/ui/build/static/js/{main.77dd47c3.js.LICENSE.txt → main.95c1d652.js.LICENSE.txt} +0 -0
  98. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/LICENSE +0 -0
  99. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/WHEEL +0 -0
  100. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/entry_points.txt +0 -0
  101. {xinference-0.12.3.dist-info → xinference-0.13.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,510 @@
1
+ # Copyright (c) 2023-2024 DeepSeek.
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ # this software and associated documentation files (the "Software"), to deal in
5
+ # the Software without restriction, including without limitation the rights to
6
+ # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7
+ # the Software, and to permit persons to whom the Software is furnished to do so,
8
+ # subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in all
11
+ # copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16
+ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17
+ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19
+
20
+ # -*- coding:utf-8 -*-
21
+
22
+ import base64
23
+ from io import BytesIO
24
+
25
+ import gradio as gr
26
+ import torch
27
+ from app_modules.gradio_utils import (
28
+ cancel_outputing,
29
+ delete_last_conversation,
30
+ reset_state,
31
+ reset_textbox,
32
+ transfer_input,
33
+ wrap_gen_fn,
34
+ )
35
+ from app_modules.overwrites import reload_javascript
36
+ from app_modules.presets import CONCURRENT_COUNT, description, description_top, title
37
+ from app_modules.utils import configure_logger, is_variable_assigned, strip_stop_words
38
+
39
+ from ..utils.conversation import SeparatorStyle
40
+ from .inference import convert_conversation_to_prompts, deepseek_generate, load_model
41
+
42
+
43
+ def load_models():
44
+ models = {
45
+ "DeepSeek-VL 7B": "deepseek-ai/deepseek-vl-7b-chat",
46
+ }
47
+
48
+ for model_name in models:
49
+ models[model_name] = load_model(models[model_name])
50
+
51
+ return models
52
+
53
+
54
+ logger = configure_logger()
55
+ models = load_models()
56
+ MODELS = sorted(list(models.keys()))
57
+
58
+
59
+ def generate_prompt_with_history(
60
+ text, image, history, vl_chat_processor, tokenizer, max_length=2048
61
+ ):
62
+ """
63
+ Generate a prompt with history for the deepseek application.
64
+
65
+ Args:
66
+ text (str): The text prompt.
67
+ image (str): The image prompt.
68
+ history (list): List of previous conversation messages.
69
+ tokenizer: The tokenizer used for encoding the prompt.
70
+ max_length (int): The maximum length of the prompt.
71
+
72
+ Returns:
73
+ tuple: A tuple containing the generated prompt, image list, conversation, and conversation copy. If the prompt could not be generated within the max_length limit, returns None.
74
+ """
75
+
76
+ sft_format = "deepseek"
77
+ user_role_ind = 0
78
+ bot_role_ind = 1
79
+
80
+ # Initialize conversation
81
+ conversation = vl_chat_processor.new_chat_template()
82
+
83
+ if history:
84
+ conversation.messages = history
85
+
86
+ if image is not None:
87
+ if "<image_placeholder>" not in text:
88
+ text = (
89
+ "<image_placeholder>" + "\n" + text
90
+ ) # append the <image_placeholder> in a new line after the text prompt
91
+ text = (text, image)
92
+
93
+ conversation.append_message(conversation.roles[user_role_ind], text)
94
+ conversation.append_message(conversation.roles[bot_role_ind], "")
95
+
96
+ # Create a copy of the conversation to avoid history truncation in the UI
97
+ conversation_copy = conversation.copy()
98
+ logger.info("=" * 80)
99
+ logger.info(get_prompt(conversation))
100
+
101
+ rounds = len(conversation.messages) // 2
102
+
103
+ for _ in range(rounds):
104
+ current_prompt = get_prompt(conversation)
105
+ current_prompt = (
106
+ current_prompt.replace("</s>", "")
107
+ if sft_format == "deepseek"
108
+ else current_prompt
109
+ )
110
+
111
+ if torch.tensor(tokenizer.encode(current_prompt)).size(-1) <= max_length:
112
+ return conversation_copy
113
+
114
+ if len(conversation.messages) % 2 != 0:
115
+ gr.Error("The messages between user and assistant are not paired.")
116
+ return
117
+
118
+ try:
119
+ for _ in range(2): # pop out two messages in a row
120
+ conversation.messages.pop(0)
121
+ except IndexError:
122
+ gr.Error("Input text processing failed, unable to respond in this round.")
123
+ return None
124
+
125
+ gr.Error("Prompt could not be generated within max_length limit.")
126
+ return None
127
+
128
+
129
+ def to_gradio_chatbot(conv):
130
+ """Convert the conversation to gradio chatbot format."""
131
+ ret = []
132
+ for i, (role, msg) in enumerate(conv.messages[conv.offset :]):
133
+ if i % 2 == 0:
134
+ if type(msg) is tuple:
135
+ msg, image = msg
136
+ if isinstance(image, str):
137
+ with open(image, "rb") as f:
138
+ data = f.read()
139
+ img_b64_str = base64.b64encode(data).decode()
140
+ image_str = f'<video src="data:video/mp4;base64,{img_b64_str}" controls width="426" height="240"></video>'
141
+ msg = msg.replace("\n".join(["<image_placeholder>"] * 4), image_str)
142
+ else:
143
+ max_hw, min_hw = max(image.size), min(image.size)
144
+ aspect_ratio = max_hw / min_hw
145
+ max_len, min_len = 800, 400
146
+ shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
147
+ longest_edge = int(shortest_edge * aspect_ratio)
148
+ W, H = image.size
149
+ if H > W:
150
+ H, W = longest_edge, shortest_edge
151
+ else:
152
+ H, W = shortest_edge, longest_edge
153
+ image = image.resize((W, H))
154
+ buffered = BytesIO()
155
+ image.save(buffered, format="JPEG")
156
+ img_b64_str = base64.b64encode(buffered.getvalue()).decode()
157
+ img_str = f'<img src="data:image/png;base64,{img_b64_str}" alt="user upload image" />'
158
+ msg = msg.replace("<image_placeholder>", img_str)
159
+ ret.append([msg, None])
160
+ else:
161
+ ret[-1][-1] = msg
162
+ return ret
163
+
164
+
165
+ def to_gradio_history(conv):
166
+ """Convert the conversation to gradio history state."""
167
+ return conv.messages[conv.offset :]
168
+
169
+
170
+ def get_prompt(conv) -> str:
171
+ """Get the prompt for generation."""
172
+ system_prompt = conv.system_template.format(system_message=conv.system_message)
173
+ if conv.sep_style == SeparatorStyle.DeepSeek:
174
+ seps = [conv.sep, conv.sep2]
175
+ if system_prompt == "" or system_prompt is None:
176
+ ret = ""
177
+ else:
178
+ ret = system_prompt + seps[0]
179
+ for i, (role, message) in enumerate(conv.messages):
180
+ if message:
181
+ if type(message) is tuple: # multimodal message
182
+ message, _ = message
183
+ ret += role + ": " + message + seps[i % 2]
184
+ else:
185
+ ret += role + ":"
186
+ return ret
187
+ else:
188
+ return conv.get_prompt
189
+
190
+
191
+ @wrap_gen_fn
192
+ def predict(
193
+ text,
194
+ image,
195
+ chatbot,
196
+ history,
197
+ top_p,
198
+ temperature,
199
+ repetition_penalty,
200
+ max_length_tokens,
201
+ max_context_length_tokens,
202
+ model_select_dropdown,
203
+ ):
204
+ """
205
+ Function to predict the response based on the user's input and selected model.
206
+
207
+ Parameters:
208
+ user_text (str): The input text from the user.
209
+ user_image (str): The input image from the user.
210
+ chatbot (str): The chatbot's name.
211
+ history (str): The history of the chat.
212
+ top_p (float): The top-p parameter for the model.
213
+ temperature (float): The temperature parameter for the model.
214
+ max_length_tokens (int): The maximum length of tokens for the model.
215
+ max_context_length_tokens (int): The maximum length of context tokens for the model.
216
+ model_select_dropdown (str): The selected model from the dropdown.
217
+
218
+ Returns:
219
+ generator: A generator that yields the chatbot outputs, history, and status.
220
+ """
221
+ print("running the prediction function")
222
+ try:
223
+ tokenizer, vl_gpt, vl_chat_processor = models[model_select_dropdown]
224
+
225
+ if text == "":
226
+ yield chatbot, history, "Empty context."
227
+ return
228
+ except KeyError:
229
+ yield [[text, "No Model Found"]], [], "No Model Found"
230
+ return
231
+
232
+ conversation = generate_prompt_with_history(
233
+ text,
234
+ image,
235
+ history,
236
+ vl_chat_processor,
237
+ tokenizer,
238
+ max_length=max_context_length_tokens,
239
+ )
240
+ prompts = convert_conversation_to_prompts(conversation)
241
+
242
+ stop_words = conversation.stop_str
243
+ gradio_chatbot_output = to_gradio_chatbot(conversation)
244
+
245
+ full_response = ""
246
+ with torch.no_grad():
247
+ for x in deepseek_generate(
248
+ prompts=prompts,
249
+ vl_gpt=vl_gpt,
250
+ vl_chat_processor=vl_chat_processor,
251
+ tokenizer=tokenizer,
252
+ stop_words=stop_words,
253
+ max_length=max_length_tokens,
254
+ temperature=temperature,
255
+ repetition_penalty=repetition_penalty,
256
+ top_p=top_p,
257
+ ):
258
+ full_response += x
259
+ response = strip_stop_words(full_response, stop_words)
260
+ conversation.update_last_message(response)
261
+ gradio_chatbot_output[-1][1] = response
262
+ yield gradio_chatbot_output, to_gradio_history(
263
+ conversation
264
+ ), "Generating..."
265
+
266
+ print("flushed result to gradio")
267
+ torch.cuda.empty_cache()
268
+
269
+ if is_variable_assigned("x"):
270
+ print(f"{model_select_dropdown}:\n{text}\n{'-' * 80}\n{x}\n{'=' * 80}")
271
+ print(
272
+ f"temperature: {temperature}, top_p: {top_p}, repetition_penalty: {repetition_penalty}, max_length_tokens: {max_length_tokens}"
273
+ )
274
+
275
+ yield gradio_chatbot_output, to_gradio_history(conversation), "Generate: Success"
276
+
277
+
278
+ def retry(
279
+ text,
280
+ image,
281
+ chatbot,
282
+ history,
283
+ top_p,
284
+ temperature,
285
+ repetition_penalty,
286
+ max_length_tokens,
287
+ max_context_length_tokens,
288
+ model_select_dropdown,
289
+ ):
290
+ if len(history) == 0:
291
+ yield (chatbot, history, "Empty context")
292
+ return
293
+
294
+ chatbot.pop()
295
+ history.pop()
296
+ text = history.pop()[-1]
297
+ if type(text) is tuple:
298
+ text, image = text
299
+
300
+ yield from predict(
301
+ text,
302
+ image,
303
+ chatbot,
304
+ history,
305
+ top_p,
306
+ temperature,
307
+ repetition_penalty,
308
+ max_length_tokens,
309
+ max_context_length_tokens,
310
+ model_select_dropdown,
311
+ )
312
+
313
+
314
+ def build_demo(MODELS):
315
+ with open("deepseek_vl/serve/assets/custom.css", "r", encoding="utf-8") as f:
316
+ customCSS = f.read()
317
+
318
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
319
+ history = gr.State([])
320
+ input_text = gr.State()
321
+ input_image = gr.State()
322
+
323
+ with gr.Row():
324
+ gr.HTML(title)
325
+ status_display = gr.Markdown("Success", elem_id="status_display")
326
+ gr.Markdown(description_top)
327
+
328
+ with gr.Row(equal_height=True):
329
+ with gr.Column(scale=4):
330
+ with gr.Row():
331
+ chatbot = gr.Chatbot(
332
+ elem_id="deepseek_chatbot",
333
+ show_share_button=True,
334
+ likeable=True,
335
+ bubble_full_width=False,
336
+ height=600,
337
+ )
338
+ with gr.Row():
339
+ with gr.Column(scale=4):
340
+ text_box = gr.Textbox(
341
+ show_label=False, placeholder="Enter text", container=False
342
+ )
343
+ with gr.Column(
344
+ min_width=70,
345
+ ):
346
+ submitBtn = gr.Button("Send")
347
+ with gr.Column(
348
+ min_width=70,
349
+ ):
350
+ cancelBtn = gr.Button("Stop")
351
+ with gr.Row():
352
+ emptyBtn = gr.Button(
353
+ "🧹 New Conversation",
354
+ )
355
+ retryBtn = gr.Button("🔄 Regenerate")
356
+ delLastBtn = gr.Button("🗑️ Remove Last Turn")
357
+
358
+ with gr.Column():
359
+ image_box = gr.Image(type="pil")
360
+
361
+ with gr.Tab(label="Parameter Setting") as parameter_row:
362
+ top_p = gr.Slider(
363
+ minimum=-0,
364
+ maximum=1.0,
365
+ value=0.95,
366
+ step=0.05,
367
+ interactive=True,
368
+ label="Top-p",
369
+ )
370
+ temperature = gr.Slider(
371
+ minimum=0,
372
+ maximum=1.0,
373
+ value=0.1,
374
+ step=0.1,
375
+ interactive=True,
376
+ label="Temperature",
377
+ )
378
+ repetition_penalty = gr.Slider(
379
+ minimum=0.0,
380
+ maximum=2.0,
381
+ value=1.1,
382
+ step=0.1,
383
+ interactive=True,
384
+ label="Repetition penalty",
385
+ )
386
+ max_length_tokens = gr.Slider(
387
+ minimum=0,
388
+ maximum=4096,
389
+ value=2048,
390
+ step=8,
391
+ interactive=True,
392
+ label="Max Generation Tokens",
393
+ )
394
+ max_context_length_tokens = gr.Slider(
395
+ minimum=0,
396
+ maximum=4096,
397
+ value=4096,
398
+ step=128,
399
+ interactive=True,
400
+ label="Max History Tokens",
401
+ )
402
+ model_select_dropdown = gr.Dropdown(
403
+ label="Select Models",
404
+ choices=MODELS,
405
+ multiselect=False,
406
+ value=MODELS[0],
407
+ interactive=True,
408
+ )
409
+
410
+ examples_list = [
411
+ [
412
+ "deepseek_vl/serve/examples/rap.jpeg",
413
+ "Can you write me a master rap song that rhymes very well based on this image?",
414
+ ],
415
+ [
416
+ "deepseek_vl/serve/examples/app.png",
417
+ "What is this app about?",
418
+ ],
419
+ [
420
+ "deepseek_vl/serve/examples/pipeline.png",
421
+ "Help me write a python code based on the image.",
422
+ ],
423
+ [
424
+ "deepseek_vl/serve/examples/chart.png",
425
+ "Could you help me to re-draw this picture with python codes?",
426
+ ],
427
+ [
428
+ "deepseek_vl/serve/examples/mirror.png",
429
+ "How many people are there in the image. Why?",
430
+ ],
431
+ [
432
+ "deepseek_vl/serve/examples/puzzle.png",
433
+ "Can this 2 pieces combine together?",
434
+ ],
435
+ ]
436
+ gr.Examples(examples=examples_list, inputs=[image_box, text_box])
437
+ gr.Markdown(description)
438
+
439
+ input_widgets = [
440
+ input_text,
441
+ input_image,
442
+ chatbot,
443
+ history,
444
+ top_p,
445
+ temperature,
446
+ repetition_penalty,
447
+ max_length_tokens,
448
+ max_context_length_tokens,
449
+ model_select_dropdown,
450
+ ]
451
+ output_widgets = [chatbot, history, status_display]
452
+
453
+ transfer_input_args = dict(
454
+ fn=transfer_input,
455
+ inputs=[text_box, image_box],
456
+ outputs=[input_text, input_image, text_box, image_box, submitBtn],
457
+ show_progress=True,
458
+ )
459
+
460
+ predict_args = dict(
461
+ fn=predict,
462
+ inputs=input_widgets,
463
+ outputs=output_widgets,
464
+ show_progress=True,
465
+ )
466
+
467
+ retry_args = dict(
468
+ fn=retry,
469
+ inputs=input_widgets,
470
+ outputs=output_widgets,
471
+ show_progress=True,
472
+ )
473
+
474
+ reset_args = dict(
475
+ fn=reset_textbox, inputs=[], outputs=[text_box, status_display]
476
+ )
477
+
478
+ predict_events = [
479
+ text_box.submit(**transfer_input_args).then(**predict_args),
480
+ submitBtn.click(**transfer_input_args).then(**predict_args),
481
+ ]
482
+
483
+ emptyBtn.click(reset_state, outputs=output_widgets, show_progress=True)
484
+ emptyBtn.click(**reset_args)
485
+ retryBtn.click(**retry_args)
486
+
487
+ delLastBtn.click(
488
+ delete_last_conversation,
489
+ [chatbot, history],
490
+ output_widgets,
491
+ show_progress=True,
492
+ )
493
+
494
+ cancelBtn.click(cancel_outputing, [], [status_display], cancels=predict_events)
495
+
496
+ return demo
497
+
498
+
499
+ if __name__ == "__main__":
500
+ demo = build_demo(MODELS)
501
+ demo.title = "DeepSeek-VL Chatbot"
502
+
503
+ reload_javascript()
504
+ demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
505
+ share=False,
506
+ favicon_path="deepseek_vl/serve/assets/favicon.ico",
507
+ inbrowser=False,
508
+ server_name="0.0.0.0",
509
+ server_port=8122,
510
+ )
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-2023 XProbe Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,94 @@
1
+ # Copyright (c) 2023-2024 DeepSeek.
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ # this software and associated documentation files (the "Software"), to deal in
5
+ # the Software without restriction, including without limitation the rights to
6
+ # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7
+ # the Software, and to permit persons to whom the Software is furnished to do so,
8
+ # subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in all
11
+ # copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16
+ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17
+ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19
+
20
+ from functools import wraps
21
+
22
+ import gradio as gr
23
+
24
+
25
+ def wrap_gen_fn(gen_fn):
26
+ @wraps(gen_fn)
27
+ def wrapped_gen_fn(prompt, *args, **kwargs):
28
+ try:
29
+ yield from gen_fn(prompt, *args, **kwargs)
30
+ except gr.Error as g_err:
31
+ raise g_err
32
+ except Exception as e:
33
+ raise gr.Error(f"Failed to generate text: {e}") from e
34
+
35
+ return wrapped_gen_fn
36
+
37
+
38
+ def delete_last_conversation(chatbot, history):
39
+ if len(history) % 2 != 0:
40
+ gr.Error("history length is not even")
41
+ return (
42
+ chatbot,
43
+ history,
44
+ "Delete Done",
45
+ )
46
+
47
+ if len(chatbot) > 0:
48
+ chatbot.pop()
49
+
50
+ if len(history) > 0 and len(history) % 2 == 0:
51
+ history.pop()
52
+ history.pop()
53
+
54
+ return (
55
+ chatbot,
56
+ history,
57
+ "Delete Done",
58
+ )
59
+
60
+
61
+ def reset_state():
62
+ return [], [], None, "Reset Done"
63
+
64
+
65
+ def reset_textbox():
66
+ return gr.update(value=""), ""
67
+
68
+
69
+ def cancel_outputing():
70
+ return "Stop Done"
71
+
72
+
73
+ def transfer_input(input_text, input_image):
74
+ print("transferring input text and input image")
75
+ return (
76
+ input_text,
77
+ input_image,
78
+ gr.update(value=""),
79
+ gr.update(value=None),
80
+ gr.Button(visible=True),
81
+ )
82
+
83
+
84
+ class State:
85
+ interrupted = False
86
+
87
+ def interrupt(self):
88
+ self.interrupted = True
89
+
90
+ def recover(self):
91
+ self.interrupted = False
92
+
93
+
94
+ shared_state = State()
@@ -0,0 +1,81 @@
1
+ # Copyright (c) 2023-2024 DeepSeek.
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ # this software and associated documentation files (the "Software"), to deal in
5
+ # the Software without restriction, including without limitation the rights to
6
+ # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7
+ # the Software, and to permit persons to whom the Software is furnished to do so,
8
+ # subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in all
11
+ # copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15
+ # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16
+ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17
+ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+ from typing import List, Tuple
24
+
25
+ from .presets import gr
26
+ from .utils import convert_asis, convert_mdtext, detect_converted_mark
27
+
28
+
29
+ def compact_text_chunks(self, prompt, text_chunks: List[str]) -> List[str]:
30
+ logging.debug("Compacting text chunks...🚀🚀🚀")
31
+ combined_str = [c.strip() for c in text_chunks if c.strip()]
32
+ combined_str = [f"[{index+1}] {c}" for index, c in enumerate(combined_str)]
33
+ combined_str = "\n\n".join(combined_str)
34
+ # resplit based on self.max_chunk_overlap
35
+ text_splitter = self.get_text_splitter_given_prompt(prompt, 1, padding=1)
36
+ return text_splitter.split_text(combined_str)
37
+
38
+
39
+ def postprocess(
40
+ self, y: List[Tuple[str | None, str | None]]
41
+ ) -> List[Tuple[str | None, str | None]]:
42
+ """
43
+ Parameters:
44
+ y: List of tuples representing the message and response pairs. Each message and response should be a string, which may be in Markdown format.
45
+ Returns:
46
+ List of tuples representing the message and response. Each message and response will be a string of HTML.
47
+ """
48
+ if y is None or y == []:
49
+ return []
50
+ temp = []
51
+ for x in y:
52
+ user, bot = x
53
+ if not detect_converted_mark(user):
54
+ user = convert_asis(user)
55
+ if not detect_converted_mark(bot):
56
+ bot = convert_mdtext(bot)
57
+ temp.append((user, bot))
58
+ return temp
59
+
60
+
61
+ with open("deepseek_vl/serve/assets/custom.js", "r", encoding="utf-8") as f, open(
62
+ "deepseek_vl/serve/assets/Kelpy-Codos.js", "r", encoding="utf-8"
63
+ ) as f2:
64
+ customJS = f.read()
65
+ kelpyCodos = f2.read()
66
+
67
+
68
+ def reload_javascript():
69
+ print("Reloading javascript...")
70
+ js = f"<script>{customJS}</script><script>{kelpyCodos}</script>"
71
+
72
+ def template_response(*args, **kwargs):
73
+ res = GradioTemplateResponseOriginal(*args, **kwargs)
74
+ res.body = res.body.replace(b"</html>", f"{js}</html>".encode("utf8"))
75
+ res.init_headers()
76
+ return res
77
+
78
+ gr.routes.templates.TemplateResponse = template_response
79
+
80
+
81
+ GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse