xinference 1.5.0.post2__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +107 -11
- xinference/client/restful/restful_client.py +51 -11
- xinference/constants.py +5 -1
- xinference/core/media_interface.py +758 -0
- xinference/core/model.py +49 -9
- xinference/core/supervisor.py +1 -1
- xinference/core/utils.py +1 -1
- xinference/core/worker.py +33 -39
- xinference/deploy/cmdline.py +17 -0
- xinference/deploy/utils.py +0 -3
- xinference/model/audio/__init__.py +16 -27
- xinference/model/audio/core.py +2 -1
- xinference/model/audio/cosyvoice.py +4 -2
- xinference/model/audio/model_spec.json +63 -46
- xinference/model/audio/model_spec_modelscope.json +31 -14
- xinference/model/embedding/__init__.py +16 -24
- xinference/model/image/__init__.py +15 -25
- xinference/model/llm/__init__.py +40 -115
- xinference/model/llm/core.py +29 -6
- xinference/model/llm/llama_cpp/core.py +30 -347
- xinference/model/llm/llm_family.json +1674 -2203
- xinference/model/llm/llm_family.py +71 -7
- xinference/model/llm/llm_family_csghub.json +0 -32
- xinference/model/llm/llm_family_modelscope.json +1838 -2016
- xinference/model/llm/llm_family_openmind_hub.json +19 -325
- xinference/model/llm/lmdeploy/core.py +7 -2
- xinference/model/llm/mlx/core.py +23 -7
- xinference/model/llm/reasoning_parser.py +281 -5
- xinference/model/llm/sglang/core.py +39 -11
- xinference/model/llm/transformers/chatglm.py +9 -2
- xinference/model/llm/transformers/cogagent.py +10 -12
- xinference/model/llm/transformers/cogvlm2.py +6 -3
- xinference/model/llm/transformers/cogvlm2_video.py +3 -6
- xinference/model/llm/transformers/core.py +58 -60
- xinference/model/llm/transformers/deepseek_v2.py +4 -2
- xinference/model/llm/transformers/deepseek_vl.py +10 -4
- xinference/model/llm/transformers/deepseek_vl2.py +9 -4
- xinference/model/llm/transformers/gemma3.py +4 -5
- xinference/model/llm/transformers/glm4v.py +3 -21
- xinference/model/llm/transformers/glm_edge_v.py +3 -20
- xinference/model/llm/transformers/intern_vl.py +3 -6
- xinference/model/llm/transformers/internlm2.py +1 -1
- xinference/model/llm/transformers/minicpmv25.py +4 -2
- xinference/model/llm/transformers/minicpmv26.py +5 -3
- xinference/model/llm/transformers/omnilmm.py +1 -1
- xinference/model/llm/transformers/opt.py +1 -1
- xinference/model/llm/transformers/ovis2.py +302 -0
- xinference/model/llm/transformers/qwen-omni.py +8 -1
- xinference/model/llm/transformers/qwen2_audio.py +3 -1
- xinference/model/llm/transformers/qwen2_vl.py +5 -1
- xinference/model/llm/transformers/qwen_vl.py +5 -2
- xinference/model/llm/utils.py +96 -45
- xinference/model/llm/vllm/core.py +108 -24
- xinference/model/llm/vllm/distributed_executor.py +8 -7
- xinference/model/llm/vllm/xavier/allocator.py +1 -1
- xinference/model/llm/vllm/xavier/block_manager.py +1 -1
- xinference/model/llm/vllm/xavier/block_tracker.py +3 -3
- xinference/model/llm/vllm/xavier/executor.py +1 -1
- xinference/model/llm/vllm/xavier/test/test_xavier.py +2 -11
- xinference/model/rerank/__init__.py +13 -24
- xinference/model/video/__init__.py +15 -25
- xinference/model/video/core.py +3 -3
- xinference/model/video/diffusers.py +157 -13
- xinference/model/video/model_spec.json +100 -0
- xinference/model/video/model_spec_modelscope.json +104 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
- xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
- xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
- xinference/thirdparty/cosyvoice/bin/train.py +7 -2
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
- xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
- xinference/thirdparty/cosyvoice/cli/model.py +140 -155
- xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
- xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
- xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
- xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
- xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
- xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
- xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
- xinference/thirdparty/cosyvoice/utils/common.py +1 -1
- xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
- xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
- xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
- xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
- xinference/types.py +2 -71
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.0f6523be.css → main.337afe76.css} +2 -2
- xinference/web/ui/build/static/css/main.337afe76.css.map +1 -0
- xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
- xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6798e126f3bc5f95a4c16a9c2ad52ffe77970c62406d83e20604dfda7ffd2247.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b617f7d21a95045fc57b26a9373551740f1978a826134cbf705c3a1bf8714a93.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c1506cb142151366074975f30fa1ff9cd6e5e978b62a4b074dfc16fe08d70d75.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c5c7c2cd1b863ce41adff2c4737bba06eef3a1acf28288cb83d992060f6b8923.json +1 -0
- xinference/web/ui/src/locales/en.json +7 -4
- xinference/web/ui/src/locales/zh.json +7 -4
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/METADATA +56 -36
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/RECORD +120 -121
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/WHEEL +1 -1
- xinference/core/image_interface.py +0 -377
- xinference/model/llm/transformers/compression.py +0 -258
- xinference/model/llm/transformers/yi_vl.py +0 -239
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
- xinference/web/ui/build/static/css/main.0f6523be.css.map +0 -1
- xinference/web/ui/build/static/js/main.4b67a723.js +0 -3
- xinference/web/ui/build/static/js/main.4b67a723.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/51709f5d3e53bcf19e613662ef9b91fb9174942c5518987a248348dd4e1e0e02.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/b8551e9775a01b28ae674125c688febe763732ea969ae344512e64ea01bf632e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e4ba658c6b3b0490910acdae0c535a892257efb61539a24adf8038fc653bd22f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f199e8173f6409a5802ed44acb95f218388131136504b2e9132129e150c92f9a.json +0 -1
- /xinference/web/ui/build/static/js/{main.4b67a723.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.5.0.post2.dist-info → xinference-1.6.0.dist-info}/top_level.txt +0 -0
xinference/model/llm/utils.py
CHANGED
|
@@ -42,6 +42,7 @@ from ...types import (
|
|
|
42
42
|
ChatCompletion,
|
|
43
43
|
ChatCompletionChoice,
|
|
44
44
|
ChatCompletionChunk,
|
|
45
|
+
ChatCompletionChunkChoice,
|
|
45
46
|
ChatCompletionChunkDelta,
|
|
46
47
|
ChatCompletionMessage,
|
|
47
48
|
Completion,
|
|
@@ -68,6 +69,11 @@ QWEN_TOOL_CALL_FAMILY = [
|
|
|
68
69
|
"qwen2-moe-instruct",
|
|
69
70
|
"qwen2.5-instruct",
|
|
70
71
|
"qwen2.5-coder-instruct",
|
|
72
|
+
"XiYanSQL-QwenCoder-2504",
|
|
73
|
+
"QwQ-32B",
|
|
74
|
+
"qwen3",
|
|
75
|
+
"HuatuoGPT-o1-Qwen2.5",
|
|
76
|
+
"DianJin-R1",
|
|
71
77
|
]
|
|
72
78
|
|
|
73
79
|
GLM4_TOOL_CALL_FAMILY = [
|
|
@@ -77,6 +83,7 @@ GLM4_TOOL_CALL_FAMILY = [
|
|
|
77
83
|
|
|
78
84
|
LLAMA3_TOOL_CALL_FAMILY = [
|
|
79
85
|
"llama-3.1-instruct",
|
|
86
|
+
"HuatuoGPT-o1-LLaMA-3.1",
|
|
80
87
|
]
|
|
81
88
|
|
|
82
89
|
DEEPSEEK_TOOL_CALL_FAMILY = [
|
|
@@ -143,6 +150,7 @@ class ChatModelMixin:
|
|
|
143
150
|
add_generation_prompt=True,
|
|
144
151
|
**kwargs,
|
|
145
152
|
)
|
|
153
|
+
logger.debug("Prompt: %s", full_context)
|
|
146
154
|
return full_context
|
|
147
155
|
except Exception as e:
|
|
148
156
|
logger.warning(
|
|
@@ -154,6 +162,36 @@ class ChatModelMixin:
|
|
|
154
162
|
# Compilation function uses a cache to avoid recompiling the same template
|
|
155
163
|
return self._build_from_raw_template(messages, chat_template, **kwargs)
|
|
156
164
|
|
|
165
|
+
@staticmethod
|
|
166
|
+
def _get_chat_template_kwargs_from_generate_config(
|
|
167
|
+
generate_config: Optional[Union[dict, Any]],
|
|
168
|
+
reasoning_parser: Optional[ReasoningParser] = None,
|
|
169
|
+
) -> Optional[dict]:
|
|
170
|
+
if reasoning_parser and not reasoning_parser.enable_thinking:
|
|
171
|
+
# hybrid model like qwen3,
|
|
172
|
+
# disabled thinking
|
|
173
|
+
return {"enable_thinking": False}
|
|
174
|
+
if not generate_config:
|
|
175
|
+
return None
|
|
176
|
+
if "chat_template_kwargs" in generate_config:
|
|
177
|
+
kwargs = generate_config["chat_template_kwargs"]
|
|
178
|
+
if isinstance(kwargs, str):
|
|
179
|
+
try:
|
|
180
|
+
return json.loads(kwargs)
|
|
181
|
+
except json.JSONDecodeError:
|
|
182
|
+
raise TypeError(
|
|
183
|
+
f"`chat_template_kwargs` should be json parsable, "
|
|
184
|
+
f"got: {kwargs}"
|
|
185
|
+
)
|
|
186
|
+
elif isinstance(kwargs, dict):
|
|
187
|
+
return kwargs
|
|
188
|
+
else:
|
|
189
|
+
raise TypeError(
|
|
190
|
+
f"`chat_template_kwargs` but be a JSON parsable str "
|
|
191
|
+
f"or dict, got: {kwargs}"
|
|
192
|
+
)
|
|
193
|
+
return None
|
|
194
|
+
|
|
157
195
|
@staticmethod
|
|
158
196
|
def convert_messages_with_content_list_to_str_conversion(
|
|
159
197
|
messages: List[Dict],
|
|
@@ -257,7 +295,7 @@ class ChatModelMixin:
|
|
|
257
295
|
and "delta" in choices[0]
|
|
258
296
|
):
|
|
259
297
|
if choices[0]["finish_reason"] is None:
|
|
260
|
-
if reasoning_parser
|
|
298
|
+
if reasoning_parser and reasoning_parser.check_content_parser():
|
|
261
299
|
# process parsing reasoning content
|
|
262
300
|
assert previous_texts is not None
|
|
263
301
|
delta = choices[0]["delta"] # type: ignore
|
|
@@ -274,7 +312,7 @@ class ChatModelMixin:
|
|
|
274
312
|
delta = choices[0]["delta"] # type: ignore
|
|
275
313
|
if "content" not in delta:
|
|
276
314
|
delta["content"] = "" # type: ignore
|
|
277
|
-
if reasoning_parser
|
|
315
|
+
if reasoning_parser and reasoning_parser.check_content_parser():
|
|
278
316
|
delta["reasoning_content"] = None # type: ignore
|
|
279
317
|
# Already a ChatCompletionChunk, we don't need to convert chunk.
|
|
280
318
|
return cast(ChatCompletionChunk, chunk)
|
|
@@ -283,7 +321,7 @@ class ChatModelMixin:
|
|
|
283
321
|
for i, choice in enumerate(choices): # type: ignore
|
|
284
322
|
delta = ChatCompletionChunkDelta()
|
|
285
323
|
if "text" in choice and choice["finish_reason"] is None:
|
|
286
|
-
if reasoning_parser
|
|
324
|
+
if not reasoning_parser or not reasoning_parser.check_content_parser():
|
|
287
325
|
delta["content"] = choice["text"]
|
|
288
326
|
else:
|
|
289
327
|
assert previous_texts is not None
|
|
@@ -296,7 +334,7 @@ class ChatModelMixin:
|
|
|
296
334
|
previous_texts[-1] = current_text
|
|
297
335
|
elif "text" in choice and choice["finish_reason"] is not None:
|
|
298
336
|
delta["content"] = choice["text"]
|
|
299
|
-
if reasoning_parser
|
|
337
|
+
if reasoning_parser and reasoning_parser.check_content_parser():
|
|
300
338
|
delta["reasoning_content"] = None
|
|
301
339
|
elif "tool_calls" in choice:
|
|
302
340
|
delta["tool_calls"] = choice["tool_calls"]
|
|
@@ -310,7 +348,9 @@ class ChatModelMixin:
|
|
|
310
348
|
assert choices is not None
|
|
311
349
|
usage = (
|
|
312
350
|
chunk["usage"]
|
|
313
|
-
if choices[0]["finish_reason"] is not None
|
|
351
|
+
if choices[0]["finish_reason"] is not None
|
|
352
|
+
and reasoning_parser
|
|
353
|
+
and reasoning_parser.check_content_parser()
|
|
314
354
|
else None
|
|
315
355
|
)
|
|
316
356
|
chat_chunk = {
|
|
@@ -328,28 +368,32 @@ class ChatModelMixin:
|
|
|
328
368
|
cls,
|
|
329
369
|
chunk: CompletionChunk,
|
|
330
370
|
reasoning_parser: Optional[ReasoningParser] = None,
|
|
331
|
-
) -> ChatCompletionChunk:
|
|
332
|
-
choices_list = []
|
|
371
|
+
) -> List[ChatCompletionChunk]:
|
|
372
|
+
choices_list: List[ChatCompletionChunkChoice] = []
|
|
373
|
+
chunks: List[ChatCompletionChunk] = []
|
|
333
374
|
for i, choice in enumerate(chunk["choices"]):
|
|
334
375
|
delta = ChatCompletionChunkDelta(role="assistant", content="")
|
|
335
|
-
if reasoning_parser
|
|
376
|
+
if reasoning_parser and reasoning_parser.check_content_parser():
|
|
336
377
|
delta["content"] = None
|
|
337
378
|
delta["reasoning_content"] = ""
|
|
338
379
|
choices_list.append(
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
380
|
+
ChatCompletionChunkChoice(
|
|
381
|
+
index=i,
|
|
382
|
+
delta=delta,
|
|
383
|
+
finish_reason=None,
|
|
384
|
+
)
|
|
344
385
|
)
|
|
345
|
-
chat_chunk =
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
386
|
+
chat_chunk = ChatCompletionChunk(
|
|
387
|
+
id="chat" + chunk["id"],
|
|
388
|
+
model=chunk["model"],
|
|
389
|
+
created=chunk["created"],
|
|
390
|
+
object="chat.completion.chunk",
|
|
391
|
+
choices=choices_list,
|
|
392
|
+
)
|
|
393
|
+
chunks.append(chat_chunk)
|
|
394
|
+
if reasoning_parser:
|
|
395
|
+
chunks.extend(reasoning_parser.prepare_first_reasoning_content_chunk(chunk))
|
|
396
|
+
return chunks
|
|
353
397
|
|
|
354
398
|
@classmethod
|
|
355
399
|
def _get_final_chat_completion_chunk(
|
|
@@ -374,6 +418,8 @@ class ChatModelMixin:
|
|
|
374
418
|
reasoning_parse: Optional[ReasoningParser] = None,
|
|
375
419
|
) -> Iterator[ChatCompletionChunk]:
|
|
376
420
|
previous_texts = [""]
|
|
421
|
+
if reasoning_parse:
|
|
422
|
+
chunks = reasoning_parse.prepare_reasoning_content_sync(chunks)
|
|
377
423
|
for _, chunk in enumerate(chunks):
|
|
378
424
|
# usage
|
|
379
425
|
choices = chunk.get("choices")
|
|
@@ -421,6 +467,9 @@ class ChatModelMixin:
|
|
|
421
467
|
reasoning_parser: Optional[ReasoningParser] = None,
|
|
422
468
|
) -> AsyncGenerator[ChatCompletionChunk, None]:
|
|
423
469
|
previous_texts = [""]
|
|
470
|
+
# Process chunks
|
|
471
|
+
if reasoning_parser:
|
|
472
|
+
chunks = reasoning_parser.prepare_reasoning_content_streaming(chunks)
|
|
424
473
|
async for chunk in chunks:
|
|
425
474
|
choices = chunk.get("choices")
|
|
426
475
|
if not choices:
|
|
@@ -436,19 +485,25 @@ class ChatModelMixin:
|
|
|
436
485
|
def _to_chat_completion(
|
|
437
486
|
completion: Completion, reasoning_parser: Optional[ReasoningParser] = None
|
|
438
487
|
) -> ChatCompletion:
|
|
488
|
+
# prepare reasoning content
|
|
489
|
+
if reasoning_parser:
|
|
490
|
+
completion = reasoning_parser.prepare_reasoning_content(completion)
|
|
491
|
+
|
|
439
492
|
if completion.get("object") == "chat.completion" and completion.get("choices"):
|
|
440
493
|
# Already a ChatCompletion
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
494
|
+
for choice in completion["choices"]:
|
|
495
|
+
message = choice["message"] # type: ignore
|
|
496
|
+
text = message["content"] # Original content from the message
|
|
497
|
+
|
|
498
|
+
if reasoning_parser and reasoning_parser.check_content_parser():
|
|
499
|
+
# Parse into reasoning and content parts
|
|
445
500
|
(
|
|
446
|
-
|
|
447
|
-
|
|
501
|
+
reasoning_val,
|
|
502
|
+
content_val,
|
|
448
503
|
) = reasoning_parser.extract_reasoning_content(text)
|
|
449
|
-
message["content"] =
|
|
450
|
-
if
|
|
451
|
-
message["reasoning_content"] =
|
|
504
|
+
message["content"] = content_val
|
|
505
|
+
if reasoning_val is not None:
|
|
506
|
+
message["reasoning_content"] = reasoning_val
|
|
452
507
|
return cast(ChatCompletion, completion)
|
|
453
508
|
|
|
454
509
|
choices = []
|
|
@@ -456,7 +511,7 @@ class ChatModelMixin:
|
|
|
456
511
|
content = choice["text"]
|
|
457
512
|
reasoning_content = None
|
|
458
513
|
|
|
459
|
-
if reasoning_parser
|
|
514
|
+
if reasoning_parser and reasoning_parser.check_content_parser():
|
|
460
515
|
reasoning_content, content = reasoning_parser.extract_reasoning_content( # type: ignore
|
|
461
516
|
choice
|
|
462
517
|
)
|
|
@@ -653,20 +708,12 @@ class ChatModelMixin:
|
|
|
653
708
|
failed_contents.append(content)
|
|
654
709
|
finish_reason = "tool_calls" if tool_calls else "stop"
|
|
655
710
|
|
|
656
|
-
reasoning_content = None
|
|
657
711
|
content = ". ".join(failed_contents) if failed_contents else None
|
|
658
|
-
if reasoning_parser is not None:
|
|
659
|
-
reasoning_content, content = reasoning_parser.extract_reasoning_content( # type: ignore
|
|
660
|
-
content
|
|
661
|
-
)
|
|
662
712
|
d = {
|
|
663
713
|
"role": "assistant",
|
|
664
714
|
"content": content,
|
|
665
715
|
"tool_calls": tool_calls,
|
|
666
716
|
}
|
|
667
|
-
# add only reasoning_content is None
|
|
668
|
-
if reasoning_content is not None:
|
|
669
|
-
d["reasoning_content"] = reasoning_content
|
|
670
717
|
|
|
671
718
|
try:
|
|
672
719
|
usage = c.get("usage")
|
|
@@ -701,7 +748,17 @@ class ChatModelMixin:
|
|
|
701
748
|
c,
|
|
702
749
|
reasoning_parser: Optional[ReasoningParser] = None,
|
|
703
750
|
):
|
|
751
|
+
if reasoning_parser:
|
|
752
|
+
c = reasoning_parser.prepare_reasoning_content(c)
|
|
704
753
|
_id = str(uuid.uuid4())
|
|
754
|
+
reasoning_content = None
|
|
755
|
+
if reasoning_parser and reasoning_parser.check_content_parser():
|
|
756
|
+
text = c["choices"][0]["text"]
|
|
757
|
+
reasoning_content, content = reasoning_parser.extract_reasoning_content(
|
|
758
|
+
text
|
|
759
|
+
)
|
|
760
|
+
c["choices"][0]["text"] = content
|
|
761
|
+
|
|
705
762
|
tool_result = cls._eval_tool_arguments(model_family, c)
|
|
706
763
|
|
|
707
764
|
tool_calls = []
|
|
@@ -722,12 +779,6 @@ class ChatModelMixin:
|
|
|
722
779
|
failed_contents.append(content)
|
|
723
780
|
finish_reason = "tool_calls" if tool_calls else "stop"
|
|
724
781
|
|
|
725
|
-
reasoning_content = None
|
|
726
|
-
content = ". ".join(failed_contents) if failed_contents else None
|
|
727
|
-
if reasoning_parser is not None:
|
|
728
|
-
reasoning_content, content = reasoning_parser.extract_reasoning_content( # type: ignore
|
|
729
|
-
content
|
|
730
|
-
)
|
|
731
782
|
m = {
|
|
732
783
|
"role": "assistant",
|
|
733
784
|
"content": content,
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
|
+
import importlib.util
|
|
16
17
|
import itertools
|
|
17
18
|
import json
|
|
18
19
|
import logging
|
|
@@ -50,7 +51,7 @@ from ....types import (
|
|
|
50
51
|
LoRA,
|
|
51
52
|
)
|
|
52
53
|
from .. import LLM, LLMFamilyV1, LLMSpecV1
|
|
53
|
-
from ..llm_family import CustomLLMFamilyV1
|
|
54
|
+
from ..llm_family import CustomLLMFamilyV1, cache_model_tokenizer_and_config
|
|
54
55
|
from ..utils import (
|
|
55
56
|
DEEPSEEK_TOOL_CALL_FAMILY,
|
|
56
57
|
QWEN_TOOL_CALL_FAMILY,
|
|
@@ -169,6 +170,7 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.3.0":
|
|
|
169
170
|
VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-instruct")
|
|
170
171
|
VLLM_SUPPORTED_MODELS.append("qwen2.5-coder")
|
|
171
172
|
VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-coder-instruct")
|
|
173
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("XiYanSQL-QwenCoder-2504")
|
|
172
174
|
VLLM_SUPPORTED_CHAT_MODELS.append("QwQ-32B-Preview")
|
|
173
175
|
VLLM_SUPPORTED_CHAT_MODELS.append("QwQ-32B")
|
|
174
176
|
VLLM_SUPPORTED_CHAT_MODELS.append("marco-o1")
|
|
@@ -176,6 +178,9 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.3.0":
|
|
|
176
178
|
VLLM_SUPPORTED_CHAT_MODELS.append("fin-r1")
|
|
177
179
|
VLLM_SUPPORTED_CHAT_MODELS.append("seallms-v3")
|
|
178
180
|
VLLM_SUPPORTED_CHAT_MODELS.append("skywork-or1-preview")
|
|
181
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("skywork-or1")
|
|
182
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("HuatuoGPT-o1-Qwen2.5")
|
|
183
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("DianJin-R1")
|
|
179
184
|
|
|
180
185
|
if VLLM_INSTALLED and vllm.__version__ >= "0.3.2":
|
|
181
186
|
VLLM_SUPPORTED_CHAT_MODELS.append("gemma-it")
|
|
@@ -206,6 +211,7 @@ if VLLM_INSTALLED and vllm.__version__ > "0.5.3":
|
|
|
206
211
|
VLLM_SUPPORTED_CHAT_MODELS.append("llama-3.1-instruct")
|
|
207
212
|
VLLM_SUPPORTED_CHAT_MODELS.append("llama-3.3-instruct")
|
|
208
213
|
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-r1-distill-llama")
|
|
214
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("HuatuoGPT-o1-LLaMA-3.1")
|
|
209
215
|
|
|
210
216
|
if VLLM_INSTALLED and vllm.__version__ >= "0.6.1":
|
|
211
217
|
VLLM_SUPPORTED_VISION_MODEL_LIST.append("internvl2")
|
|
@@ -239,6 +245,9 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.8.0":
|
|
|
239
245
|
if VLLM_INSTALLED and vllm.__version__ >= "0.8.4":
|
|
240
246
|
VLLM_SUPPORTED_CHAT_MODELS.append("glm4-0414")
|
|
241
247
|
|
|
248
|
+
if VLLM_INSTALLED and vllm.__version__ >= "0.8.5":
|
|
249
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("qwen3")
|
|
250
|
+
|
|
242
251
|
|
|
243
252
|
class VLLMModel(LLM):
|
|
244
253
|
def __init__(
|
|
@@ -330,8 +339,10 @@ class VLLMModel(LLM):
|
|
|
330
339
|
|
|
331
340
|
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
332
341
|
|
|
333
|
-
|
|
334
|
-
|
|
342
|
+
from ..llm_family import LlamaCppLLMSpecV1
|
|
343
|
+
|
|
344
|
+
if "0.3.1" <= vllm.__version__ <= "0.3.3":
|
|
345
|
+
# from vllm v0.3.1 to v0.3.3, it uses cupy as NCCL backend
|
|
335
346
|
# in which cupy will fork a process
|
|
336
347
|
# only for xoscar >= 0.3.0, new process is allowed in subpool
|
|
337
348
|
# besides, xinference set start method as forkserver for unix
|
|
@@ -341,8 +352,17 @@ class VLLMModel(LLM):
|
|
|
341
352
|
self._device_count = self._get_cuda_count()
|
|
342
353
|
self._model_config = self._sanitize_model_config(self._model_config)
|
|
343
354
|
reasoning_content = self._model_config.pop("reasoning_content")
|
|
355
|
+
enable_thinking = self._model_config.pop("enable_thinking", False)
|
|
356
|
+
self.prepare_parse_reasoning_content(
|
|
357
|
+
reasoning_content, enable_thinking=enable_thinking
|
|
358
|
+
)
|
|
344
359
|
|
|
345
|
-
|
|
360
|
+
if (
|
|
361
|
+
isinstance(self.model_spec, LlamaCppLLMSpecV1)
|
|
362
|
+
and self.model_spec.model_format == "ggufv2"
|
|
363
|
+
):
|
|
364
|
+
# gguf
|
|
365
|
+
self._preprocess_load_gguf()
|
|
346
366
|
|
|
347
367
|
if self.lora_modules is None:
|
|
348
368
|
self.lora_requests = []
|
|
@@ -482,6 +502,45 @@ class VLLMModel(LLM):
|
|
|
482
502
|
_, err, tb = self._loading_error
|
|
483
503
|
raise err.with_traceback(tb)
|
|
484
504
|
|
|
505
|
+
def _preprocess_load_gguf(self):
|
|
506
|
+
# check if it is multi gguf files
|
|
507
|
+
if (
|
|
508
|
+
not os.path.isfile(self.model_path)
|
|
509
|
+
and self.model_spec.quantization_parts
|
|
510
|
+
and self.quantization in self.model_spec.quantization_parts
|
|
511
|
+
):
|
|
512
|
+
raise RuntimeError(
|
|
513
|
+
"vllm does not support multiple gguf files, please merge them first and "
|
|
514
|
+
"provide `model_path` with merged file"
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
if "tokenizer" not in self._model_config:
|
|
518
|
+
# find pytorch format without quantization
|
|
519
|
+
non_quant_spec = next(
|
|
520
|
+
spec
|
|
521
|
+
for spec in self.model_family.model_specs
|
|
522
|
+
if spec.model_format == "pytorch"
|
|
523
|
+
and "none" in spec.quantizations
|
|
524
|
+
and spec.model_size_in_billions
|
|
525
|
+
== self.model_spec.model_size_in_billions
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
path = cache_model_tokenizer_and_config(self.model_family, non_quant_spec)
|
|
529
|
+
# other than gguf file, vllm requires to provide tokenizer and hf_config_path
|
|
530
|
+
self._model_config["tokenizer"] = self._model_config[
|
|
531
|
+
"hf_config_path"
|
|
532
|
+
] = path
|
|
533
|
+
|
|
534
|
+
if not os.path.isfile(self.model_path):
|
|
535
|
+
self.model_path = os.path.realpath(
|
|
536
|
+
os.path.join(
|
|
537
|
+
self.model_path,
|
|
538
|
+
self.model_spec.model_file_name_template.format(
|
|
539
|
+
quantization=self.quantization
|
|
540
|
+
),
|
|
541
|
+
)
|
|
542
|
+
)
|
|
543
|
+
|
|
485
544
|
def stop(self):
|
|
486
545
|
# though the vLLM engine will shutdown when deleted,
|
|
487
546
|
# but some issue e.g. GH#1682 reported
|
|
@@ -642,7 +701,11 @@ class VLLMModel(LLM):
|
|
|
642
701
|
return sanitized
|
|
643
702
|
|
|
644
703
|
@classmethod
|
|
645
|
-
def
|
|
704
|
+
def check_lib(cls) -> bool:
|
|
705
|
+
return importlib.util.find_spec("vllm") is not None
|
|
706
|
+
|
|
707
|
+
@classmethod
|
|
708
|
+
def match_json(
|
|
646
709
|
cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
|
|
647
710
|
) -> bool:
|
|
648
711
|
if not cls._has_cuda_device():
|
|
@@ -755,10 +818,6 @@ class VLLMModel(LLM):
|
|
|
755
818
|
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
756
819
|
|
|
757
820
|
sanitized_generate_config = self._sanitize_generate_config(generate_config)
|
|
758
|
-
if self.reasoning_parser:
|
|
759
|
-
# For reasoning model, the </think> we be split into multiple words,
|
|
760
|
-
# if `stop` param is passed, so we pop it from config.
|
|
761
|
-
sanitized_generate_config.pop("stop")
|
|
762
821
|
logger.debug(
|
|
763
822
|
"Enter generate, prompt: %s, generate config: %s", prompt, generate_config
|
|
764
823
|
)
|
|
@@ -935,10 +994,10 @@ class VLLMModel(LLM):
|
|
|
935
994
|
|
|
936
995
|
class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
937
996
|
@classmethod
|
|
938
|
-
def
|
|
997
|
+
def match_json(
|
|
939
998
|
cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
|
|
940
999
|
) -> bool:
|
|
941
|
-
if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8"]:
|
|
1000
|
+
if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "ggufv2"]:
|
|
942
1001
|
return False
|
|
943
1002
|
if llm_spec.model_format == "pytorch":
|
|
944
1003
|
if quantization != "none" and not (quantization is None):
|
|
@@ -954,6 +1013,9 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
954
1013
|
else:
|
|
955
1014
|
if "4" not in quantization:
|
|
956
1015
|
return False
|
|
1016
|
+
if llm_spec.model_format == "ggufv2":
|
|
1017
|
+
if not (VLLM_INSTALLED and vllm.__version__ >= "0.8.2"):
|
|
1018
|
+
return False
|
|
957
1019
|
if isinstance(llm_family, CustomLLMFamilyV1):
|
|
958
1020
|
if llm_family.model_family not in VLLM_SUPPORTED_CHAT_MODELS:
|
|
959
1021
|
return False
|
|
@@ -970,13 +1032,19 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
970
1032
|
) -> Dict:
|
|
971
1033
|
if not generate_config:
|
|
972
1034
|
generate_config = {}
|
|
973
|
-
if
|
|
974
|
-
generate_config
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
and self.model_family.
|
|
978
|
-
|
|
979
|
-
|
|
1035
|
+
if "reasoning" in getattr(self.model_family, "model_ability", []):
|
|
1036
|
+
generate_config.pop("stop", None)
|
|
1037
|
+
generate_config.pop("stop_token_ids", None)
|
|
1038
|
+
else:
|
|
1039
|
+
if not generate_config.get("stop") and self.model_family.stop:
|
|
1040
|
+
generate_config["stop"] = self.model_family.stop.copy()
|
|
1041
|
+
if (
|
|
1042
|
+
not generate_config.get("stop_token_ids")
|
|
1043
|
+
and self.model_family.stop_token_ids
|
|
1044
|
+
):
|
|
1045
|
+
generate_config[
|
|
1046
|
+
"stop_token_ids"
|
|
1047
|
+
] = self.model_family.stop_token_ids.copy()
|
|
980
1048
|
return generate_config
|
|
981
1049
|
|
|
982
1050
|
@staticmethod
|
|
@@ -988,11 +1056,15 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
988
1056
|
chunks: AsyncGenerator[CompletionChunk, None],
|
|
989
1057
|
) -> AsyncGenerator[ChatCompletionChunk, None]:
|
|
990
1058
|
i = 0
|
|
1059
|
+
previous_texts = [""]
|
|
1060
|
+
if self.reasoning_parser:
|
|
1061
|
+
chunks = self.reasoning_parser.prepare_reasoning_content(chunks)
|
|
991
1062
|
async for chunk in chunks:
|
|
992
1063
|
if i == 0:
|
|
993
|
-
|
|
1064
|
+
for first_chunk in self._get_first_chat_completion_chunk(
|
|
994
1065
|
chunk, self.reasoning_parser
|
|
995
|
-
)
|
|
1066
|
+
):
|
|
1067
|
+
yield first_chunk
|
|
996
1068
|
# usage
|
|
997
1069
|
choices = chunk.get("choices")
|
|
998
1070
|
if not choices:
|
|
@@ -1006,7 +1078,9 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1006
1078
|
reasoning_parser=self.reasoning_parser,
|
|
1007
1079
|
)
|
|
1008
1080
|
else:
|
|
1009
|
-
yield self._to_chat_completion_chunk(
|
|
1081
|
+
yield self._to_chat_completion_chunk(
|
|
1082
|
+
chunk, self.reasoning_parser, previous_texts
|
|
1083
|
+
)
|
|
1010
1084
|
i += 1
|
|
1011
1085
|
|
|
1012
1086
|
@vllm_check
|
|
@@ -1018,7 +1092,12 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1018
1092
|
) -> Union[ChatCompletion, AsyncGenerator[ChatCompletionChunk, None]]:
|
|
1019
1093
|
tools = generate_config.pop("tools", []) if generate_config else None
|
|
1020
1094
|
model_family = self.model_family.model_family or self.model_family.model_name
|
|
1021
|
-
full_context_kwargs =
|
|
1095
|
+
full_context_kwargs = (
|
|
1096
|
+
self._get_chat_template_kwargs_from_generate_config(
|
|
1097
|
+
generate_config, self.reasoning_parser
|
|
1098
|
+
)
|
|
1099
|
+
or {}
|
|
1100
|
+
)
|
|
1022
1101
|
if tools:
|
|
1023
1102
|
if (
|
|
1024
1103
|
model_family in QWEN_TOOL_CALL_FAMILY
|
|
@@ -1055,7 +1134,7 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
1055
1134
|
|
|
1056
1135
|
class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
1057
1136
|
@classmethod
|
|
1058
|
-
def
|
|
1137
|
+
def match_json(
|
|
1059
1138
|
cls, llm_family: "LLMFamilyV1", llm_spec: "LLMSpecV1", quantization: str
|
|
1060
1139
|
) -> bool:
|
|
1061
1140
|
if not cls._has_cuda_device():
|
|
@@ -1136,7 +1215,12 @@ class VLLMVisionModel(VLLMModel, ChatModelMixin):
|
|
|
1136
1215
|
if "internvl2" not in model_family.lower():
|
|
1137
1216
|
from qwen_vl_utils import process_vision_info
|
|
1138
1217
|
|
|
1139
|
-
full_context_kwargs =
|
|
1218
|
+
full_context_kwargs = (
|
|
1219
|
+
self._get_chat_template_kwargs_from_generate_config(
|
|
1220
|
+
generate_config, self.reasoning_parser
|
|
1221
|
+
)
|
|
1222
|
+
or {}
|
|
1223
|
+
)
|
|
1140
1224
|
if tools and model_family in QWEN_TOOL_CALL_FAMILY:
|
|
1141
1225
|
full_context_kwargs["tools"] = tools
|
|
1142
1226
|
assert self.model_family.chat_template is not None
|
|
@@ -54,13 +54,14 @@ class WorkerActor(xo.StatelessActor):
|
|
|
54
54
|
return f"VllmWorker_{rank}"
|
|
55
55
|
|
|
56
56
|
def execute_method(self, method: Union[str, Callable], *args, **kwargs):
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
57
|
+
# NOTE: too many logs, but useful for debug
|
|
58
|
+
# logger.debug(
|
|
59
|
+
# "Calling method %s in vllm worker %s, args: %s, kwargs: %s",
|
|
60
|
+
# method,
|
|
61
|
+
# self.uid,
|
|
62
|
+
# args,
|
|
63
|
+
# kwargs,
|
|
64
|
+
# )
|
|
64
65
|
if isinstance(method, str):
|
|
65
66
|
return getattr(self._worker, method)(*args, **kwargs)
|
|
66
67
|
else:
|
|
@@ -24,7 +24,7 @@ from .block import XavierPrefixCachingBlockAllocator
|
|
|
24
24
|
class XavierCpuGpuBlockAllocator(CpuGpuBlockAllocator):
|
|
25
25
|
def __init__(self, *args, **kwargs):
|
|
26
26
|
super().__init__(*args, **kwargs)
|
|
27
|
-
self._xavier_config: Optional[Dict[str, Any]] = None
|
|
27
|
+
self._xavier_config: Optional[Dict[str, Any]] = None # type: ignore
|
|
28
28
|
|
|
29
29
|
@property
|
|
30
30
|
def xavier_config(self):
|
|
@@ -30,7 +30,7 @@ class XavierBlockManager(SelfAttnBlockSpaceManager):
|
|
|
30
30
|
# Monkey patch
|
|
31
31
|
CpuGpuBlockAllocator.create = XavierCpuGpuBlockAllocator.create
|
|
32
32
|
super().__init__(*args, **kwargs)
|
|
33
|
-
self._xavier_config: Optional[Dict[str, Any]] = None
|
|
33
|
+
self._xavier_config: Optional[Dict[str, Any]] = None # type: ignore
|
|
34
34
|
logger.debug("Init xavier block manager done.")
|
|
35
35
|
|
|
36
36
|
@property
|
|
@@ -25,10 +25,10 @@ class VLLMBlockTracker(xo.StatelessActor):
|
|
|
25
25
|
def __init__(self):
|
|
26
26
|
super().__init__()
|
|
27
27
|
# engine -> hash -> (rank, block_id)
|
|
28
|
-
self._hash_to_rank_and_block_id: Dict[int, Dict[int, Set[Tuple[int, int]]]] = {}
|
|
28
|
+
self._hash_to_rank_and_block_id: Dict[int, Dict[int, Set[Tuple[int, int]]]] = {} # type: ignore
|
|
29
29
|
# engine -> rank -> (hash, block_id)
|
|
30
|
-
self._rank_to_hash_and_block_id: Dict[int, Dict[int, Set[Tuple[int, int]]]] = {}
|
|
31
|
-
self._unavailable_ranks: Set[int] = set()
|
|
30
|
+
self._rank_to_hash_and_block_id: Dict[int, Dict[int, Set[Tuple[int, int]]]] = {} # type: ignore
|
|
31
|
+
self._unavailable_ranks: Set[int] = set() # type: ignore
|
|
32
32
|
|
|
33
33
|
def register_blocks(
|
|
34
34
|
self, virtual_engine: int, block_infos: List[Tuple[int, int]], rank: int
|
|
@@ -38,7 +38,7 @@ class XavierExecutor(MultiprocessingDistributedExecutor):
|
|
|
38
38
|
Retrieve the necessary transmission information from the `cache_engine`.
|
|
39
39
|
"""
|
|
40
40
|
transfer_ref = await self._get_transfer_ref()
|
|
41
|
-
ref_cache_engine: CacheEngine = self.driver_worker.cache_engine[0]
|
|
41
|
+
ref_cache_engine: CacheEngine = self.driver_worker.cache_engine[0] # type: ignore
|
|
42
42
|
buffer_dtype = ref_cache_engine.dtype
|
|
43
43
|
buffer_device = "cpu"
|
|
44
44
|
buffer_pin_memory = is_pin_memory_available()
|
|
@@ -11,8 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
import os
|
|
15
|
-
import sys
|
|
16
14
|
|
|
17
15
|
import pytest
|
|
18
16
|
import xoscar as xo
|
|
@@ -30,14 +28,7 @@ class ExtendedBlockTracker(VLLMBlockTracker):
|
|
|
30
28
|
|
|
31
29
|
@pytest.fixture
|
|
32
30
|
async def actor_pool_context():
|
|
33
|
-
|
|
34
|
-
os.environ.get("POOL_START_METHOD", "forkserver")
|
|
35
|
-
if sys.platform != "win32"
|
|
36
|
-
else None
|
|
37
|
-
)
|
|
38
|
-
pool = await xo.create_actor_pool(
|
|
39
|
-
"127.0.0.1", n_process=2, subprocess_start_method=start_method
|
|
40
|
-
)
|
|
31
|
+
pool = await xo.create_actor_pool("127.0.0.1", n_process=2)
|
|
41
32
|
async with pool:
|
|
42
33
|
yield pool
|
|
43
34
|
|
|
@@ -46,7 +37,7 @@ async def actor_pool_context():
|
|
|
46
37
|
async def test_block_tracker(actor_pool_context):
|
|
47
38
|
actor_pool = actor_pool_context
|
|
48
39
|
addr = actor_pool.external_address
|
|
49
|
-
tracker_ref: xo.ActorRefType[ExtendedBlockTracker] = await xo.create_actor(
|
|
40
|
+
tracker_ref: xo.ActorRefType[ExtendedBlockTracker] = await xo.create_actor( # type: ignore
|
|
50
41
|
ExtendedBlockTracker,
|
|
51
42
|
address=addr,
|
|
52
43
|
uid=VLLMBlockTracker.default_uid(),
|