xinference 1.3.0__py3-none-any.whl → 1.3.0.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

xinference/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-02-21T22:33:35+0800",
11
+ "date": "2025-02-22T23:10:02+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "bb778404b248ea91f4210e739e2b92b8a1be1455",
15
- "version": "1.3.0"
14
+ "full-revisionid": "378a47adad8506a13105b063322ccd7a04f7ea5d",
15
+ "version": "1.3.0.post2"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -821,12 +821,6 @@ def remove_cache(
821
821
  type=bool,
822
822
  help="Whether or not to allow for custom models defined on the Hub in their own modeling files.",
823
823
  )
824
- @click.option(
825
- "--reasoning-content",
826
- default=False,
827
- type=bool,
828
- help="Whether or not to enable reasoning content in model responses.",
829
- )
830
824
  @click.option(
831
825
  "--api-key",
832
826
  "-ak",
@@ -855,7 +849,6 @@ def model_launch(
855
849
  worker_ip: Optional[str],
856
850
  gpu_idx: Optional[str],
857
851
  trust_remote_code: bool,
858
- reasoning_content: bool,
859
852
  api_key: Optional[str],
860
853
  model_path: Optional[str],
861
854
  ):
@@ -948,7 +941,6 @@ def model_launch(
948
941
  gpu_idx=_gpu_idx,
949
942
  trust_remote_code=trust_remote_code,
950
943
  model_path=model_path,
951
- reasoning_content=reasoning_content,
952
944
  **kwargs,
953
945
  )
954
946
 
@@ -1,7 +1,7 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import Dict, Optional, Tuple, Type, Union
3
3
 
4
- from ....types import ChatCompletionChunkDelta, CompletionChoice, CompletionChunk
4
+ from ....types import ChatCompletionChunkDelta, CompletionChoice
5
5
 
6
6
 
7
7
  class ReasoningParser(ABC):
@@ -26,7 +26,7 @@ class ReasoningParser(ABC):
26
26
  self,
27
27
  previous_text: str,
28
28
  current_text: str,
29
- delta: Union[str, CompletionChunk],
29
+ delta: ChatCompletionChunkDelta,
30
30
  ) -> ChatCompletionChunkDelta:
31
31
  """Extract reasoning content from model output in a streaming fashion.
32
32
 
@@ -24,7 +24,7 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
24
24
  previous_text: str,
25
25
  current_text: str,
26
26
  delta: ChatCompletionChunkDelta,
27
- ) -> Optional[ChatCompletionChunkDelta]:
27
+ ) -> ChatCompletionChunkDelta:
28
28
  """Extract reasoning content from DeepSeek-R1 model output in a streaming fashion.
29
29
 
30
30
  Args:
@@ -122,7 +122,7 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
122
122
  # Thus we assume the reasoning content is always at the start.
123
123
  # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
124
124
  if self.reasoning_end_tag not in model_output:
125
- return model_output, None
125
+ return model_output, ""
126
126
  else:
127
127
  # Add a start token if it's missing to keep compatibility.
128
128
  if self.reasoning_start_tag not in model_output:
@@ -136,5 +136,5 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
136
136
  final_output = model_output[end_index:]
137
137
 
138
138
  if len(final_output) == 0:
139
- return reasoning_content, None
139
+ return reasoning_content, ""
140
140
  return reasoning_content, final_output
@@ -374,19 +374,21 @@ class ChatModelMixin:
374
374
  current_text = ""
375
375
  async for chunk in chunks:
376
376
  if i == 0:
377
- chunk = cls._get_first_chat_completion_chunk(chunk)
377
+ chat_chunk = cls._get_first_chat_completion_chunk(chunk)
378
378
  elif not chunk.get("choices"):
379
379
  # usage
380
- chunk = cls._get_final_chat_completion_chunk(chunk)
380
+ chat_chunk = cls._get_final_chat_completion_chunk(chunk)
381
381
  else:
382
- chunk = cls._to_chat_completion_chunk(chunk)
382
+ chat_chunk = cls._to_chat_completion_chunk(chunk)
383
383
  if reasoning_parser is not None:
384
- choices = chunk.get("choices")
384
+ choices = chat_chunk.get("choices")
385
+ if choices is None:
386
+ continue
385
387
  for choice in choices:
386
388
  delta = choice.get("delta")
387
389
  if not delta:
388
390
  continue
389
- current_text = previous_text + delta.get("content")
391
+ current_text = previous_text + delta.get("content", "")
390
392
  choice[
391
393
  "delta"
392
394
  ] = reasoning_parser.extract_reasoning_content_streaming(
@@ -395,7 +397,7 @@ class ChatModelMixin:
395
397
  delta=delta,
396
398
  )
397
399
  previous_text = current_text
398
- yield chunk
400
+ yield chat_chunk
399
401
  i += 1
400
402
 
401
403
  @staticmethod
@@ -408,7 +410,7 @@ class ChatModelMixin:
408
410
  reasoning_content = None
409
411
 
410
412
  if reasoning_parser is not None:
411
- reasoning_content, content = reasoning_parser.extract_reasoning_content(
413
+ reasoning_content, content = reasoning_parser.extract_reasoning_content( # type: ignore
412
414
  choice
413
415
  )
414
416
 
@@ -429,7 +431,8 @@ class ChatModelMixin:
429
431
  "id": "chat" + completion["id"],
430
432
  "object": "chat.completion",
431
433
  "created": completion["created"],
432
- "model": choices,
434
+ "model": completion["model"],
435
+ "choices": choices, # type: ignore
433
436
  "usage": completion["usage"],
434
437
  }
435
438
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xinference
3
- Version: 1.3.0
3
+ Version: 1.3.0.post2
4
4
  Summary: Model Serving Made Easy
5
5
  Home-page: https://github.com/xorbitsai/inference
6
6
  Author: Qin Xuye
@@ -1,6 +1,6 @@
1
1
  xinference/__init__.py,sha256=nmTTrYbIpj964ZF6ojtgOM7E85JBOj1EyQbmYjbj1jw,915
2
2
  xinference/_compat.py,sha256=URSJQLXrcsTO9B_4x0wVDPijYQDhuVJmZ95npID560w,4197
3
- xinference/_version.py,sha256=WWhsA6HLqEmACoxpxfySEDjEnZMcVGekSJb9gJu4UIk,497
3
+ xinference/_version.py,sha256=4EtXtCeHp6nyWCpJ7_0hRACaUqdifs7HuNYZPw5GSHM,503
4
4
  xinference/conftest.py,sha256=vETDpRBVIlWbWi7OTwf7og89U25KyYGyI7yPIB3O8N8,9564
5
5
  xinference/constants.py,sha256=mEW4HDzjXtDXN61Mt6TtJrJ4ljbB6VUkh97e3oDbNx4,3905
6
6
  xinference/device_utils.py,sha256=ELsqvnjvz9wYthTyQFzKSV4mZsaASz6hj_IsfMmfMWc,4447
@@ -34,7 +34,7 @@ xinference/core/supervisor.py,sha256=JgfEY7LkP9m48abukMXKBumEqeV9Q-IImSAJjk6YslE
34
34
  xinference/core/utils.py,sha256=TpUHNFcA4pEXq084Quz-ilIccuDMUAsdLrEJxj0Zn6I,10414
35
35
  xinference/core/worker.py,sha256=VpX9Mwa27FyNcgoXdFRe414iL4KySZNpMfPIOhrcVPE,51737
36
36
  xinference/deploy/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
37
- xinference/deploy/cmdline.py,sha256=-RKCZaM2SlL8ZXfp_LQJ7tndn7ip9c9F-BKq0qINGkg,49565
37
+ xinference/deploy/cmdline.py,sha256=fOuD4HixyI6VuuVO7XJ-0_C3JutDe1sv4MwVjZPluS8,49338
38
38
  xinference/deploy/local.py,sha256=sO3BcpEH9oCF87CxWVA4AXAYcfHGnrcop40ew5NOA2g,3979
39
39
  xinference/deploy/supervisor.py,sha256=68rB2Ey5KFeF6zto9YGbw3P8QLZmF_KSh1NwH_pNP4w,2986
40
40
  xinference/deploy/utils.py,sha256=jdL7i2WV6u_BZ8IiE1d3YktvCARcB3ntzMQ5rHGD5DM,6756
@@ -94,7 +94,7 @@ xinference/model/llm/llm_family_csghub.json,sha256=zMKWbihsxQNVB1u5iKJbZUkbOfQ4I
94
94
  xinference/model/llm/llm_family_modelscope.json,sha256=8RsMiGrbVdLQohnmdbVWIZgk4yYcFpnYZpkcqz-P8yE,289997
95
95
  xinference/model/llm/llm_family_openmind_hub.json,sha256=jl9pfbe5DztoxgEwKBxDk1Wd7TziTiJ48_Ie_lJdYjA,67872
96
96
  xinference/model/llm/memory.py,sha256=GLNmXBI-AtMbuaJfEf50fnhN4rdbOZjLyT6L_Vjqa5g,10206
97
- xinference/model/llm/utils.py,sha256=PMhIVa_KI6SBqPG1Usd3iMSUoIEMnN9U9lnW_iA82m0,29995
97
+ xinference/model/llm/utils.py,sha256=ZzhXldHn2MHuMgyJkpwxFZlwS6mbdIL2_UMqKWKpkz4,30165
98
98
  xinference/model/llm/llama_cpp/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
99
99
  xinference/model/llm/llama_cpp/core.py,sha256=3GSGk42c8Oy_jTqRv4nLC482V2tUis3V0LlohQy_I1U,11312
100
100
  xinference/model/llm/lmdeploy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -102,8 +102,8 @@ xinference/model/llm/lmdeploy/core.py,sha256=WvSP3x6t-HBv6hKh1qWZatFAzlcZCyyKqvc
102
102
  xinference/model/llm/mlx/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
103
103
  xinference/model/llm/mlx/core.py,sha256=WQN2iURiWSL_MY5hR0GkCYa08qr5wtOFx522_c2vW30,23130
104
104
  xinference/model/llm/reasoning_parsers/__init__.py,sha256=-sjSIQ4K6w-TEzx49kVaWeWC443fnZqODU91GCQ_JNo,581
105
- xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py,sha256=7jISylZEQDCw2-5kR4MsXkZH5xhkOlrBrJBAIAsnSZI,3040
106
- xinference/model/llm/reasoning_parsers/deepseek_r1_reasoning_parser.py,sha256=IpeXAJSApXAdcUj8lYC7jkBRkJbURCt9pBVa7fZHNKM,6359
105
+ xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py,sha256=NWwx7Uj2K6SR1TZCfHeF1Q9Kpm9o6PNV8I16nyQNTh4,3020
106
+ xinference/model/llm/reasoning_parsers/deepseek_r1_reasoning_parser.py,sha256=m_olMdPLJrozADlRuH_nLRrzqLgM2W1QY9G2fa7qzes,6345
107
107
  xinference/model/llm/sglang/__init__.py,sha256=-sjSIQ4K6w-TEzx49kVaWeWC443fnZqODU91GCQ_JNo,581
108
108
  xinference/model/llm/sglang/core.py,sha256=tR8PtdC_RBG00sBQp8lF-ow5xq9QkPlRbFPoHzzCDZk,20321
109
109
  xinference/model/llm/transformers/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
@@ -15725,9 +15725,9 @@ xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9
15725
15725
  xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
15726
15726
  xinference/web/ui/src/locales/en.json,sha256=OS25toIZHXcGYPtl5rZRQfJEDuAyrwBIYM6GGxOVe4Y,8641
15727
15727
  xinference/web/ui/src/locales/zh.json,sha256=mBhTne0sIYLe_xh5qyH45W_D50Hjp-DKdwcifYEHFnw,8386
15728
- xinference-1.3.0.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15729
- xinference-1.3.0.dist-info/METADATA,sha256=9Ad2j0ziQ8Xm7bxxRjr7URv5kS4k3m6xH2IcJSN345U,24121
15730
- xinference-1.3.0.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
15731
- xinference-1.3.0.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15732
- xinference-1.3.0.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15733
- xinference-1.3.0.dist-info/RECORD,,
15728
+ xinference-1.3.0.post2.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15729
+ xinference-1.3.0.post2.dist-info/METADATA,sha256=kFcBcCj6HdKTIrJASMPOHD69Lfa-TIliWblCYV6_1Bk,24127
15730
+ xinference-1.3.0.post2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
15731
+ xinference-1.3.0.post2.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15732
+ xinference-1.3.0.post2.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15733
+ xinference-1.3.0.post2.dist-info/RECORD,,