xinference 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (35) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +35 -1
  3. xinference/client/oscar/actor_client.py +2 -2
  4. xinference/client/restful/restful_client.py +2 -2
  5. xinference/conftest.py +5 -1
  6. xinference/core/metrics.py +83 -0
  7. xinference/core/model.py +148 -8
  8. xinference/core/status_guard.py +86 -0
  9. xinference/core/supervisor.py +57 -7
  10. xinference/core/worker.py +132 -13
  11. xinference/deploy/cmdline.py +57 -4
  12. xinference/deploy/local.py +32 -6
  13. xinference/deploy/worker.py +33 -5
  14. xinference/fields.py +4 -1
  15. xinference/model/llm/__init__.py +7 -0
  16. xinference/model/llm/ggml/llamacpp.py +3 -2
  17. xinference/model/llm/llm_family.json +70 -3
  18. xinference/model/llm/llm_family.py +11 -1
  19. xinference/model/llm/llm_family_modelscope.json +72 -3
  20. xinference/model/llm/pytorch/chatglm.py +70 -28
  21. xinference/model/llm/pytorch/core.py +11 -30
  22. xinference/model/llm/pytorch/internlm2.py +155 -0
  23. xinference/model/llm/pytorch/utils.py +0 -153
  24. xinference/model/llm/utils.py +37 -8
  25. xinference/model/llm/vllm/core.py +15 -3
  26. xinference/model/multimodal/__init__.py +15 -8
  27. xinference/model/multimodal/model_spec_modelscope.json +45 -0
  28. xinference/model/utils.py +7 -2
  29. xinference/types.py +2 -0
  30. {xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/METADATA +2 -1
  31. {xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/RECORD +35 -31
  32. {xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/LICENSE +0 -0
  33. {xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/WHEEL +0 -0
  34. {xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/entry_points.txt +0 -0
  35. {xinference-0.8.0.dist-info → xinference-0.8.1.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,6 @@
14
14
 
15
15
  import gc
16
16
  import logging
17
- import re
18
17
  import time
19
18
  import uuid
20
19
  from threading import Thread
@@ -23,7 +22,6 @@ from typing import Iterable, Iterator, Tuple
23
22
  import torch
24
23
  from transformers import GenerationConfig, TextIteratorStreamer
25
24
  from transformers.generation.logits_process import (
26
- LogitsProcessor,
27
25
  LogitsProcessorList,
28
26
  RepetitionPenaltyLogitsProcessor,
29
27
  TemperatureLogitsWarper,
@@ -480,154 +478,3 @@ def generate_stream_falcon(
480
478
  # clean
481
479
  gc.collect()
482
480
  torch.cuda.empty_cache()
483
-
484
-
485
- class InvalidScoreLogitsProcessor(LogitsProcessor):
486
- def __call__(
487
- self, input_ids: torch.LongTensor, scores: torch.FloatTensor
488
- ) -> torch.FloatTensor:
489
- if torch.isnan(scores).any() or torch.isinf(scores).any():
490
- scores.zero_()
491
- scores[..., 5] = 5e4
492
- return scores
493
-
494
-
495
- invalid_score_processor = InvalidScoreLogitsProcessor()
496
-
497
-
498
- def process_response(response):
499
- response = response.strip()
500
- response = response.replace("[[训练时间]]", "2023年")
501
- punkts = [
502
- [",", ","],
503
- ["!", "!"],
504
- [":", ":"],
505
- [";", ";"],
506
- ["\\?", "?"],
507
- ]
508
- for item in punkts:
509
- response = re.sub(r"([\u4e00-\u9fff])%s" % item[0], r"\1%s" % item[1], response)
510
- response = re.sub(r"%s([\u4e00-\u9fff])" % item[0], r"%s\1" % item[1], response)
511
- return response
512
-
513
-
514
- @torch.inference_mode()
515
- def generate_stream_chatglm(
516
- model_uid,
517
- model,
518
- tokenizer,
519
- prompt,
520
- device,
521
- generate_config,
522
- judge_sent_end=False,
523
- ):
524
- stream = generate_config.get("stream", False)
525
- temperature = float(generate_config.get("temperature", 1.0))
526
- repetition_penalty = float(generate_config.get("repetition_penalty", 1.0))
527
- top_p = float(generate_config.get("top_p", 1.0))
528
- max_new_tokens = int(generate_config.get("max_tokens", 256))
529
- echo = generate_config.get("echo", False)
530
- stop_str = generate_config.get("stop", None)
531
- eos_token_id = generate_config.get("stop_token_ids", [])
532
- eos_token_id.append(tokenizer.eos_token_id)
533
-
534
- inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
535
- input_echo_len = len(inputs["input_ids"][0])
536
- gen_kwargs = {
537
- "max_length": max_new_tokens + input_echo_len,
538
- "do_sample": True if temperature > 1e-5 else False,
539
- "top_p": top_p,
540
- "repetition_penalty": repetition_penalty,
541
- "logits_processor": [invalid_score_processor],
542
- }
543
- if temperature > 1e-5:
544
- gen_kwargs["temperature"] = temperature
545
-
546
- total_len = 0
547
- last_response_length = 0
548
- for total_ids in model.stream_generate(
549
- **inputs, eos_token_id=eos_token_id, **gen_kwargs
550
- ):
551
- total_ids = total_ids.tolist()[0]
552
- total_len = len(total_ids)
553
- if echo:
554
- output_ids = total_ids
555
- else:
556
- output_ids = total_ids[input_echo_len:]
557
- response = tokenizer.decode(output_ids)
558
- response = process_response(response)
559
-
560
- partially_stopped = False
561
- stopped = False
562
- if stop_str:
563
- if isinstance(stop_str, str):
564
- pos = response.rfind(stop_str, 0)
565
- if pos != -1:
566
- response = response[:pos]
567
- stopped = True
568
- else:
569
- partially_stopped = is_partial_stop(response, stop_str)
570
- elif isinstance(stop_str, Iterable):
571
- for each_stop in stop_str:
572
- pos = response.rfind(each_stop, 0)
573
- if pos != -1:
574
- response = response[:pos]
575
- stopped = True
576
- break
577
- else:
578
- partially_stopped = is_partial_stop(response, each_stop)
579
- if partially_stopped:
580
- break
581
- else:
582
- raise ValueError("Invalid stop field type.")
583
-
584
- if stream:
585
- response = response.strip("�")
586
- tmp_response_length = len(response)
587
- response = response[last_response_length:]
588
- last_response_length = tmp_response_length
589
-
590
- if not partially_stopped:
591
- completion_choice = CompletionChoice(
592
- text=response, index=0, logprobs=None, finish_reason=None
593
- )
594
- completion_chunk = CompletionChunk(
595
- id=str(uuid.uuid1()),
596
- object="text_completion",
597
- created=int(time.time()),
598
- model=model_uid,
599
- choices=[completion_choice],
600
- )
601
- completion_usage = CompletionUsage(
602
- prompt_tokens=input_echo_len,
603
- completion_tokens=(total_len - input_echo_len),
604
- total_tokens=total_len,
605
- )
606
-
607
- yield completion_chunk, completion_usage
608
-
609
- if stopped:
610
- break
611
-
612
- if total_len - input_echo_len == max_new_tokens - 1:
613
- finish_reason = "length"
614
- else:
615
- finish_reason = "stop"
616
-
617
- completion_choice = CompletionChoice(
618
- text=response, index=0, logprobs=None, finish_reason=finish_reason
619
- )
620
- completion_chunk = CompletionChunk(
621
- id=str(uuid.uuid1()),
622
- object="text_completion",
623
- created=int(time.time()),
624
- model=model_uid,
625
- choices=[completion_choice],
626
- )
627
- completion_usage = CompletionUsage(
628
- prompt_tokens=input_echo_len,
629
- completion_tokens=(total_len - input_echo_len),
630
- total_tokens=total_len,
631
- )
632
-
633
- yield completion_chunk, completion_usage
@@ -16,7 +16,7 @@ import json
16
16
  import logging
17
17
  import time
18
18
  import uuid
19
- from typing import AsyncGenerator, Dict, Iterator, List, Optional
19
+ from typing import AsyncGenerator, Dict, Iterator, List, Optional, cast
20
20
 
21
21
  from xinference.model.llm.llm_family import PromptStyleV1
22
22
 
@@ -299,6 +299,24 @@ Begin!"""
299
299
  )
300
300
  ret += chat_history[-1]["role"] + ":"
301
301
  return ret
302
+ elif prompt_style.style_name == "INTERNLM2":
303
+ ret = (
304
+ "<s>"
305
+ if prompt_style.system_prompt == ""
306
+ else "<s>[UNUSED_TOKEN_146]system\n"
307
+ + prompt_style.system_prompt
308
+ + prompt_style.intra_message_sep
309
+ + "\n"
310
+ )
311
+ for message in chat_history:
312
+ role = message["role"]
313
+ content = message["content"]
314
+
315
+ if content:
316
+ ret += role + "\n" + content + prompt_style.intra_message_sep + "\n"
317
+ else:
318
+ ret += role + "\n"
319
+ return ret
302
320
  elif prompt_style.style_name == "ADD_COLON_SINGLE_COT":
303
321
  ret = prompt_style.system_prompt + prompt_style.intra_message_sep
304
322
  for message in chat_history:
@@ -360,7 +378,7 @@ Begin!"""
360
378
 
361
379
  @classmethod
362
380
  def _to_chat_completion_chunk(cls, chunk: CompletionChunk) -> ChatCompletionChunk:
363
- return {
381
+ chat_chunk = {
364
382
  "id": "chat" + chunk["id"],
365
383
  "model": chunk["model"],
366
384
  "created": chunk["created"],
@@ -376,12 +394,16 @@ Begin!"""
376
394
  for i, choice in enumerate(chunk["choices"])
377
395
  ],
378
396
  }
397
+ usage = chunk.get("usage")
398
+ if usage is not None:
399
+ chat_chunk["usage"] = usage
400
+ return cast(ChatCompletionChunk, chat_chunk)
379
401
 
380
402
  @classmethod
381
403
  def _get_first_chat_completion_chunk(
382
404
  cls, chunk: CompletionChunk
383
405
  ) -> ChatCompletionChunk:
384
- return {
406
+ chat_chunk = {
385
407
  "id": "chat" + chunk["id"],
386
408
  "model": chunk["model"],
387
409
  "created": chunk["created"],
@@ -397,6 +419,10 @@ Begin!"""
397
419
  for i, choice in enumerate(chunk["choices"])
398
420
  ],
399
421
  }
422
+ usage = chunk.get("usage")
423
+ if usage is not None:
424
+ chat_chunk["usage"] = usage
425
+ return cast(ChatCompletionChunk, chat_chunk)
400
426
 
401
427
  @classmethod
402
428
  def _to_chat_completion_chunks(
@@ -494,16 +520,19 @@ Begin!"""
494
520
  return text, None, None
495
521
 
496
522
  @classmethod
497
- def _tool_calls_completion(cls, model_name, model_uid, c, tools):
523
+ def _tool_calls_completion(cls, model_family, model_uid, c, tools):
498
524
  _id = str(uuid.uuid4())
499
- if model_name == "gorilla-openfunctions-v1":
525
+ family = model_family.model_family or model_family.model_name
526
+ if "gorilla-openfunctions-v1" == family:
500
527
  content, func, args = cls._eval_gorilla_openfunctions_arguments(c, tools)
501
- elif model_name == "chatglm3":
528
+ elif "chatglm3" == family:
502
529
  content, func, args = cls._eval_chatglm3_arguments(c, tools)
503
- elif model_name == "qwen-chat":
530
+ elif "qwen-chat" == family:
504
531
  content, func, args = cls._eval_qwen_chat_arguments(c, tools)
505
532
  else:
506
- raise Exception(f"Model {model_name} is not support tool calls.")
533
+ raise Exception(
534
+ f"Model {model_family.model_name} is not support tool calls."
535
+ )
507
536
  logger.debug("Tool call content: %s, func: %s, args: %s", content, func, args)
508
537
 
509
538
  if content:
@@ -94,6 +94,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
94
94
  "code-llama-python",
95
95
  "code-llama-instruct",
96
96
  "mistral-instruct-v0.1",
97
+ "mistral-instruct-v0.2",
97
98
  "chatglm3",
98
99
  ]
99
100
 
@@ -170,7 +171,7 @@ class VLLMModel(LLM):
170
171
  )
171
172
  sanitized.setdefault("temperature", generate_config.get("temperature", 1.0))
172
173
  sanitized.setdefault("top_p", generate_config.get("top_p", 1.0))
173
- sanitized.setdefault("max_tokens", generate_config.get("max_tokens", 16))
174
+ sanitized.setdefault("max_tokens", generate_config.get("max_tokens", 1024))
174
175
  sanitized.setdefault("stop", generate_config.get("stop", None))
175
176
  sanitized.setdefault(
176
177
  "stop_token_ids", generate_config.get("stop_token_ids", None)
@@ -303,6 +304,16 @@ class VLLMModel(LLM):
303
304
  delta = choice["text"][len(previous_texts[i]) :]
304
305
  previous_texts[i] = choice["text"]
305
306
  choice["text"] = delta
307
+ prompt_tokens = len(_request_output.prompt_token_ids)
308
+ completion_tokens = sum(
309
+ len(output.token_ids) for output in _request_output.outputs
310
+ )
311
+ total_tokens = prompt_tokens + completion_tokens
312
+ chunk["usage"] = CompletionUsage(
313
+ prompt_tokens=prompt_tokens,
314
+ completion_tokens=completion_tokens,
315
+ total_tokens=total_tokens,
316
+ )
306
317
  yield chunk
307
318
 
308
319
  if stream:
@@ -379,7 +390,8 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
379
390
 
380
391
  generate_config = self._sanitize_chat_config(generate_config)
381
392
  # TODO(codingl2k1): qwen hacky to set stop for function call.
382
- if tools and self.model_family.model_name == "qwen-chat":
393
+ model_family = self.model_family.model_family or self.model_family.model_name
394
+ if tools and "qwen-chat" == model_family:
383
395
  stop = generate_config.get("stop")
384
396
  if isinstance(stop, str):
385
397
  generate_config["stop"] = [stop, "Observation:"]
@@ -400,6 +412,6 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
400
412
  assert not isinstance(c, AsyncGenerator)
401
413
  if tools:
402
414
  return self._tool_calls_completion(
403
- self.model_family.model_name, self.model_uid, c, tools
415
+ self.model_family, self.model_uid, c, tools
404
416
  )
405
417
  return self._to_chat_completion(c)
@@ -30,16 +30,23 @@ MODEL_CLASSES.append(QwenVLChat)
30
30
 
31
31
 
32
32
  def _install():
33
- json_path = os.path.join(
33
+ json_path_huggingface = os.path.join(
34
34
  os.path.dirname(os.path.abspath(__file__)), "model_spec.json"
35
35
  )
36
- for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
37
- model_family = LVLMFamilyV1.parse_obj(json_obj)
38
- BUILTIN_LVLM_FAMILIES.append(model_family)
39
- for model_spec in model_family.model_specs:
40
- MODEL_NAME_TO_REVISION[model_family.model_name].append(
41
- model_spec.model_revision
42
- )
36
+ json_path_modelscope = os.path.join(
37
+ os.path.dirname(os.path.abspath(__file__)), "model_spec_modelscope.json"
38
+ )
39
+ for builtin_family, json_path in [
40
+ (BUILTIN_LVLM_FAMILIES, json_path_huggingface),
41
+ (BUILTIN_MODELSCOPE_LVLM_FAMILIES, json_path_modelscope),
42
+ ]:
43
+ for json_obj in json.load(codecs.open(json_path, "r", encoding="utf-8")):
44
+ model_family = LVLMFamilyV1.parse_obj(json_obj)
45
+ builtin_family.append(model_family)
46
+ for model_spec in model_family.model_specs:
47
+ MODEL_NAME_TO_REVISION[model_family.model_name].append(
48
+ model_spec.model_revision
49
+ )
43
50
 
44
51
 
45
52
  _install()
@@ -0,0 +1,45 @@
1
+ [
2
+ {
3
+ "version": 1,
4
+ "context_length": 4096,
5
+ "model_name": "qwen-vl-chat",
6
+ "model_lang": [
7
+ "en",
8
+ "zh"
9
+ ],
10
+ "model_ability": [
11
+ "chat"
12
+ ],
13
+ "model_description": "Qwen-VL-Chat supports more flexible interaction, such as multiple image inputs, multi-round question answering, and creative capabilities.",
14
+ "model_specs": [
15
+ {
16
+ "model_format": "pytorch",
17
+ "model_size_in_billions": 7,
18
+ "quantizations": [
19
+ "none"
20
+ ],
21
+ "model_hub": "modelscope",
22
+ "model_id": "Qwen/Qwen-VL-Chat",
23
+ "model_revision": "master"
24
+ },
25
+ {
26
+ "model_format": "gptq",
27
+ "model_size_in_billions": 7,
28
+ "quantizations": [
29
+ "Int4"
30
+ ],
31
+ "model_hub": "modelscope",
32
+ "model_id": "Qwen/Qwen-VL-Chat-{quantization}",
33
+ "model_revision": "master"
34
+ }
35
+ ],
36
+ "prompt_style": {
37
+ "style_name": "QWEN",
38
+ "system_prompt": "You are a helpful assistant.",
39
+ "roles": [
40
+ "user",
41
+ "assistant"
42
+ ]
43
+ }
44
+ }
45
+ ]
xinference/model/utils.py CHANGED
@@ -153,8 +153,13 @@ def is_model_cached(model_spec: Any, name_to_revisions_mapping: Dict):
153
153
 
154
154
 
155
155
  def is_valid_model_name(model_name: str) -> bool:
156
- model_name = model_name.strip()
157
- return 0 < len(model_name) <= 100
156
+ import re
157
+
158
+ if len(model_name) == 0:
159
+ return False
160
+
161
+ # check if contains +/?%#&=\s
162
+ return re.match(r"^[^+\/?%#&=\s]*$", model_name) is not None
158
163
 
159
164
 
160
165
  def parse_uri(uri: str) -> Tuple[str, str]:
xinference/types.py CHANGED
@@ -110,6 +110,7 @@ class CompletionChunk(TypedDict):
110
110
  created: int
111
111
  model: str
112
112
  choices: List[CompletionChoice]
113
+ usage: NotRequired[CompletionUsage]
113
114
 
114
115
 
115
116
  class Completion(TypedDict):
@@ -160,6 +161,7 @@ class ChatCompletionChunk(TypedDict):
160
161
  object: Literal["chat.completion.chunk"]
161
162
  created: int
162
163
  choices: List[ChatCompletionChunkChoice]
164
+ usage: NotRequired[CompletionUsage]
163
165
 
164
166
 
165
167
  class ChatglmCppModelConfig(TypedDict, total=False):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: xinference
3
- Version: 0.8.0
3
+ Version: 0.8.1
4
4
  Summary: Model Serving Made Easy
5
5
  Home-page: https://github.com/xorbitsai/inference
6
6
  Author: Qin Xuye
@@ -39,6 +39,7 @@ Requires-Dist: sse-starlette >=1.6.5
39
39
  Requires-Dist: openai >1
40
40
  Requires-Dist: python-jose[cryptography]
41
41
  Requires-Dist: passlib[bcrypt]
42
+ Requires-Dist: aioprometheus[starlette]
42
43
  Provides-Extra: all
43
44
  Requires-Dist: chatglm-cpp >=0.3.0 ; extra == 'all'
44
45
  Requires-Dist: ctransformers ; extra == 'all'
@@ -1,13 +1,13 @@
1
1
  xinference/__init__.py,sha256=jv7PR7ali6n5TpvGjB3hKugwB9Tq-eSTyc_xl2gFnZ0,910
2
- xinference/_version.py,sha256=-j5l4f1CftQly1S8ZlZ0g31e2b3mIt2Y60zsMqIQZEw,497
3
- xinference/conftest.py,sha256=gClOWkt5nNZ01DgZch1I2JRDT6YPY5_GKfNCQkEeWFs,9210
2
+ xinference/_version.py,sha256=6jc3tqkig6J9BPzwIKDs3G9C6KyubOgPgzkt_BI8zeg,497
3
+ xinference/conftest.py,sha256=mGhwGBCmu4SNXx-akGw6rzYO2wfB9gTwjewC--geSSw,9315
4
4
  xinference/constants.py,sha256=JfcCKl28iyuDAnfue9FrvK34KEfg0dKyoo1-2hzQTJ4,2343
5
- xinference/fields.py,sha256=k_hhu-7LRqZGmFqMhhPZPaK5GG6thfTZS_hsTpo8yFM,5061
5
+ xinference/fields.py,sha256=xRpDiZXVORKoC9rG3eqwxT-BFuAojhJlxJTsAQHzJ24,5075
6
6
  xinference/isolation.py,sha256=NstVRcO3dG4umHExICXAHlzVKwH8ch8MBwKwE-KFkE0,1826
7
- xinference/types.py,sha256=CVgf1AB2AT2LQXd5__BrImZW2C7ZDq-Z5IaKyHsA-vA,11624
7
+ xinference/types.py,sha256=dhWPyZR-YX6AGaPhPTxxem_Q0fWYN9dHQILeWFvW4yI,11704
8
8
  xinference/utils.py,sha256=Z6PPDGmX4EW8OD3OfA2Wa37ZM9OdRTnR00ITMDTu4qE,716
9
9
  xinference/api/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
10
- xinference/api/restful_api.py,sha256=ZqfQZLK4yiVXrqaw8P5LSizpZh9sgi3yCcejGMpzJYk,41358
10
+ xinference/api/restful_api.py,sha256=gCx4tRD124-x61zNnFHLQMBotDcp8en0y34kYE7T2co,42684
11
11
  xinference/api/oauth2/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
12
12
  xinference/api/oauth2/common.py,sha256=YqEUHgyUnsL3SJbTMuS8Prd7l_AI0y9HHhSEdfJ-iSI,613
13
13
  xinference/api/oauth2/core.py,sha256=1m8IDdZ9FH5SZvlurSKPKGbrucG47_04Re0PmOJRTg8,3121
@@ -17,22 +17,24 @@ xinference/client/__init__.py,sha256=Gc4HOzAy_1cic5kXlso7hahYgw89CKvZSJDicEU461k
17
17
  xinference/client/common.py,sha256=wk-3j1tJPNa60tHO8YZ5z7iBIeI6cBEqomKIatyMQx8,1554
18
18
  xinference/client/handlers.py,sha256=nFQQMRHiu_c2nsqVWMA792-QfXXq6wIW3dPR5Q7H-f0,545
19
19
  xinference/client/oscar/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
20
- xinference/client/oscar/actor_client.py,sha256=D7yp9Km3xVGwhmLaJSIfmq72R3f5bcVrwKjBAYUYxE0,21563
20
+ xinference/client/oscar/actor_client.py,sha256=fWGbqCaJHp33CSgtznPosryTD88KWSjQLQebHkGvQCI,21545
21
21
  xinference/client/restful/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
22
- xinference/client/restful/restful_client.py,sha256=HpOYx6Z1CkFQ3asevluxDa1dB__cY4BqHbbO0YmiV-0,37721
22
+ xinference/client/restful/restful_client.py,sha256=jGWMPJNtI1m-rZ3GmYwDDkZqWpyOXMiaSX-PnaF13vI,37703
23
23
  xinference/core/__init__.py,sha256=Fe5tYCHDbYJ7PhxJhQ68VbfgKgOsAuslNPr4wPhFMJM,612
24
24
  xinference/core/chat_interface.py,sha256=ec9fVV-5m88PoXrfvr4C-m74L_p35nSVHTf-fupW1jc,17114
25
- xinference/core/model.py,sha256=MYLrNVwQKJJbfZy4KJBa8yUeMAkg_xWqltx_52pxIc8,10799
25
+ xinference/core/metrics.py,sha256=ScmTG15Uq3h_ob72ybZSMWdnk8P4sUZFcm60f4ikSXc,2631
26
+ xinference/core/model.py,sha256=vQ_Q65GEFb5y9RuhhqNJNN5fUy7Mou3pO_lpvAwZzD0,16258
26
27
  xinference/core/resource.py,sha256=oG44ZpXQL7uz4pbowtHGdpHarNJ52CK6KBLJRMcySpk,1516
27
- xinference/core/supervisor.py,sha256=pHa2Y8fOV8lrtWew19axopPFVI0U_RCa4gIONspHHAA,26998
28
+ xinference/core/status_guard.py,sha256=YtQUU_I_9fn74e4xg2xqaLdmww6F4Jx0N96jHsfjR88,2644
29
+ xinference/core/supervisor.py,sha256=afrGeR0yKUjJZCb4gSBWS1EAcc9DDQp0vwV_IdtZjEU,28970
28
30
  xinference/core/utils.py,sha256=oVM_bIbbI0w4n3lFIytQ7wMmlIBrxPstrUu7Neahvyw,3528
29
- xinference/core/worker.py,sha256=kSzzHzCVdBAXDrXnscLzWbGkeErpJ1PYjinoYArS2Qc,17930
31
+ xinference/core/worker.py,sha256=SvHPX66YSPo1WrNSpTMMhTfNC-fmXQzLFcQrNriBjdo,23029
30
32
  xinference/deploy/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
31
- xinference/deploy/cmdline.py,sha256=WID5tuIHQB24Oin6vO2w3eShS1WKclYPD9MbXU3wXgM,26957
32
- xinference/deploy/local.py,sha256=-xAKTBEJ8HRZD3bfPOMItwAYTdvpcynU32ruKFCX8W8,3108
33
+ xinference/deploy/cmdline.py,sha256=FcxjI24_8-0CzHZlN-lGvFxGsfN76y0lgDDzuGEpF_0,28376
34
+ xinference/deploy/local.py,sha256=99xpzEx9yFj6xPyplzseF64ZCcVOp3oDKZRTBnJF8Io,3933
33
35
  xinference/deploy/supervisor.py,sha256=N2EnjN0_lNT92ygZ6qiip6BeodK7dazT2sWWj_wRU_4,2965
34
36
  xinference/deploy/utils.py,sha256=wR8dUZud1k7gSmd4M7l6Rq2rtrhTn5qLGmM1XB1IHr4,4941
35
- xinference/deploy/worker.py,sha256=tiMdZxLPZd8rVpoyKXZbZzqfyii_jzQWjba2IBwuezo,2242
37
+ xinference/deploy/worker.py,sha256=IzmENJE-g0bujAReC5I_iZE6epdgRwaobALKPIk7csA,2915
36
38
  xinference/deploy/test/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
37
39
  xinference/deploy/test/test_cmdline.py,sha256=bEQ4-y2IG-ntOA6tP3_EP22WQelMAiJ9BQkuBmRzZAs,6736
38
40
  xinference/locale/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
@@ -40,7 +42,7 @@ xinference/locale/utils.py,sha256=w-G1DAJGw1UUQVVtq6khOZn7ZjobUmTw6qwHMm2eWIs,13
40
42
  xinference/locale/zh_CN.json,sha256=YA55G9s1p05Bt5RBoDo5SV12dd-CMJI0ABap6RpCp4M,1097
41
43
  xinference/model/__init__.py,sha256=IRC3ojiqYkVLIK_xsIxYeKypEeeTTdrovnVzK_4L4eg,663
42
44
  xinference/model/core.py,sha256=D1EJ9ZJ8VkD0I8NcHcUBA4U9cbm-rcaHkmHL-tq8INY,3041
43
- xinference/model/utils.py,sha256=rJjz2WWZ3FROf2GIUoiS9KveLB6mQVYkcEeG4HpIl9g,9636
45
+ xinference/model/utils.py,sha256=V2Ei0QKkQS3hW_TpfEmHsl1nAIdBrOZH7sMTTWHp7Mg,9727
44
46
  xinference/model/embedding/__init__.py,sha256=uU_fNnMbEpl6LxgzN_FC9U3DisjQwCwIvKv0gfjOFkI,2119
45
47
  xinference/model/embedding/core.py,sha256=ANZCSv6rCYpzaPcqcgmzwgSrilinXoBLAAPbzpsyh2A,16138
46
48
  xinference/model/embedding/custom.py,sha256=iE3-iWVzxarXdeTdw5e6rxv6HQRXVbPHp65wwhT2IL8,3919
@@ -51,36 +53,38 @@ xinference/model/image/core.py,sha256=ectdISnWjTvxxbj-ty9fgqNzLCBo7uX1q_iA6CbHs1
51
53
  xinference/model/image/model_spec.json,sha256=VBo3jTq93UtwD9fB1oqrpIJVaZbyYNiougZuY81pt8g,2965
52
54
  xinference/model/image/stable_diffusion/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
53
55
  xinference/model/image/stable_diffusion/core.py,sha256=BdslgN_T7f2nzmxtRz0fT2S0P6tTN9zJ_UE6XO2bLLs,5152
54
- xinference/model/llm/__init__.py,sha256=wMT4RAiNhQwSptHTKtBcF74ks6Bnzp8JZN95e_QAbJE,4846
56
+ xinference/model/llm/__init__.py,sha256=x_f2h_D6aUWtGbkmVSZyzvDA-DcKT3mts3EXxFjWlr4,5244
55
57
  xinference/model/llm/core.py,sha256=YDUB-MGEu9CtHxtUKFlu9gNitFO-JvLb6xhaqOYdbZM,7761
56
- xinference/model/llm/llm_family.json,sha256=ut0jN3-g9YR1MDyKm62Os-90giX3bpttmmgvgUMkaj8,79065
57
- xinference/model/llm/llm_family.py,sha256=H5TYhzoZ5TwGWiQGmAwIz73yyJzTPqUX7bin2vdQFOQ,30496
58
- xinference/model/llm/llm_family_modelscope.json,sha256=-8gPXjh7625x_i8ms6Y8WbXnkESEfC_j6hyCWqKQLF4,43904
59
- xinference/model/llm/utils.py,sha256=kq3Ese0ItleT6J7VkXlkBT5DBKeq8RVIh_m6XZzC2nU,21933
58
+ xinference/model/llm/llm_family.json,sha256=b4PAz9EbD5RjIbC3CR--FaPmDuupViK_ecJeDDJTpjg,80701
59
+ xinference/model/llm/llm_family.py,sha256=6cqVSQsilvZIITwB0kUYIrkj1h8orH1uV1rO7XSYCf0,30989
60
+ xinference/model/llm/llm_family_modelscope.json,sha256=JkaOILdduYXaAS_knca2lO075mUwx4kaOvYN9XCR250,45570
61
+ xinference/model/llm/utils.py,sha256=QyJLOcnibkCNvJwxZhxbAJPWB_F3T9-90OQaOBDT39I,23028
60
62
  xinference/model/llm/ggml/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
61
63
  xinference/model/llm/ggml/chatglm.py,sha256=XJ6lyr4Kl6-u65ZxFtEjPbIcd0tTdudKXYoEWFoNXig,12555
62
64
  xinference/model/llm/ggml/ctransformers.py,sha256=n8dTItZe97cF79NkmsVPirqpBcrZiGAQfd2GRpz9-3I,9917
63
65
  xinference/model/llm/ggml/ctransformers_util.py,sha256=WozFJgJZlbuEDPQLhy31YmwGp-oJoUYsnd9HjuGraIE,5271
64
- xinference/model/llm/ggml/llamacpp.py,sha256=WozE6OCFc7OmAbFlqj6pwh10dfZePCRnNjL-QQ8IRpU,12182
66
+ xinference/model/llm/ggml/llamacpp.py,sha256=aK4EU25ryGgCbY9HRFfAdFusg385MB_UA1OT-p8fFQs,12241
65
67
  xinference/model/llm/ggml/tools/__init__.py,sha256=6a6P2VPKE06xKxJ-dTqp4TRO2IEDWvtcVP6gHutAR0M,624
66
68
  xinference/model/llm/ggml/tools/convert_ggml_to_gguf.py,sha256=ULvaoAKGH-L6RuRLFOtAOVitKLVPdpd5QyXrLL14gG0,17959
67
69
  xinference/model/llm/ggml/tools/gguf.py,sha256=Hv2haR-UN7NdB1N8YId32hFoEPd-JX6_aUNWRJhyJZc,30277
68
70
  xinference/model/llm/pytorch/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
69
71
  xinference/model/llm/pytorch/baichuan.py,sha256=6d9UQw5Ox1FFdPG-cO3aKPcUmio5bQ4j1X5B0eCg5TU,2703
70
- xinference/model/llm/pytorch/chatglm.py,sha256=NfqT5bPyulO5xKoZs1oOe4f-zLtaMblUA1arkZXTDls,5294
72
+ xinference/model/llm/pytorch/chatglm.py,sha256=2YCZL86ABFOjjoVXGxBwK9V80j71gSTfHAYEUveskyY,6991
71
73
  xinference/model/llm/pytorch/compression.py,sha256=rN8z0yt2xV9ASLv1muYStyBdvZjGCQL-OYaEga4FwIQ,8081
72
- xinference/model/llm/pytorch/core.py,sha256=3_hn0VpQ0Mhv-mVFd-4eKdvhLmI6dWCvlgd7AEZRlco,18447
74
+ xinference/model/llm/pytorch/core.py,sha256=fE2Faags4qg3dwuDHAYShPYNeB-DyH6KbHmlDDdrF0A,17920
73
75
  xinference/model/llm/pytorch/falcon.py,sha256=wC3_ILUtoimtOWox2_Sr94OcEkMYoB7Mf84R4LYHMGI,4298
76
+ xinference/model/llm/pytorch/internlm2.py,sha256=J9byaXpgPQ28zp8EEqz2hggX2d7bdmY5-e2Mp6SIeTU,5569
74
77
  xinference/model/llm/pytorch/llama_2.py,sha256=JQI9R_ZrNhTXg_MbS7el6P-ou49iMKq5vnr9QlQ_o70,3509
75
78
  xinference/model/llm/pytorch/spec_decoding_utils.py,sha256=t059oJ0kvcXMA1pKizN1HDzs0LMf5Jpb5MM7aMNKnzQ,18750
76
79
  xinference/model/llm/pytorch/spec_model.py,sha256=2rmSYaliu0nwOQzc3rnYFtBLFBayKZ9Xe2hkJUbe7mo,6810
77
- xinference/model/llm/pytorch/utils.py,sha256=EuWHjaB-Yh6yfJpH31Kj_D3UGgPgoumqg2SPd73nSbI,21948
80
+ xinference/model/llm/pytorch/utils.py,sha256=zKpyeF7JV0sMv4jmwiYQGCxpk6VsbXtgOmnyksHUs8M,16870
78
81
  xinference/model/llm/pytorch/vicuna.py,sha256=eJ2HVHg-HbhVHRdA5g4N9I2_cCti4K9y3Y4WjMMzAIs,2217
79
82
  xinference/model/llm/vllm/__init__.py,sha256=h_JgzSqV5lP6vQ6XX_17kE4IY4BRnvKta_7VLQAL1ms,581
80
- xinference/model/llm/vllm/core.py,sha256=Is7ECVzuzPQdZaHzw_pczwkON4jhUGSrfoM-8bxdD3k,14381
81
- xinference/model/multimodal/__init__.py,sha256=dVujhfm0TgUqfjuLzfJ42SOBELGx7Th_SAS9DecOpI4,1358
83
+ xinference/model/llm/vllm/core.py,sha256=GwQ9eBlEV0NHsrxERx7j4usR3qZEElFkPoAgBIL1VnU,14966
84
+ xinference/model/multimodal/__init__.py,sha256=EjP1dK0s-N5GyePkX4vauris7CfwsoT7Sb2E8OFoysM,1687
82
85
  xinference/model/multimodal/core.py,sha256=OupAMjRRc4UjhxWapVaXpymSl7gz1Ss4YTL_lTBO3wI,15429
83
86
  xinference/model/multimodal/model_spec.json,sha256=jPDB7Yhn13-e4h2lBm2yF0xQXIgv1cI6Jpto3AjCnto,1071
87
+ xinference/model/multimodal/model_spec_modelscope.json,sha256=yf6naWPy28mVWanW8BMWlwL8gBmzFsMYCVLg2XhzYh8,1073
84
88
  xinference/model/multimodal/qwen_vl.py,sha256=DCk_AzDiM5eX7gSRCv1Y3IBRDCwAXEMcM-1jq_HUEEE,5490
85
89
  xinference/model/rerank/__init__.py,sha256=FEFQSLCNihIgUb28EAMxutVvpDTPmaf9o7Ey97ryAyA,2135
86
90
  xinference/model/rerank/core.py,sha256=M01V1Im2SGb0x8cff8LcB6sbM1mdwcfjxQtGXuHuH6E,11110
@@ -15291,9 +15295,9 @@ xinference/web/ui/node_modules/yargs-parser/package.json,sha256=BSwbOzgetKXMK4u0
15291
15295
  xinference/web/ui/node_modules/yocto-queue/package.json,sha256=6U1XHQPGXJTqsiFvT953ORihUtXTblZy4fXBWP9qxC0,725
15292
15296
  xinference/web/ui/node_modules/yup/package.json,sha256=xRFSROB9NKxqSWHEVFvSTsPs9Ll074uo8OS1zEw0qhA,1206
15293
15297
  xinference/web/ui/node_modules/yup/node_modules/type-fest/package.json,sha256=JTv2zTTVgxQ2H82m1-6qEpdMv08lHjFx4Puf_MsbB_Q,1134
15294
- xinference-0.8.0.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15295
- xinference-0.8.0.dist-info/METADATA,sha256=dohxoMBSP0AWK6Tf1UDkxBaVFkbv5nC3VZkIltCTUNM,13380
15296
- xinference-0.8.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
15297
- xinference-0.8.0.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15298
- xinference-0.8.0.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15299
- xinference-0.8.0.dist-info/RECORD,,
15298
+ xinference-0.8.1.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
15299
+ xinference-0.8.1.dist-info/METADATA,sha256=ArHMcySJgMVpoXrhiu5nfdcLvV0uscZcC7kq7c_Ky9c,13420
15300
+ xinference-0.8.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
15301
+ xinference-0.8.1.dist-info/entry_points.txt,sha256=-lDyyzqWMFQF0Rgm7VxBNz0V-bMBMQLRR3pvQ-Y8XTY,226
15302
+ xinference-0.8.1.dist-info/top_level.txt,sha256=L1rQt7pl6m8tmKXpWVHzP-GtmzAxp663rXxGE7qnK00,11
15303
+ xinference-0.8.1.dist-info/RECORD,,