xinference 1.6.0.post1__py3-none-any.whl → 1.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (87) hide show
  1. xinference/_version.py +3 -3
  2. xinference/client/restful/restful_client.py +1 -1
  3. xinference/conftest.py +0 -7
  4. xinference/core/media_interface.py +9 -8
  5. xinference/core/model.py +13 -6
  6. xinference/core/scheduler.py +1 -10
  7. xinference/core/worker.py +0 -10
  8. xinference/model/audio/model_spec.json +53 -1
  9. xinference/model/audio/model_spec_modelscope.json +57 -1
  10. xinference/model/embedding/core.py +19 -11
  11. xinference/model/image/model_spec.json +10 -1
  12. xinference/model/image/model_spec_modelscope.json +20 -0
  13. xinference/model/llm/__init__.py +6 -54
  14. xinference/model/llm/core.py +19 -5
  15. xinference/model/llm/llama_cpp/core.py +59 -3
  16. xinference/model/llm/llama_cpp/memory.py +455 -0
  17. xinference/model/llm/llm_family.json +185 -397
  18. xinference/model/llm/llm_family.py +88 -16
  19. xinference/model/llm/llm_family_modelscope.json +199 -421
  20. xinference/model/llm/llm_family_openmind_hub.json +0 -34
  21. xinference/model/llm/sglang/core.py +4 -0
  22. xinference/model/llm/transformers/__init__.py +27 -6
  23. xinference/model/llm/transformers/chatglm.py +4 -2
  24. xinference/model/llm/transformers/core.py +49 -28
  25. xinference/model/llm/transformers/deepseek_v2.py +6 -49
  26. xinference/model/llm/transformers/gemma3.py +119 -164
  27. xinference/{thirdparty/omnilmm/train → model/llm/transformers/multimodal}/__init__.py +1 -1
  28. xinference/model/llm/transformers/{cogagent.py → multimodal/cogagent.py} +58 -95
  29. xinference/model/llm/transformers/multimodal/core.py +205 -0
  30. xinference/model/llm/transformers/{deepseek_vl2.py → multimodal/deepseek_vl2.py} +59 -120
  31. xinference/model/llm/transformers/multimodal/gemma3.py +117 -0
  32. xinference/model/llm/transformers/{glm4v.py → multimodal/glm4v.py} +57 -93
  33. xinference/model/llm/transformers/multimodal/intern_vl.py +412 -0
  34. xinference/model/llm/transformers/{minicpmv26.py → multimodal/minicpmv26.py} +55 -102
  35. xinference/model/llm/transformers/{ovis2.py → multimodal/ovis2.py} +114 -175
  36. xinference/model/llm/transformers/{qwen-omni.py → multimodal/qwen-omni.py} +82 -167
  37. xinference/model/llm/transformers/multimodal/qwen2_audio.py +131 -0
  38. xinference/model/llm/transformers/{qwen2_vl.py → multimodal/qwen2_vl.py} +224 -256
  39. xinference/model/llm/transformers/opt.py +4 -2
  40. xinference/model/llm/transformers/utils.py +6 -37
  41. xinference/model/llm/vllm/core.py +4 -0
  42. xinference/model/rerank/core.py +7 -1
  43. xinference/model/rerank/utils.py +17 -0
  44. xinference/web/ui/build/asset-manifest.json +3 -3
  45. xinference/web/ui/build/index.html +1 -1
  46. xinference/web/ui/build/static/js/main.ddf9eaee.js +3 -0
  47. xinference/web/ui/build/static/js/main.ddf9eaee.js.map +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/12e637ed5fa9ca6491b03892b6949c03afd4960fe36ac25744488e7e1982aa19.json +1 -0
  49. xinference/web/ui/node_modules/.cache/babel-loader/567e49df411efb24425d289bb484758cb57067ca54f8b5c67fe4505f698deb96.json +1 -0
  50. xinference/web/ui/node_modules/.cache/babel-loader/77ac2665a784e99501ae95d32ef5937837a0439a47e965d291b38e99cb619f5b.json +1 -0
  51. xinference/web/ui/node_modules/.cache/babel-loader/d4ed4e82bfe69915999ec83f5feaa4301c75ecc6bdf1c78f2d03e4671ecbefc8.json +1 -0
  52. xinference/web/ui/src/locales/en.json +3 -1
  53. xinference/web/ui/src/locales/zh.json +3 -1
  54. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/METADATA +6 -4
  55. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/RECORD +60 -76
  56. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/WHEEL +1 -1
  57. xinference/model/llm/transformers/cogvlm2.py +0 -442
  58. xinference/model/llm/transformers/cogvlm2_video.py +0 -333
  59. xinference/model/llm/transformers/deepseek_vl.py +0 -280
  60. xinference/model/llm/transformers/glm_edge_v.py +0 -213
  61. xinference/model/llm/transformers/intern_vl.py +0 -526
  62. xinference/model/llm/transformers/internlm2.py +0 -94
  63. xinference/model/llm/transformers/minicpmv25.py +0 -193
  64. xinference/model/llm/transformers/omnilmm.py +0 -132
  65. xinference/model/llm/transformers/qwen2_audio.py +0 -179
  66. xinference/model/llm/transformers/qwen_vl.py +0 -360
  67. xinference/thirdparty/omnilmm/LICENSE +0 -201
  68. xinference/thirdparty/omnilmm/__init__.py +0 -0
  69. xinference/thirdparty/omnilmm/chat.py +0 -218
  70. xinference/thirdparty/omnilmm/constants.py +0 -4
  71. xinference/thirdparty/omnilmm/conversation.py +0 -332
  72. xinference/thirdparty/omnilmm/model/__init__.py +0 -1
  73. xinference/thirdparty/omnilmm/model/omnilmm.py +0 -595
  74. xinference/thirdparty/omnilmm/model/resampler.py +0 -166
  75. xinference/thirdparty/omnilmm/model/utils.py +0 -578
  76. xinference/thirdparty/omnilmm/train/train_utils.py +0 -150
  77. xinference/thirdparty/omnilmm/utils.py +0 -134
  78. xinference/web/ui/build/static/js/main.ae579a97.js +0 -3
  79. xinference/web/ui/build/static/js/main.ae579a97.js.map +0 -1
  80. xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +0 -1
  81. xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +0 -1
  82. xinference/web/ui/node_modules/.cache/babel-loader/5c08e2cd07809ed3e41486b16652253404cbb63a3ff8d0366ee50f57e2413cea.json +0 -1
  83. xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +0 -1
  84. /xinference/web/ui/build/static/js/{main.ae579a97.js.LICENSE.txt → main.ddf9eaee.js.LICENSE.txt} +0 -0
  85. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/entry_points.txt +0 -0
  86. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/licenses/LICENSE +0 -0
  87. {xinference-1.6.0.post1.dist-info → xinference-1.6.1.dist-info}/top_level.txt +0 -0
@@ -1,150 +0,0 @@
1
- import copy
2
- import warnings
3
- from typing import Dict, Sequence
4
-
5
- import numpy as np
6
- import transformers
7
-
8
- IGNORE_INDEX = -100
9
- DEFAULT_IMAGE_TOKEN = "<image>"
10
- DEFAULT_IMAGE_PATCH_TOKEN = "<im_patch>"
11
- DEFAULT_IM_START_TOKEN = "<im_start>"
12
- DEFAULT_IM_END_TOKEN = "<im_end>"
13
-
14
-
15
- def _tokenize_fn(
16
- strings: Sequence[str], tokenizer: transformers.PreTrainedTokenizer
17
- ) -> Dict:
18
- """Tokenize a list of strings."""
19
- tokenized_list = [
20
- tokenizer(
21
- text,
22
- return_tensors="pt",
23
- padding="longest",
24
- max_length=tokenizer.model_max_length,
25
- truncation=True,
26
- )
27
- for text in strings
28
- ]
29
- input_ids = labels = [tokenized.input_ids[0] for tokenized in tokenized_list]
30
- input_ids_lens = labels_lens = [
31
- tokenized.input_ids.ne(tokenizer.pad_token_id).sum().item()
32
- for tokenized in tokenized_list
33
- ]
34
- return dict(
35
- input_ids=input_ids,
36
- labels=labels,
37
- input_ids_lens=input_ids_lens,
38
- labels_lens=labels_lens,
39
- )
40
-
41
-
42
- def omni_preprocess(
43
- sources, tokenizer: transformers.PreTrainedTokenizer, generation=False
44
- ):
45
- system_content = "You are an artificial intelligence assistant, which gives helpful, detailed, and polite answers to the human's questions."
46
- ignore_index = -100
47
-
48
- response_template = "\n<|assistant|>\n"
49
- instruction_template = "\n<|user|>\n"
50
- response_token_ids = tokenizer.encode(response_template, add_special_tokens=False)
51
- instruction_token_ids = tokenizer.encode(
52
- instruction_template, add_special_tokens=False
53
- )
54
-
55
- batch_input_ids = []
56
- batch_labels = []
57
- for i in range(len(sources)):
58
- new_source = []
59
- prev_role = "unexpect"
60
- for conv_turn in sources[i]:
61
- role = conv_turn["from"] if "from" in conv_turn else conv_turn["role"]
62
- content = (
63
- conv_turn["value"] if "value" in conv_turn else conv_turn["content"]
64
- )
65
-
66
- role = "user" if role == "human" else role
67
- role = "assistant" if role == "gpt" else role
68
-
69
- assert role in ["user", "assistant"]
70
- assert role != prev_role, f"role={role}, prev_role={prev_role}"
71
- prev_role = role
72
-
73
- new_turn = {"role": role, "content": content}
74
- new_source.append(new_turn)
75
- if new_source[0]["role"] != "system":
76
- new_source.insert(0, {"role": "system", "content": system_content})
77
-
78
- # TODO: this automatically add '\n' to the end
79
- res_text = tokenizer.apply_chat_template(
80
- new_source, tokenize=False, add_generation_prompt=generation
81
- )
82
- if not generation:
83
- res_text = res_text.strip()
84
-
85
- conversations_tokenized = _tokenize_fn([res_text], tokenizer)
86
- res_input_ids = conversations_tokenized["input_ids"][0]
87
-
88
- # since labels and input_ids are reference towards the same object
89
- res_labels = copy.deepcopy(conversations_tokenized["labels"][0])
90
-
91
- response_token_ids_idxs = []
92
- human_token_ids_idxs = []
93
-
94
- for assistant_idx in np.where(res_labels == response_token_ids[0])[0]:
95
- # find the indexes of the start of a response.
96
- if (
97
- response_token_ids
98
- == res_labels[
99
- assistant_idx : assistant_idx + len(response_token_ids)
100
- ].tolist()
101
- ):
102
- response_token_ids_idxs.append(assistant_idx + len(response_token_ids))
103
-
104
- if len(response_token_ids_idxs) == 0:
105
- warnings.warn(
106
- f"Could not find response key `{response_template}` in the "
107
- f"following instance: @===>{tokenizer.decode(res_input_ids)}<===@ "
108
- f"Raw text is @===>{res_text}<===@"
109
- f"Raw source is @===>{new_source}<===@"
110
- f"This instance will be ignored in loss calculation. "
111
- f"Note, if this happens often, consider increasing the `max_seq_length`."
112
- )
113
- res_labels[:] = ignore_index
114
-
115
- human_token_ids = instruction_token_ids
116
- for human_idx in np.where(res_labels == human_token_ids[0])[0]:
117
- # find the indexes of the start of a human answer.
118
- if (
119
- human_token_ids
120
- == res_labels[human_idx : human_idx + len(human_token_ids)].tolist()
121
- ):
122
- human_token_ids_idxs.append(human_idx)
123
-
124
- if len(human_token_ids_idxs) == 0:
125
- warnings.warn(
126
- f"Could not find instruction key `{instruction_template}` in the "
127
- f"following instance: @===>{tokenizer.decode(res_input_ids)}<===@ "
128
- f"Raw text is @===>{res_text}<===@"
129
- f"Raw source is @===>{new_source}<===@"
130
- f"This instance will be ignored in loss calculation. "
131
- f"Note, if this happens often, consider increasing the `max_seq_length`."
132
- )
133
- res_labels[:] = ignore_index
134
-
135
- for idx, (start, end) in enumerate(
136
- zip(human_token_ids_idxs, response_token_ids_idxs)
137
- ):
138
- # Make pytorch loss function ignore all non response tokens
139
- if idx != 0:
140
- res_labels[start:end] = ignore_index
141
- else:
142
- res_labels[:end] = ignore_index
143
-
144
- if len(response_token_ids_idxs) < len(human_token_ids_idxs):
145
- res_labels[human_token_ids_idxs[-1] :] = ignore_index
146
-
147
- batch_input_ids.append(res_input_ids)
148
- batch_labels.append(res_labels)
149
-
150
- return dict(input_ids=batch_input_ids, labels=batch_labels)
@@ -1,134 +0,0 @@
1
- import logging
2
- import logging.handlers
3
- import os
4
- import sys
5
-
6
- import requests
7
-
8
- from .constants import LOGDIR
9
-
10
- server_error_msg = (
11
- "**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
12
- )
13
- moderation_msg = (
14
- "YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES. PLEASE TRY AGAIN."
15
- )
16
-
17
- handler = None
18
-
19
-
20
- def build_logger(logger_name, logger_filename):
21
- global handler
22
-
23
- formatter = logging.Formatter(
24
- fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
25
- datefmt="%Y-%m-%d %H:%M:%S",
26
- )
27
-
28
- # Set the format of root handlers
29
- if not logging.getLogger().handlers:
30
- logging.basicConfig(level=logging.INFO)
31
- logging.getLogger().handlers[0].setFormatter(formatter)
32
-
33
- # Redirect stdout and stderr to loggers
34
- stdout_logger = logging.getLogger("stdout")
35
- stdout_logger.setLevel(logging.INFO)
36
- sl = StreamToLogger(stdout_logger, logging.INFO)
37
- sys.stdout = sl
38
-
39
- stderr_logger = logging.getLogger("stderr")
40
- stderr_logger.setLevel(logging.ERROR)
41
- sl = StreamToLogger(stderr_logger, logging.ERROR)
42
- sys.stderr = sl
43
-
44
- # Get logger
45
- logger = logging.getLogger(logger_name)
46
- logger.setLevel(logging.INFO)
47
-
48
- # Add a file handler for all loggers
49
- if handler is None:
50
- os.makedirs(LOGDIR, exist_ok=True)
51
- filename = os.path.join(LOGDIR, logger_filename)
52
- handler = logging.handlers.TimedRotatingFileHandler(
53
- filename, when="D", utc=True
54
- )
55
- handler.setFormatter(formatter)
56
-
57
- for name, item in logging.root.manager.loggerDict.items():
58
- if isinstance(item, logging.Logger):
59
- item.addHandler(handler)
60
-
61
- return logger
62
-
63
-
64
- class StreamToLogger(object):
65
- """
66
- Fake file-like stream object that redirects writes to a logger instance.
67
- """
68
-
69
- def __init__(self, logger, log_level=logging.INFO):
70
- self.terminal = sys.stdout
71
- self.logger = logger
72
- self.log_level = log_level
73
- self.linebuf = ""
74
-
75
- def __getattr__(self, attr):
76
- return getattr(self.terminal, attr)
77
-
78
- def write(self, buf):
79
- temp_linebuf = self.linebuf + buf
80
- self.linebuf = ""
81
- for line in temp_linebuf.splitlines(True):
82
- # From the io.TextIOWrapper docs:
83
- # On output, if newline is None, any '\n' characters written
84
- # are translated to the system default line separator.
85
- # By default sys.stdout.write() expects '\n' newlines and then
86
- # translates them so this is still cross platform.
87
- if line[-1] == "\n":
88
- self.logger.log(self.log_level, line.rstrip())
89
- else:
90
- self.linebuf += line
91
-
92
- def flush(self):
93
- if self.linebuf != "":
94
- self.logger.log(self.log_level, self.linebuf.rstrip())
95
- self.linebuf = ""
96
-
97
-
98
- def disable_torch_init():
99
- """
100
- Disable the redundant torch default initialization to accelerate model creation.
101
- """
102
- import torch
103
-
104
- setattr(torch.nn.Linear, "reset_parameters", lambda self: None)
105
- setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None)
106
-
107
-
108
- def violates_moderation(text):
109
- """
110
- Check whether the text violates OpenAI moderation API.
111
- """
112
- url = "https://api.openai.com/v1/moderations"
113
- headers = {
114
- "Content-Type": "application/json",
115
- "Authorization": "Bearer " + os.environ["OPENAI_API_KEY"],
116
- }
117
- text = text.replace("\n", "")
118
- data = "{" + '"input": ' + f'"{text}"' + "}"
119
- data = data.encode("utf-8")
120
- try:
121
- ret = requests.post(url, headers=headers, data=data, timeout=5)
122
- flagged = ret.json()["results"][0]["flagged"]
123
- except requests.exceptions.RequestException:
124
- flagged = False
125
- except KeyError:
126
- flagged = False
127
-
128
- return flagged
129
-
130
-
131
- def pretty_print_semaphore(semaphore):
132
- if semaphore is None:
133
- return "None"
134
- return f"Semaphore(value={semaphore._value}, locked={semaphore.locked()})"