xinference 0.10.2.post1__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/oauth2/auth_service.py +1 -1
- xinference/api/restful_api.py +53 -61
- xinference/client/restful/restful_client.py +52 -57
- xinference/conftest.py +1 -1
- xinference/core/cache_tracker.py +1 -1
- xinference/core/event.py +1 -1
- xinference/core/model.py +15 -4
- xinference/core/status_guard.py +1 -1
- xinference/core/supervisor.py +58 -72
- xinference/core/worker.py +73 -102
- xinference/deploy/cmdline.py +175 -6
- xinference/deploy/test/test_cmdline.py +2 -0
- xinference/deploy/utils.py +1 -1
- xinference/device_utils.py +29 -3
- xinference/fields.py +5 -1
- xinference/model/audio/model_spec.json +8 -1
- xinference/model/audio/whisper.py +88 -12
- xinference/model/core.py +2 -2
- xinference/model/embedding/core.py +13 -0
- xinference/model/image/__init__.py +29 -0
- xinference/model/image/core.py +6 -0
- xinference/model/image/custom.py +109 -0
- xinference/model/llm/__init__.py +92 -32
- xinference/model/llm/core.py +57 -102
- xinference/model/llm/ggml/tools/convert_ggml_to_gguf.py +2 -2
- xinference/model/llm/llm_family.json +446 -2
- xinference/model/llm/llm_family.py +45 -41
- xinference/model/llm/llm_family_modelscope.json +208 -1
- xinference/model/llm/pytorch/deepseek_vl.py +89 -33
- xinference/model/llm/pytorch/qwen_vl.py +67 -12
- xinference/model/llm/pytorch/yi_vl.py +62 -45
- xinference/model/llm/utils.py +45 -15
- xinference/model/llm/vllm/core.py +21 -4
- xinference/model/rerank/core.py +48 -20
- xinference/thirdparty/omnilmm/chat.py +2 -1
- xinference/thirdparty/omnilmm/model/omnilmm.py +2 -1
- xinference/types.py +2 -0
- xinference/web/ui/build/asset-manifest.json +6 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.54bca460.css +2 -0
- xinference/web/ui/build/static/css/main.54bca460.css.map +1 -0
- xinference/web/ui/build/static/js/main.8e44da4b.js +3 -0
- xinference/web/ui/build/static/js/{main.26fdbfbe.js.LICENSE.txt → main.8e44da4b.js.LICENSE.txt} +7 -0
- xinference/web/ui/build/static/js/main.8e44da4b.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0b11a5339468c13b2d31ac085e7effe4303259b2071abd46a0a8eb8529233a5e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/29dda700ab913cf7f2cfabe450ddabfb283e96adfa3ec9d315b2fa6c63cd375c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2c63e940b945fd5817157e08a42b889b30d668ea4c91332f48ef2b1b9d26f520.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4135fe8745434cbce6438d1ebfa47422e0c77d884db4edc75c8bf32ea1d50621.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/46b6dd1f6d1109cd0e2455a0ea0be3e9bda1097cd4ebec9c4040070372671cfc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4de0a71074f9cbe1e7862750dcdd08cbc1bae7d9d9849a78b1783ca670017b3c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/53f6c0c0afb51265cd8fb940daeb65523501879ac2a8c03a1ead22b9793c5041.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8ccbb839002bc5bc03e0a0e7612362bf92f6ae64f87e094f8682d6a6fe4619bb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/97ed30d6e22cf76f0733651e2c18364689a01665d0b5fe811c1b7ca3eb713c82.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9c0c70f1838913aaa792a0d2260f17f90fd177b95698ed46b7bc3050eb712c1c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9cfd33238ca43e5bf9fc7e442690e8cc6027c73553db36de87e3597ed524ee4b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ada71518a429f821a9b1dea38bc951447f03c8db509887e0980b893acac938f3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b6c9558d28b5972bb8b2691c5a76a2c8814a815eb3443126da9f49f7d6a0c118.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bb0f721c084a4d85c09201c984f02ee8437d3b6c5c38a57cb4a101f653daef1b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ddaec68b88e5eff792df1e39a4b4b8b737bfc832293c015660c3c69334e3cf5c.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +33 -0
- xinference/web/ui/node_modules/clipboard/.babelrc.json +11 -0
- xinference/web/ui/node_modules/clipboard/.eslintrc.json +24 -0
- xinference/web/ui/node_modules/clipboard/.prettierrc.json +9 -0
- xinference/web/ui/node_modules/clipboard/bower.json +18 -0
- xinference/web/ui/node_modules/clipboard/composer.json +25 -0
- xinference/web/ui/node_modules/clipboard/package.json +63 -0
- xinference/web/ui/node_modules/delegate/package.json +31 -0
- xinference/web/ui/node_modules/good-listener/bower.json +11 -0
- xinference/web/ui/node_modules/good-listener/package.json +35 -0
- xinference/web/ui/node_modules/select/bower.json +13 -0
- xinference/web/ui/node_modules/select/package.json +29 -0
- xinference/web/ui/node_modules/tiny-emitter/package.json +53 -0
- xinference/web/ui/package-lock.json +34 -0
- xinference/web/ui/package.json +1 -0
- {xinference-0.10.2.post1.dist-info → xinference-0.11.0.dist-info}/METADATA +14 -13
- {xinference-0.10.2.post1.dist-info → xinference-0.11.0.dist-info}/RECORD +81 -60
- xinference/client/oscar/__init__.py +0 -13
- xinference/client/oscar/actor_client.py +0 -611
- xinference/model/llm/pytorch/spec_decoding_utils.py +0 -531
- xinference/model/llm/pytorch/spec_model.py +0 -186
- xinference/web/ui/build/static/js/main.26fdbfbe.js +0 -3
- xinference/web/ui/build/static/js/main.26fdbfbe.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/63a4c48f0326d071c7772c46598215c006ae41fd3d4ff3577fe717de66ad6e89.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/de0299226173b0662b573f49e3992220f6611947073bd66ac079728a8bc8837d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e9b52d171223bb59fb918316297a051cdfd42dd453e8260fd918e90bc0a4ebdf.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f4d5d1a41892a754c1ee0237450d804b20612d1b657945b59e564161ea47aa7a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/fad4cd70de36ef6e6d5f8fd74a10ded58d964a8a91ef7681693fbb8376552da7.json +0 -1
- {xinference-0.10.2.post1.dist-info → xinference-0.11.0.dist-info}/LICENSE +0 -0
- {xinference-0.10.2.post1.dist-info → xinference-0.11.0.dist-info}/WHEEL +0 -0
- {xinference-0.10.2.post1.dist-info → xinference-0.11.0.dist-info}/entry_points.txt +0 -0
- {xinference-0.10.2.post1.dist-info → xinference-0.11.0.dist-info}/top_level.txt +0 -0
xinference/model/llm/utils.py
CHANGED
|
@@ -114,6 +114,22 @@ class ChatModelMixin:
|
|
|
114
114
|
else:
|
|
115
115
|
ret += role
|
|
116
116
|
return ret
|
|
117
|
+
elif prompt_style.style_name == "LLAMA3":
|
|
118
|
+
ret = (
|
|
119
|
+
f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>"
|
|
120
|
+
f"{prompt_style.intra_message_sep}{prompt_style.system_prompt}{prompt_style.inter_message_sep}"
|
|
121
|
+
)
|
|
122
|
+
for i, message in enumerate(chat_history):
|
|
123
|
+
role = get_role(message["role"])
|
|
124
|
+
content = message["content"]
|
|
125
|
+
if content:
|
|
126
|
+
ret += (
|
|
127
|
+
f"<|start_header_id|>{role}<|end_header_id|>"
|
|
128
|
+
f"{prompt_style.intra_message_sep}{content}{prompt_style.inter_message_sep}"
|
|
129
|
+
)
|
|
130
|
+
else:
|
|
131
|
+
ret += f"<|start_header_id|>{role}<|end_header_id|>{prompt_style.intra_message_sep}"
|
|
132
|
+
return ret
|
|
117
133
|
elif prompt_style.style_name == "FALCON":
|
|
118
134
|
ret = prompt_style.system_prompt
|
|
119
135
|
for message in chat_history:
|
|
@@ -212,16 +228,14 @@ Begin!"""
|
|
|
212
228
|
tools_name_text = []
|
|
213
229
|
for func_info in tools:
|
|
214
230
|
parameters = []
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
param["required"] = True
|
|
224
|
-
parameters.append(param)
|
|
231
|
+
fp = func_info["function"].get("parameters", {})
|
|
232
|
+
if fp:
|
|
233
|
+
required_parameters = fp.get("required", [])
|
|
234
|
+
for name, p in fp["properties"].items():
|
|
235
|
+
param = dict({"name": name}, **p)
|
|
236
|
+
if name in required_parameters:
|
|
237
|
+
param["required"] = True
|
|
238
|
+
parameters.append(param)
|
|
225
239
|
|
|
226
240
|
name = func_info["function"]["name"]
|
|
227
241
|
desc = func_info["function"]["description"]
|
|
@@ -431,6 +445,17 @@ Begin!"""
|
|
|
431
445
|
else:
|
|
432
446
|
ret += "<AI>" + content.strip()
|
|
433
447
|
return ret
|
|
448
|
+
elif prompt_style.style_name == "PHI3":
|
|
449
|
+
ret = f"<|system|>{prompt_style.intra_message_sep}{prompt_style.system_prompt}{prompt_style.inter_message_sep}"
|
|
450
|
+
for message in chat_history:
|
|
451
|
+
content = message["content"] or ""
|
|
452
|
+
role = get_role(message["role"])
|
|
453
|
+
if content:
|
|
454
|
+
ret += f"<|{role}|>{prompt_style.intra_message_sep}{content}{prompt_style.inter_message_sep}"
|
|
455
|
+
else:
|
|
456
|
+
ret += f"<|{role}|>{prompt_style.intra_message_sep}"
|
|
457
|
+
ret += "<|assistant|>\n"
|
|
458
|
+
return ret
|
|
434
459
|
else:
|
|
435
460
|
raise ValueError(f"Invalid prompt style: {prompt_style.style_name}")
|
|
436
461
|
|
|
@@ -664,6 +689,15 @@ Begin!"""
|
|
|
664
689
|
else:
|
|
665
690
|
m = {"role": "assistant", "content": content, "tool_calls": []}
|
|
666
691
|
finish_reason = "stop"
|
|
692
|
+
try:
|
|
693
|
+
usage = c.get("usage")
|
|
694
|
+
assert "prompt_tokens" in usage
|
|
695
|
+
except Exception:
|
|
696
|
+
usage = {
|
|
697
|
+
"prompt_tokens": -1,
|
|
698
|
+
"completion_tokens": -1,
|
|
699
|
+
"total_tokens": -1,
|
|
700
|
+
}
|
|
667
701
|
return {
|
|
668
702
|
"id": "chat" + f"cmpl-{_id}",
|
|
669
703
|
"model": model_uid,
|
|
@@ -676,11 +710,7 @@ Begin!"""
|
|
|
676
710
|
"finish_reason": finish_reason,
|
|
677
711
|
}
|
|
678
712
|
],
|
|
679
|
-
"usage":
|
|
680
|
-
"prompt_tokens": -1,
|
|
681
|
-
"completion_tokens": -1,
|
|
682
|
-
"total_tokens": -1,
|
|
683
|
-
},
|
|
713
|
+
"usage": usage,
|
|
684
714
|
}
|
|
685
715
|
|
|
686
716
|
|
|
@@ -85,6 +85,7 @@ except ImportError:
|
|
|
85
85
|
|
|
86
86
|
VLLM_SUPPORTED_MODELS = [
|
|
87
87
|
"llama-2",
|
|
88
|
+
"llama-3",
|
|
88
89
|
"baichuan",
|
|
89
90
|
"internlm-16k",
|
|
90
91
|
"mistral-v0.1",
|
|
@@ -94,6 +95,7 @@ VLLM_SUPPORTED_MODELS = [
|
|
|
94
95
|
]
|
|
95
96
|
VLLM_SUPPORTED_CHAT_MODELS = [
|
|
96
97
|
"llama-2-chat",
|
|
98
|
+
"llama-3-instruct",
|
|
97
99
|
"vicuna-v1.3",
|
|
98
100
|
"vicuna-v1.5",
|
|
99
101
|
"baichuan-chat",
|
|
@@ -108,6 +110,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
|
|
|
108
110
|
"mistral-instruct-v0.1",
|
|
109
111
|
"mistral-instruct-v0.2",
|
|
110
112
|
"mixtral-instruct-v0.1",
|
|
113
|
+
"mixtral-8x22B-instruct-v0.1",
|
|
111
114
|
"chatglm3",
|
|
112
115
|
"chatglm3-32k",
|
|
113
116
|
"chatglm3-128k",
|
|
@@ -237,10 +240,17 @@ class VLLMModel(LLM):
|
|
|
237
240
|
if llm_spec.model_format == "pytorch":
|
|
238
241
|
if quantization != "none" and not (quantization is None):
|
|
239
242
|
return False
|
|
240
|
-
if llm_spec.model_format
|
|
241
|
-
# Currently, only 4-bit weight quantization is supported for
|
|
243
|
+
if llm_spec.model_format == "awq":
|
|
244
|
+
# Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
|
|
242
245
|
if "4" not in quantization:
|
|
243
246
|
return False
|
|
247
|
+
if llm_spec.model_format == "gptq":
|
|
248
|
+
if VLLM_INSTALLED and vllm.__version__ >= "0.3.3":
|
|
249
|
+
if not any(q in quantization for q in ("3", "4", "8")):
|
|
250
|
+
return False
|
|
251
|
+
else:
|
|
252
|
+
if "4" not in quantization:
|
|
253
|
+
return False
|
|
244
254
|
if isinstance(llm_family, CustomLLMFamilyV1):
|
|
245
255
|
if llm_family.model_family not in VLLM_SUPPORTED_MODELS:
|
|
246
256
|
return False
|
|
@@ -414,10 +424,17 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
|
|
|
414
424
|
if llm_spec.model_format == "pytorch":
|
|
415
425
|
if quantization != "none" and not (quantization is None):
|
|
416
426
|
return False
|
|
417
|
-
if llm_spec.model_format
|
|
418
|
-
# Currently, only 4-bit weight quantization is supported for
|
|
427
|
+
if llm_spec.model_format == "awq":
|
|
428
|
+
# Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
|
|
419
429
|
if "4" not in quantization:
|
|
420
430
|
return False
|
|
431
|
+
if llm_spec.model_format == "gptq":
|
|
432
|
+
if VLLM_INSTALLED and vllm.__version__ >= "0.3.3":
|
|
433
|
+
if not any(q in quantization for q in ("3", "4", "8")):
|
|
434
|
+
return False
|
|
435
|
+
else:
|
|
436
|
+
if "4" not in quantization:
|
|
437
|
+
return False
|
|
421
438
|
if isinstance(llm_family, CustomLLMFamilyV1):
|
|
422
439
|
if llm_family.model_family not in VLLM_SUPPORTED_CHAT_MODELS:
|
|
423
440
|
return False
|
xinference/model/rerank/core.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import gc
|
|
15
16
|
import logging
|
|
16
17
|
import os
|
|
17
18
|
import uuid
|
|
@@ -21,6 +22,7 @@ from typing import Dict, List, Optional, Tuple
|
|
|
21
22
|
import numpy as np
|
|
22
23
|
|
|
23
24
|
from ...constants import XINFERENCE_CACHE_DIR
|
|
25
|
+
from ...device_utils import empty_cache
|
|
24
26
|
from ...types import Document, DocumentObj, Rerank
|
|
25
27
|
from ..core import CacheableModelSpec, ModelDescription
|
|
26
28
|
from ..utils import is_model_cached
|
|
@@ -31,6 +33,8 @@ logger = logging.getLogger(__name__)
|
|
|
31
33
|
# Init when registering all the builtin models.
|
|
32
34
|
MODEL_NAME_TO_REVISION: Dict[str, List[str]] = defaultdict(list)
|
|
33
35
|
RERANK_MODEL_DESCRIPTIONS: Dict[str, List[Dict]] = defaultdict(list)
|
|
36
|
+
RERANK_EMPTY_CACHE_COUNT = int(os.getenv("XINFERENCE_RERANK_EMPTY_CACHE_COUNT", "10"))
|
|
37
|
+
assert RERANK_EMPTY_CACHE_COUNT > 0
|
|
34
38
|
|
|
35
39
|
|
|
36
40
|
def get_rerank_model_descriptions():
|
|
@@ -113,28 +117,44 @@ class RerankModel:
|
|
|
113
117
|
self._model_config = model_config or dict()
|
|
114
118
|
self._use_fp16 = use_fp16
|
|
115
119
|
self._model = None
|
|
120
|
+
self._counter = 0
|
|
116
121
|
|
|
117
122
|
def load(self):
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
from
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
123
|
+
if self._model_spec.type == "normal":
|
|
124
|
+
try:
|
|
125
|
+
from sentence_transformers.cross_encoder import CrossEncoder
|
|
126
|
+
except ImportError:
|
|
127
|
+
error_message = "Failed to import module 'sentence-transformers'"
|
|
128
|
+
installation_guide = [
|
|
129
|
+
"Please make sure 'sentence-transformers' is installed. ",
|
|
130
|
+
"You can install it by `pip install sentence-transformers`\n",
|
|
131
|
+
]
|
|
132
|
+
|
|
133
|
+
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
134
|
+
self._model = CrossEncoder(
|
|
135
|
+
self._model_path, device=self._device, **self._model_config
|
|
136
|
+
)
|
|
137
|
+
if self._use_fp16:
|
|
138
|
+
self._model.model.half()
|
|
139
|
+
else:
|
|
140
|
+
try:
|
|
141
|
+
if self._model_spec.type == "LLM-based":
|
|
142
|
+
from FlagEmbedding import FlagLLMReranker as FlagReranker
|
|
143
|
+
elif self._model_spec.type == "LLM-based layerwise":
|
|
144
|
+
from FlagEmbedding import LayerWiseFlagLLMReranker as FlagReranker
|
|
145
|
+
else:
|
|
146
|
+
raise RuntimeError(
|
|
147
|
+
f"Unsupported Rank model type: {self._model_spec.type}"
|
|
148
|
+
)
|
|
149
|
+
except ImportError:
|
|
150
|
+
error_message = "Failed to import module 'FlagEmbedding'"
|
|
151
|
+
installation_guide = [
|
|
152
|
+
"Please make sure 'FlagEmbedding' is installed. ",
|
|
153
|
+
"You can install it by `pip install FlagEmbedding`\n",
|
|
154
|
+
]
|
|
135
155
|
|
|
136
|
-
|
|
137
|
-
|
|
156
|
+
raise ImportError(f"{error_message}\n\n{''.join(installation_guide)}")
|
|
157
|
+
self._model = FlagReranker(self._model_path, use_fp16=self._use_fp16)
|
|
138
158
|
|
|
139
159
|
def rerank(
|
|
140
160
|
self,
|
|
@@ -145,13 +165,21 @@ class RerankModel:
|
|
|
145
165
|
return_documents: Optional[bool],
|
|
146
166
|
**kwargs,
|
|
147
167
|
) -> Rerank:
|
|
168
|
+
self._counter += 1
|
|
169
|
+
if self._counter % RERANK_EMPTY_CACHE_COUNT == 0:
|
|
170
|
+
logger.debug("Empty rerank cache.")
|
|
171
|
+
gc.collect()
|
|
172
|
+
empty_cache()
|
|
148
173
|
assert self._model is not None
|
|
149
174
|
if kwargs:
|
|
150
175
|
raise ValueError("rerank hasn't support extra parameter.")
|
|
151
176
|
if max_chunks_per_doc is not None:
|
|
152
177
|
raise ValueError("rerank hasn't support `max_chunks_per_doc` parameter.")
|
|
153
178
|
sentence_combinations = [[query, doc] for doc in documents]
|
|
154
|
-
|
|
179
|
+
if self._model_spec.type == "normal":
|
|
180
|
+
similarity_scores = self._model.predict(sentence_combinations)
|
|
181
|
+
else:
|
|
182
|
+
similarity_scores = self._model.compute_score(sentence_combinations)
|
|
155
183
|
sim_scores_argsort = list(reversed(np.argsort(similarity_scores)))
|
|
156
184
|
if top_n is not None:
|
|
157
185
|
sim_scores_argsort = sim_scores_argsort[:top_n]
|
|
@@ -4,7 +4,6 @@ import json
|
|
|
4
4
|
import os
|
|
5
5
|
|
|
6
6
|
import torch
|
|
7
|
-
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
|
|
8
7
|
from PIL import Image
|
|
9
8
|
from transformers import AutoModel, AutoTokenizer
|
|
10
9
|
|
|
@@ -20,6 +19,8 @@ DEFAULT_IM_END_TOKEN = "<im_end>"
|
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
def init_omni_lmm(model_path, device_map):
|
|
22
|
+
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
|
|
23
|
+
|
|
23
24
|
torch.backends.cuda.matmul.allow_tf32 = True
|
|
24
25
|
disable_torch_init()
|
|
25
26
|
model_name = os.path.expanduser(model_path)
|
|
@@ -2,7 +2,6 @@ import gc
|
|
|
2
2
|
import math
|
|
3
3
|
from typing import List, Optional, Tuple, Union
|
|
4
4
|
|
|
5
|
-
import timm
|
|
6
5
|
import torch
|
|
7
6
|
import torch.nn as nn
|
|
8
7
|
from torch import Tensor
|
|
@@ -37,6 +36,8 @@ class Identity(torch.nn.Identity):
|
|
|
37
36
|
|
|
38
37
|
|
|
39
38
|
def create_vision_module(config):
|
|
39
|
+
import timm
|
|
40
|
+
|
|
40
41
|
vision_tower = timm.create_model(
|
|
41
42
|
"eva02_enormous_patch14_clip_224.laion2b_plus",
|
|
42
43
|
pretrained=False,
|
xinference/types.py
CHANGED
|
@@ -33,6 +33,7 @@ from .fields import (
|
|
|
33
33
|
stop_field,
|
|
34
34
|
stream_field,
|
|
35
35
|
stream_interval_field,
|
|
36
|
+
stream_option_field,
|
|
36
37
|
temperature_field,
|
|
37
38
|
top_k_field,
|
|
38
39
|
top_p_field,
|
|
@@ -392,6 +393,7 @@ class _CreateCompletionOpenAIFallback(BaseModel):
|
|
|
392
393
|
seed: Optional[int] = none_field
|
|
393
394
|
stop: Optional[Union[str, List[str]]] = stop_field
|
|
394
395
|
stream: bool = stream_field
|
|
396
|
+
stream_options: Optional[dict] = stream_option_field
|
|
395
397
|
suffix: Optional[str] = none_field
|
|
396
398
|
temperature: float = temperature_field
|
|
397
399
|
top_p: float = top_p_field
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"files": {
|
|
3
|
-
"main.
|
|
3
|
+
"main.css": "./static/css/main.54bca460.css",
|
|
4
|
+
"main.js": "./static/js/main.8e44da4b.js",
|
|
4
5
|
"static/media/icon.webp": "./static/media/icon.4603d52c63041e5dfbfd.webp",
|
|
5
6
|
"index.html": "./index.html",
|
|
6
|
-
"main.
|
|
7
|
+
"main.54bca460.css.map": "./static/css/main.54bca460.css.map",
|
|
8
|
+
"main.8e44da4b.js.map": "./static/js/main.8e44da4b.js.map"
|
|
7
9
|
},
|
|
8
10
|
"entrypoints": [
|
|
9
|
-
"static/
|
|
11
|
+
"static/css/main.54bca460.css",
|
|
12
|
+
"static/js/main.8e44da4b.js"
|
|
10
13
|
]
|
|
11
14
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.
|
|
1
|
+
<!doctype html><html lang="en"><head><meta charset="utf-8"/><link rel="icon" href="./favicon.svg"/><meta name="viewport" content="width=device-width,initial-scale=1"/><meta name="theme-color" content="#000000"/><meta name="description" content="Web site created using create-react-app"/><link rel="apple-touch-icon" href="./logo192.png"/><link rel="manifest" href="./manifest.json"/><title>Xinference</title><script defer="defer" src="./static/js/main.8e44da4b.js"></script><link href="./static/css/main.54bca460.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div></body></html>
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
.formBox{max-height:80vh;max-width:50vw;min-width:50vw;overflow:auto;padding:40px 20px 0 0;position:relative;transition:all .4s ease-in-out}.broaden{max-width:100%;min-width:100%;padding-right:0}.show-json{align-items:center;color:#444;display:flex;position:fixed;right:60px;top:90px}.icon{cursor:pointer;margin-left:20px;position:absolute;right:-40px}.icon:hover{color:#1976d2}.arrow{font-size:24px!important}.jsonBox{min-height:80vh;position:relative;transition:all .4s ease-in-out;width:100%}.hide{overflow:hidden;-webkit-transform:translate(30vw);transform:translate(30vw);width:0}.jsonBox-header{font-weight:700;line-height:40px}.textarea{border:1px solid #ddd;border-radius:5px;color:#444;height:calc(100% - 40px);padding:5px 10px;resize:none;width:100%}.copyIcon{color:#555;cursor:pointer;font-size:16px!important;position:absolute;right:5px;top:13px}.copyIcon:hover{color:#1976d2}.addBtn{margin-left:20px!important}.item{background-color:#eee;border-radius:10px;margin:10px 50px 0;overflow:hidden;padding:20px;position:relative}.item:hover .deleteBtn{-webkit-transform:translateX(-50px);transform:translateX(-50px)}.deleteBtn{background-color:#1976d2;border-radius:25px;height:50px;line-height:70px;position:absolute;right:20px;text-align:center;top:calc(50% - 25px);-webkit-transform:translateX(80px);transform:translateX(80px);transition:all .3s ease-in-out;width:50px}.deleteBtn:hover{box-shadow:0 0 10px #aaa;cursor:pointer}.deleteIcon{color:#fff;font-size:28px!important}
|
|
2
|
+
/*# sourceMappingURL=main.54bca460.css.map*/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"static/css/main.54bca460.css","mappings":"AAAA,SAIE,eAAgB,CAFhB,cAAe,CACf,cAAe,CAEf,aAAc,CACd,qBAAsB,CALtB,iBAAkB,CAMlB,8BACF,CAEA,SACE,cAAe,CACf,cAAe,CACf,eACF,CAEA,WAEE,kBAAmB,CAInB,UAAW,CALX,YAAa,CAEb,cAAe,CAEf,UAAW,CADX,QAGF,CAEA,MAGE,cAAe,CACf,gBAAiB,CAHjB,iBAAkB,CAClB,WAGF,CAEA,YACE,aACF,CAEA,OACE,wBACF,CAEA,SAEE,eAAgB,CADhB,iBAAkB,CAGlB,8BAAgC,CADhC,UAEF,CAEA,MAGE,eAAgB,CADhB,iCAA6B,CAA7B,yBAA6B,CAD7B,OAGF,CAEA,gBAEE,eAAgB,CADhB,gBAEF,CAEA,UAIE,qBAAsB,CACtB,iBAAkB,CAElB,UAAW,CALX,wBAAyB,CACzB,gBAAiB,CAGjB,WAAY,CALZ,UAOF,CAEA,UAME,UAAW,CALX,cAAe,CAIf,wBAA0B,CAH1B,iBAAkB,CAElB,SAAU,CADV,QAIF,CAEA,gBACE,aACF,CAEA,QACE,0BACF,CAEA,MAEE,qBAAsB,CAGtB,kBAAmB,CAFnB,kBAAmB,CAGnB,eAAgB,CAFhB,YAAa,CAHb,iBAMF,CAEA,uBACE,mCAA4B,CAA5B,2BACF,CAEA,WAUE,wBAAyB,CADzB,kBAAmB,CAJnB,WAAY,CAGZ,gBAAiB,CAPjB,iBAAkB,CAClB,UAAW,CAKX,iBAAkB,CAJlB,oBAAqB,CAGrB,kCAA2B,CAA3B,0BAA2B,CAK3B,8BAAgC,CAPhC,UAQF,CAEA,iBAEE,wBAAyB,CADzB,cAEF,CAEA,YAEE,UAAW,CADX,wBAEF","sources":["scenes/register_model/styles/registerModelStyle.css"],"sourcesContent":[".formBox {\n position: relative;\n max-width: 50vw;\n min-width: 50vw;\n max-height: 80vh;\n overflow: auto;\n padding: 40px 20px 0 0;\n transition: all 0.4s ease-in-out;\n}\n\n.broaden {\n max-width: 100%;\n min-width: 100%;\n padding-right: 0;\n}\n\n.show-json {\n display: flex;\n align-items: center;\n position: fixed;\n top: 90px;\n right: 60px;\n color: #444;\n}\n\n.icon {\n position: absolute;\n right: -40px;\n cursor: pointer;\n margin-left: 20px;\n}\n\n.icon:hover {\n color: #1976d2;\n}\n\n.arrow {\n font-size: 24px !important;\n}\n\n.jsonBox {\n position: relative;\n min-height: 80vh;\n width: 100%;\n transition: all 0.4s ease-in-out;\n}\n\n.hide {\n width: 0;\n transform: translate(30vw, 0);\n overflow: hidden;\n}\n\n.jsonBox-header {\n line-height: 40px;\n font-weight: 700;\n}\n\n.textarea {\n width: 100%;\n height: calc(100% - 40px);\n padding: 5px 10px;\n border: 1px solid #ddd;\n border-radius: 5px;\n resize: none;\n color: #444;\n}\n\n.copyIcon {\n cursor: pointer;\n position: absolute;\n top: 13px;\n right: 5px;\n font-size: 16px !important;\n color: #555;\n}\n\n.copyIcon:hover {\n color: #1976d2;\n}\n\n.addBtn {\n margin-left: 20px !important;\n}\n\n.item {\n position: relative;\n background-color: #eee;\n margin: 10px 50px 0;\n padding: 20px;\n border-radius: 10px;\n overflow: hidden;\n}\n\n.item:hover .deleteBtn {\n transform: translateX(-50px);\n}\n\n.deleteBtn {\n position: absolute;\n right: 20px;\n top: calc(50% - 25px);\n width: 50px;\n height: 50px;\n transform: translateX(80px);\n text-align: center;\n line-height: 70px;\n border-radius: 25px;\n background-color: #1976d2;\n transition: all 0.3s ease-in-out;\n}\n\n.deleteBtn:hover {\n cursor: pointer;\n box-shadow: 0 0 10px #aaa;\n}\n\n.deleteIcon {\n font-size: 28px !important;\n color: #fff;\n}\n"],"names":[],"sourceRoot":""}
|