xinference 0.15.0__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +204 -1
- xinference/client/restful/restful_client.py +4 -2
- xinference/core/image_interface.py +28 -0
- xinference/core/model.py +28 -0
- xinference/core/supervisor.py +6 -0
- xinference/model/audio/fish_speech.py +9 -9
- xinference/model/audio/model_spec.json +9 -9
- xinference/model/audio/whisper.py +4 -1
- xinference/model/image/core.py +2 -1
- xinference/model/image/model_spec.json +16 -4
- xinference/model/image/model_spec_modelscope.json +16 -4
- xinference/model/image/sdapi.py +136 -0
- xinference/model/image/stable_diffusion/core.py +148 -20
- xinference/model/llm/__init__.py +8 -0
- xinference/model/llm/llm_family.json +393 -0
- xinference/model/llm/llm_family.py +3 -1
- xinference/model/llm/llm_family_modelscope.json +408 -3
- xinference/model/llm/sglang/core.py +3 -0
- xinference/model/llm/transformers/chatglm.py +1 -1
- xinference/model/llm/transformers/core.py +6 -0
- xinference/model/llm/transformers/deepseek_v2.py +340 -0
- xinference/model/llm/transformers/qwen2_audio.py +168 -0
- xinference/model/llm/transformers/qwen2_vl.py +31 -5
- xinference/model/llm/utils.py +104 -84
- xinference/model/llm/vllm/core.py +8 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/firefly_gan_vq.yaml +2 -3
- xinference/thirdparty/fish_speech/fish_speech/configs/text2semantic_finetune.yaml +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/pt_BR.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +1 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/__init__.py +0 -3
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +169 -198
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +4 -27
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +9 -47
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +12 -10
- xinference/thirdparty/fish_speech/tools/api.py +79 -134
- xinference/thirdparty/fish_speech/tools/commons.py +35 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +3 -3
- xinference/thirdparty/fish_speech/tools/file.py +17 -0
- xinference/thirdparty/fish_speech/tools/llama/build_dataset.py +1 -1
- xinference/thirdparty/fish_speech/tools/llama/generate.py +29 -24
- xinference/thirdparty/fish_speech/tools/llama/merge_lora.py +1 -1
- xinference/thirdparty/fish_speech/tools/llama/quantize.py +2 -2
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +34 -0
- xinference/thirdparty/fish_speech/tools/post_api.py +85 -44
- xinference/thirdparty/fish_speech/tools/sensevoice/fun_asr.py +1 -1
- xinference/thirdparty/fish_speech/tools/smart_pad.py +16 -3
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +2 -2
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +4 -2
- xinference/thirdparty/fish_speech/tools/webui.py +12 -146
- xinference/types.py +7 -4
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/{main.632e9148.css → main.5061c4c3.css} +2 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +1 -0
- xinference/web/ui/build/static/js/{main.9cfafbd6.js → main.754740c0.js} +3 -3
- xinference/web/ui/build/static/js/main.754740c0.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/cd90b08d177025dfe84209596fc51878f8a86bcaa6a240848a3d2e5fd4c7ff24.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +1 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/METADATA +9 -3
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/RECORD +72 -74
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/lit_module.py +0 -442
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/discriminator.py +0 -44
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/reference.py +0 -115
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/wavenet.py +0 -225
- xinference/thirdparty/fish_speech/tools/auto_rerank.py +0 -159
- xinference/thirdparty/fish_speech/tools/gen_ref.py +0 -36
- xinference/thirdparty/fish_speech/tools/merge_asr_files.py +0 -55
- xinference/web/ui/build/static/css/main.632e9148.css.map +0 -1
- xinference/web/ui/build/static/js/main.9cfafbd6.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/01d6d198156bacbd436c51435edbd4b2cacd47a79db929105eba30f74b67d48d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/59eb25f514afcc4fefd1b309d192b2455f1e0aec68a9de598ca4b2333fe2c774.json +0 -1
- /xinference/web/ui/build/static/js/{main.9cfafbd6.js.LICENSE.txt → main.754740c0.js.LICENSE.txt} +0 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/LICENSE +0 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/WHEEL +0 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.15.0.dist-info → xinference-0.15.1.dist-info}/top_level.txt +0 -0
xinference/model/llm/utils.py
CHANGED
|
@@ -301,99 +301,89 @@ class ChatModelMixin:
|
|
|
301
301
|
}
|
|
302
302
|
|
|
303
303
|
@staticmethod
|
|
304
|
-
def _eval_glm_chat_arguments(c):
|
|
304
|
+
def _eval_glm_chat_arguments(c) -> List[Tuple]:
|
|
305
|
+
"""
|
|
306
|
+
Currently, glm4 tool call only supports one function
|
|
307
|
+
"""
|
|
305
308
|
try:
|
|
306
309
|
if isinstance(c, dict):
|
|
307
|
-
return None, c["name"], c["arguments"]
|
|
310
|
+
return [(None, c["name"], c["arguments"])]
|
|
308
311
|
except KeyError:
|
|
309
312
|
logger.error("Can't parse glm output: %s", c)
|
|
310
|
-
return str(c), None, None
|
|
313
|
+
return [(str(c), None, None)]
|
|
311
314
|
else:
|
|
312
|
-
return str(c), None, None
|
|
315
|
+
return [(str(c), None, None)]
|
|
313
316
|
|
|
314
|
-
@
|
|
315
|
-
def
|
|
317
|
+
@classmethod
|
|
318
|
+
def _handle_qwen_tool_result(cls, text: str) -> List[Tuple]:
|
|
319
|
+
text: str = text.strip() # type: ignore
|
|
320
|
+
contents: List[str] = text.split(QWEN_TOOL_CALL_SYMBOLS[1])
|
|
321
|
+
results: List[Tuple] = []
|
|
322
|
+
for content in contents:
|
|
323
|
+
content = content.strip()
|
|
324
|
+
if content:
|
|
325
|
+
if content.startswith(QWEN_TOOL_CALL_SYMBOLS[0]):
|
|
326
|
+
content = content[len(QWEN_TOOL_CALL_SYMBOLS[0]) :]
|
|
327
|
+
content = content.strip()
|
|
328
|
+
try:
|
|
329
|
+
res = json.loads(content)
|
|
330
|
+
results.append((None, res["name"], res["arguments"]))
|
|
331
|
+
except Exception as e:
|
|
332
|
+
logger.error(
|
|
333
|
+
"Can't parse single qwen tool call output: %s. Error: %s",
|
|
334
|
+
content,
|
|
335
|
+
e,
|
|
336
|
+
)
|
|
337
|
+
results.append((content, None, None))
|
|
338
|
+
return results
|
|
339
|
+
|
|
340
|
+
@classmethod
|
|
341
|
+
def _eval_qwen_chat_arguments(cls, c) -> List[Tuple]:
|
|
316
342
|
text = c["choices"][0]["text"]
|
|
317
|
-
|
|
318
|
-
if text.startswith(QWEN_TOOL_CALL_SYMBOLS[0]):
|
|
319
|
-
text = text[len(QWEN_TOOL_CALL_SYMBOLS[0]) :]
|
|
320
|
-
if text.endswith(QWEN_TOOL_CALL_SYMBOLS[1]):
|
|
321
|
-
text = text[: -len(QWEN_TOOL_CALL_SYMBOLS[1])]
|
|
322
|
-
text = text.strip()
|
|
323
|
-
try:
|
|
324
|
-
content = json.loads(text)
|
|
325
|
-
return None, content["name"], content["arguments"]
|
|
326
|
-
except Exception as e:
|
|
327
|
-
logger.error("Can't parse qwen tool call output: %s. Error: %s", text, e)
|
|
328
|
-
return text, None, None
|
|
343
|
+
return cls._handle_qwen_tool_result(text)
|
|
329
344
|
|
|
330
345
|
@classmethod
|
|
331
346
|
def _eval_tool_arguments(cls, model_family, c):
|
|
332
347
|
family = model_family.model_family or model_family.model_name
|
|
333
348
|
if family in GLM4_TOOL_CALL_FAMILY:
|
|
334
|
-
|
|
349
|
+
result = cls._eval_glm_chat_arguments(c)
|
|
335
350
|
elif family in QWEN_TOOL_CALL_FAMILY:
|
|
336
|
-
|
|
351
|
+
result = cls._eval_qwen_chat_arguments(c)
|
|
337
352
|
else:
|
|
338
353
|
raise Exception(
|
|
339
354
|
f"Model {model_family.model_name} is not support tool calls."
|
|
340
355
|
)
|
|
341
|
-
logger.debug("Tool call content:
|
|
342
|
-
return
|
|
343
|
-
|
|
344
|
-
@classmethod
|
|
345
|
-
def _tools_token_filter(cls, model_family):
|
|
346
|
-
"""
|
|
347
|
-
Generates a filter function for Qwen series models to retain outputs after "\nFinal Answer:".
|
|
348
|
-
|
|
349
|
-
Returns:
|
|
350
|
-
A function that takes tokens (string output by the model so far) and delta (new tokens added) as input,
|
|
351
|
-
returns the part after "\nFinal Answer:" if found, else returns delta.
|
|
352
|
-
"""
|
|
353
|
-
family = model_family.model_family or model_family.model_name
|
|
354
|
-
if family in QWEN_TOOL_CALL_FAMILY:
|
|
355
|
-
# Encapsulating function to reset 'found' after each call
|
|
356
|
-
found = False
|
|
357
|
-
|
|
358
|
-
def process_tokens(tokens: str, delta: str):
|
|
359
|
-
nonlocal found
|
|
360
|
-
# Once "Final Answer:" is found, future tokens are allowed.
|
|
361
|
-
if found:
|
|
362
|
-
return delta
|
|
363
|
-
# Check if the token ends with "\nFinal Answer:" and update `found`.
|
|
364
|
-
final_answer_idx = tokens.lower().rfind("\nfinal answer:")
|
|
365
|
-
if final_answer_idx != -1:
|
|
366
|
-
found = True
|
|
367
|
-
return tokens[final_answer_idx + len("\nfinal answer:") :]
|
|
368
|
-
return ""
|
|
369
|
-
|
|
370
|
-
return process_tokens
|
|
371
|
-
else:
|
|
372
|
-
return lambda tokens, delta: delta
|
|
356
|
+
logger.debug(f"Tool call content: {result}")
|
|
357
|
+
return result
|
|
373
358
|
|
|
374
359
|
@classmethod
|
|
375
360
|
def _tool_calls_completion_chunk(cls, model_family, model_uid, c):
|
|
376
361
|
_id = str(uuid.uuid4())
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
"
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
362
|
+
tool_result = cls._eval_tool_arguments(model_family, c)
|
|
363
|
+
tool_calls = []
|
|
364
|
+
failed_contents = []
|
|
365
|
+
for content, func, args in tool_result:
|
|
366
|
+
if func:
|
|
367
|
+
tool_calls.append(
|
|
368
|
+
[
|
|
369
|
+
{
|
|
370
|
+
"id": f"call_{_id}",
|
|
371
|
+
"type": "function",
|
|
372
|
+
"function": {
|
|
373
|
+
"name": func,
|
|
374
|
+
"arguments": json.dumps(args, ensure_ascii=False),
|
|
375
|
+
},
|
|
376
|
+
}
|
|
377
|
+
]
|
|
378
|
+
)
|
|
379
|
+
else:
|
|
380
|
+
failed_contents.append(content)
|
|
381
|
+
finish_reason = "tool_calls" if tool_calls else "stop"
|
|
382
|
+
d = {
|
|
383
|
+
"role": "assistant",
|
|
384
|
+
"content": ". ".join(failed_contents) if failed_contents else None,
|
|
385
|
+
"tool_calls": tool_calls,
|
|
386
|
+
}
|
|
397
387
|
try:
|
|
398
388
|
usage = c.get("usage")
|
|
399
389
|
assert "prompt_tokens" in usage
|
|
@@ -422,12 +412,13 @@ class ChatModelMixin:
|
|
|
422
412
|
@classmethod
|
|
423
413
|
def _tool_calls_completion(cls, model_family, model_uid, c):
|
|
424
414
|
_id = str(uuid.uuid4())
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
415
|
+
tool_result = cls._eval_tool_arguments(model_family, c)
|
|
416
|
+
|
|
417
|
+
tool_calls = []
|
|
418
|
+
failed_contents = []
|
|
419
|
+
for content, func, args in tool_result:
|
|
420
|
+
if func:
|
|
421
|
+
tool_calls.append(
|
|
431
422
|
{
|
|
432
423
|
"id": f"call_{_id}",
|
|
433
424
|
"type": "function",
|
|
@@ -436,12 +427,15 @@ class ChatModelMixin:
|
|
|
436
427
|
"arguments": json.dumps(args, ensure_ascii=False),
|
|
437
428
|
},
|
|
438
429
|
}
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
else
|
|
443
|
-
|
|
444
|
-
|
|
430
|
+
)
|
|
431
|
+
else:
|
|
432
|
+
failed_contents.append(content)
|
|
433
|
+
finish_reason = "tool_calls" if tool_calls else "stop"
|
|
434
|
+
m = {
|
|
435
|
+
"role": "assistant",
|
|
436
|
+
"content": ". ".join(failed_contents) if failed_contents else None,
|
|
437
|
+
"tool_calls": tool_calls,
|
|
438
|
+
}
|
|
445
439
|
try:
|
|
446
440
|
usage = c.get("usage")
|
|
447
441
|
assert "prompt_tokens" in usage
|
|
@@ -555,6 +549,32 @@ def generate_completion_chunk(
|
|
|
555
549
|
)
|
|
556
550
|
|
|
557
551
|
|
|
552
|
+
def generate_completion(
|
|
553
|
+
model_uid: str,
|
|
554
|
+
response: str,
|
|
555
|
+
prompt_tokens=-1,
|
|
556
|
+
completion_tokens=-1,
|
|
557
|
+
total_tokens=-1,
|
|
558
|
+
finish_reason="stop",
|
|
559
|
+
) -> Completion:
|
|
560
|
+
return Completion(
|
|
561
|
+
id=str(uuid.uuid1()),
|
|
562
|
+
object="text_completion",
|
|
563
|
+
created=int(time.time()),
|
|
564
|
+
model=model_uid,
|
|
565
|
+
choices=[
|
|
566
|
+
CompletionChoice(
|
|
567
|
+
text=response, index=0, logprobs=None, finish_reason=finish_reason
|
|
568
|
+
)
|
|
569
|
+
],
|
|
570
|
+
usage=CompletionUsage(
|
|
571
|
+
prompt_tokens=prompt_tokens,
|
|
572
|
+
completion_tokens=completion_tokens,
|
|
573
|
+
total_tokens=total_tokens,
|
|
574
|
+
),
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
|
|
558
578
|
def generate_chat_completion(
|
|
559
579
|
model_uid: str,
|
|
560
580
|
response: str,
|
|
@@ -104,6 +104,7 @@ VLLM_SUPPORTED_MODELS = [
|
|
|
104
104
|
"code-llama-python",
|
|
105
105
|
"deepseek",
|
|
106
106
|
"deepseek-coder",
|
|
107
|
+
"yi-coder",
|
|
107
108
|
]
|
|
108
109
|
VLLM_SUPPORTED_CHAT_MODELS = [
|
|
109
110
|
"llama-2-chat",
|
|
@@ -130,6 +131,7 @@ VLLM_SUPPORTED_CHAT_MODELS = [
|
|
|
130
131
|
"codegeex4",
|
|
131
132
|
"deepseek-chat",
|
|
132
133
|
"deepseek-coder-instruct",
|
|
134
|
+
"yi-coder-chat",
|
|
133
135
|
]
|
|
134
136
|
if VLLM_INSTALLED and vllm.__version__ >= "0.3.0":
|
|
135
137
|
VLLM_SUPPORTED_CHAT_MODELS.append("qwen1.5-chat")
|
|
@@ -149,6 +151,12 @@ if VLLM_INSTALLED and vllm.__version__ >= "0.4.0":
|
|
|
149
151
|
VLLM_SUPPORTED_CHAT_MODELS.append("qwen2-moe-instruct")
|
|
150
152
|
VLLM_SUPPORTED_CHAT_MODELS.append("c4ai-command-r-v01")
|
|
151
153
|
|
|
154
|
+
if VLLM_INSTALLED and vllm.__version__ >= "0.5.1":
|
|
155
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2-chat")
|
|
156
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2-chat-0628")
|
|
157
|
+
VLLM_SUPPORTED_CHAT_MODELS.append("deepseek-v2.5")
|
|
158
|
+
|
|
159
|
+
|
|
152
160
|
if VLLM_INSTALLED and vllm.__version__ >= "0.5.3":
|
|
153
161
|
VLLM_SUPPORTED_CHAT_MODELS.append("gemma-2-it")
|
|
154
162
|
VLLM_SUPPORTED_CHAT_MODELS.append("mistral-nemo-instruct")
|
|
@@ -22,13 +22,12 @@ head:
|
|
|
22
22
|
resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
|
|
23
23
|
num_mels: 512
|
|
24
24
|
upsample_initial_channel: 512
|
|
25
|
-
use_template: false
|
|
26
25
|
pre_conv_kernel_size: 13
|
|
27
26
|
post_conv_kernel_size: 13
|
|
28
27
|
quantizer:
|
|
29
28
|
_target_: fish_speech.models.vqgan.modules.fsq.DownsampleFiniteScalarQuantize
|
|
30
29
|
input_dim: 512
|
|
31
|
-
n_groups:
|
|
30
|
+
n_groups: 8
|
|
32
31
|
n_codebooks: 1
|
|
33
32
|
levels: [8, 5, 5, 5]
|
|
34
|
-
downsample_factor: [2]
|
|
33
|
+
downsample_factor: [2, 2]
|
|
@@ -72,7 +72,7 @@
|
|
|
72
72
|
"Put your text here.": "Put your text here.",
|
|
73
73
|
"Reference Audio": "Reference Audio",
|
|
74
74
|
"Reference Text": "Reference Text",
|
|
75
|
-
"Related code
|
|
75
|
+
"Related code and weights are released under CC BY-NC-SA 4.0 License.": "Related code and weights are released under CC BY-NC-SA 4.0 License.",
|
|
76
76
|
"Remove Selected Data": "Remove Selected Data",
|
|
77
77
|
"Removed path successfully!": "Removed path successfully!",
|
|
78
78
|
"Repetition Penalty": "Repetition Penalty",
|
|
@@ -72,7 +72,7 @@
|
|
|
72
72
|
"Put your text here.": "Ponga su texto aquí.",
|
|
73
73
|
"Reference Audio": "Audio de Referencia",
|
|
74
74
|
"Reference Text": "Texto de Referencia",
|
|
75
|
-
"Related code
|
|
75
|
+
"Related code and weights are released under CC BY-NC-SA 4.0 License.": "El código relacionado y los pesos se publican bajo la Licencia CC BY-NC-SA 4.0.",
|
|
76
76
|
"Remove Selected Data": "Eliminar Datos Seleccionados",
|
|
77
77
|
"Removed path successfully!": "¡Ruta eliminada exitosamente!",
|
|
78
78
|
"Repetition Penalty": "Penalización por Repetición",
|
|
@@ -72,7 +72,7 @@
|
|
|
72
72
|
"Put your text here.": "ここにテキストを入力してください。",
|
|
73
73
|
"Reference Audio": "リファレンスオーディオ",
|
|
74
74
|
"Reference Text": "リファレンステキスト",
|
|
75
|
-
"Related code
|
|
75
|
+
"Related code and weights are released under CC BY-NC-SA 4.0 License.": "関連コードと重みはCC BY-NC-SA 4.0ライセンスの下でリリースされます。",
|
|
76
76
|
"Remove Selected Data": "選択したデータを削除",
|
|
77
77
|
"Removed path successfully!": "パスの削除に成功しました!",
|
|
78
78
|
"Repetition Penalty": "反復ペナルティ",
|
|
@@ -84,7 +84,7 @@
|
|
|
84
84
|
"Reference Text": "Texto de Referência",
|
|
85
85
|
"warning": "Aviso",
|
|
86
86
|
"Pre-processing begins...": "O pré-processamento começou!",
|
|
87
|
-
"Related code
|
|
87
|
+
"Related code and weights are released under CC BY-NC-SA 4.0 License.": "O código relacionado e os pesos são licenciados sob a Licença CC BY-NC-SA 4.0.",
|
|
88
88
|
"Remove Selected Data": "Remover Dados Selecionados",
|
|
89
89
|
"Removed path successfully!": "Caminho removido com sucesso!",
|
|
90
90
|
"Repetition Penalty": "Penalidade de Repetição",
|
|
@@ -72,7 +72,7 @@
|
|
|
72
72
|
"Put your text here.": "在此处输入文本.",
|
|
73
73
|
"Reference Audio": "参考音频",
|
|
74
74
|
"Reference Text": "参考文本",
|
|
75
|
-
"Related code
|
|
75
|
+
"Related code and weights are released under CC BY-NC-SA 4.0 License.": "相关代码和权重使用 CC BY-NC-SA 4.0 许可证发布.",
|
|
76
76
|
"Remove Selected Data": "移除选中数据",
|
|
77
77
|
"Removed path successfully!": "移除路径成功!",
|
|
78
78
|
"Repetition Penalty": "重复惩罚",
|
|
@@ -353,7 +353,7 @@ class BaseTransformer(nn.Module):
|
|
|
353
353
|
|
|
354
354
|
if "int8" in str(Path(path)):
|
|
355
355
|
logger.info("Using int8 weight-only quantization!")
|
|
356
|
-
from
|
|
356
|
+
from tools.llama.quantize import WeightOnlyInt8QuantHandler
|
|
357
357
|
|
|
358
358
|
simple_quantizer = WeightOnlyInt8QuantHandler(model)
|
|
359
359
|
model = simple_quantizer.convert_for_runtime()
|
|
@@ -363,7 +363,7 @@ class BaseTransformer(nn.Module):
|
|
|
363
363
|
path_comps = path.name.split("-")
|
|
364
364
|
assert path_comps[-2].startswith("g")
|
|
365
365
|
groupsize = int(path_comps[-2][1:])
|
|
366
|
-
from
|
|
366
|
+
from tools.llama.quantize import WeightOnlyInt4QuantHandler
|
|
367
367
|
|
|
368
368
|
simple_quantizer = WeightOnlyInt4QuantHandler(model, groupsize)
|
|
369
369
|
model = simple_quantizer.convert_for_runtime()
|