xinference 0.14.1.post1__py3-none-any.whl → 0.14.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +15 -34
- xinference/client/restful/restful_client.py +2 -2
- xinference/core/chat_interface.py +44 -9
- xinference/core/model.py +4 -4
- xinference/core/scheduler.py +1 -2
- xinference/core/worker.py +1 -1
- xinference/deploy/cmdline.py +2 -2
- xinference/deploy/test/test_cmdline.py +7 -7
- xinference/model/llm/__init__.py +20 -27
- xinference/model/llm/{ggml/llamacpp.py → llama_cpp/core.py} +2 -35
- xinference/model/llm/llm_family.json +448 -1153
- xinference/model/llm/llm_family.py +14 -139
- xinference/model/llm/llm_family_modelscope.json +230 -313
- xinference/model/llm/memory.py +9 -9
- xinference/model/llm/sglang/core.py +2 -2
- xinference/model/llm/{pytorch → transformers}/chatglm.py +6 -13
- xinference/model/llm/{pytorch → transformers}/core.py +2 -10
- xinference/model/llm/transformers/intern_vl.py +457 -0
- xinference/model/llm/{pytorch → transformers}/internlm2.py +4 -8
- xinference/model/llm/{pytorch → transformers}/minicpmv26.py +67 -22
- xinference/model/llm/{pytorch → transformers}/utils.py +1 -2
- xinference/model/llm/utils.py +76 -70
- xinference/model/llm/vllm/core.py +110 -11
- xinference/model/utils.py +1 -95
- xinference/thirdparty/internvl/__init__.py +0 -0
- xinference/thirdparty/internvl/conversation.py +393 -0
- xinference/thirdparty/omnilmm/model/utils.py +16 -1
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.ffc26121.js +3 -0
- xinference/web/ui/build/static/js/main.ffc26121.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/213b5913e164773c2b0567455377765715f5f07225fbac77ad8e1e9dc9648a47.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5391543180fead1eeef5364300301498d58a7d91d62de3841a32768b67f4552f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5c26a23b5eacf5b752a08531577ae3840bb247745ef9a39583dc2d05ba93a82a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/714c37ce0ec5b5c591033f02be2f3f491fdd70da3ef568ee4a4f94689a3d5ca2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/978b57d1a04a701bc3fcfebc511f5f274eed6ed7eade67f6fb76c27d5fd9ecc8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a797831de0dc74897f4b50b3426555d748f328b4c2cc391de709eadaf6a5f3e3.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e91938976f229ce986b2907e51e1f00540b584ced0a315d498c172d13220739d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +1 -0
- {xinference-0.14.1.post1.dist-info → xinference-0.14.2.dist-info}/METADATA +5 -8
- {xinference-0.14.1.post1.dist-info → xinference-0.14.2.dist-info}/RECORD +63 -70
- xinference/locale/utils.py +0 -39
- xinference/locale/zh_CN.json +0 -26
- xinference/model/llm/ggml/tools/__init__.py +0 -15
- xinference/model/llm/ggml/tools/convert_ggml_to_gguf.py +0 -498
- xinference/model/llm/ggml/tools/gguf.py +0 -884
- xinference/model/llm/pytorch/__init__.py +0 -13
- xinference/model/llm/pytorch/baichuan.py +0 -81
- xinference/model/llm/pytorch/falcon.py +0 -138
- xinference/model/llm/pytorch/intern_vl.py +0 -352
- xinference/model/llm/pytorch/vicuna.py +0 -69
- xinference/web/ui/build/static/js/main.17ca0398.js +0 -3
- xinference/web/ui/build/static/js/main.17ca0398.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1444c41a4d04494f1cbc2d8c1537df107b451cb569cb2c1fbf5159f3a4841a5f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/44774c783428f952d8e2e4ad0998a9c5bc16a57cd9c68b7c5ff18aaa5a41d65c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5262556baf9207738bf6a8ba141ec6599d0a636345c245d61fdf88d3171998cb.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/6450605fac003812485f6251b9f0caafbf2e5bfc3bbe2f000050d9e2fdb8dcd3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/71684495d995c7e266eecc6a0ad8ea0284cc785f80abddf863789c57a6134969.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/80acd1edf31542ab1dcccfad02cb4b38f3325cff847a781fcce97500cfd6f878.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8a9742ddd8ba8546ef42dc14caca443f2b4524fabed7bf269e0eff3b7b64ee7d.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d06a96a3c9c32e42689094aa3aaad41c8125894e956b8f84a70fadce6e3f65b3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d93730e2b5d7e8c957b4d0965d2ed1dac9045a649adbd47c220d11f255d4b1e0.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e656dc00b4d8b387f0a81ba8fc558767df1601c66369e2eb86a5ef27cf080572.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f28b83886159d83b84f099b05d607a822dca4dd7f2d8aa6d56fe08bab0b5b086.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f3e02274cb1964e99b1fe69cbb6db233d3d8d7dd05d50ebcdb8e66d50b224b7b.json +0 -1
- /xinference/{locale → model/llm/llama_cpp}/__init__.py +0 -0
- /xinference/model/llm/{ggml → transformers}/__init__.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/cogvlm2.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/compression.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/deepseek_vl.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/glm4v.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/llama_2.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/minicpmv25.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/omnilmm.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/qwen_vl.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/tensorizer_utils.py +0 -0
- /xinference/model/llm/{pytorch → transformers}/yi_vl.py +0 -0
- /xinference/web/ui/build/static/js/{main.17ca0398.js.LICENSE.txt → main.ffc26121.js.LICENSE.txt} +0 -0
- {xinference-0.14.1.post1.dist-info → xinference-0.14.2.dist-info}/LICENSE +0 -0
- {xinference-0.14.1.post1.dist-info → xinference-0.14.2.dist-info}/WHEEL +0 -0
- {xinference-0.14.1.post1.dist-info → xinference-0.14.2.dist-info}/entry_points.txt +0 -0
- {xinference-0.14.1.post1.dist-info → xinference-0.14.2.dist-info}/top_level.txt +0 -0
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
|
-
import shutil
|
|
18
17
|
from threading import Lock
|
|
19
18
|
from typing import Any, Dict, List, Optional, Set, Tuple, Type, Union
|
|
20
19
|
|
|
@@ -59,8 +58,8 @@ BUILTIN_LLM_MODEL_GENERATE_FAMILIES: Set[str] = set()
|
|
|
59
58
|
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES: Set[str] = set()
|
|
60
59
|
|
|
61
60
|
|
|
62
|
-
class
|
|
63
|
-
model_format: Literal["
|
|
61
|
+
class LlamaCppLLMSpecV1(BaseModel):
|
|
62
|
+
model_format: Literal["ggufv2"]
|
|
64
63
|
# Must in order that `str` first, then `int`
|
|
65
64
|
model_size_in_billions: Union[str, int]
|
|
66
65
|
quantizations: List[str]
|
|
@@ -85,7 +84,7 @@ class GgmlLLMSpecV1(BaseModel):
|
|
|
85
84
|
|
|
86
85
|
|
|
87
86
|
class PytorchLLMSpecV1(BaseModel):
|
|
88
|
-
model_format: Literal["pytorch", "gptq", "awq"]
|
|
87
|
+
model_format: Literal["pytorch", "gptq", "awq", "fp8"]
|
|
89
88
|
# Must in order that `str` first, then `int`
|
|
90
89
|
model_size_in_billions: Union[str, int]
|
|
91
90
|
quantizations: List[str]
|
|
@@ -247,7 +246,7 @@ class CustomLLMFamilyV1(LLMFamilyV1):
|
|
|
247
246
|
|
|
248
247
|
|
|
249
248
|
LLMSpecV1 = Annotated[
|
|
250
|
-
Union[
|
|
249
|
+
Union[LlamaCppLLMSpecV1, PytorchLLMSpecV1, MLXLLMSpecV1],
|
|
251
250
|
Field(discriminator="model_format"),
|
|
252
251
|
]
|
|
253
252
|
|
|
@@ -308,13 +307,10 @@ def cache(
|
|
|
308
307
|
if os.path.exists(legacy_cache_path):
|
|
309
308
|
logger.info("Legacy cache path exists: %s", legacy_cache_path)
|
|
310
309
|
return os.path.dirname(legacy_cache_path)
|
|
311
|
-
elif download_from_self_hosted_storage() and is_self_hosted(llm_family, llm_spec):
|
|
312
|
-
logger.info(f"Caching from self-hosted storage")
|
|
313
|
-
return cache_from_self_hosted_storage(llm_family, llm_spec, quantization)
|
|
314
310
|
else:
|
|
315
311
|
if llm_spec.model_uri is not None:
|
|
316
312
|
logger.info(f"Caching from URI: {llm_spec.model_uri}")
|
|
317
|
-
return cache_from_uri(llm_family, llm_spec
|
|
313
|
+
return cache_from_uri(llm_family, llm_spec)
|
|
318
314
|
else:
|
|
319
315
|
if llm_spec.model_hub == "huggingface":
|
|
320
316
|
logger.info(f"Caching from Hugging Face: {llm_spec.model_id}")
|
|
@@ -329,68 +325,10 @@ def cache(
|
|
|
329
325
|
raise ValueError(f"Unknown model hub: {llm_spec.model_hub}")
|
|
330
326
|
|
|
331
327
|
|
|
332
|
-
SUPPORTED_SCHEMES = ["s3"]
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
class AWSRegion:
|
|
336
|
-
def __init__(self, region: str):
|
|
337
|
-
self.region = region
|
|
338
|
-
self.original_aws_default_region = None
|
|
339
|
-
|
|
340
|
-
def __enter__(self):
|
|
341
|
-
if "AWS_DEFAULT_REGION" in os.environ:
|
|
342
|
-
self.original_aws_default_region = os.environ["AWS_DEFAULT_REGION"]
|
|
343
|
-
os.environ["AWS_DEFAULT_REGION"] = self.region
|
|
344
|
-
|
|
345
|
-
def __exit__(self, exc_type, exc_value, traceback):
|
|
346
|
-
if self.original_aws_default_region:
|
|
347
|
-
os.environ["AWS_DEFAULT_REGION"] = self.original_aws_default_region
|
|
348
|
-
else:
|
|
349
|
-
del os.environ["AWS_DEFAULT_REGION"]
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
def is_self_hosted(
|
|
353
|
-
llm_family: LLMFamilyV1,
|
|
354
|
-
llm_spec: "LLMSpecV1",
|
|
355
|
-
):
|
|
356
|
-
from fsspec import AbstractFileSystem, filesystem
|
|
357
|
-
|
|
358
|
-
with AWSRegion("cn-northwest-1"):
|
|
359
|
-
src_fs: AbstractFileSystem = filesystem("s3", anon=True)
|
|
360
|
-
model_dir = (
|
|
361
|
-
f"/xinference-models/llm/"
|
|
362
|
-
f"{llm_family.model_name}-{llm_spec.model_format}-{llm_spec.model_size_in_billions}b"
|
|
363
|
-
)
|
|
364
|
-
return src_fs.exists(model_dir)
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
def cache_from_self_hosted_storage(
|
|
368
|
-
llm_family: LLMFamilyV1,
|
|
369
|
-
llm_spec: "LLMSpecV1",
|
|
370
|
-
quantization: Optional[str] = None,
|
|
371
|
-
) -> str:
|
|
372
|
-
with AWSRegion("cn-northwest-1"):
|
|
373
|
-
llm_spec = llm_spec.copy()
|
|
374
|
-
llm_spec.model_uri = (
|
|
375
|
-
f"s3://xinference-models/llm/"
|
|
376
|
-
f"{llm_family.model_name}-{llm_spec.model_format}-{llm_spec.model_size_in_billions}b"
|
|
377
|
-
)
|
|
378
|
-
|
|
379
|
-
return cache_from_uri(
|
|
380
|
-
llm_family, llm_spec, quantization, self_hosted_storage=True
|
|
381
|
-
)
|
|
382
|
-
|
|
383
|
-
|
|
384
328
|
def cache_from_uri(
|
|
385
329
|
llm_family: LLMFamilyV1,
|
|
386
330
|
llm_spec: "LLMSpecV1",
|
|
387
|
-
quantization: Optional[str] = None,
|
|
388
|
-
self_hosted_storage: bool = False,
|
|
389
331
|
) -> str:
|
|
390
|
-
from fsspec import AbstractFileSystem, filesystem
|
|
391
|
-
|
|
392
|
-
from ..utils import copy_from_src_to_dst
|
|
393
|
-
|
|
394
332
|
cache_dir_name = (
|
|
395
333
|
f"{llm_family.model_name}-{llm_spec.model_format}"
|
|
396
334
|
f"-{llm_spec.model_size_in_billions}b"
|
|
@@ -415,69 +353,6 @@ def cache_from_uri(
|
|
|
415
353
|
else:
|
|
416
354
|
os.symlink(src_root, cache_dir, target_is_directory=True)
|
|
417
355
|
return cache_dir
|
|
418
|
-
elif src_scheme in SUPPORTED_SCHEMES:
|
|
419
|
-
# use anonymous connection for self-hosted storage.
|
|
420
|
-
src_fs: AbstractFileSystem = filesystem(src_scheme, anon=self_hosted_storage)
|
|
421
|
-
local_fs: AbstractFileSystem = filesystem("file")
|
|
422
|
-
|
|
423
|
-
files_to_download = []
|
|
424
|
-
if llm_spec.model_format == "pytorch":
|
|
425
|
-
if os.path.exists(cache_dir):
|
|
426
|
-
logger.info(f"Cache {cache_dir} exists")
|
|
427
|
-
return cache_dir
|
|
428
|
-
else:
|
|
429
|
-
os.makedirs(cache_dir, exist_ok=True)
|
|
430
|
-
|
|
431
|
-
for path, _, files in src_fs.walk(llm_spec.model_uri):
|
|
432
|
-
for file in files:
|
|
433
|
-
src_path = f"{path}/{file}"
|
|
434
|
-
local_path = src_path.replace(src_root, cache_dir)
|
|
435
|
-
files_to_download.append((src_path, local_path))
|
|
436
|
-
elif llm_spec.model_format == "ggmlv3":
|
|
437
|
-
file = llm_spec.model_file_name_template.format(quantization=quantization)
|
|
438
|
-
if os.path.exists(os.path.join(cache_dir, file)):
|
|
439
|
-
logger.info(f"Cache {os.path.join(cache_dir, file)} exists")
|
|
440
|
-
return cache_dir
|
|
441
|
-
else:
|
|
442
|
-
os.makedirs(cache_dir, exist_ok=True)
|
|
443
|
-
|
|
444
|
-
src_path = f"{src_root}/{file}"
|
|
445
|
-
local_path = f"{cache_dir}/{file}"
|
|
446
|
-
files_to_download.append((src_path, local_path))
|
|
447
|
-
else:
|
|
448
|
-
raise ValueError(f"Unsupported model format: {llm_spec.model_format}")
|
|
449
|
-
|
|
450
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
451
|
-
|
|
452
|
-
failed = False
|
|
453
|
-
with ThreadPoolExecutor(max_workers=min(len(files_to_download), 4)) as executor:
|
|
454
|
-
futures = [
|
|
455
|
-
(
|
|
456
|
-
src_path,
|
|
457
|
-
executor.submit(
|
|
458
|
-
copy_from_src_to_dst, src_fs, src_path, local_fs, local_path
|
|
459
|
-
),
|
|
460
|
-
)
|
|
461
|
-
for src_path, local_path in files_to_download
|
|
462
|
-
]
|
|
463
|
-
for src_path, future in futures:
|
|
464
|
-
if failed:
|
|
465
|
-
future.cancel()
|
|
466
|
-
else:
|
|
467
|
-
try:
|
|
468
|
-
future.result()
|
|
469
|
-
except:
|
|
470
|
-
logger.error(f"Download {src_path} failed", exc_info=True)
|
|
471
|
-
failed = True
|
|
472
|
-
|
|
473
|
-
if failed:
|
|
474
|
-
logger.warning(f"Removing cache directory: {cache_dir}")
|
|
475
|
-
shutil.rmtree(cache_dir, ignore_errors=True)
|
|
476
|
-
raise RuntimeError(
|
|
477
|
-
f"Failed to download model '{llm_family.model_name}' "
|
|
478
|
-
f"(size: {llm_spec.model_size_in_billions}, format: {llm_spec.model_format})"
|
|
479
|
-
)
|
|
480
|
-
return cache_dir
|
|
481
356
|
else:
|
|
482
357
|
raise ValueError(f"Unsupported URL scheme: {src_scheme}")
|
|
483
358
|
|
|
@@ -597,7 +472,7 @@ def _get_meta_path(
|
|
|
597
472
|
return os.path.join(cache_dir, "__valid_download")
|
|
598
473
|
else:
|
|
599
474
|
return os.path.join(cache_dir, f"__valid_download_{model_hub}")
|
|
600
|
-
elif model_format in ["
|
|
475
|
+
elif model_format in ["ggufv2", "gptq", "awq", "fp8", "mlx"]:
|
|
601
476
|
assert quantization is not None
|
|
602
477
|
if model_hub == "huggingface":
|
|
603
478
|
return os.path.join(cache_dir, f"__valid_download_{quantization}")
|
|
@@ -636,7 +511,7 @@ def _skip_download(
|
|
|
636
511
|
logger.warning(f"Cache {cache_dir} exists, but it was from {hub}")
|
|
637
512
|
return True
|
|
638
513
|
return False
|
|
639
|
-
elif model_format in ["
|
|
514
|
+
elif model_format in ["ggufv2", "gptq", "awq", "fp8", "mlx"]:
|
|
640
515
|
assert quantization is not None
|
|
641
516
|
return os.path.exists(
|
|
642
517
|
_get_meta_path(cache_dir, model_format, model_hub, quantization)
|
|
@@ -731,7 +606,7 @@ def cache_from_csghub(
|
|
|
731
606
|
):
|
|
732
607
|
return cache_dir
|
|
733
608
|
|
|
734
|
-
if llm_spec.model_format in ["pytorch", "gptq", "awq", "mlx"]:
|
|
609
|
+
if llm_spec.model_format in ["pytorch", "gptq", "awq", "fp8", "mlx"]:
|
|
735
610
|
download_dir = retry_download(
|
|
736
611
|
snapshot_download,
|
|
737
612
|
llm_family.model_name,
|
|
@@ -745,7 +620,7 @@ def cache_from_csghub(
|
|
|
745
620
|
)
|
|
746
621
|
create_symlink(download_dir, cache_dir)
|
|
747
622
|
|
|
748
|
-
elif llm_spec.model_format in ["
|
|
623
|
+
elif llm_spec.model_format in ["ggufv2"]:
|
|
749
624
|
file_names, final_file_name, need_merge = _generate_model_file_names(
|
|
750
625
|
llm_spec, quantization
|
|
751
626
|
)
|
|
@@ -799,7 +674,7 @@ def cache_from_modelscope(
|
|
|
799
674
|
):
|
|
800
675
|
return cache_dir
|
|
801
676
|
|
|
802
|
-
if llm_spec.model_format in ["pytorch", "gptq", "awq", "mlx"]:
|
|
677
|
+
if llm_spec.model_format in ["pytorch", "gptq", "awq", "fp8", "mlx"]:
|
|
803
678
|
download_dir = retry_download(
|
|
804
679
|
snapshot_download,
|
|
805
680
|
llm_family.model_name,
|
|
@@ -812,7 +687,7 @@ def cache_from_modelscope(
|
|
|
812
687
|
)
|
|
813
688
|
create_symlink(download_dir, cache_dir)
|
|
814
689
|
|
|
815
|
-
elif llm_spec.model_format in ["
|
|
690
|
+
elif llm_spec.model_format in ["ggufv2"]:
|
|
816
691
|
file_names, final_file_name, need_merge = _generate_model_file_names(
|
|
817
692
|
llm_spec, quantization
|
|
818
693
|
)
|
|
@@ -868,7 +743,7 @@ def cache_from_huggingface(
|
|
|
868
743
|
if not IS_NEW_HUGGINGFACE_HUB:
|
|
869
744
|
use_symlinks = {"local_dir_use_symlinks": True, "local_dir": cache_dir}
|
|
870
745
|
|
|
871
|
-
if llm_spec.model_format in ["pytorch", "gptq", "awq", "mlx"]:
|
|
746
|
+
if llm_spec.model_format in ["pytorch", "gptq", "awq", "fp8", "mlx"]:
|
|
872
747
|
assert isinstance(llm_spec, (PytorchLLMSpecV1, MLXLLMSpecV1))
|
|
873
748
|
download_dir = retry_download(
|
|
874
749
|
huggingface_hub.snapshot_download,
|
|
@@ -884,8 +759,8 @@ def cache_from_huggingface(
|
|
|
884
759
|
if IS_NEW_HUGGINGFACE_HUB:
|
|
885
760
|
create_symlink(download_dir, cache_dir)
|
|
886
761
|
|
|
887
|
-
elif llm_spec.model_format in ["
|
|
888
|
-
assert isinstance(llm_spec,
|
|
762
|
+
elif llm_spec.model_format in ["ggufv2"]:
|
|
763
|
+
assert isinstance(llm_spec, LlamaCppLLMSpecV1)
|
|
889
764
|
file_names, final_file_name, need_merge = _generate_model_file_names(
|
|
890
765
|
llm_spec, quantization
|
|
891
766
|
)
|