vllm-npu 0.4.2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vllm/__init__.py +23 -0
- vllm/_custom_ops.py +251 -0
- vllm/attention/__init__.py +13 -0
- vllm/attention/backends/__init__.py +0 -0
- vllm/attention/backends/abstract.py +127 -0
- vllm/attention/backends/flash_attn.py +271 -0
- vllm/attention/backends/flashinfer.py +220 -0
- vllm/attention/backends/rocm_flash_attn.py +374 -0
- vllm/attention/backends/torch_sdpa.py +250 -0
- vllm/attention/backends/xformers.py +393 -0
- vllm/attention/layer.py +56 -0
- vllm/attention/ops/__init__.py +0 -0
- vllm/attention/ops/paged_attn.py +216 -0
- vllm/attention/ops/prefix_prefill.py +792 -0
- vllm/attention/ops/triton_flash_attention.py +810 -0
- vllm/attention/selector.py +91 -0
- vllm/block.py +84 -0
- vllm/config.py +1225 -0
- vllm/core/__init__.py +0 -0
- vllm/core/block/__init__.py +0 -0
- vllm/core/block/block_table.py +295 -0
- vllm/core/block/common.py +199 -0
- vllm/core/block/cpu_gpu_block_allocator.py +228 -0
- vllm/core/block/interfaces.py +205 -0
- vllm/core/block/naive_block.py +318 -0
- vllm/core/block/prefix_caching_block.py +606 -0
- vllm/core/block_manager_v1.py +625 -0
- vllm/core/block_manager_v2.py +258 -0
- vllm/core/evictor_v1.py +105 -0
- vllm/core/evictor_v2.py +127 -0
- vllm/core/interfaces.py +113 -0
- vllm/core/policy.py +45 -0
- vllm/core/scheduler.py +1163 -0
- vllm/distributed/__init__.py +3 -0
- vllm/distributed/communication_op.py +237 -0
- vllm/distributed/device_communicators/__init__.py +0 -0
- vllm/distributed/device_communicators/custom_all_reduce.py +274 -0
- vllm/distributed/device_communicators/pynccl.py +287 -0
- vllm/distributed/device_communicators/pynccl_utils.py +66 -0
- vllm/distributed/parallel_state.py +339 -0
- vllm/distributed/utils.py +136 -0
- vllm/engine/__init__.py +0 -0
- vllm/engine/arg_utils.py +649 -0
- vllm/engine/async_llm_engine.py +737 -0
- vllm/engine/llm_engine.py +784 -0
- vllm/engine/metrics.py +368 -0
- vllm/engine/output_processor/__init__.py +0 -0
- vllm/engine/output_processor/interfaces.py +76 -0
- vllm/engine/output_processor/multi_step.py +142 -0
- vllm/engine/output_processor/single_step.py +284 -0
- vllm/engine/output_processor/stop_checker.py +101 -0
- vllm/engine/output_processor/util.py +19 -0
- vllm/entrypoints/__init__.py +0 -0
- vllm/entrypoints/api_server.py +119 -0
- vllm/entrypoints/llm.py +259 -0
- vllm/entrypoints/openai/__init__.py +0 -0
- vllm/entrypoints/openai/api_server.py +186 -0
- vllm/entrypoints/openai/cli_args.py +115 -0
- vllm/entrypoints/openai/protocol.py +460 -0
- vllm/entrypoints/openai/serving_chat.py +392 -0
- vllm/entrypoints/openai/serving_completion.py +347 -0
- vllm/entrypoints/openai/serving_engine.py +234 -0
- vllm/envs.py +217 -0
- vllm/executor/__init__.py +0 -0
- vllm/executor/cpu_executor.py +152 -0
- vllm/executor/distributed_gpu_executor.py +115 -0
- vllm/executor/executor_base.py +115 -0
- vllm/executor/gpu_executor.py +150 -0
- vllm/executor/multiproc_worker_utils.py +263 -0
- vllm/executor/neuron_executor.py +91 -0
- vllm/executor/ray_gpu_executor.py +327 -0
- vllm/executor/ray_utils.py +119 -0
- vllm/logger.py +153 -0
- vllm/logging/__init__.py +5 -0
- vllm/logging/formatter.py +15 -0
- vllm/lora/__init__.py +0 -0
- vllm/lora/fully_sharded_layers.py +262 -0
- vllm/lora/layers.py +1181 -0
- vllm/lora/lora.py +167 -0
- vllm/lora/models.py +645 -0
- vllm/lora/punica.py +213 -0
- vllm/lora/request.py +32 -0
- vllm/lora/utils.py +98 -0
- vllm/lora/worker_manager.py +251 -0
- vllm/model_executor/__init__.py +7 -0
- vllm/model_executor/guided_decoding/__init__.py +25 -0
- vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py +70 -0
- vllm/model_executor/guided_decoding/outlines_decoding.py +130 -0
- vllm/model_executor/guided_decoding/outlines_logits_processors.py +184 -0
- vllm/model_executor/layers/__init__.py +0 -0
- vllm/model_executor/layers/activation.py +173 -0
- vllm/model_executor/layers/fused_moe/__init__.py +7 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json +140 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/fused_moe.py +479 -0
- vllm/model_executor/layers/layernorm.py +71 -0
- vllm/model_executor/layers/linear.py +709 -0
- vllm/model_executor/layers/logits_processor.py +115 -0
- vllm/model_executor/layers/ops/__init__.py +0 -0
- vllm/model_executor/layers/ops/rand.py +157 -0
- vllm/model_executor/layers/ops/sample.py +406 -0
- vllm/model_executor/layers/quantization/__init__.py +35 -0
- vllm/model_executor/layers/quantization/aqlm.py +376 -0
- vllm/model_executor/layers/quantization/awq.py +175 -0
- vllm/model_executor/layers/quantization/base_config.py +97 -0
- vllm/model_executor/layers/quantization/fp8.py +265 -0
- vllm/model_executor/layers/quantization/gptq.py +224 -0
- vllm/model_executor/layers/quantization/gptq_marlin.py +438 -0
- vllm/model_executor/layers/quantization/marlin.py +227 -0
- vllm/model_executor/layers/quantization/schema.py +84 -0
- vllm/model_executor/layers/quantization/squeezellm.py +137 -0
- vllm/model_executor/layers/rejection_sampler.py +405 -0
- vllm/model_executor/layers/rotary_embedding.py +525 -0
- vllm/model_executor/layers/sampler.py +1051 -0
- vllm/model_executor/layers/vocab_parallel_embedding.py +155 -0
- vllm/model_executor/model_loader/__init__.py +30 -0
- vllm/model_executor/model_loader/loader.py +362 -0
- vllm/model_executor/model_loader/neuron.py +136 -0
- vllm/model_executor/model_loader/tensorizer.py +368 -0
- vllm/model_executor/model_loader/utils.py +41 -0
- vllm/model_executor/model_loader/weight_utils.py +372 -0
- vllm/model_executor/models/__init__.py +119 -0
- vllm/model_executor/models/baichuan.py +410 -0
- vllm/model_executor/models/bloom.py +327 -0
- vllm/model_executor/models/chatglm.py +386 -0
- vllm/model_executor/models/commandr.py +373 -0
- vllm/model_executor/models/dbrx.py +413 -0
- vllm/model_executor/models/decilm.py +122 -0
- vllm/model_executor/models/deepseek.py +438 -0
- vllm/model_executor/models/falcon.py +444 -0
- vllm/model_executor/models/gemma.py +393 -0
- vllm/model_executor/models/gpt2.py +266 -0
- vllm/model_executor/models/gpt_bigcode.py +274 -0
- vllm/model_executor/models/gpt_j.py +281 -0
- vllm/model_executor/models/gpt_neox.py +295 -0
- vllm/model_executor/models/internlm2.py +323 -0
- vllm/model_executor/models/jais.py +333 -0
- vllm/model_executor/models/llama.py +442 -0
- vllm/model_executor/models/llava.py +239 -0
- vllm/model_executor/models/minicpm.py +531 -0
- vllm/model_executor/models/mixtral.py +583 -0
- vllm/model_executor/models/mixtral_quant.py +404 -0
- vllm/model_executor/models/mpt.py +295 -0
- vllm/model_executor/models/olmo.py +356 -0
- vllm/model_executor/models/opt.py +349 -0
- vllm/model_executor/models/orion.py +319 -0
- vllm/model_executor/models/phi.py +300 -0
- vllm/model_executor/models/qwen.py +284 -0
- vllm/model_executor/models/qwen2.py +367 -0
- vllm/model_executor/models/qwen2_moe.py +447 -0
- vllm/model_executor/models/stablelm.py +301 -0
- vllm/model_executor/models/starcoder2.py +302 -0
- vllm/model_executor/models/xverse.py +366 -0
- vllm/model_executor/sampling_metadata.py +588 -0
- vllm/model_executor/utils.py +35 -0
- vllm/outputs.py +150 -0
- vllm/py.typed +2 -0
- vllm/sampling_params.py +340 -0
- vllm/sequence.py +766 -0
- vllm/spec_decode/__init__.py +0 -0
- vllm/spec_decode/batch_expansion.py +397 -0
- vllm/spec_decode/interfaces.py +73 -0
- vllm/spec_decode/metrics.py +191 -0
- vllm/spec_decode/multi_step_worker.py +203 -0
- vllm/spec_decode/ngram_worker.py +176 -0
- vllm/spec_decode/spec_decode_worker.py +472 -0
- vllm/spec_decode/top1_proposer.py +200 -0
- vllm/spec_decode/util.py +228 -0
- vllm/test_utils.py +41 -0
- vllm/transformers_utils/__init__.py +0 -0
- vllm/transformers_utils/config.py +58 -0
- vllm/transformers_utils/configs/__init__.py +16 -0
- vllm/transformers_utils/configs/chatglm.py +68 -0
- vllm/transformers_utils/configs/dbrx.py +278 -0
- vllm/transformers_utils/configs/falcon.py +87 -0
- vllm/transformers_utils/configs/jais.py +236 -0
- vllm/transformers_utils/configs/mpt.py +178 -0
- vllm/transformers_utils/detokenizer.py +313 -0
- vllm/transformers_utils/tokenizer.py +149 -0
- vllm/transformers_utils/tokenizer_group/__init__.py +33 -0
- vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py +55 -0
- vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py +169 -0
- vllm/transformers_utils/tokenizer_group/tokenizer_group.py +78 -0
- vllm/transformers_utils/tokenizers/__init__.py +5 -0
- vllm/transformers_utils/tokenizers/baichuan.py +255 -0
- vllm/usage/__init__.py +0 -0
- vllm/usage/usage_lib.py +209 -0
- vllm/utils.py +677 -0
- vllm/worker/__init__.py +0 -0
- vllm/worker/cache_engine.py +105 -0
- vllm/worker/cpu_model_runner.py +346 -0
- vllm/worker/cpu_worker.py +321 -0
- vllm/worker/model_runner.py +1168 -0
- vllm/worker/neuron_model_runner.py +196 -0
- vllm/worker/neuron_worker.py +98 -0
- vllm/worker/worker.py +345 -0
- vllm/worker/worker_base.py +146 -0
- vllm_npu-0.4.2.dist-info/LICENSE +201 -0
- vllm_npu-0.4.2.dist-info/METADATA +173 -0
- vllm_npu-0.4.2.dist-info/RECORD +219 -0
- vllm_npu-0.4.2.dist-info/WHEEL +5 -0
- vllm_npu-0.4.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,219 @@
|
|
1
|
+
vllm/__init__.py,sha256=WDv-6PmvjJBIjijeZ8nEpkhoXyp7ATE9HDvqmX6iXVw,755
|
2
|
+
vllm/_custom_ops.py,sha256=lSuo3wu07v5l2dIob7fm4JqjoQz0wreN7ETvK1ZxAlY,8597
|
3
|
+
vllm/block.py,sha256=5E1AqlE1QW44EWv_mE8RJTUkQb65VrFwtMBzak71JvM,2374
|
4
|
+
vllm/config.py,sha256=4PxokzC9V7qe52tN6mYHkNgW-LSk2qnuwofIBzv4Sik,52193
|
5
|
+
vllm/envs.py,sha256=E92xZi-FzxcBnWF_g4f3GBnu9djorPuUIYNYI_fU4tc,8097
|
6
|
+
vllm/logger.py,sha256=qOmmhvvLbsK-SYmlsmsqUr7s3Xqbhx3AGrkDw-_BuSE,5431
|
7
|
+
vllm/outputs.py,sha256=uoUaF8Zb4VUky1T_wzGvKtAyyHtMtYqZFo73dMWaBVM,6038
|
8
|
+
vllm/py.typed,sha256=F5LUrt0voM87SNuuOky2X9veCVDqJUgRg_VohYqDigY,65
|
9
|
+
vllm/sampling_params.py,sha256=bPyMw0PIvlvww1VHzdceJAemTaRsQvPDC-JbhpiSC2w,16393
|
10
|
+
vllm/sequence.py,sha256=KP335mzkmMEZh-ZMFDeYqQVKH3F2eFOtF-XH1zirf7g,28327
|
11
|
+
vllm/test_utils.py,sha256=enPEjb6BzDCuYfZbJcgm5tnUfYpeVZkMw6SyVO_Qvho,1248
|
12
|
+
vllm/utils.py,sha256=uvXW8_oPVMYmFehPIeO6Odgauwh9sN46YiOFHVG3vT0,21823
|
13
|
+
vllm/attention/__init__.py,sha256=0ju2zaeUU8vjGMrTMZLa3LbkLguApbGK2XhqkUUb8Sk,436
|
14
|
+
vllm/attention/layer.py,sha256=RsVeQdzyTgNH7J5F75Mx-K5zJByj5btghDDGHqWS7Uc,1925
|
15
|
+
vllm/attention/selector.py,sha256=LoAwQ2Svx57Cq2KEc7WmajseIzDu3Ek-q2yDlv26Z1o,3171
|
16
|
+
vllm/attention/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
+
vllm/attention/backends/abstract.py,sha256=abCWm9W_wfZmzwvZxmhWwSayX1a29FH55ET-gnFaRIo,3799
|
18
|
+
vllm/attention/backends/flash_attn.py,sha256=8OLyOMgYF6nM-9Ooz8hswbgdkmzNLmDxONVqN0_SFfk,11173
|
19
|
+
vllm/attention/backends/flashinfer.py,sha256=JQzArJCgtJsHq_F1ca-24fMe__hYk6GIZ5OOvEkZYkw,8381
|
20
|
+
vllm/attention/backends/rocm_flash_attn.py,sha256=b_5sfJkmlFhH2MW0Jx9kJc8RdlOL6Xbz7-ygwuxKpcY,14756
|
21
|
+
vllm/attention/backends/torch_sdpa.py,sha256=EeZ336wk_a4UTzyvO4SUhHRbcjEBXZCVQw8xvUzPn-I,9602
|
22
|
+
vllm/attention/backends/xformers.py,sha256=z8JBitn3CipOIf8ACaUH7zMfCxr89gbRMunP8CWAl-U,16140
|
23
|
+
vllm/attention/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24
|
+
vllm/attention/ops/paged_attn.py,sha256=9_8Fll5TYlS_8xmpzayalfpFoPJEIGyU9Eo7ucFz-nQ,6905
|
25
|
+
vllm/attention/ops/prefix_prefill.py,sha256=a7uNKb7Ah15HYUrxJuVLJW80dg1PpeRN7eBqV69lhCI,28421
|
26
|
+
vllm/attention/ops/triton_flash_attention.py,sha256=EIS7Do1um2V0tSVYDkCpT4cf_41_jEwpOsUYMIny8RE,27247
|
27
|
+
vllm/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
|
+
vllm/core/block_manager_v1.py,sha256=WxM_ufCprJllst5dC-8GKxnaQuKblHNAsVeTBqZNgcg,25115
|
29
|
+
vllm/core/block_manager_v2.py,sha256=86a-0WeON6yuKDn_7nJeJboUfeLd5o7oopjfMo88dRU,10603
|
30
|
+
vllm/core/evictor_v1.py,sha256=9cTsGhNJd_21AtTKCbLcSUruzKTphRo5va5gW5LTFOc,3548
|
31
|
+
vllm/core/evictor_v2.py,sha256=DResScCENepgT3if4_bYk0WYp4r-wRkZ5E5XfyCdBV0,4479
|
32
|
+
vllm/core/interfaces.py,sha256=xqjJMHb889v1p4ib2D40R4tIkcronrTqZOY857ZBJTM,2878
|
33
|
+
vllm/core/policy.py,sha256=_5v6DNIkNmXMhzy0CdBVokBIcmQDb4zGov1lXKU6qpQ,958
|
34
|
+
vllm/core/scheduler.py,sha256=N47ej7mwiTtAbwdFJAyYPp2v6ecKt5CCiMZj5mP41z0,50666
|
35
|
+
vllm/core/block/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
36
|
+
vllm/core/block/block_table.py,sha256=NWfP5y9h9ayswrrcz8xRnOUcr8_ZXTlvw0eepTLlL4g,11752
|
37
|
+
vllm/core/block/common.py,sha256=gjmJad3pC5Aacdlf98SmI0J547eWc-_i_ET035PR6ME,6706
|
38
|
+
vllm/core/block/cpu_gpu_block_allocator.py,sha256=M0L8BDQ3w8eGDh2ecnzTzRkdRtpmFEVC9zfD-rspqS4,8826
|
39
|
+
vllm/core/block/interfaces.py,sha256=q1iltFvcJZMzxlueq7a3cOg6TyOooe5HlOoadXJXohU,4891
|
40
|
+
vllm/core/block/naive_block.py,sha256=4lflehRq8MhrjYc5q9AeV5Ge7RfZbobxdZ92GOhi9as,10904
|
41
|
+
vllm/core/block/prefix_caching_block.py,sha256=0ehWEzOBpbGCFs9-w3ZkDF2wLK5z3fU-T8A99JaUGE4,22564
|
42
|
+
vllm/distributed/__init__.py,sha256=__tl9Frrf3PFrSyNYcn5i-y2rL-J4-Qn6RJwrsZ4xgc,83
|
43
|
+
vllm/distributed/communication_op.py,sha256=v1FockR0Z1rOfv4zU2vZjZdFAViOgTvIpYXxrG9rYuU,9657
|
44
|
+
vllm/distributed/parallel_state.py,sha256=2GxhY8-r4rzaJw0xNP07-0kn15s_n0XcW1PgdnWwcn0,13336
|
45
|
+
vllm/distributed/utils.py,sha256=rOoR2Z9GFqwCV8IxttRp4bCkwWzNCNMaOMckAB7IPKY,5386
|
46
|
+
vllm/distributed/device_communicators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
|
+
vllm/distributed/device_communicators/custom_all_reduce.py,sha256=DzkdwfP_aHYqizRV5s_D3J9b31VxS5FPI6HgGvZnr54,9793
|
48
|
+
vllm/distributed/device_communicators/pynccl.py,sha256=oz5HX0PJqC9vXZQMCzRuML9qUVWMR0hdP_MsaSJH5aM,10509
|
49
|
+
vllm/distributed/device_communicators/pynccl_utils.py,sha256=XsNryyBFTWqrWxKk9xfNohkBoodE9uJO0OsEx6Q86eI,1761
|
50
|
+
vllm/engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
|
+
vllm/engine/arg_utils.py,sha256=sQzs9rHfHI3GYjyMrs1xsVe-OknYlHaVESEUyc55ZV8,30211
|
52
|
+
vllm/engine/async_llm_engine.py,sha256=BdI5Hhp5ub0t-rPH_iaKJ8xhuGjS-oadRPPW9lhW0ok,29166
|
53
|
+
vllm/engine/llm_engine.py,sha256=CMs8y0w68L_VSjjV5aC4ynS9beQHnovilvq6feMFxAI,33511
|
54
|
+
vllm/engine/metrics.py,sha256=i8t6mXtBTLjd2zafcWrpUKWz9KNeeVoZSpbrNynnBsI,15068
|
55
|
+
vllm/engine/output_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
|
+
vllm/engine/output_processor/interfaces.py,sha256=PJN0J7a_rOlLDpDOL1SQJg23_skpnQl62DB13zK0LG0,2908
|
57
|
+
vllm/engine/output_processor/multi_step.py,sha256=CtendtQTWP_bzt5td1eHzaqqwDpMm0u0ND7jkOeHjKw,6047
|
58
|
+
vllm/engine/output_processor/single_step.py,sha256=z40gvds0IqShrqHGOj0kSQIycEX6HjTEP1F8v4PLIPA,13876
|
59
|
+
vllm/engine/output_processor/stop_checker.py,sha256=BUqrcnkdDR4JmfyNbQIaMRVcRXmK-V-P3JJ79L5mJwk,4011
|
60
|
+
vllm/engine/output_processor/util.py,sha256=2OR7yCEdNjcrOc6LgpKZprjDh6n_sbsqCi8DEirQ77A,666
|
61
|
+
vllm/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
62
|
+
vllm/entrypoints/api_server.py,sha256=SqsJDlh6Ou_P6m7BiECDZD7PRfUmn0MWgTDS5UCJemc,4228
|
63
|
+
vllm/entrypoints/llm.py,sha256=Sbr5m3pV4k3B_XsCT_rFTGgmcvVT7NUqQmxelcLd9jQ,12071
|
64
|
+
vllm/entrypoints/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
65
|
+
vllm/entrypoints/openai/api_server.py,sha256=5CoGq4q2rraRXikLmj-KkVkEQQB0f8y4GDCXnrfFU6Y,6614
|
66
|
+
vllm/entrypoints/openai/cli_args.py,sha256=Y61Mrl4W8L_H2_PtvrlGqo9a2pNzE_eePBrYLkm59fw,4454
|
67
|
+
vllm/entrypoints/openai/protocol.py,sha256=SLpbiupGu3X_ZtcjdY_WWXiSP5W7gA_VX5RqLSO_VwM,17060
|
68
|
+
vllm/entrypoints/openai/serving_chat.py,sha256=zf6SOHE3ny8zGxI7EhG1k_c8kGQCvm9C6m8jmAlJ85o,17162
|
69
|
+
vllm/entrypoints/openai/serving_completion.py,sha256=S-HzFgyC15x_3t7jtbkXHbgllt06fdsUsv--60ayiyk,15451
|
70
|
+
vllm/entrypoints/openai/serving_engine.py,sha256=7pusNV5tSQQIAicfkIZccY3VeyMbTOoE4KpLnjQeKEA,9389
|
71
|
+
vllm/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
72
|
+
vllm/executor/cpu_executor.py,sha256=Q6S2-D8bmMdEbH-VV6Os5moVT3_CoPGkJ6wq3p0EwvY,5727
|
73
|
+
vllm/executor/distributed_gpu_executor.py,sha256=aqJGL3iOS4rtyZENUwUCOGJ81dF5e5IT2r193FYFfWk,4211
|
74
|
+
vllm/executor/executor_base.py,sha256=_ojmnDL5vcFyk2_wU4aDijlu3rNy-kIdZDIcCAg2ZI0,3915
|
75
|
+
vllm/executor/gpu_executor.py,sha256=NDLQg1394sW538fjrx9SZ5sqkJnCecqRa2sggal7RAI,5662
|
76
|
+
vllm/executor/multiproc_worker_utils.py,sha256=5Wte7EL1hF2iQJh5hb7MYrhQnSiwyNuF-Ry2tuQrTzQ,8546
|
77
|
+
vllm/executor/neuron_executor.py,sha256=KOijQt4IlDp__imqqBdXfFf-zmnU_GPsQr6qDr4bApo,3201
|
78
|
+
vllm/executor/ray_gpu_executor.py,sha256=s3_nVNFLyDv_C3A2uGv3qdcaVlh3quXWSWLlmFlesEw,12930
|
79
|
+
vllm/executor/ray_utils.py,sha256=ZoXVE7E2zfEVzjymDMW56tjF9xRuW191J79ErrPCqD4,4476
|
80
|
+
vllm/logging/__init__.py,sha256=FItnlpEZIYll1KuZC2T0mANn3gtSZ00PetYfOpQ9lEo,91
|
81
|
+
vllm/logging/formatter.py,sha256=TEUmKj3xHiLzHBnFqAujcxH0t2hBQ04sUaho2RyORnk,486
|
82
|
+
vllm/lora/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
83
|
+
vllm/lora/fully_sharded_layers.py,sha256=YAWCdC07ttiEQ_MOP0PM8ozdWcR3qNBMckdLvp085M4,10193
|
84
|
+
vllm/lora/layers.py,sha256=5b8KimN12u5wFDHrHFhba9rmGJH6Nl5snHGEeFkre1s,43117
|
85
|
+
vllm/lora/lora.py,sha256=csRLSX1slDtWKx4WJfnIT2WymU3kOwKhfPmADkqQLB4,5124
|
86
|
+
vllm/lora/models.py,sha256=sKvKUqICe_xKan-SvrpouALi8Tj6EanGl6TpOlaYkXA,27413
|
87
|
+
vllm/lora/punica.py,sha256=joaqjpytzJKr8A5c9vWD-oWv-u5Ra6EblJaIem1pMmM,6652
|
88
|
+
vllm/lora/request.py,sha256=rWnxR9E8TJ_kU7n-COg8UsXlQkb5WkmKQiraXA1DDO8,910
|
89
|
+
vllm/lora/utils.py,sha256=T6wA-06z1160_m3yKMUBO61KUOiyNnB0M3lCvRyOd6M,3991
|
90
|
+
vllm/lora/worker_manager.py,sha256=8cxDEVA99ieAZiflryWAZW9rklR258QDn0kKT_GwcMg,9372
|
91
|
+
vllm/model_executor/__init__.py,sha256=Vs_cFguNcvqmfjS0PlP5ffw8cpIi-u1Fpaytq1q5reI,183
|
92
|
+
vllm/model_executor/sampling_metadata.py,sha256=QzEeViRclMi_JXGt304xfzRiCu5CD29WP4pE-F4uHZs,23977
|
93
|
+
vllm/model_executor/utils.py,sha256=FUmv40dHOE_LtbXNiaq-Wlh0tS3NwGuPmLHxR2TSczI,928
|
94
|
+
vllm/model_executor/guided_decoding/__init__.py,sha256=uvHmXKzXs_QKq01CZz3dCeHZV5HPj2SDy5E4o9x_kf8,1191
|
95
|
+
vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py,sha256=GncmTw7ma2-DV_BidfQIge1xx29vkHlXwuZ4e_gNqXE,2979
|
96
|
+
vllm/model_executor/guided_decoding/outlines_decoding.py,sha256=O7AKM2YSKShrgEUslKuctF6S0KovAD0eNAHa2f60pXA,4636
|
97
|
+
vllm/model_executor/guided_decoding/outlines_logits_processors.py,sha256=7vZRAVHptaKE9wsMBpDOVe5dTN7zfSymrJGGyzWp1AM,6297
|
98
|
+
vllm/model_executor/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
99
|
+
vllm/model_executor/layers/activation.py,sha256=IIGnP9_v0rgjEAgEc3zB39FhHgC-13HLuoO8ZNQ1324,6045
|
100
|
+
vllm/model_executor/layers/layernorm.py,sha256=-cC5Klj64Kf6fvdfFa0yFHT-jPrAiJtemjLs4S2w7VU,1986
|
101
|
+
vllm/model_executor/layers/linear.py,sha256=XraA6dfQ8z6gd6OPWJYuKa7pgfQoYohJ4OgF9Lx05DA,30448
|
102
|
+
vllm/model_executor/layers/logits_processor.py,sha256=2huFdZxVjbptV2YJTbCBMjsnTMDh0Tx3pRh1HjLjLHU,4111
|
103
|
+
vllm/model_executor/layers/rejection_sampler.py,sha256=xDScFQwbwYwsblhjdPt5PStQxqE97Odo1CxH4GPb6cg,16541
|
104
|
+
vllm/model_executor/layers/rotary_embedding.py,sha256=MLBqXt1u-f-qYSn4CvvLBEpc1Nw3CwEGC-jvWKfesyA,20752
|
105
|
+
vllm/model_executor/layers/sampler.py,sha256=pLCsEyxpzFbL1LmBDt7JJqkOmXL6SnbPofBDGYSCaXQ,45125
|
106
|
+
vllm/model_executor/layers/vocab_parallel_embedding.py,sha256=dRwUFcJVhX2jtHhQpyq1AklP8iz9hGuM8RcXDdYyHL0,6289
|
107
|
+
vllm/model_executor/layers/fused_moe/__init__.py,sha256=8DwT9uqwo4eBxFtYuZEjZIIVC90m2tZNcUtwA9cUsL8,158
|
108
|
+
vllm/model_executor/layers/fused_moe/fused_moe.py,sha256=onmK2RVOSgPu4Sg-r4f4DrSALYZnZz0xZEkcs2m3IxE,19188
|
109
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=pCCKkdUzzuBVtljyk7AEIAbeDf12DUiieXaODZXzm5E,3254
|
110
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=trX2-c4N6hTTD6zFNi6A2bT3FkhxKjkM2rPl-o1K9ss,3250
|
111
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=I4d56uD7E1JMXD9RAxq3FebdPquDsnNEkVaIY9Ctm9w,3246
|
112
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=G4PKqWxh0MlBhg7QHKj0m--_fP3Ll0gs7VJaeg-NIDM,3254
|
113
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=bKX9AvcxN6k-i3RUmHSchZZ3rjoYRYb4iBqhCI4L3MY,3257
|
114
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=AffDc0_51ML8HiA3757zbD10TZJdUsUDIYIqO4g0yUw,3250
|
115
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=IEYBNjt9HGnzoOVSWvL0A0jUqq926QD0_BvVYR4RA1Y,3252
|
116
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=Ns9Y12aZbJnFhcG3nwb67bDqqiQAo9tdTAIe8K2Ajz4,3255
|
117
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=HOxWmCI2ifHmWc0or2y8nEen86jDeLDov1-tuMzuhxo,3256
|
118
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=_5weLBinQCDzyV75hHKIT95Y0ce94KWft2_5BC6EkbQ,3254
|
119
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=Ru460ZgnUP4U8OsJfwF8n-AI-gfcolNR3_qzoxG6DtY,3254
|
120
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=K6BGrKw_oHTAtHjsZldcjp-BUM1dIecKXrrRn9OpRGs,3254
|
121
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json",sha256=HPKsnFQDO4jlX7y_r2NBoao1Gb0b_adH_TWS25FTF2Q,3114
|
122
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=-5nkLIunjG1ghPoUEtt2AXEQw9oGiilP7K3UvQv9CqE,3252
|
123
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=DxYu8regZOSFu8ugFGA_QbwWK4g8xwQUZF9a_nNY4Cs,3255
|
124
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=qwKy8oaMsd3QrXgQbM_x9xcfYiHK_Ou1CEwDPL5Gbgo,3259
|
125
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=BAJnXTZoewwCtzJLUPJ0oYuALv640MvDuLseGcsYaaw,3252
|
126
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json",sha256=bHXioE5sSuwOjFikC9T7Pq__z4hrNILgLwbEAYopXoI,3268
|
127
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=tme0ydWzIxdABZLk4tU8G_X2dJUYGGZNkQzNGcmcvUc,3261
|
128
|
+
vllm/model_executor/layers/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
129
|
+
vllm/model_executor/layers/ops/rand.py,sha256=wZmJEze7ZIhMo4xTZZh_T5FVD3fcAxSQM7A49cP2rHA,5091
|
130
|
+
vllm/model_executor/layers/ops/sample.py,sha256=YcnzpDhkwHX8W33ROKg1KAzFaJIRGBppUcB5RDn7mpk,16675
|
131
|
+
vllm/model_executor/layers/quantization/__init__.py,sha256=LWcJems66lRpBUfoQz3LCl9m2cXmPXFJrlyhyVCa94o,1234
|
132
|
+
vllm/model_executor/layers/quantization/aqlm.py,sha256=vAFAst6I0G4soDm_-OwxGN4MRByQb3IrF3Eje7si9tQ,13684
|
133
|
+
vllm/model_executor/layers/quantization/awq.py,sha256=YdWZxp4NGu_TkxzMLvYHasqkq17UlAiE7H3Cv_1-hLQ,6132
|
134
|
+
vllm/model_executor/layers/quantization/base_config.py,sha256=HItnLkQHI2BHbnG-0hMNWW5ziWJuaG4W7uk58HBVUPk,3113
|
135
|
+
vllm/model_executor/layers/quantization/fp8.py,sha256=R85ZaoBwYdyAy8xxhLNFElf2N3cz6iUuR458zYdtv-c,10065
|
136
|
+
vllm/model_executor/layers/quantization/gptq.py,sha256=fFRTnbE9TTVl5rfwiq6m5SzVwAnakKamG6BOtD8nAds,7881
|
137
|
+
vllm/model_executor/layers/quantization/gptq_marlin.py,sha256=KrZkwPgdq5ZAJV2PrJf6V8488AdROoG-tsVlHKOGHts,14891
|
138
|
+
vllm/model_executor/layers/quantization/marlin.py,sha256=BxJcJam4yAC0bc64cK9bb4EkInlBATOtYMkF4jMR2u4,7641
|
139
|
+
vllm/model_executor/layers/quantization/schema.py,sha256=XMe02jWK5tzYXCOclOoX8HrObh33s9950yzWMqQ9dlE,3648
|
140
|
+
vllm/model_executor/layers/quantization/squeezellm.py,sha256=KUJBQc80QS7w5oup08s-OwBPe5Qg5NPu96MJOMnvPXo,4558
|
141
|
+
vllm/model_executor/model_loader/__init__.py,sha256=NwGMzGtcAMxaVZ66oDMIjvE-m2wTImBO17RbrEWTRoI,1309
|
142
|
+
vllm/model_executor/model_loader/loader.py,sha256=d6LgzSeug4LrZsFJe5UXIeEfb1TmnAWY3gUSdZHz3Uc,16203
|
143
|
+
vllm/model_executor/model_loader/neuron.py,sha256=TfD1qlFMnJQv1FdSsKJxDpP-nd3HOhRy0bRT7aj_3ko,5003
|
144
|
+
vllm/model_executor/model_loader/tensorizer.py,sha256=XLQa8m_FAJlQLk4xMDLZH61-qU9eXocn6eNrBjpOoMQ,15600
|
145
|
+
vllm/model_executor/model_loader/utils.py,sha256=7u45FtITChfUfH0D0Ztz1uYLaY8rP2LhQPNdcZvO7t0,1405
|
146
|
+
vllm/model_executor/model_loader/weight_utils.py,sha256=bv2vbCISDyoYpVhcmWO2XCFiHIojGEXUzqWq72YvTfQ,13657
|
147
|
+
vllm/model_executor/models/__init__.py,sha256=FB81MIjX7541CRWPpsEAvqjeVbzvxPW2noVvR8ggwkY,4885
|
148
|
+
vllm/model_executor/models/baichuan.py,sha256=aVDLEmoBCOSmCRTE4ljzQwZreXslCumH94xroILiOR0,15574
|
149
|
+
vllm/model_executor/models/bloom.py,sha256=Ik6F6vz2TXBjhl38M4VlL6GLsqAQ4_F3kuwTFseag7k,11863
|
150
|
+
vllm/model_executor/models/chatglm.py,sha256=CHXVoIa6IrmzkXbIWxYE1Xdrwn2D5LmWcQeuEQTTeag,13290
|
151
|
+
vllm/model_executor/models/commandr.py,sha256=6rLFDHXwyzq-tlb2SNQ8qKMdZ4FlLbdzj0rxLmVi73Q,14462
|
152
|
+
vllm/model_executor/models/dbrx.py,sha256=p9GeJfdAoB0ylazJwvTwr_XPJFEcFowKr-3YwfhyH5Q,14762
|
153
|
+
vllm/model_executor/models/decilm.py,sha256=1NeTcuFMmjALprLFF9LvYwPLXD_n43rJIETkrM1OFyo,5288
|
154
|
+
vllm/model_executor/models/deepseek.py,sha256=wo7XToUkoF9UtwH4-mSswIFbAWRjDwveRlDZCsqP9Z4,17369
|
155
|
+
vllm/model_executor/models/falcon.py,sha256=UGwCq5_wW4OLbTqRfgEeRBgH6b0pfOEQXOCMpNIMggA,17689
|
156
|
+
vllm/model_executor/models/gemma.py,sha256=1emvZweTB9GOC_7B3Zexwc-SQgi3Rrl1ui33Xq5WO7M,14689
|
157
|
+
vllm/model_executor/models/gpt2.py,sha256=ZGy2PN3YDqsnRQPenG-N5Q5sYaoXEvkFIB9w5geepbA,9802
|
158
|
+
vllm/model_executor/models/gpt_bigcode.py,sha256=xxjvE4hSQAYCgGWNanvKRyeUBcit-cDqM_RQmjWSZtM,9767
|
159
|
+
vllm/model_executor/models/gpt_j.py,sha256=b73AelsGjR6FlB3wNsExUyDwWzAQbYcotlqMFQQNka0,10126
|
160
|
+
vllm/model_executor/models/gpt_neox.py,sha256=kaMSTdm-dXXgDsjnIVM76dryEJRwgFsGsFYRnQVPamo,11098
|
161
|
+
vllm/model_executor/models/internlm2.py,sha256=L-blE9O_90E5VBxXkNiuaY3Llv962KdZSNB_8mrFvKY,12361
|
162
|
+
vllm/model_executor/models/jais.py,sha256=WMZdmNL5rFE2hNeZpUatDpJzQjssvCF5Ww_oizxSJrU,12256
|
163
|
+
vllm/model_executor/models/llama.py,sha256=29UkoC7G-X8gt_hf7zpbXtnfz8TFu_eiRRqQif-op-Q,17518
|
164
|
+
vllm/model_executor/models/llava.py,sha256=BkI6Qot_awnOiLi3ZP3dzOSeKGt1o89as5yugiSg2xc,10849
|
165
|
+
vllm/model_executor/models/minicpm.py,sha256=jEC6_ueMMfqCDnEBZRNSRjp0ClzUFGUKV7lwpSrwe3U,20813
|
166
|
+
vllm/model_executor/models/mixtral.py,sha256=CVOEvuB2z7dABGkjmrL6928vDe_iKAnXJWQ39qEmbMo,24088
|
167
|
+
vllm/model_executor/models/mixtral_quant.py,sha256=Z8R1UxXjM4EC7gj20etQPdRfGKqMNU7iYPb8ukSxta4,15988
|
168
|
+
vllm/model_executor/models/mpt.py,sha256=D77a1ooSr9Az761UqC7fo5xIuR2WcDv-ltTMDeyi1NI,10622
|
169
|
+
vllm/model_executor/models/olmo.py,sha256=Dpvb1bijiK_7PWvVGUE1mr-8kZvnNllk79lnXyaCRGs,13007
|
170
|
+
vllm/model_executor/models/opt.py,sha256=wAT86ZFjbYBj_KMZbwKAhNXxzXIP2Uq9fuclYHEQTg4,13212
|
171
|
+
vllm/model_executor/models/orion.py,sha256=RIznxoWtqb2QyCuI0H4rqXrWHHssGwDoGJraVNP12oo,11981
|
172
|
+
vllm/model_executor/models/phi.py,sha256=U2cv52LDBks_6qdTuXdYRAcbxMQlQYVsPea76stS9YE,11405
|
173
|
+
vllm/model_executor/models/qwen.py,sha256=UShrRhcMy0dQ__9i6Emb4kYYa0Hzw2UvtABNkpPQNlI,10218
|
174
|
+
vllm/model_executor/models/qwen2.py,sha256=DFjNNkQzdR3MX_gch7bVGu2bQeuC9qbhlhC_21j-wp4,13773
|
175
|
+
vllm/model_executor/models/qwen2_moe.py,sha256=y38HUpmj4KEl2LTt4WlUMMriA7BVX0-8O2xr2m3wE7M,17796
|
176
|
+
vllm/model_executor/models/stablelm.py,sha256=P_hv9zbp317mrhwsDIw968BoNGpTUP0u4t-rgRJ3EEU,12390
|
177
|
+
vllm/model_executor/models/starcoder2.py,sha256=Bj48OYCD3U4hNv4xVbiDPBYBcqdKgRmV7ixscuOpwRE,11915
|
178
|
+
vllm/model_executor/models/xverse.py,sha256=mxzt54-H1U5n7AAj7pAJlw26HW94QnT4mGo_-E53Gp0,13652
|
179
|
+
vllm/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
180
|
+
vllm/spec_decode/batch_expansion.py,sha256=qKXK1bA8qxlgsmrstvvEmjnfsnMMvcP3iMqEu8zBjIw,16695
|
181
|
+
vllm/spec_decode/interfaces.py,sha256=YabyUJAKEuNBbr0PUR8Kf30k0I1NW_6AZDXEAPiOVy4,2070
|
182
|
+
vllm/spec_decode/metrics.py,sha256=mpeHEIuFPuKd7GaX0--QdcZnAJdpwsAhq7IWp9xhywM,7243
|
183
|
+
vllm/spec_decode/multi_step_worker.py,sha256=jgQKV4pjYDSRflAbj9xFPtPqqMvvHEvU4yUU8rFvB38,8440
|
184
|
+
vllm/spec_decode/ngram_worker.py,sha256=eCZ7RfhTPRWPuU5y_ue7R1fuc3vtcbNx6Kfi1I4LQtY,6601
|
185
|
+
vllm/spec_decode/spec_decode_worker.py,sha256=PlRYVfBypXj5WBcqT0Y27zEnM7zt0dIIAi3h2B33vH8,20366
|
186
|
+
vllm/spec_decode/top1_proposer.py,sha256=Qm69FUdjz6tfQcmxjPGOOGBY81KELMbvREuKyqVwshY,7807
|
187
|
+
vllm/spec_decode/util.py,sha256=QeSDbwT4771urh8FbkI-84tcqCAeKBDn-xMJ7eRh8WI,8008
|
188
|
+
vllm/transformers_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
189
|
+
vllm/transformers_utils/config.py,sha256=JuMqdcmNfGZnlhP8tk_yCzwK9GV9BkzQrZVaM5vmHc8,2328
|
190
|
+
vllm/transformers_utils/detokenizer.py,sha256=83YfdbRQwvLPxpCLPOm_FL7Fayc-_RBAC-Rsh710iok,13148
|
191
|
+
vllm/transformers_utils/tokenizer.py,sha256=6ujAVMHfXfcQAAe8is0mV-aVth5OZBMPlSd36mdN5ug,5488
|
192
|
+
vllm/transformers_utils/configs/__init__.py,sha256=R4sVRF1yv4C5-u1hZI3XnSExgsSNoyzAWv1sQoHm6iQ,619
|
193
|
+
vllm/transformers_utils/configs/chatglm.py,sha256=77q3UL8VKz1ly1ozm0aXMQkzTUIu4DlQkuVt1D219D0,2747
|
194
|
+
vllm/transformers_utils/configs/dbrx.py,sha256=M0QtsnSAKkLdVArfOAL42XRSHXNi8C3VoFpuEAXBwjM,10918
|
195
|
+
vllm/transformers_utils/configs/falcon.py,sha256=3LMoztRxWVGj0bQZyCVav6jc6HXmygHONIvdk6xYruo,2878
|
196
|
+
vllm/transformers_utils/configs/jais.py,sha256=Xo4mh8cd_JHW6UjO6TADEWvbO1lhVxM_4GLqWtYQYCI,10335
|
197
|
+
vllm/transformers_utils/configs/mpt.py,sha256=4HQQ3PKw7Wpbmwmtti2LhBZTJWsbq9v-NipuLSzzYgk,7562
|
198
|
+
vllm/transformers_utils/tokenizer_group/__init__.py,sha256=sGbY-UQJ_s7_L53FIfLW0_VyJ-pVMhVQLku4juvgv7Q,1267
|
199
|
+
vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py,sha256=QOpqD4xYlr4ixbFWPCqUO-cJRqNJbv8-oUps62nnBeE,1607
|
200
|
+
vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py,sha256=CzmmUngdzgvJ9gb5cn3XTFynkpMzOzl_Ta895E7m44E,6515
|
201
|
+
vllm/transformers_utils/tokenizer_group/tokenizer_group.py,sha256=sy2J9atKx_kZBeAva4aPATUvXnkYgMghmTKAs1lkZ6M,3226
|
202
|
+
vllm/transformers_utils/tokenizers/__init__.py,sha256=yNjHrv9o-X4OEvO3-ydyjzWWmqUuuJmYcVmkSmtnYLI,114
|
203
|
+
vllm/transformers_utils/tokenizers/baichuan.py,sha256=BCyRKgU8v6JrhPad1eqPq5UHo-3MvduREqxFKYAD5Nk,9390
|
204
|
+
vllm/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
205
|
+
vllm/usage/usage_lib.py,sha256=uXjKEKdGwNNjx56bAZmYwTbL9aMfthzC_5v8C3I_Ly0,7164
|
206
|
+
vllm/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
207
|
+
vllm/worker/cache_engine.py,sha256=yT5Ir5T0uYDoSbzlOZarcPV42vFvzw0_y7Ipne6oMxk,3922
|
208
|
+
vllm/worker/cpu_model_runner.py,sha256=IXE1mPLny3chd4x1Yy85YvQX1ymjTUfMmVZsouJtEMk,13934
|
209
|
+
vllm/worker/cpu_worker.py,sha256=p1qUrKL4Bf2gFHfAG7vbjRktsBKBDLPiubYpqU_pFkQ,12654
|
210
|
+
vllm/worker/model_runner.py,sha256=qmIM8N_A0j0eTUxJzN5MC9R-xwuwSYwfUXCJRhpbCI4,49867
|
211
|
+
vllm/worker/neuron_model_runner.py,sha256=0CAkAavF3k4X9hoyT5RNtCQtyUwq3L-O4mWo_9Zra28,7901
|
212
|
+
vllm/worker/neuron_worker.py,sha256=sX6fWmNan9M4iP1PEO2UyWcxckuu89i9sf6725incVk,3480
|
213
|
+
vllm/worker/worker.py,sha256=SPnZrMFSkKrFqJGBtN9dD3VJagmhHGIXeoV_7_q9zv4,14644
|
214
|
+
vllm/worker/worker_base.py,sha256=B_VaAmgcLQ15t3_9sjOzTQJvflIOuMXvNnF1M-bbOZs,5263
|
215
|
+
vllm_npu-0.4.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
216
|
+
vllm_npu-0.4.2.dist-info/METADATA,sha256=shtf1Bfhi7_a-iMVg2Ax9BHYJXotyjHD4-pEpPvNrKI,9238
|
217
|
+
vllm_npu-0.4.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
218
|
+
vllm_npu-0.4.2.dist-info/top_level.txt,sha256=fAgb8Pt4zQoKTUA3ZnKEIgcjh0L97_dwEjYDTL5MEEo,5
|
219
|
+
vllm_npu-0.4.2.dist-info/RECORD,,
|
@@ -0,0 +1 @@
|
|
1
|
+
vllm
|