vllm-cpu 0.8.5.post2__cp310-cp310-manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of vllm-cpu might be problematic. Click here for more details.
- vllm/_C.abi3.so +0 -0
- vllm/__init__.py +170 -0
- vllm/_custom_ops.py +1536 -0
- vllm/_ipex_ops.py +241 -0
- vllm/_version.py +34 -0
- vllm/adapter_commons/__init__.py +0 -0
- vllm/adapter_commons/layers.py +16 -0
- vllm/adapter_commons/models.py +105 -0
- vllm/adapter_commons/request.py +25 -0
- vllm/adapter_commons/utils.py +92 -0
- vllm/adapter_commons/worker_manager.py +38 -0
- vllm/assets/__init__.py +0 -0
- vllm/assets/audio.py +38 -0
- vllm/assets/base.py +40 -0
- vllm/assets/image.py +31 -0
- vllm/assets/video.py +103 -0
- vllm/attention/__init__.py +19 -0
- vllm/attention/backends/__init__.py +0 -0
- vllm/attention/backends/abstract.py +306 -0
- vllm/attention/backends/blocksparse_attn.py +457 -0
- vllm/attention/backends/cpu_mla.py +303 -0
- vllm/attention/backends/flash_attn.py +999 -0
- vllm/attention/backends/flashinfer.py +1092 -0
- vllm/attention/backends/flashmla.py +242 -0
- vllm/attention/backends/hpu_attn.py +301 -0
- vllm/attention/backends/ipex_attn.py +396 -0
- vllm/attention/backends/mla/__init__.py +0 -0
- vllm/attention/backends/mla/common.py +1444 -0
- vllm/attention/backends/pallas.py +346 -0
- vllm/attention/backends/placeholder_attn.py +399 -0
- vllm/attention/backends/rocm_aiter_mla.py +412 -0
- vllm/attention/backends/rocm_flash_attn.py +969 -0
- vllm/attention/backends/torch_sdpa.py +691 -0
- vllm/attention/backends/triton_mla.py +113 -0
- vllm/attention/backends/utils.py +609 -0
- vllm/attention/backends/xformers.py +798 -0
- vllm/attention/layer.py +443 -0
- vllm/attention/ops/__init__.py +0 -0
- vllm/attention/ops/blocksparse_attention/__init__.py +0 -0
- vllm/attention/ops/blocksparse_attention/blocksparse_attention_kernel.py +432 -0
- vllm/attention/ops/blocksparse_attention/interface.py +238 -0
- vllm/attention/ops/blocksparse_attention/utils.py +244 -0
- vllm/attention/ops/chunked_prefill_paged_decode.py +366 -0
- vllm/attention/ops/flashmla.py +115 -0
- vllm/attention/ops/hpu_paged_attn.py +105 -0
- vllm/attention/ops/ipex_attn.py +193 -0
- vllm/attention/ops/merge_attn_states.py +42 -0
- vllm/attention/ops/nki_flash_attn.py +905 -0
- vllm/attention/ops/paged_attn.py +255 -0
- vllm/attention/ops/prefix_prefill.py +902 -0
- vllm/attention/ops/rocm_aiter_mla.py +42 -0
- vllm/attention/ops/rocm_aiter_paged_attn.py +101 -0
- vllm/attention/ops/triton_decode_attention.py +675 -0
- vllm/attention/ops/triton_flash_attention.py +1375 -0
- vllm/attention/ops/triton_merge_attn_states.py +96 -0
- vllm/attention/selector.py +186 -0
- vllm/attention/utils/fa_utils.py +54 -0
- vllm/beam_search.py +82 -0
- vllm/benchmarks/__init__.py +0 -0
- vllm/benchmarks/datasets.py +831 -0
- vllm/benchmarks/endpoint_request_func.py +160 -0
- vllm/benchmarks/latency.py +181 -0
- vllm/benchmarks/serve.py +925 -0
- vllm/benchmarks/throughput.py +608 -0
- vllm/benchmarks/utils.py +69 -0
- vllm/collect_env.py +795 -0
- vllm/compilation/__init__.py +0 -0
- vllm/compilation/backends.py +715 -0
- vllm/compilation/compiler_interface.py +437 -0
- vllm/compilation/counter.py +33 -0
- vllm/compilation/decorators.py +249 -0
- vllm/compilation/fix_functionalization.py +182 -0
- vllm/compilation/fusion.py +617 -0
- vllm/compilation/fx_utils.py +60 -0
- vllm/compilation/inductor_pass.py +114 -0
- vllm/compilation/monitor.py +38 -0
- vllm/compilation/multi_output_match.py +108 -0
- vllm/compilation/noop_elimination.py +135 -0
- vllm/compilation/pass_manager.py +74 -0
- vllm/compilation/sequence_parallelism.py +266 -0
- vllm/compilation/torch25_custom_graph_pass.py +41 -0
- vllm/compilation/vllm_inductor_pass.py +68 -0
- vllm/compilation/wrapper.py +129 -0
- vllm/config.py +4179 -0
- vllm/connections.py +170 -0
- vllm/core/__init__.py +0 -0
- vllm/core/block/__init__.py +0 -0
- vllm/core/block/block_table.py +398 -0
- vllm/core/block/common.py +370 -0
- vllm/core/block/cpu_gpu_block_allocator.py +440 -0
- vllm/core/block/interfaces.py +318 -0
- vllm/core/block/naive_block.py +465 -0
- vllm/core/block/prefix_caching_block.py +1134 -0
- vllm/core/block/utils.py +27 -0
- vllm/core/block_manager.py +520 -0
- vllm/core/evictor.py +156 -0
- vllm/core/interfaces.py +134 -0
- vllm/core/placeholder_block_space_manager.py +99 -0
- vllm/core/scheduler.py +2060 -0
- vllm/device_allocator/__init__.py +0 -0
- vllm/device_allocator/cumem.py +280 -0
- vllm/distributed/__init__.py +5 -0
- vllm/distributed/communication_op.py +40 -0
- vllm/distributed/device_communicators/__init__.py +0 -0
- vllm/distributed/device_communicators/base_device_communicator.py +151 -0
- vllm/distributed/device_communicators/cpu_communicator.py +139 -0
- vllm/distributed/device_communicators/cuda_communicator.py +131 -0
- vllm/distributed/device_communicators/cuda_wrapper.py +179 -0
- vllm/distributed/device_communicators/custom_all_reduce.py +301 -0
- vllm/distributed/device_communicators/custom_all_reduce_utils.py +257 -0
- vllm/distributed/device_communicators/hpu_communicator.py +45 -0
- vllm/distributed/device_communicators/neuron_communicator.py +19 -0
- vllm/distributed/device_communicators/pynccl.py +217 -0
- vllm/distributed/device_communicators/pynccl_wrapper.py +340 -0
- vllm/distributed/device_communicators/shm_broadcast.py +557 -0
- vllm/distributed/device_communicators/tpu_communicator.py +93 -0
- vllm/distributed/device_communicators/xpu_communicator.py +54 -0
- vllm/distributed/kv_transfer/README.md +29 -0
- vllm/distributed/kv_transfer/__init__.py +11 -0
- vllm/distributed/kv_transfer/disagg_prefill_workflow.jpg +0 -0
- vllm/distributed/kv_transfer/kv_connector/__init__.py +0 -0
- vllm/distributed/kv_transfer/kv_connector/base.py +127 -0
- vllm/distributed/kv_transfer/kv_connector/factory.py +107 -0
- vllm/distributed/kv_transfer/kv_connector/lmcache_connector.py +98 -0
- vllm/distributed/kv_transfer/kv_connector/mooncake_store_connector.py +201 -0
- vllm/distributed/kv_transfer/kv_connector/simple_connector.py +328 -0
- vllm/distributed/kv_transfer/kv_connector/utils.py +90 -0
- vllm/distributed/kv_transfer/kv_connector/v1/__init__.py +8 -0
- vllm/distributed/kv_transfer/kv_connector/v1/base.py +209 -0
- vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py +131 -0
- vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py +383 -0
- vllm/distributed/kv_transfer/kv_connector_agent.py +76 -0
- vllm/distributed/kv_transfer/kv_lookup_buffer/__init__.py +0 -0
- vllm/distributed/kv_transfer/kv_lookup_buffer/base.py +174 -0
- vllm/distributed/kv_transfer/kv_lookup_buffer/mooncake_store.py +160 -0
- vllm/distributed/kv_transfer/kv_lookup_buffer/simple_buffer.py +236 -0
- vllm/distributed/kv_transfer/kv_pipe/__init__.py +0 -0
- vllm/distributed/kv_transfer/kv_pipe/base.py +66 -0
- vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py +279 -0
- vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py +279 -0
- vllm/distributed/kv_transfer/kv_transfer_state.py +70 -0
- vllm/distributed/parallel_state.py +1209 -0
- vllm/distributed/utils.py +366 -0
- vllm/engine/__init__.py +0 -0
- vllm/engine/arg_utils.py +1724 -0
- vllm/engine/async_llm_engine.py +1261 -0
- vllm/engine/async_timeout.py +191 -0
- vllm/engine/llm_engine.py +2150 -0
- vllm/engine/metrics.py +717 -0
- vllm/engine/metrics_types.py +96 -0
- vllm/engine/multiprocessing/__init__.py +183 -0
- vllm/engine/multiprocessing/client.py +745 -0
- vllm/engine/multiprocessing/engine.py +450 -0
- vllm/engine/output_processor/__init__.py +0 -0
- vllm/engine/output_processor/interfaces.py +74 -0
- vllm/engine/output_processor/multi_step.py +210 -0
- vllm/engine/output_processor/single_step.py +136 -0
- vllm/engine/output_processor/stop_checker.py +130 -0
- vllm/engine/output_processor/util.py +27 -0
- vllm/engine/protocol.py +302 -0
- vllm/entrypoints/__init__.py +0 -0
- vllm/entrypoints/api_server.py +177 -0
- vllm/entrypoints/chat_utils.py +1259 -0
- vllm/entrypoints/cli/__init__.py +0 -0
- vllm/entrypoints/cli/benchmark/__init__.py +0 -0
- vllm/entrypoints/cli/benchmark/base.py +38 -0
- vllm/entrypoints/cli/benchmark/latency.py +29 -0
- vllm/entrypoints/cli/benchmark/main.py +53 -0
- vllm/entrypoints/cli/benchmark/serve.py +29 -0
- vllm/entrypoints/cli/benchmark/throughput.py +29 -0
- vllm/entrypoints/cli/collect_env.py +35 -0
- vllm/entrypoints/cli/main.py +59 -0
- vllm/entrypoints/cli/openai.py +175 -0
- vllm/entrypoints/cli/serve.py +59 -0
- vllm/entrypoints/cli/types.py +24 -0
- vllm/entrypoints/launcher.py +146 -0
- vllm/entrypoints/llm.py +1450 -0
- vllm/entrypoints/logger.py +44 -0
- vllm/entrypoints/openai/__init__.py +0 -0
- vllm/entrypoints/openai/api_server.py +1130 -0
- vllm/entrypoints/openai/cli_args.py +296 -0
- vllm/entrypoints/openai/logits_processors.py +89 -0
- vllm/entrypoints/openai/protocol.py +1806 -0
- vllm/entrypoints/openai/run_batch.py +439 -0
- vllm/entrypoints/openai/serving_chat.py +1210 -0
- vllm/entrypoints/openai/serving_completion.py +557 -0
- vllm/entrypoints/openai/serving_embedding.py +245 -0
- vllm/entrypoints/openai/serving_engine.py +569 -0
- vllm/entrypoints/openai/serving_models.py +314 -0
- vllm/entrypoints/openai/serving_pooling.py +237 -0
- vllm/entrypoints/openai/serving_score.py +439 -0
- vllm/entrypoints/openai/serving_tokenization.py +147 -0
- vllm/entrypoints/openai/serving_transcription.py +421 -0
- vllm/entrypoints/openai/tool_parsers/__init__.py +19 -0
- vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py +163 -0
- vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py +254 -0
- vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py +232 -0
- vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py +370 -0
- vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py +211 -0
- vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py +303 -0
- vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py +262 -0
- vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py +342 -0
- vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py +110 -0
- vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py +292 -0
- vllm/entrypoints/openai/tool_parsers/utils.py +123 -0
- vllm/entrypoints/score_utils.py +49 -0
- vllm/entrypoints/ssl.py +74 -0
- vllm/entrypoints/utils.py +136 -0
- vllm/env_override.py +34 -0
- vllm/envs.py +800 -0
- vllm/executor/__init__.py +0 -0
- vllm/executor/executor_base.py +400 -0
- vllm/executor/mp_distributed_executor.py +243 -0
- vllm/executor/msgspec_utils.py +29 -0
- vllm/executor/multiproc_worker_utils.py +312 -0
- vllm/executor/ray_distributed_executor.py +700 -0
- vllm/executor/ray_utils.py +400 -0
- vllm/executor/uniproc_executor.py +141 -0
- vllm/forward_context.py +159 -0
- vllm/inputs/__init__.py +37 -0
- vllm/inputs/data.py +248 -0
- vllm/inputs/parse.py +121 -0
- vllm/inputs/preprocess.py +745 -0
- vllm/inputs/registry.py +212 -0
- vllm/jsontree.py +79 -0
- vllm/logger.py +210 -0
- vllm/logging_utils/__init__.py +7 -0
- vllm/logging_utils/formatter.py +17 -0
- vllm/logits_process.py +121 -0
- vllm/lora/__init__.py +0 -0
- vllm/lora/fully_sharded_layers.py +335 -0
- vllm/lora/layers.py +1263 -0
- vllm/lora/lora.py +198 -0
- vllm/lora/models.py +802 -0
- vllm/lora/ops/__init__.py +0 -0
- vllm/lora/ops/torch_ops/__init__.py +15 -0
- vllm/lora/ops/torch_ops/lora_ops.py +115 -0
- vllm/lora/ops/triton_ops/__init__.py +11 -0
- vllm/lora/ops/triton_ops/kernel_utils.py +243 -0
- vllm/lora/ops/triton_ops/lora_expand.py +293 -0
- vllm/lora/ops/triton_ops/lora_kernel_metadata.py +147 -0
- vllm/lora/ops/triton_ops/lora_shrink.py +247 -0
- vllm/lora/ops/triton_ops/utils.py +121 -0
- vllm/lora/peft_helper.py +115 -0
- vllm/lora/punica_wrapper/__init__.py +9 -0
- vllm/lora/punica_wrapper/punica_base.py +483 -0
- vllm/lora/punica_wrapper/punica_cpu.py +348 -0
- vllm/lora/punica_wrapper/punica_gpu.py +289 -0
- vllm/lora/punica_wrapper/punica_hpu.py +144 -0
- vllm/lora/punica_wrapper/punica_selector.py +20 -0
- vllm/lora/punica_wrapper/utils.py +161 -0
- vllm/lora/request.py +97 -0
- vllm/lora/resolver.py +83 -0
- vllm/lora/utils.py +237 -0
- vllm/lora/worker_manager.py +251 -0
- vllm/model_executor/__init__.py +15 -0
- vllm/model_executor/custom_op.py +153 -0
- vllm/model_executor/guided_decoding/__init__.py +180 -0
- vllm/model_executor/guided_decoding/guidance_decoding.py +63 -0
- vllm/model_executor/guided_decoding/guidance_logits_processors.py +85 -0
- vllm/model_executor/guided_decoding/guided_fields.py +42 -0
- vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py +66 -0
- vllm/model_executor/guided_decoding/outlines_decoding.py +154 -0
- vllm/model_executor/guided_decoding/outlines_logits_processors.py +271 -0
- vllm/model_executor/guided_decoding/reasoner/__init__.py +35 -0
- vllm/model_executor/guided_decoding/utils.py +241 -0
- vllm/model_executor/guided_decoding/xgrammar_decoding.py +425 -0
- vllm/model_executor/layers/__init__.py +0 -0
- vllm/model_executor/layers/activation.py +368 -0
- vllm/model_executor/layers/fused_moe/__init__.py +51 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H20.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=96,device_name=NVIDIA_H20.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=NVIDIA_H100.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +130 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +130 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +130 -0
- vllm/model_executor/layers/fused_moe/configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325X,block_shape=[128,128].json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=60,N=1408,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=60,N=176,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=60,N=352,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=60,N=704,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +138 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_L40S.json +173 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/README +12 -0
- vllm/model_executor/layers/fused_moe/cutlass_moe.py +180 -0
- vllm/model_executor/layers/fused_moe/deep_gemm_moe.py +294 -0
- vllm/model_executor/layers/fused_moe/fused_marlin_moe.py +374 -0
- vllm/model_executor/layers/fused_moe/fused_moe.py +1539 -0
- vllm/model_executor/layers/fused_moe/layer.py +949 -0
- vllm/model_executor/layers/fused_moe/moe_align_block_size.py +243 -0
- vllm/model_executor/layers/fused_moe/moe_pallas.py +64 -0
- vllm/model_executor/layers/fused_moe/moe_torch_iterative.py +59 -0
- vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py +416 -0
- vllm/model_executor/layers/fused_moe/utils.py +48 -0
- vllm/model_executor/layers/layernorm.py +277 -0
- vllm/model_executor/layers/lightning_attn.py +651 -0
- vllm/model_executor/layers/linear.py +1518 -0
- vllm/model_executor/layers/logits_processor.py +196 -0
- vllm/model_executor/layers/mamba/__init__.py +0 -0
- vllm/model_executor/layers/mamba/mamba2_metadata.py +109 -0
- vllm/model_executor/layers/mamba/mamba_mixer.py +244 -0
- vllm/model_executor/layers/mamba/mamba_mixer2.py +538 -0
- vllm/model_executor/layers/mamba/ops/__init__.py +0 -0
- vllm/model_executor/layers/mamba/ops/causal_conv1d.py +104 -0
- vllm/model_executor/layers/mamba/ops/mamba_ssm.py +415 -0
- vllm/model_executor/layers/mamba/ops/ssd_bmm.py +261 -0
- vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py +588 -0
- vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py +750 -0
- vllm/model_executor/layers/mamba/ops/ssd_combined.py +231 -0
- vllm/model_executor/layers/mamba/ops/ssd_state_passing.py +205 -0
- vllm/model_executor/layers/pooler.py +336 -0
- vllm/model_executor/layers/quantization/__init__.py +153 -0
- vllm/model_executor/layers/quantization/aqlm.py +374 -0
- vllm/model_executor/layers/quantization/awq.py +184 -0
- vllm/model_executor/layers/quantization/awq_marlin.py +518 -0
- vllm/model_executor/layers/quantization/awq_triton.py +319 -0
- vllm/model_executor/layers/quantization/base_config.py +145 -0
- vllm/model_executor/layers/quantization/bitblas.py +459 -0
- vllm/model_executor/layers/quantization/bitsandbytes.py +396 -0
- vllm/model_executor/layers/quantization/compressed_tensors/__init__.py +0 -0
- vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py +624 -0
- vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +1100 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py +20 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py +357 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py +54 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py +159 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py +119 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +149 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py +110 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py +200 -0
- vllm/model_executor/layers/quantization/compressed_tensors/triton_scaled_mm.py +205 -0
- vllm/model_executor/layers/quantization/compressed_tensors/utils.py +213 -0
- vllm/model_executor/layers/quantization/deepspeedfp.py +193 -0
- vllm/model_executor/layers/quantization/experts_int8.py +194 -0
- vllm/model_executor/layers/quantization/fbgemm_fp8.py +168 -0
- vllm/model_executor/layers/quantization/fp8.py +832 -0
- vllm/model_executor/layers/quantization/gguf.py +408 -0
- vllm/model_executor/layers/quantization/gptq.py +276 -0
- vllm/model_executor/layers/quantization/gptq_bitblas.py +438 -0
- vllm/model_executor/layers/quantization/gptq_marlin.py +643 -0
- vllm/model_executor/layers/quantization/gptq_marlin_24.py +295 -0
- vllm/model_executor/layers/quantization/hqq_marlin.py +328 -0
- vllm/model_executor/layers/quantization/ipex_quant.py +250 -0
- vllm/model_executor/layers/quantization/kernels/__init__.py +0 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py +89 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/__init__.py +82 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/allspark.py +115 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/bitblas.py +299 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/exllama.py +142 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py +119 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py +132 -0
- vllm/model_executor/layers/quantization/kernels/scaled_mm/ScaledMMLinearKernel.py +66 -0
- vllm/model_executor/layers/quantization/kernels/scaled_mm/__init__.py +86 -0
- vllm/model_executor/layers/quantization/kernels/scaled_mm/aiter.py +119 -0
- vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py +136 -0
- vllm/model_executor/layers/quantization/kernels/scaled_mm/triton.py +40 -0
- vllm/model_executor/layers/quantization/kernels/scaled_mm/xla.py +104 -0
- vllm/model_executor/layers/quantization/kv_cache.py +137 -0
- vllm/model_executor/layers/quantization/marlin.py +259 -0
- vllm/model_executor/layers/quantization/modelopt.py +410 -0
- vllm/model_executor/layers/quantization/moe_wna16.py +447 -0
- vllm/model_executor/layers/quantization/neuron_quant.py +67 -0
- vllm/model_executor/layers/quantization/ptpc_fp8.py +125 -0
- vllm/model_executor/layers/quantization/qqq.py +273 -0
- vllm/model_executor/layers/quantization/quark/__init__.py +0 -0
- vllm/model_executor/layers/quantization/quark/quark.py +385 -0
- vllm/model_executor/layers/quantization/quark/quark_moe.py +236 -0
- vllm/model_executor/layers/quantization/quark/schemes/__init__.py +7 -0
- vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py +54 -0
- vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py +142 -0
- vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py +121 -0
- vllm/model_executor/layers/quantization/quark/utils.py +102 -0
- vllm/model_executor/layers/quantization/schema.py +85 -0
- vllm/model_executor/layers/quantization/torchao.py +127 -0
- vllm/model_executor/layers/quantization/tpu_int8.py +119 -0
- vllm/model_executor/layers/quantization/utils/__init__.py +5 -0
- vllm/model_executor/layers/quantization/utils/allspark_utils.py +51 -0
- vllm/model_executor/layers/quantization/utils/bitblas_utils.py +198 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +18 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/fp8_utils.py +523 -0
- vllm/model_executor/layers/quantization/utils/gptq_utils.py +94 -0
- vllm/model_executor/layers/quantization/utils/int8_utils.py +459 -0
- vllm/model_executor/layers/quantization/utils/layer_utils.py +39 -0
- vllm/model_executor/layers/quantization/utils/machete_utils.py +32 -0
- vllm/model_executor/layers/quantization/utils/marlin_utils.py +413 -0
- vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py +110 -0
- vllm/model_executor/layers/quantization/utils/marlin_utils_test.py +164 -0
- vllm/model_executor/layers/quantization/utils/marlin_utils_test_24.py +464 -0
- vllm/model_executor/layers/quantization/utils/marlin_utils_test_qqq.py +127 -0
- vllm/model_executor/layers/quantization/utils/quant_utils.py +571 -0
- vllm/model_executor/layers/quantization/utils/w8a8_utils.py +404 -0
- vllm/model_executor/layers/rejection_sampler.py +400 -0
- vllm/model_executor/layers/resampler.py +269 -0
- vllm/model_executor/layers/rotary_embedding.py +1598 -0
- vllm/model_executor/layers/sampler.py +1221 -0
- vllm/model_executor/layers/spec_decode_base_sampler.py +258 -0
- vllm/model_executor/layers/typical_acceptance_sampler.py +172 -0
- vllm/model_executor/layers/utils.py +99 -0
- vllm/model_executor/layers/vocab_parallel_embedding.py +485 -0
- vllm/model_executor/model_loader/__init__.py +20 -0
- vllm/model_executor/model_loader/loader.py +1542 -0
- vllm/model_executor/model_loader/neuron.py +243 -0
- vllm/model_executor/model_loader/tensorizer.py +468 -0
- vllm/model_executor/model_loader/utils.py +171 -0
- vllm/model_executor/model_loader/weight_utils.py +749 -0
- vllm/model_executor/models/__init__.py +27 -0
- vllm/model_executor/models/adapters.py +247 -0
- vllm/model_executor/models/arctic.py +559 -0
- vllm/model_executor/models/aria.py +656 -0
- vllm/model_executor/models/aya_vision.py +461 -0
- vllm/model_executor/models/baichuan.py +469 -0
- vllm/model_executor/models/bamba.py +542 -0
- vllm/model_executor/models/bart.py +936 -0
- vllm/model_executor/models/bert.py +725 -0
- vllm/model_executor/models/blip.py +337 -0
- vllm/model_executor/models/blip2.py +717 -0
- vllm/model_executor/models/bloom.py +358 -0
- vllm/model_executor/models/chameleon.py +1135 -0
- vllm/model_executor/models/chatglm.py +476 -0
- vllm/model_executor/models/clip.py +410 -0
- vllm/model_executor/models/commandr.py +466 -0
- vllm/model_executor/models/constant_size_cache.py +136 -0
- vllm/model_executor/models/dbrx.py +469 -0
- vllm/model_executor/models/deepseek.py +484 -0
- vllm/model_executor/models/deepseek_mtp.py +266 -0
- vllm/model_executor/models/deepseek_v2.py +830 -0
- vllm/model_executor/models/deepseek_vl2.py +647 -0
- vllm/model_executor/models/eagle.py +247 -0
- vllm/model_executor/models/exaone.py +548 -0
- vllm/model_executor/models/fairseq2_llama.py +153 -0
- vllm/model_executor/models/falcon.py +508 -0
- vllm/model_executor/models/florence2.py +1102 -0
- vllm/model_executor/models/fuyu.py +388 -0
- vllm/model_executor/models/gemma.py +423 -0
- vllm/model_executor/models/gemma2.py +423 -0
- vllm/model_executor/models/gemma3.py +531 -0
- vllm/model_executor/models/gemma3_mm.py +716 -0
- vllm/model_executor/models/glm.py +22 -0
- vllm/model_executor/models/glm4.py +303 -0
- vllm/model_executor/models/glm4v.py +647 -0
- vllm/model_executor/models/gpt2.py +313 -0
- vllm/model_executor/models/gpt_bigcode.py +336 -0
- vllm/model_executor/models/gpt_j.py +337 -0
- vllm/model_executor/models/gpt_neox.py +330 -0
- vllm/model_executor/models/granite.py +494 -0
- vllm/model_executor/models/granite_speech.py +777 -0
- vllm/model_executor/models/granitemoe.py +435 -0
- vllm/model_executor/models/granitemoeshared.py +339 -0
- vllm/model_executor/models/gritlm.py +245 -0
- vllm/model_executor/models/grok1.py +560 -0
- vllm/model_executor/models/h2ovl.py +542 -0
- vllm/model_executor/models/idefics2_vision_model.py +387 -0
- vllm/model_executor/models/idefics3.py +767 -0
- vllm/model_executor/models/interfaces.py +569 -0
- vllm/model_executor/models/interfaces_base.py +163 -0
- vllm/model_executor/models/intern_vit.py +476 -0
- vllm/model_executor/models/internlm2.py +453 -0
- vllm/model_executor/models/internlm2_ve.py +146 -0
- vllm/model_executor/models/internvl.py +945 -0
- vllm/model_executor/models/jais.py +371 -0
- vllm/model_executor/models/jamba.py +590 -0
- vllm/model_executor/models/kimi_vl.py +577 -0
- vllm/model_executor/models/llama.py +619 -0
- vllm/model_executor/models/llama4.py +530 -0
- vllm/model_executor/models/llama_eagle.py +152 -0
- vllm/model_executor/models/llama_eagle3.py +232 -0
- vllm/model_executor/models/llava.py +869 -0
- vllm/model_executor/models/llava_next.py +582 -0
- vllm/model_executor/models/llava_next_video.py +470 -0
- vllm/model_executor/models/llava_onevision.py +954 -0
- vllm/model_executor/models/mamba.py +271 -0
- vllm/model_executor/models/mamba2.py +302 -0
- vllm/model_executor/models/mamba_cache.py +76 -0
- vllm/model_executor/models/medusa.py +210 -0
- vllm/model_executor/models/minicpm.py +592 -0
- vllm/model_executor/models/minicpm3.py +229 -0
- vllm/model_executor/models/minicpmo.py +725 -0
- vllm/model_executor/models/minicpmv.py +1287 -0
- vllm/model_executor/models/minimax_cache.py +35 -0
- vllm/model_executor/models/minimax_text_01.py +1261 -0
- vllm/model_executor/models/mistral3.py +598 -0
- vllm/model_executor/models/mixtral.py +485 -0
- vllm/model_executor/models/mixtral_quant.py +447 -0
- vllm/model_executor/models/mllama.py +1623 -0
- vllm/model_executor/models/mllama4.py +838 -0
- vllm/model_executor/models/mlp_speculator.py +205 -0
- vllm/model_executor/models/modernbert.py +325 -0
- vllm/model_executor/models/module_mapping.py +71 -0
- vllm/model_executor/models/molmo.py +1567 -0
- vllm/model_executor/models/moonvit.py +628 -0
- vllm/model_executor/models/mpt.py +329 -0
- vllm/model_executor/models/nemotron.py +506 -0
- vllm/model_executor/models/nemotron_nas.py +446 -0
- vllm/model_executor/models/nvlm_d.py +212 -0
- vllm/model_executor/models/olmo.py +390 -0
- vllm/model_executor/models/olmo2.py +412 -0
- vllm/model_executor/models/olmoe.py +449 -0
- vllm/model_executor/models/opt.py +410 -0
- vllm/model_executor/models/orion.py +356 -0
- vllm/model_executor/models/paligemma.py +397 -0
- vllm/model_executor/models/persimmon.py +342 -0
- vllm/model_executor/models/phi.py +354 -0
- vllm/model_executor/models/phi3.py +18 -0
- vllm/model_executor/models/phi3_small.py +463 -0
- vllm/model_executor/models/phi3v.py +722 -0
- vllm/model_executor/models/phi4mm.py +1263 -0
- vllm/model_executor/models/phi4mm_audio.py +1232 -0
- vllm/model_executor/models/phi4mm_utils.py +1883 -0
- vllm/model_executor/models/phimoe.py +666 -0
- vllm/model_executor/models/pixtral.py +1281 -0
- vllm/model_executor/models/plamo2.py +736 -0
- vllm/model_executor/models/prithvi_geospatial_mae.py +231 -0
- vllm/model_executor/models/qwen.py +360 -0
- vllm/model_executor/models/qwen2.py +552 -0
- vllm/model_executor/models/qwen2_5_omni_thinker.py +901 -0
- vllm/model_executor/models/qwen2_5_vl.py +1136 -0
- vllm/model_executor/models/qwen2_audio.py +402 -0
- vllm/model_executor/models/qwen2_moe.py +531 -0
- vllm/model_executor/models/qwen2_rm.py +130 -0
- vllm/model_executor/models/qwen2_vl.py +1409 -0
- vllm/model_executor/models/qwen3.py +319 -0
- vllm/model_executor/models/qwen3_moe.py +528 -0
- vllm/model_executor/models/qwen_vl.py +784 -0
- vllm/model_executor/models/registry.py +611 -0
- vllm/model_executor/models/roberta.py +332 -0
- vllm/model_executor/models/siglip.py +522 -0
- vllm/model_executor/models/skyworkr1v.py +949 -0
- vllm/model_executor/models/smolvlm.py +51 -0
- vllm/model_executor/models/solar.py +504 -0
- vllm/model_executor/models/stablelm.py +349 -0
- vllm/model_executor/models/starcoder2.py +355 -0
- vllm/model_executor/models/telechat2.py +139 -0
- vllm/model_executor/models/teleflm.py +78 -0
- vllm/model_executor/models/transformers.py +442 -0
- vllm/model_executor/models/ultravox.py +655 -0
- vllm/model_executor/models/utils.py +714 -0
- vllm/model_executor/models/vision.py +149 -0
- vllm/model_executor/models/whisper.py +746 -0
- vllm/model_executor/models/zamba2.py +1008 -0
- vllm/model_executor/parameter.py +458 -0
- vllm/model_executor/pooling_metadata.py +71 -0
- vllm/model_executor/sampling_metadata.py +596 -0
- vllm/model_executor/utils.py +53 -0
- vllm/multimodal/__init__.py +31 -0
- vllm/multimodal/audio.py +105 -0
- vllm/multimodal/base.py +218 -0
- vllm/multimodal/hasher.py +103 -0
- vllm/multimodal/image.py +77 -0
- vllm/multimodal/inputs.py +843 -0
- vllm/multimodal/parse.py +454 -0
- vllm/multimodal/processing.py +1760 -0
- vllm/multimodal/profiling.py +274 -0
- vllm/multimodal/registry.py +321 -0
- vllm/multimodal/utils.py +386 -0
- vllm/multimodal/video.py +166 -0
- vllm/outputs.py +521 -0
- vllm/platforms/__init__.py +286 -0
- vllm/platforms/cpu.py +182 -0
- vllm/platforms/cuda.py +463 -0
- vllm/platforms/hpu.py +94 -0
- vllm/platforms/interface.py +427 -0
- vllm/platforms/neuron.py +69 -0
- vllm/platforms/rocm.py +346 -0
- vllm/platforms/tpu.py +174 -0
- vllm/platforms/xpu.py +142 -0
- vllm/plugins/__init__.py +82 -0
- vllm/pooling_params.py +53 -0
- vllm/profiler/__init__.py +7 -0
- vllm/profiler/layerwise_profile.py +374 -0
- vllm/profiler/utils.py +147 -0
- vllm/prompt_adapter/__init__.py +0 -0
- vllm/prompt_adapter/layers.py +82 -0
- vllm/prompt_adapter/models.py +357 -0
- vllm/prompt_adapter/request.py +36 -0
- vllm/prompt_adapter/utils.py +97 -0
- vllm/prompt_adapter/worker_manager.py +178 -0
- vllm/py.typed +2 -0
- vllm/reasoning/__init__.py +12 -0
- vllm/reasoning/abs_reasoning_parsers.py +189 -0
- vllm/reasoning/deepseek_r1_reasoning_parser.py +172 -0
- vllm/reasoning/granite_reasoning_parser.py +362 -0
- vllm/sampling_params.py +598 -0
- vllm/scalar_type.py +335 -0
- vllm/scripts.py +14 -0
- vllm/sequence.py +1486 -0
- vllm/spec_decode/__init__.py +0 -0
- vllm/spec_decode/batch_expansion.py +505 -0
- vllm/spec_decode/draft_model_runner.py +335 -0
- vllm/spec_decode/interfaces.py +98 -0
- vllm/spec_decode/medusa_worker.py +137 -0
- vllm/spec_decode/metrics.py +212 -0
- vllm/spec_decode/mlp_speculator_worker.py +93 -0
- vllm/spec_decode/mqa_scorer.py +159 -0
- vllm/spec_decode/multi_step_worker.py +416 -0
- vllm/spec_decode/ngram_worker.py +195 -0
- vllm/spec_decode/proposer_worker_base.py +58 -0
- vllm/spec_decode/smaller_tp_proposer_worker.py +194 -0
- vllm/spec_decode/spec_decode_worker.py +1324 -0
- vllm/spec_decode/target_model_runner.py +44 -0
- vllm/spec_decode/top1_proposer.py +274 -0
- vllm/spec_decode/util.py +276 -0
- vllm/test_utils.py +129 -0
- vllm/third_party/__init__.py +0 -0
- vllm/third_party/pynvml.py +6139 -0
- vllm/tracing.py +130 -0
- vllm/transformers_utils/__init__.py +19 -0
- vllm/transformers_utils/config.py +813 -0
- vllm/transformers_utils/configs/__init__.py +52 -0
- vllm/transformers_utils/configs/arctic.py +206 -0
- vllm/transformers_utils/configs/chatglm.py +71 -0
- vllm/transformers_utils/configs/cohere2.py +194 -0
- vllm/transformers_utils/configs/dbrx.py +280 -0
- vllm/transformers_utils/configs/deepseek_vl2.py +216 -0
- vllm/transformers_utils/configs/eagle.py +65 -0
- vllm/transformers_utils/configs/exaone.py +191 -0
- vllm/transformers_utils/configs/falcon.py +89 -0
- vllm/transformers_utils/configs/h2ovl.py +15 -0
- vllm/transformers_utils/configs/internvl.py +53 -0
- vllm/transformers_utils/configs/jais.py +237 -0
- vllm/transformers_utils/configs/kimi_vl.py +36 -0
- vllm/transformers_utils/configs/medusa.py +62 -0
- vllm/transformers_utils/configs/mllama.py +30 -0
- vllm/transformers_utils/configs/mlp_speculator.py +67 -0
- vllm/transformers_utils/configs/moonvit.py +32 -0
- vllm/transformers_utils/configs/mpt.py +179 -0
- vllm/transformers_utils/configs/nemotron.py +204 -0
- vllm/transformers_utils/configs/nvlm_d.py +14 -0
- vllm/transformers_utils/configs/skyworkr1v.py +53 -0
- vllm/transformers_utils/configs/solar.py +246 -0
- vllm/transformers_utils/configs/telechat2.py +63 -0
- vllm/transformers_utils/configs/ultravox.py +107 -0
- vllm/transformers_utils/detokenizer.py +167 -0
- vllm/transformers_utils/detokenizer_utils.py +188 -0
- vllm/transformers_utils/processor.py +210 -0
- vllm/transformers_utils/processors/__init__.py +6 -0
- vllm/transformers_utils/processors/deepseek_vl2.py +363 -0
- vllm/transformers_utils/s3_utils.py +161 -0
- vllm/transformers_utils/tokenizer.py +291 -0
- vllm/transformers_utils/tokenizer_base.py +146 -0
- vllm/transformers_utils/tokenizer_group.py +110 -0
- vllm/transformers_utils/tokenizers/__init__.py +9 -0
- vllm/transformers_utils/tokenizers/mistral.py +483 -0
- vllm/transformers_utils/utils.py +98 -0
- vllm/triton_utils/__init__.py +5 -0
- vllm/triton_utils/importing.py +53 -0
- vllm/usage/__init__.py +0 -0
- vllm/usage/usage_lib.py +255 -0
- vllm/utils.py +2692 -0
- vllm/v1/__init__.py +0 -0
- vllm/v1/attention/__init__.py +0 -0
- vllm/v1/attention/backends/__init__.py +0 -0
- vllm/v1/attention/backends/flash_attn.py +783 -0
- vllm/v1/attention/backends/flashinfer.py +638 -0
- vllm/v1/attention/backends/mla/__init__.py +0 -0
- vllm/v1/attention/backends/mla/common.py +974 -0
- vllm/v1/attention/backends/mla/flashmla.py +149 -0
- vllm/v1/attention/backends/mla/triton_mla.py +118 -0
- vllm/v1/attention/backends/pallas.py +221 -0
- vllm/v1/attention/backends/triton_attn.py +198 -0
- vllm/v1/core/__init__.py +0 -0
- vllm/v1/core/block_pool.py +281 -0
- vllm/v1/core/encoder_cache_manager.py +149 -0
- vllm/v1/core/kv_cache_manager.py +385 -0
- vllm/v1/core/kv_cache_utils.py +744 -0
- vllm/v1/core/sched/__init__.py +0 -0
- vllm/v1/core/sched/interface.py +134 -0
- vllm/v1/core/sched/output.py +126 -0
- vllm/v1/core/sched/scheduler.py +838 -0
- vllm/v1/core/sched/utils.py +22 -0
- vllm/v1/core/specialized_manager.py +161 -0
- vllm/v1/engine/__init__.py +166 -0
- vllm/v1/engine/async_llm.py +532 -0
- vllm/v1/engine/core.py +701 -0
- vllm/v1/engine/core_client.py +942 -0
- vllm/v1/engine/detokenizer.py +260 -0
- vllm/v1/engine/exceptions.py +16 -0
- vllm/v1/engine/llm_engine.py +285 -0
- vllm/v1/engine/logprobs.py +198 -0
- vllm/v1/engine/mm_input_cache.py +82 -0
- vllm/v1/engine/output_processor.py +420 -0
- vllm/v1/engine/parallel_sampling.py +132 -0
- vllm/v1/engine/processor.py +387 -0
- vllm/v1/executor/__init__.py +0 -0
- vllm/v1/executor/abstract.py +112 -0
- vllm/v1/executor/multiproc_executor.py +480 -0
- vllm/v1/executor/ray_distributed_executor.py +61 -0
- vllm/v1/kv_cache_interface.py +166 -0
- vllm/v1/metrics/__init__.py +0 -0
- vllm/v1/metrics/loggers.py +498 -0
- vllm/v1/metrics/stats.py +238 -0
- vllm/v1/outputs.py +111 -0
- vllm/v1/request.py +178 -0
- vllm/v1/sample/__init__.py +0 -0
- vllm/v1/sample/metadata.py +43 -0
- vllm/v1/sample/ops/__init__.py +0 -0
- vllm/v1/sample/ops/bad_words.py +38 -0
- vllm/v1/sample/ops/penalties.py +58 -0
- vllm/v1/sample/ops/topk_topp_sampler.py +315 -0
- vllm/v1/sample/rejection_sampler.py +631 -0
- vllm/v1/sample/sampler.py +270 -0
- vllm/v1/sample/tpu/__init__.py +0 -0
- vllm/v1/sample/tpu/metadata.py +118 -0
- vllm/v1/sample/tpu/sampler.py +154 -0
- vllm/v1/serial_utils.py +274 -0
- vllm/v1/spec_decode/__init__.py +0 -0
- vllm/v1/spec_decode/eagle.py +318 -0
- vllm/v1/spec_decode/metadata.py +61 -0
- vllm/v1/spec_decode/metrics.py +164 -0
- vllm/v1/spec_decode/ngram_proposer.py +131 -0
- vllm/v1/spec_decode/utils.py +18 -0
- vllm/v1/stats/__init__.py +0 -0
- vllm/v1/stats/common.py +453 -0
- vllm/v1/structured_output/__init__.py +113 -0
- vllm/v1/structured_output/backend_guidance.py +215 -0
- vllm/v1/structured_output/backend_types.py +96 -0
- vllm/v1/structured_output/backend_xgrammar.py +299 -0
- vllm/v1/structured_output/request.py +84 -0
- vllm/v1/structured_output/utils.py +174 -0
- vllm/v1/utils.py +249 -0
- vllm/v1/worker/__init__.py +0 -0
- vllm/v1/worker/block_table.py +87 -0
- vllm/v1/worker/gpu_input_batch.py +677 -0
- vllm/v1/worker/gpu_model_runner.py +1776 -0
- vllm/v1/worker/gpu_worker.py +349 -0
- vllm/v1/worker/lora_model_runner_mixin.py +145 -0
- vllm/v1/worker/tpu_model_runner.py +1419 -0
- vllm/v1/worker/tpu_worker.py +260 -0
- vllm/v1/worker/utils.py +74 -0
- vllm/v1/worker/worker_base.py +64 -0
- vllm/version.py +40 -0
- vllm/vllm_flash_attn/.gitkeep +0 -0
- vllm/worker/__init__.py +0 -0
- vllm/worker/cache_engine.py +144 -0
- vllm/worker/cpu_enc_dec_model_runner.py +323 -0
- vllm/worker/cpu_model_runner.py +668 -0
- vllm/worker/cpu_pooling_model_runner.py +122 -0
- vllm/worker/cpu_worker.py +400 -0
- vllm/worker/enc_dec_model_runner.py +542 -0
- vllm/worker/hpu_model_runner.py +2221 -0
- vllm/worker/hpu_worker.py +483 -0
- vllm/worker/model_runner.py +2056 -0
- vllm/worker/model_runner_base.py +281 -0
- vllm/worker/multi_step_hpu_worker.py +122 -0
- vllm/worker/multi_step_model_runner.py +908 -0
- vllm/worker/multi_step_tpu_worker.py +107 -0
- vllm/worker/multi_step_worker.py +196 -0
- vllm/worker/neuron_model_runner.py +336 -0
- vllm/worker/neuron_worker.py +138 -0
- vllm/worker/pooling_model_runner.py +200 -0
- vllm/worker/tpu_model_runner.py +908 -0
- vllm/worker/tpu_worker.py +332 -0
- vllm/worker/utils.py +52 -0
- vllm/worker/worker.py +570 -0
- vllm/worker/worker_base.py +644 -0
- vllm/worker/xpu_model_runner.py +603 -0
- vllm/worker/xpu_worker.py +185 -0
- vllm_cpu-0.8.5.post2.dist-info/METADATA +309 -0
- vllm_cpu-0.8.5.post2.dist-info/RECORD +1103 -0
- vllm_cpu-0.8.5.post2.dist-info/WHEEL +5 -0
- vllm_cpu-0.8.5.post2.dist-info/entry_points.txt +2 -0
- vllm_cpu-0.8.5.post2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1103 @@
|
|
|
1
|
+
vllm/_C.abi3.so,sha256=DmNC_Cb3URhfcK_4ku6Rtb2KO4qbZp_3BG9-tL1HJJ0,19280616
|
|
2
|
+
vllm/__init__.py,sha256=W0nZA68TuQKHgaVuYMvu4cR1R65gqrgNQOa3JdEVwAA,6582
|
|
3
|
+
vllm/_custom_ops.py,sha256=nSfiXvbc-JHJihhQW4OkLJq3Uy74YaILzP2eB3RhRLI,63292
|
|
4
|
+
vllm/_ipex_ops.py,sha256=RD5-a6UtbwTQ6yahU1y_2Zckh4E2j2TlUsY6-efxnRM,8687
|
|
5
|
+
vllm/_version.py,sha256=bjIAh3HUnB-ZgZi6fckX1QuV5_Zo7VIDIYuxsk5BjKQ,719
|
|
6
|
+
vllm/beam_search.py,sha256=bL5N3-Whe38DNaBMnUywj9JoDyqpXYbNJwt9oSf2P84,2386
|
|
7
|
+
vllm/collect_env.py,sha256=93OZPRA19A5f5VJzD0g-HaSTD4o6JgFqPcUERPVTmy8,27285
|
|
8
|
+
vllm/config.py,sha256=AW8SgaBRnGNrrOEEAS_40cGZ24mroZDW2c2BDWfJGkg,183428
|
|
9
|
+
vllm/connections.py,sha256=3mo1DsM_BnR4SgURHhMxea5pkKKYRv0WAXCqz9khWHA,4989
|
|
10
|
+
vllm/env_override.py,sha256=v5bFL5pZEz9a1Q8xPmyKb7mrxNIb_y7cWr4E-FcBbS4,1475
|
|
11
|
+
vllm/envs.py,sha256=W9C4Ir1vvOH39QkN0Bd-yg4aftWvS4E1hRBC8EqCoF8,32112
|
|
12
|
+
vllm/forward_context.py,sha256=vf7Ld8ERTl0ioJmUxB_shruNfcvPW12sJ-tWu1PGS94,6528
|
|
13
|
+
vllm/jsontree.py,sha256=uEJ99TCQv26_ZrRmZuZAuIrh6U8n8KAAQod4WUP4pLM,2143
|
|
14
|
+
vllm/logger.py,sha256=wIy4Qe8RvxqyZjebeX2sR8OE2g9Wq2MSu7H7lzTraxs,7339
|
|
15
|
+
vllm/logits_process.py,sha256=OBOGJ6XdkTYyAbstBAcngW4JN1mr9euaFnHES6uCX-A,4649
|
|
16
|
+
vllm/outputs.py,sha256=dgBIWA0VrQ9KXhru_0_NtAaU35urHTcNnqhrXkOWxS4,20255
|
|
17
|
+
vllm/pooling_params.py,sha256=TQcDIM8CKnH9_Io24akBrVhZUZIl9_zhuO5szeEmBqs,2017
|
|
18
|
+
vllm/py.typed,sha256=F5LUrt0voM87SNuuOky2X9veCVDqJUgRg_VohYqDigY,65
|
|
19
|
+
vllm/sampling_params.py,sha256=yP7zdMacdcAfd_81kTolWukiMb9TY6QaaS_O1m8Qswg,26489
|
|
20
|
+
vllm/scalar_type.py,sha256=WkMflqmaY02SmZAngPoP1-x8cmzfBj_rcpl2dhph-r8,11968
|
|
21
|
+
vllm/scripts.py,sha256=f4JQeU_63yCFEvUth0qKfLX18lsPDgcxBveXqvG7js8,432
|
|
22
|
+
vllm/sequence.py,sha256=rFgzP-vtzBEK-zsIJijhkx91bTg8DR-nIh3z_ieky28,58721
|
|
23
|
+
vllm/test_utils.py,sha256=uVMsqTrMcwyNNJaxlO6mvka5pmUE7xUAvOBapd_R7hs,5996
|
|
24
|
+
vllm/tracing.py,sha256=u98azd2ER4HnjempIUdqZhPOvQaK2tZfbSOUTZn_OMo,4776
|
|
25
|
+
vllm/utils.py,sha256=O-sWK3pQ-d8hHP6rzdDsQj0DOV7yMIFqf8PP_v_bR50,90469
|
|
26
|
+
vllm/version.py,sha256=FOUZzkwMR0KzHLv4gr6R25HwulikpB9H8Vver3VX8sE,1306
|
|
27
|
+
vllm/adapter_commons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
+
vllm/adapter_commons/layers.py,sha256=rdsvBlYTiblidwK2EYkl3UdB4xvopcrd8li3vPFTbwo,406
|
|
29
|
+
vllm/adapter_commons/models.py,sha256=tuuVafwk9Yvfl8uCXSg1Whzm2Wsq8W7JqypRm_XpBzg,2807
|
|
30
|
+
vllm/adapter_commons/request.py,sha256=GoLdKUNCU6x-8plK95CuLOy56QOSYW6IQAg6ZQg76C8,617
|
|
31
|
+
vllm/adapter_commons/utils.py,sha256=ytCCfLdk-FwWCflWMSTazUPg2gNmXjaovEWbpvQ6fe0,3271
|
|
32
|
+
vllm/adapter_commons/worker_manager.py,sha256=qBj7swkk7LJoQi2GpueMQFMbVPjphnuzOKIc36oQ6Ts,928
|
|
33
|
+
vllm/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
+
vllm/assets/audio.py,sha256=nUfScoi0ZPE4E0VsmcIp5-qT-0yJXMVHqbKC7HUkJTw,1082
|
|
35
|
+
vllm/assets/base.py,sha256=IdwWieuPqaAaYKo2ybSfg07dt86k4-NrP1BHPCrFN2s,1196
|
|
36
|
+
vllm/assets/image.py,sha256=Kn7HrcJEIXKUcx7IUUcixkslwKannvgGVXWa77bI19s,922
|
|
37
|
+
vllm/assets/video.py,sha256=xh4lUSaWIKJcnAFVvxnzWDGPC23WW5GZiXDNCUpeOhI,3159
|
|
38
|
+
vllm/attention/__init__.py,sha256=YW7x8Ahq9TPxx8GiduskAliKrJUM5i-kDrOjuLep7aA,610
|
|
39
|
+
vllm/attention/layer.py,sha256=ow66LS13K7O3wkn9horD-OIZ-ckr2fh3nbhQUy0ddKY,18033
|
|
40
|
+
vllm/attention/selector.py,sha256=Jz6nwwL_GqmQyMfrPvt5ju9fAnM8v5sGZVpGLSHOPbU,5865
|
|
41
|
+
vllm/attention/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
+
vllm/attention/backends/abstract.py,sha256=4E2pYsrrdxQ2ztuyyBQj2C_ud4O4crvrS3ipADOCXCY,9357
|
|
43
|
+
vllm/attention/backends/blocksparse_attn.py,sha256=uv0BHWZJJpyqG4b-4LTKFIgtrtu2AgsVpCenGnCGd5s,18006
|
|
44
|
+
vllm/attention/backends/cpu_mla.py,sha256=gUquZuAA_avJVo5cdwRTIxzc3AlihZ8ZKoPxnhQSp8U,11115
|
|
45
|
+
vllm/attention/backends/flash_attn.py,sha256=iCHOfdsb7waXXokUMa4TbFqwoYaU_7DaCGlbBvNhiPs,44480
|
|
46
|
+
vllm/attention/backends/flashinfer.py,sha256=CPaaTlZaN4A5FD3EEgfr4mMHBxwpvlWSwKogc-ZBWjo,47373
|
|
47
|
+
vllm/attention/backends/flashmla.py,sha256=A18b0mudfHs6-KukAkXmYE4JNO5anhSdvDZcKSNF7dE,9023
|
|
48
|
+
vllm/attention/backends/hpu_attn.py,sha256=J44NMVBFVi9Im7NMayWhR-FWoINsVy41Re6IQ6b7t3s,11856
|
|
49
|
+
vllm/attention/backends/ipex_attn.py,sha256=TRo1vH7RcrLM8XxY1cMF0FadFNJchPUnoXGbcF1zgYs,14996
|
|
50
|
+
vllm/attention/backends/pallas.py,sha256=hk32u6fUdChSnwAPQeSymWtAD1eLwt63KPZxW9-EFjs,13639
|
|
51
|
+
vllm/attention/backends/placeholder_attn.py,sha256=0noOZ6cJXaPmL_rhs1UKI90Xj9hnHTVfDxdAsyWQjlg,16096
|
|
52
|
+
vllm/attention/backends/rocm_aiter_mla.py,sha256=NrVW5fgaPd0eum9nx2ZNOdyChHqU_l36bIrjrlBW0kg,17009
|
|
53
|
+
vllm/attention/backends/rocm_flash_attn.py,sha256=SJ1b0jdYyrSjVTjjBpgEjeI0--yuCf_ahy5PFMttSuw,42404
|
|
54
|
+
vllm/attention/backends/torch_sdpa.py,sha256=3IJ1wAK73YuIpeAZOMGN1gMhjD55wNJLgPEWorZLCrg,27346
|
|
55
|
+
vllm/attention/backends/triton_mla.py,sha256=ZIAU3VLXDYY0o5W_pblhEkIyMfjPbWkceASUTMdYf14,3954
|
|
56
|
+
vllm/attention/backends/utils.py,sha256=UgUuOyNyPu3TU71pLHWtvBFzR8uhZrIun7zFgFMP2-w,25864
|
|
57
|
+
vllm/attention/backends/xformers.py,sha256=PFtsIRNkym9ceLMS_KWqUGGd0sk13-hbR5tiAqHSjn0,33633
|
|
58
|
+
vllm/attention/backends/mla/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
|
+
vllm/attention/backends/mla/common.py,sha256=rj86xTKsbfY12yx9m4SkVSTw_Eh3rBVMKoqHoM_IH2s,60795
|
|
60
|
+
vllm/attention/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
|
+
vllm/attention/ops/chunked_prefill_paged_decode.py,sha256=fAhWkjmY7Epy34L07KV5r0KGl_9F-Y5zq76lQY7-G6s,12441
|
|
62
|
+
vllm/attention/ops/flashmla.py,sha256=Jsx27AMgaf6_XLF68n-zkBcZx8P5wap7CmAZhgX7IK4,3884
|
|
63
|
+
vllm/attention/ops/hpu_paged_attn.py,sha256=Re6jJVV6Mz6IrgWwXhvFOwO6pUcRdmKl0ssh8cifiqw,3460
|
|
64
|
+
vllm/attention/ops/ipex_attn.py,sha256=US7b2ANDdhbPuglGfaCcO5XcvgRB0kZiDLwT462jteU,5526
|
|
65
|
+
vllm/attention/ops/merge_attn_states.py,sha256=2BvJ9H7A30OfnnntsG5tjLPZNfbNteLG_JMicqhyuFE,1637
|
|
66
|
+
vllm/attention/ops/nki_flash_attn.py,sha256=0tEpBTS-QdBZvj0Zh9NWOVu7rta3ITcWjdIOBPV2u7I,32612
|
|
67
|
+
vllm/attention/ops/paged_attn.py,sha256=c0lJ_5D-rniDyKadqr3h_WIGewY4rcHS1as2EWsb0ow,8319
|
|
68
|
+
vllm/attention/ops/prefix_prefill.py,sha256=J6UJwFg_0wkvDqipBrdUY0OT1QhrJW-C_N1-3LCqtxk,30979
|
|
69
|
+
vllm/attention/ops/rocm_aiter_mla.py,sha256=15lHeCs8TnD8YkAtcVawTIwb9mjWXwO1rlifpL2f_hY,1475
|
|
70
|
+
vllm/attention/ops/rocm_aiter_paged_attn.py,sha256=WVCMDB-wDHPc8zRfmZb1M76UGGxbZO5y2dJk60GB-3o,3885
|
|
71
|
+
vllm/attention/ops/triton_decode_attention.py,sha256=pQ1V0nxf2fIBxHIxr4CpAEV5bJ6ZBhKNsFHOa-AmpNQ,19124
|
|
72
|
+
vllm/attention/ops/triton_flash_attention.py,sha256=TNXhU3ZBfiy2-h1Cb_YubHQS0jcDX2l1W5is9nOiUI4,51287
|
|
73
|
+
vllm/attention/ops/triton_merge_attn_states.py,sha256=udVXvfHxIxHYq2mGT_5dzWsRIkXXtphyhT0s0cfR9aA,3495
|
|
74
|
+
vllm/attention/ops/blocksparse_attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
|
+
vllm/attention/ops/blocksparse_attention/blocksparse_attention_kernel.py,sha256=u_96xPRC15lmK8hK5npn3-G8DyY-I1k3spa5sfzPi3U,11535
|
|
76
|
+
vllm/attention/ops/blocksparse_attention/interface.py,sha256=QfWOzV2KjpJu7goLLOFegFbkIOA484Io3T6E-T8KXwo,9326
|
|
77
|
+
vllm/attention/ops/blocksparse_attention/utils.py,sha256=vitFMc2NXAqjiyWKt23ELVxzKT_1BE10TvA3wYPjooY,8085
|
|
78
|
+
vllm/attention/utils/fa_utils.py,sha256=S0UZ-Ew2gJ0rpD53YEvEf4-_X-Ppd8om4o8N_HmT2rY,2018
|
|
79
|
+
vllm/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
80
|
+
vllm/benchmarks/datasets.py,sha256=yLwGVj3GuBCnlCJaTSxZnx-CBmREwwJy1siEJ90bFi8,31245
|
|
81
|
+
vllm/benchmarks/endpoint_request_func.py,sha256=3-qqeaNP0Y6mbxyoQYgSjmAW-JQiM1wpNRjN7LkRFqk,5873
|
|
82
|
+
vllm/benchmarks/latency.py,sha256=aBg2dEQgPkCwy5XrLyrFxM5_cvPPwL5QennF906NbWI,6385
|
|
83
|
+
vllm/benchmarks/serve.py,sha256=uuXCjdsOCp-1j4ZHrQiZsJR299nKExbAaO6DqyaLRfA,36139
|
|
84
|
+
vllm/benchmarks/throughput.py,sha256=cOw0zwhN2tnTcxQg_IjijOvSvf1PV-2aBroUSPo_Czw,24771
|
|
85
|
+
vllm/benchmarks/utils.py,sha256=gpiQUYOxOG6Bve3zQeOoKWpj6fTIm3pyJ8JllU0vK0Y,2178
|
|
86
|
+
vllm/compilation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
|
+
vllm/compilation/backends.py,sha256=fEwWj-YYjgqr8vlbu5wJSfN4arzl8VzrIOtoIssBrTo,28909
|
|
88
|
+
vllm/compilation/compiler_interface.py,sha256=4wyv8RFXuhwsFPSTTxqIEDRlQ-T-mv-UmqBkTkMwDMM,18234
|
|
89
|
+
vllm/compilation/counter.py,sha256=lGVEXL9lTnNWEBc17xMUna4WplhdPUlQaLQPn5qjHWA,937
|
|
90
|
+
vllm/compilation/decorators.py,sha256=u0kOMbxoEOtxTPAChFijXWyw3nU3QwmlkX26J5uQYsc,10246
|
|
91
|
+
vllm/compilation/fix_functionalization.py,sha256=z8l6h_C6-UkbOR5uxo55R1ETOIRfCTj6dawujHzap-o,7953
|
|
92
|
+
vllm/compilation/fusion.py,sha256=lxyDdi-VBQ3gRIUeOS4nhPoFP0bSsr6XYzhpduTD8nE,24533
|
|
93
|
+
vllm/compilation/fx_utils.py,sha256=LdfaaWz7sReX4FYBTLDeg7SQ2enmFnXsNRin30KaLBA,2023
|
|
94
|
+
vllm/compilation/inductor_pass.py,sha256=P1f2VP2ODFNx0MFYGI32bd5TPkznKg1fSbC90f5uk-Q,3388
|
|
95
|
+
vllm/compilation/monitor.py,sha256=p4LQ1roNhKQE8M8zhlYwgdnSFBFi6EkDUazr6X2usc0,1346
|
|
96
|
+
vllm/compilation/multi_output_match.py,sha256=FKsWQk73_olG_sIh7veaHzfBecoKURWD6SRH5_Ozw20,3839
|
|
97
|
+
vllm/compilation/noop_elimination.py,sha256=Eeu6RBwaLjuNBdlp5KgvKy58whQbWDy-KLqA3k-xans,5207
|
|
98
|
+
vllm/compilation/pass_manager.py,sha256=P2zVO1hOc31xR2GbX_jNpWfm5kd4xZI90-_u7PvyhoE,2656
|
|
99
|
+
vllm/compilation/sequence_parallelism.py,sha256=W-MIPU-5CaIn2WizyTCOqw5iRYUxLnu9iNnSu5IzYRQ,9636
|
|
100
|
+
vllm/compilation/torch25_custom_graph_pass.py,sha256=2Uegyh-fmx5OsvKOFU4byAXdvN_XqTel3VdHTU-XplQ,1361
|
|
101
|
+
vllm/compilation/vllm_inductor_pass.py,sha256=I06mbGhQLX4gj2W6CUDoqw1aZ36Pr9Xi6VnQL5ypCyc,2460
|
|
102
|
+
vllm/compilation/wrapper.py,sha256=pCzXlNs-_49mKiQJFE3Kg02lEkHf_CfbVk6dcPlhnok,5631
|
|
103
|
+
vllm/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
104
|
+
vllm/core/block_manager.py,sha256=XoH1P87ofSfcNqcPpKuhDlrpV_8TlO2zkodkAezveJQ,22204
|
|
105
|
+
vllm/core/evictor.py,sha256=Jy-eZwgdA4Q2F0buFgDNv2fPeiIxJAUEFtyKYz1VL40,5446
|
|
106
|
+
vllm/core/interfaces.py,sha256=Uou6g2s9rlGrSYtk8x-TmeFRHK-SsvB2w2-pNxgfUAs,3590
|
|
107
|
+
vllm/core/placeholder_block_space_manager.py,sha256=7HEHgCYHMNdAvd7ESfplHkjUQbqf8jOn6zXBEk_ShRo,2971
|
|
108
|
+
vllm/core/scheduler.py,sha256=rR1ELyMQNljfzwZmtuNOwPmXIYW-gBLS-axLczR_Whw,90120
|
|
109
|
+
vllm/core/block/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
110
|
+
vllm/core/block/block_table.py,sha256=HMwMwVY8pHLjlje6gfVsrHvyvLupcd3SMAvgcsUcnxM,16022
|
|
111
|
+
vllm/core/block/common.py,sha256=cfDse1iNYLehOXrSfUypTmakGAdSSXrX0YmodFPpJjI,13200
|
|
112
|
+
vllm/core/block/cpu_gpu_block_allocator.py,sha256=1zWQKDaRTIkkZIwP-pJ_i2bIbIcTRk3MBhpT3V06V3k,16947
|
|
113
|
+
vllm/core/block/interfaces.py,sha256=yx7jEGmrXqAKyDQ76oEGZdfCAKBIld_5Tv7mmf7ra5E,8144
|
|
114
|
+
vllm/core/block/naive_block.py,sha256=EgYRm94K88DyFM3Xjfa1A8hWuGZStL0nIqiZqbVxQMI,16355
|
|
115
|
+
vllm/core/block/prefix_caching_block.py,sha256=tBjZ58xTQmmzx2s24BcneVyCf5F-aaOqgJz07bxbfoo,44182
|
|
116
|
+
vllm/core/block/utils.py,sha256=osLxVwSUYjOsLeal8RzpmGT72F4aU3qbTGuYMdWIsHY,928
|
|
117
|
+
vllm/device_allocator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
|
+
vllm/device_allocator/cumem.py,sha256=se3229P3JRyk_iiXngyqnno4iqMESYTJHaMSF6YSkkw,10962
|
|
119
|
+
vllm/distributed/__init__.py,sha256=Rk8k7bXtcPNaihFk5qOn__toXjElImWbszyRJBzeYHA,122
|
|
120
|
+
vllm/distributed/communication_op.py,sha256=RauC0Jv4NtSia7pdV5eZaSfzXUAt1g1d5mYaxWi6f8U,1499
|
|
121
|
+
vllm/distributed/parallel_state.py,sha256=SupbqBGESH9G1oMpWgYFStv8URv-AMHR-eLIwriZSLI,46729
|
|
122
|
+
vllm/distributed/utils.py,sha256=FmlqkrolzZumCbBZESxtAkYnDxxOQSyUx9UF6Gke9nE,14559
|
|
123
|
+
vllm/distributed/device_communicators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
124
|
+
vllm/distributed/device_communicators/base_device_communicator.py,sha256=S2m9N5_I4ilFgY9gU0hxIBNLtCa-bGoho0U1_tVq79I,6227
|
|
125
|
+
vllm/distributed/device_communicators/cpu_communicator.py,sha256=5Z9j1ziqDwYs3hQPwKpMk9er605nGf9JSIW6C6ncHmY,5276
|
|
126
|
+
vllm/distributed/device_communicators/cuda_communicator.py,sha256=JJl2m8gbFM5o8Hjxpiju-ImizmJDEhxOJa4iaEacpcU,5018
|
|
127
|
+
vllm/distributed/device_communicators/cuda_wrapper.py,sha256=X6-QuSBtX2ImLvcLGJANUt_FxTYubJnVAKEewwdztqc,7128
|
|
128
|
+
vllm/distributed/device_communicators/custom_all_reduce.py,sha256=cV6XxtIF-GTLONnJdPelX7uO3IfWaYYhsNjlyxaiKXc,12544
|
|
129
|
+
vllm/distributed/device_communicators/custom_all_reduce_utils.py,sha256=g-GEhCjE6dGzRDYHKYDP5FqFW9snR-YMLcIQOYt0Zz8,10474
|
|
130
|
+
vllm/distributed/device_communicators/hpu_communicator.py,sha256=LSIPK-d_v2ICOii97Du1VhDAVHq0lggXnW8GOahmYiM,1767
|
|
131
|
+
vllm/distributed/device_communicators/neuron_communicator.py,sha256=qrRh1kLgdB2bBYrEJnSs5nTD0YJ6DW9n7_CPJ9x15eo,624
|
|
132
|
+
vllm/distributed/device_communicators/pynccl.py,sha256=Z4QXdGf_qzz2J3PfKtU7bEG29oxbXy4dEorGB9WNB3k,9142
|
|
133
|
+
vllm/distributed/device_communicators/pynccl_wrapper.py,sha256=3J0dyH47BKC4tFGkIaSoJe033WoimULq9SdM76LqMTI,13706
|
|
134
|
+
vllm/distributed/device_communicators/shm_broadcast.py,sha256=P4P6ngwyzmVyYRnLGAbeWreTS52P1ihrSvBTAmYE7dc,24100
|
|
135
|
+
vllm/distributed/device_communicators/tpu_communicator.py,sha256=48PMU9TmB9eFh45p3J-a4OIcm0Oa2VgWaLWyP2LclPk,3808
|
|
136
|
+
vllm/distributed/device_communicators/xpu_communicator.py,sha256=mP9w-kIxqxd98-htC2QHLLQqc7T9YtXGJqLR6K4hWKA,2107
|
|
137
|
+
vllm/distributed/kv_transfer/README.md,sha256=B4s4s-6F9FP4wbgmrYJDSpdUu0_Yq4EeWLEyZMNkAyk,2006
|
|
138
|
+
vllm/distributed/kv_transfer/__init__.py,sha256=hgB0f7tJWprC53X4NYq-5y0YAhf-lF2OfOQcaF_14Bk,371
|
|
139
|
+
vllm/distributed/kv_transfer/disagg_prefill_workflow.jpg,sha256=fOFUEx-2Fm1uxHCGopvCREaRqdvR87Z7C0bMqEVH3Iw,142656
|
|
140
|
+
vllm/distributed/kv_transfer/kv_connector_agent.py,sha256=ZvPS-iDdCIqCYI5XUnwR-hiws15sjsgKoksHCthsQ90,2433
|
|
141
|
+
vllm/distributed/kv_transfer/kv_transfer_state.py,sha256=HvxD6knnUDmnjXK-TYA27idyEIz1Th-jkTKHd2Te6Rs,2266
|
|
142
|
+
vllm/distributed/kv_transfer/kv_connector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
143
|
+
vllm/distributed/kv_transfer/kv_connector/base.py,sha256=qEPxC3gSvvM3oQHr3KL-ktd_53k2fs8sVpJzZUvjyF4,4434
|
|
144
|
+
vllm/distributed/kv_transfer/kv_connector/factory.py,sha256=SJPW3ZmV7bCW4C9_usjYEIelatPGmv_QLshA-NhCf4s,4005
|
|
145
|
+
vllm/distributed/kv_transfer/kv_connector/lmcache_connector.py,sha256=bc90F18oq95nkPNCx5-WnZqX90aOgCdbLyeRHiLbXE8,3683
|
|
146
|
+
vllm/distributed/kv_transfer/kv_connector/mooncake_store_connector.py,sha256=Y4Ry3ljzTR3przGc9x0twV0tqo-MUdJDqAa91z7cl2w,8550
|
|
147
|
+
vllm/distributed/kv_transfer/kv_connector/simple_connector.py,sha256=q6kT6N4Gv9qnAqp_HNRz267v0lF5GY81R7zFcrPWiGs,13871
|
|
148
|
+
vllm/distributed/kv_transfer/kv_connector/utils.py,sha256=F7b-JTvj7MGlmaQ3LktzxqQBTY65Z9O-6B8XPg9KNxs,3750
|
|
149
|
+
vllm/distributed/kv_transfer/kv_connector/v1/__init__.py,sha256=vjF_iq8G1cGemr1Vtj11t7O0TJRaHuUjhb5VNegzNrQ,207
|
|
150
|
+
vllm/distributed/kv_transfer/kv_connector/v1/base.py,sha256=AyThfkIvLIubwvvCWhySG_BWKChgVUsZRKRqJSchKNQ,6810
|
|
151
|
+
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py,sha256=ACF4Z4Psxwg3EC2HeqeyCdm6qgRYGHUYzrP2sMFfb9I,4978
|
|
152
|
+
vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py,sha256=l4TcUH75Tl6rPUobmup1Wi-ADv1Wyk4WH9ffi-Me4lg,15572
|
|
153
|
+
vllm/distributed/kv_transfer/kv_lookup_buffer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
154
|
+
vllm/distributed/kv_transfer/kv_lookup_buffer/base.py,sha256=onwGEXNRYYiOCXHzkVbQ-WVBAWJ_w4SeBuQF_Z9TLrk,6217
|
|
155
|
+
vllm/distributed/kv_transfer/kv_lookup_buffer/mooncake_store.py,sha256=qU8ulFs4EZOi0udp1DeDakHCqSHHzdGKcOxR_WMVl5c,5610
|
|
156
|
+
vllm/distributed/kv_transfer/kv_lookup_buffer/simple_buffer.py,sha256=OP6qAlDx90p86x-QhRSm22aW2tuoMB7HDKws0DCpVhs,9100
|
|
157
|
+
vllm/distributed/kv_transfer/kv_pipe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
158
|
+
vllm/distributed/kv_transfer/kv_pipe/base.py,sha256=U4hivz-zJkjhTGgNdtcuupc_ArsoUPFuWEv_AXJ9rqs,2087
|
|
159
|
+
vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py,sha256=BXVNE9q2w6jsrrnyaFS_qBwUbfMW6rUcOhTslH1GrJ4,12042
|
|
160
|
+
vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py,sha256=v_lF2Vija5coPEFil4lIH___xvuT6XvX563Y682M_U8,9699
|
|
161
|
+
vllm/engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
|
+
vllm/engine/arg_utils.py,sha256=Zd-5IvV7WyQqwiYCSuaoBBzBQbWwA4MeoNNy032eHKo,77963
|
|
163
|
+
vllm/engine/async_llm_engine.py,sha256=NVj01NbYxyKN7WEXJmXzciBhcMZmvOP0HzZ350sgk8o,51183
|
|
164
|
+
vllm/engine/async_timeout.py,sha256=JxUaRVK_M5P5wRVkKHQ-QkDMnGxKMTt9S9OhQeQzP-s,7092
|
|
165
|
+
vllm/engine/llm_engine.py,sha256=hRj6md9_j9Z4WWjBa7MPdK-Q6WxFU6b8ku_D1QuHl-o,94629
|
|
166
|
+
vllm/engine/metrics.py,sha256=pgK8JsN_qIDxOAJCb7Rdabv1e5FL-xIQNq3jXeJGBfk,31380
|
|
167
|
+
vllm/engine/metrics_types.py,sha256=9qcaNDFM1xfaQGjY9aPK_Cn-GObdctJiqR_t6cLzy_Y,3309
|
|
168
|
+
vllm/engine/protocol.py,sha256=Hc2CuVEqKXfg6FzjBIkRbMUfXCYarDwRCO8GlUfQwso,10642
|
|
169
|
+
vllm/engine/multiprocessing/__init__.py,sha256=Oxga1HyDxx_K9SJ90gd5PBeVhEQ_9edrrT-DgWT1mo4,4923
|
|
170
|
+
vllm/engine/multiprocessing/client.py,sha256=HAGfCWFxusAbfVvQJO3KmuzMVP0zK9Lk3dVQj3O1FSc,30282
|
|
171
|
+
vllm/engine/multiprocessing/engine.py,sha256=iu4zUfSPqlO-aL4iSSsOZMcpA1pJwXyb5HikWz5h_L8,17848
|
|
172
|
+
vllm/engine/output_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
173
|
+
vllm/engine/output_processor/interfaces.py,sha256=99zPnCsA0H9k8d7uXfv8yGva69mAS1m3apR9sdWIUfY,2994
|
|
174
|
+
vllm/engine/output_processor/multi_step.py,sha256=s5GKHbBFncCg4lu9ujcLOykVp_Fe1VfC3fAWTIulEns,9238
|
|
175
|
+
vllm/engine/output_processor/single_step.py,sha256=ToiNI09zyU86zsB8Vbw9-eLcbNk_gsCHc2sKRmBfcF0,6015
|
|
176
|
+
vllm/engine/output_processor/stop_checker.py,sha256=XtOa0t-ZErekuf9SFoxQv-nZN2ddPxEiFIwIJEUK-ig,5067
|
|
177
|
+
vllm/engine/output_processor/util.py,sha256=IoNFmy8vKrK5pn3nGS26Ey5irhKr8mzNOGP30SsT1qA,1056
|
|
178
|
+
vllm/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
179
|
+
vllm/entrypoints/api_server.py,sha256=rjgwdjwCsAUr3FS1CbeyxcKD-jKm4GFB1pyhu49R8Lg,5708
|
|
180
|
+
vllm/entrypoints/chat_utils.py,sha256=6lWTFU91w9Agmq-QhV5MJi-KCYeYl1K2spzZt3C5chA,45402
|
|
181
|
+
vllm/entrypoints/launcher.py,sha256=ZLE3YFhCKhintM-Ra34S8p8L5-eUhzoArdygX8QvVF4,5212
|
|
182
|
+
vllm/entrypoints/llm.py,sha256=yU8FyVqYi_hC4u_GcEpvDLc1JnvNYD6617GdoAglCsk,64044
|
|
183
|
+
vllm/entrypoints/logger.py,sha256=ThXqCnP0Ord214j4_OQh124fOT421rf4-JpXtwQdKx4,1443
|
|
184
|
+
vllm/entrypoints/score_utils.py,sha256=HerqcBBubpl-Oh1UIYoDGkRsbqyhpuRiTopyBlil9v8,1655
|
|
185
|
+
vllm/entrypoints/ssl.py,sha256=JigVmJhUkhrDPvD1z-iCCwOOLE__qD8V_2h94zvt19A,2736
|
|
186
|
+
vllm/entrypoints/utils.py,sha256=varTasRl4FqK7J5FjtCfV_N5T8ZhdY4Ll1M0-Co4bFA,5574
|
|
187
|
+
vllm/entrypoints/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
188
|
+
vllm/entrypoints/cli/collect_env.py,sha256=DhsoUicPdVy4jQaYZIAQFSZlyxGomBpif9dTO_DkN_U,1089
|
|
189
|
+
vllm/entrypoints/cli/main.py,sha256=ECcw2rG854LW1KxsInnwPk9Rwm-Mh0ONPuURSY0JYdE,1568
|
|
190
|
+
vllm/entrypoints/cli/openai.py,sha256=zuqg82yXK6IzEd05-g5VGVGoILz6-xE8LeTYE5QrzRY,5765
|
|
191
|
+
vllm/entrypoints/cli/serve.py,sha256=OAgH3VF1gZTOMoHQthyiyxBuBa6uQDSnvVueO6JkXDo,2054
|
|
192
|
+
vllm/entrypoints/cli/types.py,sha256=9GDzWTOdmPeQk-Z2cXdXSk6DgQMlrhbmLPgQ69Hof0Y,637
|
|
193
|
+
vllm/entrypoints/cli/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
194
|
+
vllm/entrypoints/cli/benchmark/base.py,sha256=blt2nY-bqD8LZV_lt73JcYdVd1f_Pb_fRHhm8wXYYtk,1105
|
|
195
|
+
vllm/entrypoints/cli/benchmark/latency.py,sha256=htqbqDiw338UQ4Imrm8h4BSvHgNe7jXsSE9sHxZDI7c,810
|
|
196
|
+
vllm/entrypoints/cli/benchmark/main.py,sha256=b4ANrwJgpjSMPg8bHnvxhyukxlc_s7x3jrL9n01ni00,1780
|
|
197
|
+
vllm/entrypoints/cli/benchmark/serve.py,sha256=ISS4pL4Q_yF54GOICzDBGcTEDWMnU9e5SLdhosgFK_I,792
|
|
198
|
+
vllm/entrypoints/cli/benchmark/throughput.py,sha256=p6_xRqPiHbXoElsDA2wIT6zIB3c_ClxDQewCk2VROQo,812
|
|
199
|
+
vllm/entrypoints/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
200
|
+
vllm/entrypoints/openai/api_server.py,sha256=H2ylrBtj1bEYudzXHmSI3i_8luuElA1GbIqO48kMnJE,43608
|
|
201
|
+
vllm/entrypoints/openai/cli_args.py,sha256=LeLIxRo7E-NnbIPnr0RCkJX3mgEi9nLG_eaFv5RCx_U,11709
|
|
202
|
+
vllm/entrypoints/openai/logits_processors.py,sha256=zbAeibwgnyMxn99ZlIheZZtlN3iBzk9AzqbOC7qVUr4,3161
|
|
203
|
+
vllm/entrypoints/openai/protocol.py,sha256=QQNcmm-0KvHngElREAjyn_AAm_l8qUKE_ndNeKmSmTc,66479
|
|
204
|
+
vllm/entrypoints/openai/run_batch.py,sha256=7QqKfdo08-mGl7oGRB7ql1y6BHIN4vI3ldQFbk_Kpnc,16821
|
|
205
|
+
vllm/entrypoints/openai/serving_chat.py,sha256=boteYCnqyhM990RuG8gy-S7IyQ9MYxNR0D3NKxO4Iao,56951
|
|
206
|
+
vllm/entrypoints/openai/serving_completion.py,sha256=RGl4Q-z-Joqvahzx8MjKB4sDgV01XPis59Y8p-XDuPY,23479
|
|
207
|
+
vllm/entrypoints/openai/serving_embedding.py,sha256=_zS1NUeIWxGXjOvkBO2WA2DqZ7aAVvMGWE33lH3kSXU,9164
|
|
208
|
+
vllm/entrypoints/openai/serving_engine.py,sha256=lJqfR9xXL_hkPRfMot7hGNpoQSsmqZVJmD40rjVbgUU,22349
|
|
209
|
+
vllm/entrypoints/openai/serving_models.py,sha256=C_NVmE_oimem4sRLOQkIyJTuIhD6fKiSemwRSQQOaoo,12728
|
|
210
|
+
vllm/entrypoints/openai/serving_pooling.py,sha256=L74e9zPrfh7d4O0Alig4VnE4zMzuI6ncX3SDyEbHx2g,8876
|
|
211
|
+
vllm/entrypoints/openai/serving_score.py,sha256=JtMtgtGKFMELJab0uQLEIclpdMhRoGz2TvPwbJrtl_M,16240
|
|
212
|
+
vllm/entrypoints/openai/serving_tokenization.py,sha256=OiUPnXv4UY1K26BVtMHThxLsy_bdZb6vvnjYYv4-vPU,5517
|
|
213
|
+
vllm/entrypoints/openai/serving_transcription.py,sha256=8w8DvHzoAma-T50asNFb66badNOCwwV-tqekxWiG50Q,15629
|
|
214
|
+
vllm/entrypoints/openai/tool_parsers/__init__.py,sha256=Y5LRbwcAwjrDbB0O7zZr6BCqDWiaPA0APHptnB-jDxo,856
|
|
215
|
+
vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py,sha256=tULmZD-It5bOOf08FFJFN-bx2A_0CT5Atr9zgvpiGBg,6026
|
|
216
|
+
vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py,sha256=OnpN_9yo148CIPT1TPH_1U67rTMR8h2NS1W3_1cZI-g,11135
|
|
217
|
+
vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py,sha256=QPOnvPxzWROGBD0P8TQoGCRlvkclYqTAx4GBhu2jktg,10365
|
|
218
|
+
vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py,sha256=tMaFDFJk9gvnbym9nRwgTXrzq_jL7ma3C0SayUF3reg,16799
|
|
219
|
+
vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py,sha256=o1-jp-KTOTXgvO9obkMGOWREXjQV-kIwKj5ujYMZFlw,9135
|
|
220
|
+
vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py,sha256=hxhk0FEsTPeD7Tc3hmItizGHT4RjZVFODT7iqJksnJI,13553
|
|
221
|
+
vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py,sha256=jByIKB-3-EL9url5wIxfbcGuWT4kuumd6_vveItMwhE,11959
|
|
222
|
+
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py,sha256=6-GUAAGvDMVybNR6qvIvraLJc9Z4oUmqiyePZnOc-Lg,15371
|
|
223
|
+
vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py,sha256=ez0cTcApdl2oKpj6RDWDjS1AUUdw2dV10QDiBrC5uT4,4218
|
|
224
|
+
vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py,sha256=AqBVcgTZp5Mr-edJzdn8u8u3tev7sDMQsTHtluZ35us,11971
|
|
225
|
+
vllm/entrypoints/openai/tool_parsers/utils.py,sha256=56zqKHw3Q5XqhqNpwZWnRSbzhx17qzuyFsASFXmeZZk,3805
|
|
226
|
+
vllm/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
227
|
+
vllm/executor/executor_base.py,sha256=WS5uXxYBWl2wGbBBgqRTUZgiwVFiNiaymMRDm-B3r3Y,15664
|
|
228
|
+
vllm/executor/mp_distributed_executor.py,sha256=6mrv5wsBUw6ng08_qyn7LtmYsaGgGgTHoZSdP-hmPtY,9854
|
|
229
|
+
vllm/executor/msgspec_utils.py,sha256=FG5Qh6ghtLjyI6BHiTGmJQ3sGop2Lpm4LoADP_0Hs9o,909
|
|
230
|
+
vllm/executor/multiproc_worker_utils.py,sha256=yXbbNwhImWw7xgSgjoOSwbGLsuW0hCLsye0IFBVU0pU,10725
|
|
231
|
+
vllm/executor/ray_distributed_executor.py,sha256=0bokBxMVvBKogFyi9a7CT8Hmq5gbLDiM_aKuFCyjRR0,30725
|
|
232
|
+
vllm/executor/ray_utils.py,sha256=ADwyUzK3ciZGUNYhogA6m0-YwUFzX4bLQm9aADMijY8,16860
|
|
233
|
+
vllm/executor/uniproc_executor.py,sha256=FY4XQzC1ZpIOsOpS5SDtWBKlGRMPLP-5MTwPj2ijovQ,5657
|
|
234
|
+
vllm/inputs/__init__.py,sha256=CU29-EwaGLoem47wIqFC-mR1433WZh3a67hNOlIsVlI,1155
|
|
235
|
+
vllm/inputs/data.py,sha256=9yF-KJ7fZY-rl5sBdeawmVo-Zon5ZqVB9ceV_YJPY2Q,8254
|
|
236
|
+
vllm/inputs/parse.py,sha256=q0Kx42qRjf0NalObXwBN6eJGjHnfSsOl4mI8PJRzvEI,3716
|
|
237
|
+
vllm/inputs/preprocess.py,sha256=ngy8-vDPWtSFYal1haK7GVfGOBxrrYk-Ji3Zom_YTAk,28089
|
|
238
|
+
vllm/inputs/registry.py,sha256=hSPaEwBKSGmCH1ELqiEyhCMw6UZZBJHbM7oeQVmV9bw,6425
|
|
239
|
+
vllm/logging_utils/__init__.py,sha256=t2aDazCRc19hTrOxiW3eY-d702nQFBOJz_QIfSip9ok,136
|
|
240
|
+
vllm/logging_utils/formatter.py,sha256=AAWbFV4wgQZn_Ek0MKA-TBJwCQiO3ejIuIV1rQm8ADQ,525
|
|
241
|
+
vllm/lora/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
242
|
+
vllm/lora/fully_sharded_layers.py,sha256=qUGiJmRn9JA_JKm4TaHWaVkdP_NrKAAf-h9QGdJjav8,12229
|
|
243
|
+
vllm/lora/layers.py,sha256=GMzaA48SBSbG7pE_PoMf9QB2Ho0eciRFB9tEM4EhelY,46372
|
|
244
|
+
vllm/lora/lora.py,sha256=XfOb94aCsORrhvTnHDy-gF6iGo6nULtpsRcR2wpfYBU,6222
|
|
245
|
+
vllm/lora/models.py,sha256=zugnzfnOHtGnvpUjiLCDpgyBfaVFI7I3tM-cZbBi4e8,35111
|
|
246
|
+
vllm/lora/peft_helper.py,sha256=ag2z855o72BMDHMMt1noP1XoMG875wtKOWVfuwZDiM4,4399
|
|
247
|
+
vllm/lora/request.py,sha256=w_fGpOlDlJpOS-7iw-dKXPkr0zvDmgdB4mr3kOszin8,3059
|
|
248
|
+
vllm/lora/resolver.py,sha256=Ss3VTm7LIoc6eb_7EyNJgdgzMALWnxIBNRs2TqBbTyA,2807
|
|
249
|
+
vllm/lora/utils.py,sha256=oQR3bn0k5is1hbZ6mA5-B9G62QhpFH13fS1Mdrj9Hko,9162
|
|
250
|
+
vllm/lora/worker_manager.py,sha256=Mh-T5-V6kFooiFmgduO4hUw3spQfPDRp_uJs1rN6QAc,10673
|
|
251
|
+
vllm/lora/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
252
|
+
vllm/lora/ops/torch_ops/__init__.py,sha256=z03eb5aCSj_Z-_RPa3huUKuXRBvhxj_M8lK7izkQJHE,466
|
|
253
|
+
vllm/lora/ops/torch_ops/lora_ops.py,sha256=ilxQObKw2wEz3BJJ8X87xWPHGOz0jSII3b13wpj66es,4300
|
|
254
|
+
vllm/lora/ops/triton_ops/__init__.py,sha256=5-wjfUVaVmWcrM1PEFIbmzK2VJufzzPFKg8btwBShr4,309
|
|
255
|
+
vllm/lora/ops/triton_ops/kernel_utils.py,sha256=xbjwaaJDlAW3J-db75xOdd6_Hb5z1ZEfW_9szb3NvJo,8442
|
|
256
|
+
vllm/lora/ops/triton_ops/lora_expand.py,sha256=9aOvCxbKjeXWI0Bl0HgC3VQ_Lw3GcXWv-hHLT6_Gv4U,8971
|
|
257
|
+
vllm/lora/ops/triton_ops/lora_kernel_metadata.py,sha256=WAI_fis0b0f5_3GG58VbvJfOY2slL-L5nX8kd665IFI,5905
|
|
258
|
+
vllm/lora/ops/triton_ops/lora_shrink.py,sha256=1YcPmNXvUw9BELsV3kzHM18vYNhoSlJsfHqPLVUc32I,7998
|
|
259
|
+
vllm/lora/ops/triton_ops/utils.py,sha256=reDlg9D5e97cmSgwkTiiAQLCRcW1nYbf5VnbTx1Fu9A,4885
|
|
260
|
+
vllm/lora/punica_wrapper/__init__.py,sha256=RAbrZogtmoPZNIMImJFX1REM0cydwz5C-ATIp7_qHFA,244
|
|
261
|
+
vllm/lora/punica_wrapper/punica_base.py,sha256=Nf0l2sBxlSX43_i1DVPqgLRTdYoJknSScuxdeqUhb6M,18227
|
|
262
|
+
vllm/lora/punica_wrapper/punica_cpu.py,sha256=1hzv1SchgOGiUo4kYsAN5BcTDX5XNQopMqbbXvxLqlg,12465
|
|
263
|
+
vllm/lora/punica_wrapper/punica_gpu.py,sha256=3-VIBGixPXWlWPrpV4zuTA4H1uEQxcSHI0FzEVXfvCc,10829
|
|
264
|
+
vllm/lora/punica_wrapper/punica_hpu.py,sha256=qmXdhyHJrv4ZBXBlT2tOQSc3Wibu6Z8YkEDd8HINL24,5792
|
|
265
|
+
vllm/lora/punica_wrapper/punica_selector.py,sha256=WP5XsmWE8YJG8fmak0jNPrgYxF4_lxaJt-mcNaMfsRY,755
|
|
266
|
+
vllm/lora/punica_wrapper/utils.py,sha256=CFoSN8wz2TQhQhYdiZFjN_QU4AwQ8wPfa_o7wL9Ufuk,6846
|
|
267
|
+
vllm/model_executor/__init__.py,sha256=cRhmybV9ftoNVy7E91WIczp4wLL4E6y77KQ9vrhWqL0,505
|
|
268
|
+
vllm/model_executor/custom_op.py,sha256=-EYQ_VHfc_JJQRGlBuBx5praOIgZA_lg_1074yh5Q7o,5626
|
|
269
|
+
vllm/model_executor/parameter.py,sha256=uqCaKZy4iKETQMqNleUXoMKcQ0zYA1KwEO32s3d4bwM,16689
|
|
270
|
+
vllm/model_executor/pooling_metadata.py,sha256=FeDxnEg8W8-ZOtg17JE1z26RfrKaU2_ZaWvwkochqKA,2077
|
|
271
|
+
vllm/model_executor/sampling_metadata.py,sha256=NqPjJLXP6xdiy5hQk1lq_sRdFrjwUEQhfOudpag-Wt0,22965
|
|
272
|
+
vllm/model_executor/utils.py,sha256=o1nKePmbzfAwPqVqXJbOGwDUXy_fLUGaFZlsPEbMhpI,1915
|
|
273
|
+
vllm/model_executor/guided_decoding/__init__.py,sha256=TpFJoEV4tiUOJH3q3fLcR5o3BQnUtPEfMLMykMKKidk,8329
|
|
274
|
+
vllm/model_executor/guided_decoding/guidance_decoding.py,sha256=l9e0FB3CXN9JXaKwd5ZHL06hkINp0rxcaalRYGC3YPQ,2579
|
|
275
|
+
vllm/model_executor/guided_decoding/guidance_logits_processors.py,sha256=9zLWQfZwe2MRdLO0n17TKtfhNqn-TM7_w0BwKjvF6y8,2509
|
|
276
|
+
vllm/model_executor/guided_decoding/guided_fields.py,sha256=Xy26Otb4nD52-WXAfzFvajLSiyLFFLBXOzx0sBAtqdw,1563
|
|
277
|
+
vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py,sha256=uUzfxJuP1NDjgLzPdEHJqGCj17J2JInP_LksjCP2utQ,2678
|
|
278
|
+
vllm/model_executor/guided_decoding/outlines_decoding.py,sha256=5fRTcp8uiGQzGSu_X7JxQE3So7VSqH46BB098OcdlGg,5506
|
|
279
|
+
vllm/model_executor/guided_decoding/outlines_logits_processors.py,sha256=7Auegbg5X2GJVNE0Y8IpjZlmSx2cCXpICakNybZUzo0,10438
|
|
280
|
+
vllm/model_executor/guided_decoding/utils.py,sha256=alzXMrED6vcIkwgh-byWqthCP9lC6dqFAxluS_yzqhY,7888
|
|
281
|
+
vllm/model_executor/guided_decoding/xgrammar_decoding.py,sha256=xlYHsakhHXVZqQ3QrWBtVK4AcYVlvSaxSGQqo40DJkI,16738
|
|
282
|
+
vllm/model_executor/guided_decoding/reasoner/__init__.py,sha256=jtFIBQmMxyEj4_f3h5NxOJ3y7wTVDbiHRtFneCTPRBc,1243
|
|
283
|
+
vllm/model_executor/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
284
|
+
vllm/model_executor/layers/activation.py,sha256=j0puBjGjVj5BqTsUDtI3jF7fTdJ_gnAEGESVm1-Jdco,12522
|
|
285
|
+
vllm/model_executor/layers/layernorm.py,sha256=JO55dJIRWlYCBpQ6uYkTNTeu7iTEKudDxWJ53gQsrE0,8820
|
|
286
|
+
vllm/model_executor/layers/lightning_attn.py,sha256=8yOj6NdnSqSa65lYJe5in7yYrRsSg8xiSjju_zTDtCo,20894
|
|
287
|
+
vllm/model_executor/layers/linear.py,sha256=l7Pv_n76G7YKpIZGumI2ZbkoCyPsVRBTbw2b7CO-Y1s,64973
|
|
288
|
+
vllm/model_executor/layers/logits_processor.py,sha256=4cmm-_PEPbWrwdfh9MC5MDwdXkbmmGrQoGwfttxdvCo,7757
|
|
289
|
+
vllm/model_executor/layers/pooler.py,sha256=vT8XtX12oKqGhwAA6z9SAHbqIFuImyR32KlvdWNc0FU,11434
|
|
290
|
+
vllm/model_executor/layers/rejection_sampler.py,sha256=4Rp-lgHMcZ4JFZs2ujBG2Rmm2FmPoGbq-ZbxQW_cVeU,16469
|
|
291
|
+
vllm/model_executor/layers/resampler.py,sha256=fA3oc51Ku2jW2orqWVX0voeMc1zSooV6EyGteRIvGAs,10444
|
|
292
|
+
vllm/model_executor/layers/rotary_embedding.py,sha256=EGzeXYeDmBYwbNIOaRlJG7Df-zsdCSYyWiRwCHgZ0uk,66024
|
|
293
|
+
vllm/model_executor/layers/sampler.py,sha256=aOo-Z6NB9FEM7OMnTFe6NWOEU8_Z-tJntdVXmKzdHLo,50453
|
|
294
|
+
vllm/model_executor/layers/spec_decode_base_sampler.py,sha256=iPRvd0WzUQzr8e6nSdggtKkyxYFv3-7MtnTs71wUzas,10190
|
|
295
|
+
vllm/model_executor/layers/typical_acceptance_sampler.py,sha256=uzlrDmPtV19Mv17iJIceBU4Lj9IiJ1M3kXTxZ_y0mek,7047
|
|
296
|
+
vllm/model_executor/layers/utils.py,sha256=Aa25A_a2vZFKIROJOWEynpZD5HRa4HC7179u-HSwaQY,4031
|
|
297
|
+
vllm/model_executor/layers/vocab_parallel_embedding.py,sha256=1yd_jhRjAOwJ95f0YJvDzZqF-2e4GMgKfimrMITBUw4,22682
|
|
298
|
+
vllm/model_executor/layers/fused_moe/__init__.py,sha256=hH3j2O9YtTlU3G4onWuXho3zxiGYailn_99thya1v6Y,1287
|
|
299
|
+
vllm/model_executor/layers/fused_moe/cutlass_moe.py,sha256=5h6pIi7pJLwyyJO-_1Ir-BrA3KdF1n8C4KMdMMCFt6s,8006
|
|
300
|
+
vllm/model_executor/layers/fused_moe/deep_gemm_moe.py,sha256=pWVAazeIIZ9HdgLIGWvKEnO6pGWNT1vJi8daSAwk3dw,11542
|
|
301
|
+
vllm/model_executor/layers/fused_moe/fused_marlin_moe.py,sha256=TinNuSAB3Rj7eXpS3uAbglA2Qvuw6fAHOJsXVutqq4Y,14343
|
|
302
|
+
vllm/model_executor/layers/fused_moe/fused_moe.py,sha256=BFsieGtPoCz1Un1YmjRT-SCcPI1MmdeNkJTcEXfdKhI,62916
|
|
303
|
+
vllm/model_executor/layers/fused_moe/layer.py,sha256=fFfPdq-vUXsGlTEXSVSG7zg5jI7dGj_wlTQH7bEvxYE,40247
|
|
304
|
+
vllm/model_executor/layers/fused_moe/moe_align_block_size.py,sha256=om1dbnaefHLn8S9j7x95-85VvCAR3Wtx1rvEYAtDvMU,8280
|
|
305
|
+
vllm/model_executor/layers/fused_moe/moe_pallas.py,sha256=20nVD5HOmg1D9Jol0k4IS4PF_k6QIUIZBGIUxPNlrIE,2338
|
|
306
|
+
vllm/model_executor/layers/fused_moe/moe_torch_iterative.py,sha256=XSzX80m9PnrlCgGPzSud-lsro4Xx9qlWoiT9OfBZ6WQ,2087
|
|
307
|
+
vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py,sha256=o-O-vrrYgtF6stNTqU2t73WWOV2cLFBejaHVxumwb9A,16639
|
|
308
|
+
vllm/model_executor/layers/fused_moe/utils.py,sha256=TWZBVHS1XMUYxFCVL-EvcDTHf_f6KIPdk4cWxdgDC9U,1487
|
|
309
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
|
|
310
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=hH5rRN9Wtyv35azxMzyUMHWtiKgOHev5tNjIG8j6dsE,2751
|
|
311
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=qPumkNxaHMvVBnEjPe_Xiuz9ICb6Hqc-9I1DAR8s3gA,4130
|
|
312
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=s47lb8VLnyxMgWlqcIR4BdPBsjKWL4olXF49uZvygzQ,4140
|
|
313
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=gzfjrYDcS0vsACq7ONGVkNA3FqVjr3e89q9fO9kokkg,4133
|
|
314
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Np7yRX9Z7Y7Z5Nutbl02wpKdZRltbt4WqlPlleiYs2E,4146
|
|
315
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=XsNfNXY8v0eatazkLCDiDclI0FnTudUGLYO01e1_4aA,4149
|
|
316
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=H0K4_O1CMbNLi-srcycT3lSl4JaBl3EGF89GY5Rj9MU,4130
|
|
317
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=arPqstZMzZjz8BNpY3alKT4vGCJyUj5I2hEeK02aq98,4152
|
|
318
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=wjnQ4v-dflJMR3iFDHBuZI_1R0xXjsNoWc2kHu6C8JI,4135
|
|
319
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=7WHPz_0fxeI3Ed0D9VIpZVoeN9RtJVVARvptfcmQu40,4146
|
|
320
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=2kWS9Qvy5Q3mvUFmbPVures5iZAriAXsy8WrtE5wu00,3727
|
|
321
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=I3k416HbXU_rYb8scD8gAI4fuBlElHl06PM347Qa11w,3253
|
|
322
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H20.json",sha256=RgV8C4F1LO09h01YsgF_eqX6GNoBtC7ulPfJRUUbg_g,3241
|
|
323
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H200.json",sha256=nsNEuDNks0tVLfQfIm7xxFwEeptTfQcoa9fJy0NS8xQ,3247
|
|
324
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=qbqjisJ4oKmcYzumHPRk5UyOzsdi8J6xas82UWHMeAI,3263
|
|
325
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20.json",sha256=vS2DRIDOqWyiBvbG6H746ownfkD1F8Aj2YZ0ET9xll8,3232
|
|
326
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=MlpzcrkZo78kFYr6cqmh4lBdpxKcEvlzqvRf0bmeduQ,3264
|
|
327
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200.json",sha256=xqhl748it8GV2KXX0XixitE_ywnsKksqK8AGL7tAgT8,3254
|
|
328
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=FsWbV4Q6AzAtgegVuENBDz2ZcSJsqNiwUIVfQbpP7hQ,3244
|
|
329
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=IuvyC8TNhCVAmUZfLSoETsyCKsmejKXrs_0zuwFLPAU,3265
|
|
330
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20.json",sha256=10Ntu2aVD5vGLonx-jW0qNw-tgZWdZmzMGx7utDVeng,3237
|
|
331
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RFH5FcN2ZCPk6DsxviTti1Q8JU5jzBRFXvUQNgOvnmI,3265
|
|
332
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200.json",sha256=JraM-Nvbg5V_TJkSl6UPFYZN1zHHoIbr2pAcksenoTY,3248
|
|
333
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=96,device_name=NVIDIA_H20.json",sha256=JtcHRlPz8xQEAqJ9EWI63oYvdmjQFG6VTHqtt85VOSA,3221
|
|
334
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=AMD_Instinct_MI300X.json",sha256=f3iM3xm8hGUirJ4ilAIPO6Pe9bs4sm3qaRKMswN9SKE,4731
|
|
335
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=NVIDIA_H100.json",sha256=Bq57MPQXuSib06u6OwiEmSzOr3XvPYoD6ohYDJaBnII,3244
|
|
336
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=pCCKkdUzzuBVtljyk7AEIAbeDf12DUiieXaODZXzm5E,3254
|
|
337
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=trX2-c4N6hTTD6zFNi6A2bT3FkhxKjkM2rPl-o1K9ss,3250
|
|
338
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=I4d56uD7E1JMXD9RAxq3FebdPquDsnNEkVaIY9Ctm9w,3246
|
|
339
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ypuAxMQ7JESPXLBltt68wly2wTrJzlnobhUMip6xAmc,2751
|
|
340
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=tUptlureu5QgyAEedtx5sm7CFudXAE6fIXepOb9gfas,2745
|
|
341
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=h57svdmDlZC_D8w9XWjPRS8ciYVkJiPEYfhrD2NRVVY,4127
|
|
342
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JmXhUnhX6YOy8RsmT0zFLGyNCpRBPV2q2Db9Y9ctZeE,4144
|
|
343
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=G4PKqWxh0MlBhg7QHKj0m--_fP3Ll0gs7VJaeg-NIDM,3254
|
|
344
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=bKX9AvcxN6k-i3RUmHSchZZ3rjoYRYb4iBqhCI4L3MY,3257
|
|
345
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=bWR6XBZ4nJ_ROg8rEgrQGc04I3BDbwILDHMZxATO-H4,2740
|
|
346
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Gu1wROuky-xS0dsFgbXS2QD_hOVV8yol9a5iqiYyq3s,2749
|
|
347
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=_9HO7SaR6aQeh6vqCDpo3kjHnGJ9BVKLiMwYYgd3SmQ,2913
|
|
348
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=2ONiQSa9odzdPe1dIgBpP24l5z-5wB1eos06xOj0V_Q,2738
|
|
349
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=Twkm9DVNxijpowfvioJ_4cKwIIlAWdyNWO9TA3gxAHs,4149
|
|
350
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=THQWP1o2bWhnJh0rq3ZIVvs_sagIJgoK4x3pJbiFbHk,2910
|
|
351
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=o1pR3rNpO1eW4BHOKpPIQLjviw4P2X5Fr4HQBcdHA-I,2747
|
|
352
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=iySqae0zI_PRBLqV-vfSCwDS4Jxcl5QjWa2NnhndL0U,2752
|
|
353
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Uhq0SrWiCrldkWbb0ZZZhWaCZ0SsvpiNL4z30KZUN5g,2747
|
|
354
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=ydsFUdXdVE_ZSScVhUxvxOFwKG-nkTraNeN69wqzxIM,2903
|
|
355
|
+
"vllm/model_executor/layers/fused_moe/configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=TtDngG7ljrU5RtWZ7g-xxdBT3uEuawiKhP8EwPr97XM,3254
|
|
356
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325X,block_shape=[128,128].json",sha256=fT7fwjuit4HbbyREYV3ECJ9Rm88FW-V54e27nG9nA_Q,4741
|
|
357
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fT7fwjuit4HbbyREYV3ECJ9Rm88FW-V54e27nG9nA_Q,4741
|
|
358
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=HNvrgcXxV-eVMLwb7zY_R5KgJ7uBz-YIyQsKq1lWnWA,3263
|
|
359
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json",sha256=bHJEVy-CeImiY9JBRCMlHfHPAUi5xO7ENxgVVboN2Yo,3258
|
|
360
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=PnNmKSRFznCIUzZ4ZfaYTrMHeF2_kCQr4_bsEy_9Zu8,3259
|
|
361
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json",sha256=0Vlxxzp4wrvkFj-NF4OAsJAaPkm-hhisJg0tgNl-W9g,3254
|
|
362
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262
|
|
363
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Lqom_VMIPduSZTZQdeL2Wl_x3r9q6RmI9bojJrYwQZ4,3255
|
|
364
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fd2p65T9OboKIgw7MQc4IdKaJsoO73Nu3VQiKjV6Ffk,3261
|
|
365
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FUGuYbs_QhqKfErofvbTUplhAVN465A7NR_-ryXvebE,3741
|
|
366
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bpDPbTyrXLyCSy-o0diveVVeVUF_xj-fdSzCzWmEcKA,4733
|
|
367
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bpDPbTyrXLyCSy-o0diveVVeVUF_xj-fdSzCzWmEcKA,4733
|
|
368
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=skSJdv0Pr4rba5ODxp-fHZ6dpxn8KkvACGzNf74j81I,3257
|
|
369
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=mtm7RgEBEJJkHsOis9BtAFo1OCk3vBbt7l7eumDzd7k,3263
|
|
370
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=R4B2n2vGt4pPo6jS4Bmnx8AYtcfF9qQJE5bD7OhmXHs,3265
|
|
371
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=JnqtO0t2HBcQECdYavi18mu9_MwblGr4zfRcW4zU7_c,3265
|
|
372
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bpDPbTyrXLyCSy-o0diveVVeVUF_xj-fdSzCzWmEcKA,4733
|
|
373
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=rVORXxNsxy4WmO5SJR8Sd4k7vozKqhYf50wZNCMeQzs,3239
|
|
374
|
+
"vllm/model_executor/layers/fused_moe/configs/E=60,N=1408,device_name=AMD_Instinct_MI300X.json",sha256=4UXbsSNHmrSWnD85SdRMLp4cFGRufndzJjB6hoQPclU,4736
|
|
375
|
+
"vllm/model_executor/layers/fused_moe/configs/E=60,N=176,device_name=AMD_Instinct_MI300X.json",sha256=p6TKUp-KDeLB9E9LqThR1e7J2-ogSXPJojISdHgCxaY,4727
|
|
376
|
+
"vllm/model_executor/layers/fused_moe/configs/E=60,N=352,device_name=AMD_Instinct_MI300X.json",sha256=gHxtmO_uvpueLVlsJgXBVE3_pS1S9EeRxNmHG_ZQszg,4729
|
|
377
|
+
"vllm/model_executor/layers/fused_moe/configs/E=60,N=704,device_name=AMD_Instinct_MI300X.json",sha256=tVdpbIU1scsylx6oz3IADhkcwvZaNqw-_QVb7a6oVX8,4732
|
|
378
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=6QPLhZurIqcMVdy3w0Dd7gLViKxsyJRBz-qd8agpi6Q,3248
|
|
379
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=WPu80-OWyEJBy1hdnewLN1H1neFW8UVJrqyeDGegXc0,3250
|
|
380
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=PaFLbT5ftJiiVSOVkq_DH01EcbIs0sBVkCd9PdYYmw4,3253
|
|
381
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=ozS2ECxk-Dsd4Y9DgCGGwDwJlCf5T20ANf5gnTUMuSc,3252
|
|
382
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=KEN6xt8pgPH_FbLT2fsAD4s03_V-Z9GXuEC4IKe3cPg,3262
|
|
383
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200.json",sha256=w18R3eHB4oUhfbcCXjHyDvp0RiDSeCrfM-VFESim2hQ,3253
|
|
384
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=iz4W1UAV1fcz1ZFh4hNQSLJ_F1MdXW-V3msy7t0WrRM,3262
|
|
385
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=dYpKgvuG7Jji0W0zg_E9NfIojStBAdBcKd4B3nhimqk,3263
|
|
386
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200.json",sha256=CXiHlGpea5cEGmFi28Jec34uxEZITF2XldVFcJteZX0,3251
|
|
387
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=W1q4PfievvgJ_SiPsDhOsR0Q0eJKb4o8JZhMcVhC-_4,3264
|
|
388
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=tku4-yTbIr0H5TNrm1Pq3tJJFYTXqHpdzJDSEF3bk9A,3238
|
|
389
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=HJcV-Tzt-yojzNQkPCgi84B44F_RppXxOIicRyg20-U,3264
|
|
390
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200.json",sha256=bM9g-XpirsThO3Q2x8ChSx3PPtHuHRXLvVMnTWt8jLI,3243
|
|
391
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=oxOKFDrgmw1YmgxTtRa1uoe3p09ylTLrkj_jOTqNh1Q,3249
|
|
392
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=-B6gZAEYLwMJZOnpO81pTxqs-YVKs_144Nn9BSLaMh0,3247
|
|
393
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json",sha256=GPjPHicomrS7ntHu7nnvgNXcHCoUw9vhyTUewkXpppo,3252
|
|
394
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=ObHUCUAgHTnld8Cq9Dy1n3ilmbBzyNC4jZcz6YYhMXA,3264
|
|
395
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=WegYsHl39QVlHu_4EZJSrgA4LQ5fYxSVNWFhoL6W2Rc,3251
|
|
396
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Hrlas0Nt7d3JMr1vTpI3OVgkzxqcRziSMfFf_U5pQ58,3267
|
|
397
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200.json",sha256=J59rmqF8NQWkqmay__ahA3t3IwaPXNu5AVNLnTaDfYA,3252
|
|
398
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=X8FVPE7rLblDs_Dw_Iu-KDw9H7PaC417EHyVclYjfv8,3733
|
|
399
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json",sha256=FsIv5bqSpkWbxK2dBfg1N6tX9epZ55ZhgkJCD7hENlY,4733
|
|
400
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=CnjQX3SlQn6fIGsX6P_dbNO0TYgAd-sVUb1FfDcDFUo,3732
|
|
401
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json",sha256=fnO-v4YqBz0vUo0UtOTTD0n7VDG_ivczeQ1tR6Qm9f0,4734
|
|
402
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=V_sgDtEtGEuBsGVa0maYJHhhGqe1NE7l-1ek2ed9WP8,3082
|
|
403
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=QaITFIJU4UsrOBXaGdPYJwTmYJ0nT9kiiqeUiZzvd1k,3270
|
|
404
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200.json",sha256=CC_jsMhXzrYne7eIOroDa0fCBKNnffiaVW2TKd4P-ek,3260
|
|
405
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=LgHbxG1kQV36zZPkJcnurHYzwAjMh04lvEHEsfzS1t0,3732
|
|
406
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X.json",sha256=_fcdkmWvdMqHiH8ZAGke-zXhH7qVPQx5CmKELW5hRCA,4735
|
|
407
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=mVH8Rl4sLATinf7_0A9lTS83kv1E7Cm9oC0BL-pc9n4,3732
|
|
408
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X.json",sha256=JKYW21c0CzR0fgE5ZnYp6C1sY_tVRlm8L_lgak5V5zE,4736
|
|
409
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=yTf2R9cngSf4OafucAYlDDn4-bftaMFKaY7qhaBZPqQ,3739
|
|
410
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json",sha256=_1eVE7ok935L2V43-3D3bVNWSVaoViia19sh0VrXmXM,4735
|
|
411
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=5exlPUKvZxGDR0UT4_Dn5fp-_ZETJ6_Dbw_Vk1u8bbE,3735
|
|
412
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json",sha256=18v6YruKbQ95pXPV8ocV4VdM1zNw3aZFp3WByeUkNSM,4736
|
|
413
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=AffDc0_51ML8HiA3757zbD10TZJdUsUDIYIqO4g0yUw,3250
|
|
414
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=IEYBNjt9HGnzoOVSWvL0A0jUqq926QD0_BvVYR4RA1Y,3252
|
|
415
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=Ns9Y12aZbJnFhcG3nwb67bDqqiQAo9tdTAIe8K2Ajz4,3255
|
|
416
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=uGSLFPZXK_JQ3GTDUAEiIecDor1yjbC3bJvMolF0Xl8,3267
|
|
417
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200.json",sha256=8q6ol5JQBWj6yVfzFOn7Gz5MSXTaW9javL7qQmYVOwg,3245
|
|
418
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=6jRC0oOpVpq5c1xePFKNRy-Xtmb038i4LE9N2zao2W4,3730
|
|
419
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X.json",sha256=cFWeyNJtEbs-Bfohgzclxo1rcYGU863oV0BzJyQ4T0w,4734
|
|
420
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=SMtsqtQeqcyy8aNwl9hPxRvx_XQdT7I3SBDNJ3OIvwY,3728
|
|
421
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X.json",sha256=ZyOFJB6GUgGZsAjjT43XJwG8P-QrZ5yTvmgzQP7ThQY,4734
|
|
422
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=HOxWmCI2ifHmWc0or2y8nEen86jDeLDov1-tuMzuhxo,3256
|
|
423
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=csHezh0HGWaNwrblGzMgcE95hqbqjWS8HImLRJYr_ts,3266
|
|
424
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=_5weLBinQCDzyV75hHKIT95Y0ce94KWft2_5BC6EkbQ,3254
|
|
425
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=DlatRLPaSr8HJuO50gRZ2lzXoelx55EP3SDUdgIT2v4,3269
|
|
426
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200.json",sha256=TXSOoqvi-x8H13xPqrB9qz2T3opEGA-2D0v_4n5BEG4,3259
|
|
427
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=ro3drDpWAdeXH7IjMvx8wYGhIuDPOl0bpbJaIB5Msns,3732
|
|
428
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json",sha256=w_R2LL8k5jNVUARcqvSgGLvNoQiQC0Mh73ciqSIAz54,4734
|
|
429
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=hjDoTXRmEFLKhhmBFEjPowQus_z23ISonxFljql3c9k,3732
|
|
430
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json",sha256=AdOTy7ASetdAXUhNM8buoU8_rLLjcUYF0m8RGFrLWRo,4733
|
|
431
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=Ru460ZgnUP4U8OsJfwF8n-AI-gfcolNR3_qzoxG6DtY,3254
|
|
432
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=K6BGrKw_oHTAtHjsZldcjp-BUM1dIecKXrrRn9OpRGs,3254
|
|
433
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json",sha256=4aK_plqztXcJ-hs5_PsAvM0jclMzcO3hd3zTo0FhDro,3251
|
|
434
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=qqFoMaObuO8pFWcSb9q0wYsdC4eSCO7B-_ruQhR1N9M,3264
|
|
435
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=-5nkLIunjG1ghPoUEtt2AXEQw9oGiilP7K3UvQv9CqE,3252
|
|
436
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=WKzddrIXo-KavpuXuouW3aLLAptu5Q4XJUb5K2PLgDM,3262
|
|
437
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200.json",sha256=ad1ZkkSyLJwRGb4Kf24qg5hW_DPmt0BXrKR85oAiV34,3257
|
|
438
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_L40S.json",sha256=qX5_yErBEwDRzhv2FvxrS3pEMa8zn0GHzLp5TUMX90g,3872
|
|
439
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=ysRCWmxV20K2BYD9XEUtxwREFGtA3QHI191vHRA0k_Q,3733
|
|
440
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X.json",sha256=L8VA1sfygHoyLJ-Ybfs8DP5c0YWFmMkwxHT8yJ9PEFM,4732
|
|
441
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=FJWpDLr13XF3hHiHfJykpjbLiP7Ccu2en3U6BL-QwXw,3732
|
|
442
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X.json",sha256=FnVcfzf5gXkQRt0XgsRzIQVbDPaUDOwWJX_9qOlyvRc,4731
|
|
443
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=DxYu8regZOSFu8ugFGA_QbwWK4g8xwQUZF9a_nNY4Cs,3255
|
|
444
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=obzfE_9XgsbFNfC9biYOHxR-V_Bgc7PKT8qZZJaiJJc,3262
|
|
445
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=qwKy8oaMsd3QrXgQbM_x9xcfYiHK_Ou1CEwDPL5Gbgo,3259
|
|
446
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=qUifbWbE4cOKZbIHWmmLx68VRaslQX69eZHwRIQx-7I,3269
|
|
447
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200.json",sha256=JT-ZMLhAqqzSkqivOW5ATTKRlyyaFQkqQDnaPS4DE10,3262
|
|
448
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=QsR-Xr9vyuiArMTSo-dX-1DFgATfqwIGOzFuQJAuE_Y,3734
|
|
449
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json",sha256=EtVorGY4khTEuimlqZu0AAlPz84PH3ZkDZmVpxLtgQw,4735
|
|
450
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=D3wX0_s_ylo3nLIUfaWZmGYtMvX7oiieOLMdQ9k7mng,3734
|
|
451
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json",sha256=JPdO0azlh4yUvbpC9dEHYpRT11ELEr5LXBSb5XP4E_4,4735
|
|
452
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=BAJnXTZoewwCtzJLUPJ0oYuALv640MvDuLseGcsYaaw,3252
|
|
453
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=-Tj7ImS6ZFDof_0VTyq7kVm8XD9B54RD6CUOPSf3Jjg,3265
|
|
454
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=tme0ydWzIxdABZLk4tU8G_X2dJUYGGZNkQzNGcmcvUc,3261
|
|
455
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=g6Ivy4wvadaCAMJ4ZElbUU-CwyTMdbaa49M7IVQhVjk,3273
|
|
456
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200.json",sha256=GstQosPPHUn_I2DV3eMGtn3xXOw6kl1hb8L0EvRsbEU,3261
|
|
457
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=kF4Fx0yHUmiMSLFNXT6xqAEA4AgCaHOoy_3irv4dNss,3732
|
|
458
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X.json",sha256=uOlVzTdJl_4VrRK4wmxIb8JKfveFZRjO9syjw_oEeL0,4732
|
|
459
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=plnx7r9jkcYXkhvapbeeNvUg3NMGdGsIgIPSrfVy2qU,3733
|
|
460
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X.json",sha256=UC-iTgh8_dUSXRaYHOIhDH31KOiJmcfqM_Bv_UBf3ks,4733
|
|
461
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
|
|
462
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=WQLKugnKzlQ0avf1N-41lRHtG6wJ56DfVPv_nip6NBc,3273
|
|
463
|
+
vllm/model_executor/layers/fused_moe/configs/README,sha256=W2yIZkP9O8GGlg97We9BJfTtWUtPbuz5ZH3esrrjBX0,572
|
|
464
|
+
vllm/model_executor/layers/mamba/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
465
|
+
vllm/model_executor/layers/mamba/mamba2_metadata.py,sha256=R3UJ_ZGgdei4h6wzpaZqT-B7fEjk6spOM2UMgRy8ij8,4109
|
|
466
|
+
vllm/model_executor/layers/mamba/mamba_mixer.py,sha256=Zf4FM5tdpJDsZwMMehWnNOjhpXnKJRPVvoLBZoONLaM,10141
|
|
467
|
+
vllm/model_executor/layers/mamba/mamba_mixer2.py,sha256=srdZB5YXCDBAKjRxK3fkT1tPv7JopvKW9uvyqc4rM6c,22608
|
|
468
|
+
vllm/model_executor/layers/mamba/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
469
|
+
vllm/model_executor/layers/mamba/ops/causal_conv1d.py,sha256=_ZiWUKMLApKDWDH8iB_8Zw_GGGAFHDJRxbcMWQlMYac,4470
|
|
470
|
+
vllm/model_executor/layers/mamba/ops/mamba_ssm.py,sha256=bQX50q_1z93VsJc_t5gRqhWqqLn0LZluEVfha4CaPDk,14196
|
|
471
|
+
vllm/model_executor/layers/mamba/ops/ssd_bmm.py,sha256=zXsgPJWuo-VSxzqNWJKoviGj0jtO8TLrnQo8bvZGmIY,8572
|
|
472
|
+
vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py,sha256=ZRoGCYVz3LIcju1txuH0mQLbqcuqhg0muZtoPes6lK0,20837
|
|
473
|
+
vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py,sha256=87dl7hjZm8kkYcKjTk16eV0_RiYRmEgoFGbZgEhOFUo,25613
|
|
474
|
+
vllm/model_executor/layers/mamba/ops/ssd_combined.py,sha256=0Hzm4NoZseWT278uikG88FyxCzol_II_XyYG8Sx7Wmw,9363
|
|
475
|
+
vllm/model_executor/layers/mamba/ops/ssd_state_passing.py,sha256=VWtXQkc0_UMoEKx9rIDDXvbVjQ3uHdA49p9PpTqzveU,7370
|
|
476
|
+
vllm/model_executor/layers/quantization/__init__.py,sha256=TalnUVZVCDZF5x9UfXECwkeGW-o_5AZ8f2iNtTSbjkk,5042
|
|
477
|
+
vllm/model_executor/layers/quantization/aqlm.py,sha256=QfdUGmbgpk8AvVcu5GhyBZILT2naouXuj9swKMAGK9k,13632
|
|
478
|
+
vllm/model_executor/layers/quantization/awq.py,sha256=N25NTWjYJBKg540sEuB6ScaYOxQkVk_QR_6N-d_Rlbs,7077
|
|
479
|
+
vllm/model_executor/layers/quantization/awq_marlin.py,sha256=ztPXUH2uPQbts1Q4-PFYQ9ztwXV3AWr3wwxKCXL2xrg,21336
|
|
480
|
+
vllm/model_executor/layers/quantization/awq_triton.py,sha256=SNr8Xro-iH8IrPJBvGdI6R_gsV0QmNnEzWOZhx2p014,12415
|
|
481
|
+
vllm/model_executor/layers/quantization/base_config.py,sha256=uo3fpOXNR-au-FXNVR6oSXTj3ZBapvRgV5xzwKrzLM8,5022
|
|
482
|
+
vllm/model_executor/layers/quantization/bitblas.py,sha256=JqYdMguKdC5Gixwe1qO9CSyCckX8CRfgodhTWnx0_iY,17423
|
|
483
|
+
vllm/model_executor/layers/quantization/bitsandbytes.py,sha256=sCoJs5fEGPtVoaRwmuvzlOrR2AfvncfbMCSzhVfaPTA,15220
|
|
484
|
+
vllm/model_executor/layers/quantization/deepspeedfp.py,sha256=dFYeJS3w1hV488ycJaEFh9vhhYsx_L1BkVkRIULrH_Y,7141
|
|
485
|
+
vllm/model_executor/layers/quantization/experts_int8.py,sha256=ylZ29BXWbbguYfjESmAYJXcPjAiJMRFcgSGClK8_f8U,7544
|
|
486
|
+
vllm/model_executor/layers/quantization/fbgemm_fp8.py,sha256=3s4OcrfLBwWdwBBEmJhOD3qzAtXW-01jvn95XBDxbOg,6690
|
|
487
|
+
vllm/model_executor/layers/quantization/fp8.py,sha256=SOG2TmjHXtXfWlLg_MnOlMA0FBveiEbhZZkjPtzjVzc,38798
|
|
488
|
+
vllm/model_executor/layers/quantization/gguf.py,sha256=xKXwqosINZjqmH6UDxY-sgYoLqjWHhTOM3rYwN5pAUs,15605
|
|
489
|
+
vllm/model_executor/layers/quantization/gptq.py,sha256=UvvQgccjma1BWX2qiEzEChAS-oksybpCyl_Ih3PRJpI,10693
|
|
490
|
+
vllm/model_executor/layers/quantization/gptq_bitblas.py,sha256=WEZ9xLDkMTKs7wJN5ljGvNn2rMuZKF927OYlyynbm18,16726
|
|
491
|
+
vllm/model_executor/layers/quantization/gptq_marlin.py,sha256=-BvLrewcU-BV_JYrc2TWN5DFp0fxxgECls4VLfAArMU,25889
|
|
492
|
+
vllm/model_executor/layers/quantization/gptq_marlin_24.py,sha256=ofvuwNh5iMi14XVeJDccjqgmmhngH8JOGS-NfevN31c,10881
|
|
493
|
+
vllm/model_executor/layers/quantization/hqq_marlin.py,sha256=8Rzl1cD3lQF6x3_9VrULM5wE4utzsfk1_PAW8svW8Wo,12722
|
|
494
|
+
vllm/model_executor/layers/quantization/ipex_quant.py,sha256=5-XtI2qXAh_isk47Ym0tmI8hAz0xMGjLr9vkBIXXmXw,9761
|
|
495
|
+
vllm/model_executor/layers/quantization/kv_cache.py,sha256=za-3W4pn--yEnGmbNT7H_cTy7dq-qzrhyOtRL5J2OSo,6058
|
|
496
|
+
vllm/model_executor/layers/quantization/marlin.py,sha256=2kmoUfynMW0rZtCwXFwUPktdD5gB4-yerAI67S1LUCY,9590
|
|
497
|
+
vllm/model_executor/layers/quantization/modelopt.py,sha256=QxYzmdjPYnQel0IiR8Z05-UcYamAfvH0P5b9MwiCRt8,17050
|
|
498
|
+
vllm/model_executor/layers/quantization/moe_wna16.py,sha256=9U8onQRLdwVfTtf5aseOMG8_d3G93jFOIUcj7sJFa0w,19705
|
|
499
|
+
vllm/model_executor/layers/quantization/neuron_quant.py,sha256=rtAjtdeIxcydC1MVLUMNwZAUJiitTofmOQC0OCNdfTI,2421
|
|
500
|
+
vllm/model_executor/layers/quantization/ptpc_fp8.py,sha256=KvJDkBrNNBfQcgrzVOdB0NlBUQZrPgxxXizl9b4pzKk,5206
|
|
501
|
+
vllm/model_executor/layers/quantization/qqq.py,sha256=8f-hMJ2I65hrCwtItBwkNtsu2MRBmEBM67hMY-chYZA,9938
|
|
502
|
+
vllm/model_executor/layers/quantization/schema.py,sha256=vb8XZXdDPgY437o96uNlQOUDhPoGeowrlwO1b4QdugE,3686
|
|
503
|
+
vllm/model_executor/layers/quantization/torchao.py,sha256=sZS7l6lrRrBErqtW-8kwlY9bxs-87JDNmiwS3CefzCA,4236
|
|
504
|
+
vllm/model_executor/layers/quantization/tpu_int8.py,sha256=CggIxU5-tKOS-yNuzzZCurwqfH3YHEz6feir6MABzwY,4443
|
|
505
|
+
vllm/model_executor/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
506
|
+
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py,sha256=U4ZRH7ltm8AvyQ0B26EhvK_aJGNPYKDrWv-AgM71-XY,27174
|
|
507
|
+
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=k8zxaxRSSKmNQCk_BrvRXekROO71F_MsT9w2cmuG2OE,48701
|
|
508
|
+
vllm/model_executor/layers/quantization/compressed_tensors/triton_scaled_mm.py,sha256=gMLaK9uSOVqYw1wv8klnGLc1qass2K-DnpWTSm0rvuM,7731
|
|
509
|
+
vllm/model_executor/layers/quantization/compressed_tensors/utils.py,sha256=YSJeIjPjkJ_dZIzKEnGGz419L7rC3HJnvhhohySbntE,7707
|
|
510
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py,sha256=BwKgT5i-Gu1d45Z72x5PUP3Yect8hO37yW9rfUeIoug,931
|
|
511
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py,sha256=-Wi5n6G025-iZdIreWf5_yNUHXwmkCTcax4nDTG-5Sk,14065
|
|
512
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py,sha256=Jxb5WOHR4m98DRO4y3XKJMDpT_NIp3EwkAJe8HYqwqE,1527
|
|
513
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py,sha256=v0u6xXo4zJ530YWbnnJXbJFoMrjUVveF6z4HVKhCqrk,6207
|
|
514
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py,sha256=GmDa7Kwrqj7Ja5-VZ7DRY8MTcmPChOKOK7ku3gguLT8,5409
|
|
515
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py,sha256=OZWeSv-JNl0Qi8OO9xg1qCoGPuSnTNKyTb063iVWLCI,6443
|
|
516
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py,sha256=yns8-JTL4TM6boMggXk8E-yOalJgQ9O1neAs57oMh3g,4872
|
|
517
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py,sha256=Y5XH3IUwKHPA1fqI0xrurXEkdsSEnDcVNcl4j4KN5R0,8483
|
|
518
|
+
vllm/model_executor/layers/quantization/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
519
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py,sha256=Qx5iCPjsnW69RjhPY6qRRsTWddK4gf4sbrgh6czvtYE,2879
|
|
520
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/__init__.py,sha256=qfYaz1R-8tYyZA681HiD2es0Cefy9pyMQaRxbcNF62o,3148
|
|
521
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/allspark.py,sha256=sRi51JVWhw-NAI2j4UZu1aM-GhD-3TGqjJq25naNQYE,4382
|
|
522
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/bitblas.py,sha256=rKl3U-GOjP8AHYvLT7Nb7KnmuEU0SJeFHcS0I4QLRfo,11986
|
|
523
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/exllama.py,sha256=pqVO7rcPUT28KaVAoYJ1e96JSF18scX--uz_WprnUKI,6151
|
|
524
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py,sha256=IkHizcr5RzUPsATGSqiZ4B35GatUhVZdiBkDHM6psBc,4981
|
|
525
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py,sha256=cRqXQU_vDoSbq0dx1F6kLA9_dhZtFA65LREKpQqpl0E,5810
|
|
526
|
+
vllm/model_executor/layers/quantization/kernels/scaled_mm/ScaledMMLinearKernel.py,sha256=EKNHGvrBoiElOpU-LOa4V0pui-MkLDcwxE9AScv9Sdc,2046
|
|
527
|
+
vllm/model_executor/layers/quantization/kernels/scaled_mm/__init__.py,sha256=tEOkluEyfDASQrKY7k9kmyqJ7h7tC1bbXl49YrU9WGA,3456
|
|
528
|
+
vllm/model_executor/layers/quantization/kernels/scaled_mm/aiter.py,sha256=i_V3XGjjUgq9UqqjG61jGongXwhoyFPSJpg1RSzAEgU,4822
|
|
529
|
+
vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py,sha256=-xzsBNkRLZ9WnMq4Iq2YvXjuSDMtmNdP2oEa4QGJIcE,5989
|
|
530
|
+
vllm/model_executor/layers/quantization/kernels/scaled_mm/triton.py,sha256=3ssC8LJcQ-iPtlxEdoAZfPngiAuCaAy3-RWuBGz3ldE,1283
|
|
531
|
+
vllm/model_executor/layers/quantization/kernels/scaled_mm/xla.py,sha256=pDL9lpK8gSsbeUTZ2KpRx6n5G5MvgxhYLVP5FEAA8cg,4303
|
|
532
|
+
vllm/model_executor/layers/quantization/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
533
|
+
vllm/model_executor/layers/quantization/quark/quark.py,sha256=n2bagwPQ1X0En9wVRUDgiu9G9eR7MfJBJ5lqFtj3ok8,16645
|
|
534
|
+
vllm/model_executor/layers/quantization/quark/quark_moe.py,sha256=N66KTUle8IvOpQt8lgkikXkGp0n21pS5PMVln44jWh0,10782
|
|
535
|
+
vllm/model_executor/layers/quantization/quark/utils.py,sha256=9VCOhQ_qNa31Se-LBuV11m1_HXNV4Fp8UJU9q0pRljM,3558
|
|
536
|
+
vllm/model_executor/layers/quantization/quark/schemes/__init__.py,sha256=uEyTk05b5tNt-oEmSSkySwnWJ87o-oVphnSJoTf3MAM,221
|
|
537
|
+
vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py,sha256=f_5qRmZ3SZvIgBWiMlNjfu-WN1uVwSvfLGFe9QS25R4,1491
|
|
538
|
+
vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py,sha256=2PAM1IXP9h5SvGvixnq9uETU4MglVV4iT0SJpq9f3K4,6138
|
|
539
|
+
vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py,sha256=8vBLb1dXAQ11hfK4Bzq_FMXtg6idTF2MWZ0dQ3GnFQU,5404
|
|
540
|
+
vllm/model_executor/layers/quantization/utils/__init__.py,sha256=VbdLnvlGCFpa2o9SRnEMflRyJ3NOXd6j6d1fPN_xm5w,166
|
|
541
|
+
vllm/model_executor/layers/quantization/utils/allspark_utils.py,sha256=OCRUcTMINyOKCWe0tik98msBXKTGXX2beZQZeTMyXz0,2191
|
|
542
|
+
vllm/model_executor/layers/quantization/utils/bitblas_utils.py,sha256=z8mYjocXD6MjznFURj9RdGGFAdpu90oheDQ__IHk480,7798
|
|
543
|
+
vllm/model_executor/layers/quantization/utils/fp8_utils.py,sha256=mPRip_w_O2frmjtBEqhBkg37b5zF7JcPrccXpgM3ioc,17523
|
|
544
|
+
vllm/model_executor/layers/quantization/utils/gptq_utils.py,sha256=ZRB27yUulJhuTEMbAncFwVAj65PQnnvLVI52VTfHuUA,3802
|
|
545
|
+
vllm/model_executor/layers/quantization/utils/int8_utils.py,sha256=lMQcnPvRgmfasNxPPMweOvdMNjf69wXqkGIgbjNoSdE,14169
|
|
546
|
+
vllm/model_executor/layers/quantization/utils/layer_utils.py,sha256=HmjtrTYHbc5u6kKfE8cnu5O2Nqg9ZyRy9BAGHYgenDY,1562
|
|
547
|
+
vllm/model_executor/layers/quantization/utils/machete_utils.py,sha256=OeMLMQDbifbxz-HRYmjrC4RnMOutos17544P-h_dNE8,1074
|
|
548
|
+
vllm/model_executor/layers/quantization/utils/marlin_utils.py,sha256=I9sz2ajMJ7dAhsyQNPUPxN5ZsPtIrmyp8zMtEQ5m_RA,16540
|
|
549
|
+
vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py,sha256=bASNhLOrC-7AaC-xGUIQiDqUx0HRWPX1liD4iqzjas8,3672
|
|
550
|
+
vllm/model_executor/layers/quantization/utils/marlin_utils_test.py,sha256=HkXMXXwpP4mfxpWI8p6BRjoq2ipVGpWLe1eWYSYxi-c,5311
|
|
551
|
+
vllm/model_executor/layers/quantization/utils/marlin_utils_test_24.py,sha256=zk_DLYsG7n7Y9mvtEkY89fXSH9kPEsuPLfbX0W38DEA,17558
|
|
552
|
+
vllm/model_executor/layers/quantization/utils/marlin_utils_test_qqq.py,sha256=ybGP1JEn-Mti_vvEs8Ch4cl1NfRK4TMDxdYWEYZSvxI,4101
|
|
553
|
+
vllm/model_executor/layers/quantization/utils/quant_utils.py,sha256=XOrnnp5RJdyfggUTrMsjR4OBPaNhfTd55HvoZw5plt0,19458
|
|
554
|
+
vllm/model_executor/layers/quantization/utils/w8a8_utils.py,sha256=Jl7P05H9N2c5WA1IAze-6cnbWizj0-kmuwp66Tfbvl8,16722
|
|
555
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=t8TaODfMF2Nq0qg6KOc8NSTs7m90Jcu6Ih3BXUvFb04,3799
|
|
556
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=CNI-I9ncqHJ7ukpzgyxdJtz0bd29vsgC38tvMM6TV1U,3803
|
|
557
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=CNI-I9ncqHJ7ukpzgyxdJtz0bd29vsgC38tvMM6TV1U,3803
|
|
558
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=-j7Xyk4xFaiAD90FeH4AqRSnS82f4owKRGMHbObrrHQ,3250
|
|
559
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=sW_T-BdLbjJoFqlr-B5f9emF8E0IdKfy_1wUSIEi55g,3253
|
|
560
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
|
|
561
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=akDNAjUZ3EXBznF9w6qUcpXxaLWq7oXnX5jy-R9cleI,3246
|
|
562
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=JAycl7EaUZtmCoXMjq4JwKXCeXxZ6S4Ts_DricRUw_o,549
|
|
563
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=q5KZyi9T-l07P3r1u9i6-Dpw89Upjw1gpTp3f1CluEo,3799
|
|
564
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RTnTPFQNg5JULbPLWJDTRNRZHI7FsrTxqSDkZfSbmzw,3806
|
|
565
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RTnTPFQNg5JULbPLWJDTRNRZHI7FsrTxqSDkZfSbmzw,3806
|
|
566
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=DLCfW5tQ9k74AGZ2yER1etP-HgUGglPp_woJiaPuxgQ,3249
|
|
567
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=8v9mdWPs1eXczo3iwFrNnRo2LF9wPU4Scm-r9bL7Fz8,3251
|
|
568
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
|
|
569
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=7OFCbBqqEA7vQ1oiygfW-7Tqqx8OJATaLujtcQIgyTU,3247
|
|
570
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
|
|
571
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=iJZ_tAzoYGUmg9ltil4e8vzKlKi980yTmswEMWqV1Jw,546
|
|
572
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fDomA7uBQKX8kbO_4MFcoBwHhIR_7sOkngQPv6cQq4Y,548
|
|
573
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ucrZBIN_ivmmfMAvkT40xQpH87LdQK38lZbeLWMyV4M,3806
|
|
574
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=zDnVqBqgT-nLkz_Cou-KTPsNIVh-YbTBno9L2MgdRTM,3803
|
|
575
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=zDnVqBqgT-nLkz_Cou-KTPsNIVh-YbTBno9L2MgdRTM,3803
|
|
576
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=zd5cMYrxQ6PD0jKpd3YF6ThT9RGdqgEQnCW6F4W-r4E,3249
|
|
577
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=CjO6dh_qt1iTu5kYRs98tTLL-W6FOzLO4AESMUFHz5s,3254
|
|
578
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=7v4tp0RaT4vxF4urSBrkK5FR_5ikeFQ1htF3DwDl1lk,3249
|
|
579
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=M5F5wzSmFokEm0X8__ogLvdE1QVC6EW8atqq-kp3rVA,3253
|
|
580
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0J2MFgaLkv-mfVE5x363lgVKYU6miLG_xRO3tJUga_M,3249
|
|
581
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=983yfFeeo-BClL_H1g-owXwbA6t0l-kREiy7kLURUMw,550
|
|
582
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=C2eM8RudmP-qXEf_Apg-qcB5n2Ugxf8-7uG8hQDSt1g,3801
|
|
583
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=llI6PWlSDgQf-ouTDXkFYOoSz9u3bzklwBtZYY_fWVM,3807
|
|
584
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=llI6PWlSDgQf-ouTDXkFYOoSz9u3bzklwBtZYY_fWVM,3807
|
|
585
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=q9HUcoL0cdZCOWZ8MKbcpR8NSy5iNEBq6NPTaHLgRB0,3242
|
|
586
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=uJu6Gv4e80vxVrDyBo8_y47tOV03RmWVsMIWQ-bbW6Q,3251
|
|
587
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4ubbhwSFX_XbefRLEkLoWxJkcetFWPzsszPu0X3_Wrw,3242
|
|
588
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=euiKvhb3DXkvPPQJLqNE_xN2evsTOoZnVIiquyN2Cm4,3246
|
|
589
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FhyniGTx5QeCuVrBSVTQys6q05Pr5lPEcPykpAX7Iyo,3247
|
|
590
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=pLQvMaVvlet_JenEz25mxxplAaHNisl6SFTSZ7lYP2w,548
|
|
591
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=uAa-ZQmASwlqZbr1l1CM6FyJI9irNdLBzc1U5Hdyw1E,3802
|
|
592
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RnN7lfu15CE-4ywMjAbEz8wWV743AP-1Fq5U_j8EQeI,3812
|
|
593
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RnN7lfu15CE-4ywMjAbEz8wWV743AP-1Fq5U_j8EQeI,3812
|
|
594
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=cE3BscS_zEtF_m_jr51IPfpaZZgIEojmhTHsrb9jABM,3260
|
|
595
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=SScyo-oYCBxJR9C7ZIKu_pJJNiXdpT13kYe26rddvPQ,3261
|
|
596
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0v17v78pETXv6S2ZoibekxOVhiTmCm807DYG4DONUck,3259
|
|
597
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=I44PvJj758-sw_fCOVROLTpG0NQ5_5PCYyQcpZC1YSY,3259
|
|
598
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ulvOEAFO8c-UOa34FEZrjOkCR6ovhJlfFFDhmaKIBiU,3245
|
|
599
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=BiZowqExbvXftuE37SYcheOdtYX7Z5BEXyykJ6GbYSk,3254
|
|
600
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-CVHqClROli9FWe_FnlnuAG2LiFivDFK_nghH6t-BWc,3261
|
|
601
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=7ok0uooTihvRSckZMNd6jInRvht_xkC5posHO66ejqc,552
|
|
602
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=h_Z6wBKdSGBEo5BfQKaxuFlxztrnbbZR0pkcYKv92sk,551
|
|
603
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=k63VgniyN3Rl_-h1hYmT_q9QZtSFqQmXBqhEXJQkxqE,3800
|
|
604
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=icswqRYUsUdoQMrv4YIqO46GG9BzepmBJmnTre9-VjU,3800
|
|
605
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=icswqRYUsUdoQMrv4YIqO46GG9BzepmBJmnTre9-VjU,3800
|
|
606
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=sL0E4zZzb01g6GHaTCXltg20uSbthXHSJFQ0SaxZ7PU,3245
|
|
607
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=MZcJz7BjwVOHHHxvYqGrWw77WnxslYhwW80bZw-jSKQ,3249
|
|
608
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=GsLoYkaZ2p4Qu0Coj-X90s7JWyfZBOloIHPlyNKSIes,3246
|
|
609
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4--7YWnJYUK4XmQ2zZ4M1ZYdKvUkET0VkNgIBn6xaOA,3247
|
|
610
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=NjEA2QjOVXyOaVSMPch5qa1Dq3igbW7MmE986-7taW0,547
|
|
611
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=21Buh2aiGTHjpW45Rm-TwZD8MSaAy8NMUrK5l_hGT5k,3803
|
|
612
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=P8p-dZZt_D61G6k3PgUetF01xzTRmCDJAnqCIsSDW8I,3805
|
|
613
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=P8p-dZZt_D61G6k3PgUetF01xzTRmCDJAnqCIsSDW8I,3805
|
|
614
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=8zuJhFdd6aXREpiqPFhIKEFWA5lgLVGrG0-a9UXcBqk,3262
|
|
615
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=_42bDZX4VODErI6OL-NrWja36iNHC4DzgF1l5Mk67-c,3248
|
|
616
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Zn1TvhAoPOv0zQBYHOZhwdDw3oqyxm0zIa7IJkTCHpo,3247
|
|
617
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=H9wONEU0XXSxOJfkx5UkS8Ss3A2QCp9G0XNoJEqE9nQ,548
|
|
618
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=2T2TYZhXgC97slH92HQ8GvZS3KuUt1ZiC3RtudPVEPA,3802
|
|
619
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=b6_bhUuQrI9HYvvwmAvUYh4v1GZ8w0sjApOmwuj_t8Y,3806
|
|
620
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=b6_bhUuQrI9HYvvwmAvUYh4v1GZ8w0sjApOmwuj_t8Y,3806
|
|
621
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=yqjO7zML7EseBJw6Bn5MTyHeAitkPsl1dndXeL6Rn6A,3257
|
|
622
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-nQIhKAOVCQrxLV6HDlcD0V8HMWvqrv-vyiORVU7qls,3244
|
|
623
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=KKmCvNh5T_qfD8v7JijMqXxQ5L6-gRX7oc6c5re6EF0,3248
|
|
624
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=M3nwpZd2-0w263ywZt9gaw53z7MN673T5tl4tc43Ntk,3249
|
|
625
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=H9wONEU0XXSxOJfkx5UkS8Ss3A2QCp9G0XNoJEqE9nQ,548
|
|
626
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=KmEgJ7zP2Sr_7GsAfL-12_g2S2a2wVpnxgCiF5dFiLI,3802
|
|
627
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=J4SXwpsioBRdTXOaj2OjrdNrEuW1NF43cLds65UWzCY,3808
|
|
628
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=J4SXwpsioBRdTXOaj2OjrdNrEuW1NF43cLds65UWzCY,3808
|
|
629
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=UjBOmVqYynBH3dJVuMJXjKnuZ6LssohzzEBpLBG4_G4,3256
|
|
630
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=8BJsjc2UUYdotrIqwyzisjrq0wcyW4jnTo_M8J3qYwA,3263
|
|
631
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=vLoV3JMtvHOKpR5D1BeCQPMuYlWUAlrXu54gByNkwKY,3266
|
|
632
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Mtw7a9BSspj2TzC-aPxE82o1LEvwzgbUuIofwRxUNA0,3263
|
|
633
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=N0kCPHvybNK-HvMO2EqNDLkj7m7WrHTl-3AD32LBD4k,3248
|
|
634
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=mjh-AgJN_IoWAc1uwhUiB1lE3ufAPDf-KPP6vUTrDKw,3251
|
|
635
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=NHdx3tZnfLF7NplswMzcTRbQEQFLtChg4rd7GU9lMbM,3262
|
|
636
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=mcF12eQTtGxocrVIA3I98NHd1NLd0-8EyfXtqDgv0PM,549
|
|
637
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AThoa7FUcGdNXYB_v9iMpBh2X8C0iLfc7y-C0xy2cRY,548
|
|
638
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=MJgIvZHf01ju8IWEVO6vyMedy5OTZxDpzv6A7_8W-Tg,3813
|
|
639
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AT2yrMoTvmoizi4sxwLtiULZ57P1CBhKGg9-6Gxnuc4,3819
|
|
640
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AT2yrMoTvmoizi4sxwLtiULZ57P1CBhKGg9-6Gxnuc4,3819
|
|
641
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=laYeH4w0iZOj2Yg3vDgtKoroNQnwBEX4GUGLrO9095I,3260
|
|
642
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=TWcPDZ2miQMD6OWDC1FteRs80ND9RC-oJL3PLVmJbtI,3257
|
|
643
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=cPtr1UJq_B-dTqgMrVm8ptiYXA6qOy_F8rs2f7ljuEI,3811
|
|
644
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=cobt_ZhR3dt2CySr12bGPVwn1oS98YvGLdIh9H8BDQ0,3801
|
|
645
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=cobt_ZhR3dt2CySr12bGPVwn1oS98YvGLdIh9H8BDQ0,3801
|
|
646
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=6Z7kIa14RjVq3ek_C15q5mUu1IrY2r0OP8S-_pm-MYU,3252
|
|
647
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=r63SZkUJJV87B00hAX074_uaC7wwQXdurlJsB1jUA0I,3254
|
|
648
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=hL3doX7zzxld3UcS8p9ACSadDaE6t3xXlYwM7X3GOeI,3252
|
|
649
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=xBhxdCFf3waTUsLxJxA54R90zODbC_DKI3XXBVKjKRw,3252
|
|
650
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=2ks7TQUULAD-Zn5i69YHo_2hpmsmxlocdYmJccSh2No,552
|
|
651
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=eiI8X2fFNknJmiT0uHbzSaEKQwwZk5bxn676gNvcyg0,3802
|
|
652
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fQQDJMlLdYsY5Cosg5HkRzvrJ4asjQmc0WGgoD4bC20,3810
|
|
653
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fQQDJMlLdYsY5Cosg5HkRzvrJ4asjQmc0WGgoD4bC20,3810
|
|
654
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=O_SV2vo_oaABfT6Mxqcmo12pnhKtfX4TnXfe02OcHJk,3254
|
|
655
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=g12Xkurat7oUS7LdS9pHLKFlur4_FaMGiGBvdq-iBCs,3242
|
|
656
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=EWLxbWncwGJyL-dV6EO-s8kk25wfYrESa0STjCnzD64,3244
|
|
657
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=tFdrY5nADmXUlShdN8w8Jzkxuj_RPLXCRceX9FhQ35E,3251
|
|
658
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=M-ewEHbgHLBLYLi1Hgz5Pp4kypnUiCRo0ut2scNnvDw,550
|
|
659
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=zTzLbdff09HwMuWlWpoAIgQZ6NEjsFXSF0Y5z4Be7Ig,3802
|
|
660
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=dcPHbYEbz8T9SM5-a5sP_K_npDkhH7u0KM9aiLn9esE,3806
|
|
661
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=dcPHbYEbz8T9SM5-a5sP_K_npDkhH7u0KM9aiLn9esE,3806
|
|
662
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=TO2qRGmp37v53Zqu8Joeq_BSbtwM_mpVoozGyoNg0-o,3254
|
|
663
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=QqijmgLqIoBUxRPnuUQGsoQASRFRMsCVQKTjEjGecVo,3247
|
|
664
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0xquf00fgfrDODpaxyre0VDcjqfzqExj939rzeJ8pMo,3244
|
|
665
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ipg8iK8w2ySRe1Z08YJUWAHX43rvkrXpR6svxRhSnFE,548
|
|
666
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-wuzdNXf3K0jfFQGB8nFSyoSZ4BfAvIkY10k6FdjnLY,3800
|
|
667
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-o9QqqQQ-9kRVCuDOUGBuKXHRTd0asGTzrDcHGGYJLQ,3799
|
|
668
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-o9QqqQQ-9kRVCuDOUGBuKXHRTd0asGTzrDcHGGYJLQ,3799
|
|
669
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=DbemSQdo2h5vGjSNB6Fovnn-aAGfjti04Bp-5KxLALk,3246
|
|
670
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=6glWpljtfiuspJv_Esg_LWCDDQ57d2HETsOIv0zr2Ec,3249
|
|
671
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=qG6v3n3qF6LE2DdGT-mDIXecZ1a7vg7p3QqXYCMX85k,3254
|
|
672
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=EgFTGyW_YuDwyEDUCoGglyI1ETdj9J7AR0UfJ86jMoI,3249
|
|
673
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4--7YWnJYUK4XmQ2zZ4M1ZYdKvUkET0VkNgIBn6xaOA,3247
|
|
674
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=ZfPPlx0qcuR4WjaFAE-W1QZgSPAMf3NyGcpvQIvyFMs,3245
|
|
675
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=NiorJgOotxkQcP49ID3z5al1UA4QQDrT8MvbCwAWL5Y,3248
|
|
676
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=QgSlDAhlB2W4bzTd2O98UL-C_IKfJm_cVmQz8FqsLF0,361
|
|
677
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=i3wy_CBO7BQQVhKReRC2F0PaRIQDdN9F5lJ7kD0xe1I,548
|
|
678
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=QpkqpJnyjuHH8Zo4U4QZgehUF2F2uQDZFb8fdhixXWI,3794
|
|
679
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=wv5GjGAA-NyJ41SYdYG3tPAgwf6JK7Zf6SaWALQ5c3Y,3806
|
|
680
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=wv5GjGAA-NyJ41SYdYG3tPAgwf6JK7Zf6SaWALQ5c3Y,3806
|
|
681
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=RRMNeM_qiHvlUTOAeqwgs7ukSoAZSlK8XN4z8hgWl0k,3258
|
|
682
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=brB0-FFr-Sv2bdrz4DQJ_NaFhETctf1g4Yzwj_Fcczc,3251
|
|
683
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bPQWtvaJrzOOIgI-R-MIxs_f4yC_FobkDydu3OkOFtg,3252
|
|
684
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RYLh-Uim9U2_djLkFwwpV0rNQHik0tZHzecuj1_hPLw,3248
|
|
685
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ZRgiuHZ2SFC6u-WV5DGwau4k1RiPLI67eENO0e-5Ylg,3253
|
|
686
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4EzbnLWHVwrjyKYPMcDxbxM2o-krjlT0YXvM8oPH5Cg,549
|
|
687
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=OFgOtRkUHwyOT7Hk_BQft_WzuZOwbhMSLP65Fbr4goA,3799
|
|
688
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AOu05da2LZbCzD9SKsrgnzH-ih3CdXsRIdJc_4J1lps,3807
|
|
689
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AOu05da2LZbCzD9SKsrgnzH-ih3CdXsRIdJc_4J1lps,3807
|
|
690
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=qzmFm2pqxphir1LBrycDZp5JA4It8OdQeQ5iTrTwLNE,3253
|
|
691
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=2UyOMRMdbvHt6WlZdOKALm3Or0eMCx7vvwgLiCYyoOs,3259
|
|
692
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-hP_P8NM0K04mGzTmpGBNibQ5xxh5gPz5WtoMXhoz1E,3253
|
|
693
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=vEU4_YOMnLdYFf1BkBEdFbGRMG8KLhsO_t0gv7vaO4Y,3244
|
|
694
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FB5Le4obvPoCgFSnC_3-Uh59n-Mt4Rol8saXVcK3RPw,3252
|
|
695
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=k1rzpgm9m19AHf_HPQcNCuSBtAwFgMePUYB1jZeFyYY,549
|
|
696
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=9IbzTwLRgTCfFLSvjEWKiajCjG81R-wTljIV2zUYUA8,3809
|
|
697
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=B4uEwuftvaj9gHGdoDBnVhxbNRmzUtzu4LH0u-O7voA,3804
|
|
698
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=B4uEwuftvaj9gHGdoDBnVhxbNRmzUtzu4LH0u-O7voA,3804
|
|
699
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=ZTPWtJA3JBL2jhy7C60RdsntKCN8oQ-DDIL17ok7OB4,3257
|
|
700
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=mokCWoXdKi8p4mLYqgljjwDRJWK5I2oF6_MJuObi5sU,3254
|
|
701
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=kLviGvVngpgOuelfKtvv9Is7MWQ89rGxlomMRP6t0Ic,3250
|
|
702
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bIVRtaaHThozH54VIte0Nk0sOGV67K4s2YZUE6QWx2s,3252
|
|
703
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=_YXzQ6N3QpF3Ou1Fy-51YyL-J3i5gOBVCgSM42vOT9I,549
|
|
704
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=csaz7AaVDTvCuzaptN-e8K1PNuIwZm9OwnPSJydHI90,3803
|
|
705
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=scfO3_ncCtyrqcYSnIoAZTMfvBzjB4o_0_bdiiVSNh4,3803
|
|
706
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=scfO3_ncCtyrqcYSnIoAZTMfvBzjB4o_0_bdiiVSNh4,3803
|
|
707
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=CE1wRLyFONo4_icKO8fcTTX-5giKNJ9_1F-2mr-lGQU,3257
|
|
708
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=JdUaLiMmf8oEbwuhPHMIncvWzXS2SxOEgfM80ZjM7l0,3259
|
|
709
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=vlys0Zi_CaaU41OHGbWSBtbVglFi98bgqEySBMc9Sdg,3258
|
|
710
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=YWyByOlKSqp5lbcUa8eu6N2dHRKJqJDbCDSjdDQJngg,3249
|
|
711
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=GY9VBPi21K6vJlF1NOEzCyqMS7LX3xq5dRxrK0jvIHk,3244
|
|
712
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=8LWF55ZPjrOY_sEdRGqf1eLcTNySgUiiWNWsN4EGxLY,3247
|
|
713
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=j5PTW0IC4Z2yQIygcdICaOsvb639u6Mv-ZpJYkrBQ2k,3254
|
|
714
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=_Cc0EqUzl6d93OxWJRWYbYpEaTIp0glJhdfV-GSAi5M,552
|
|
715
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ZSHvdnC2vOXI2HPW1iNI9HdihoLcNYlRLMF85pqjWZE,551
|
|
716
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=SkyMLsoxGoHdO4kgTerihone7eEi0nmHlrvZUI1I_V4,3804
|
|
717
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=6Jo2hw2gQpyiNoCRZpGItu4MBkYytzdW-VggWUC4fPE,3804
|
|
718
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=6Jo2hw2gQpyiNoCRZpGItu4MBkYytzdW-VggWUC4fPE,3804
|
|
719
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=xbDfUYLphVtZWJojZWODlxGMCoiIgxn4LsnD9ge3r9A,3257
|
|
720
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=hqh8TQw3t5hPM9u7rmHPuaMjwgxmQ-Zt35fSTgOS0HQ,3261
|
|
721
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Ggy4hejkcWjiw5Bi-wGzSP5JLVuvOjip_rbjXFBJZbs,3257
|
|
722
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Xy4mgZx5iiEvuv2ydO4dFNIT8s0jgBhNHE1vu93fGJM,3250
|
|
723
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=qKG9hmaxN_7tCB_06L1dh0csxs3TGeya9B-X6W-tNhg,3245
|
|
724
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=jb7vGi1RJefImkT3BZU_9iOkiCulcd5oDjxpVSt7big,3246
|
|
725
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=x476nFeltB_2iO9_6y-z2P_unAbh7ghLPFi5z2LOTOo,3253
|
|
726
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=_Cc0EqUzl6d93OxWJRWYbYpEaTIp0glJhdfV-GSAi5M,552
|
|
727
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=TWpzs48j0QwApAsBWt3iIlu6cqR46Meslyp96MOANcc,551
|
|
728
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=i5b52A1Oe8kCdPrPLBGud7OMHm8779JD0rBocYO_lo4,3797
|
|
729
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=U20Q4JwG63kU-6cc241VHGdpettCWbBXRJ9EZ-fbkqA,3803
|
|
730
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=U20Q4JwG63kU-6cc241VHGdpettCWbBXRJ9EZ-fbkqA,3803
|
|
731
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4uWiQMh3cZY_EtLA0a3PU8Z1VCunF2PpolTPYeP9Rjo,3256
|
|
732
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=D0moiKqS73oril32iNj5gRJUWpT2SZ5jf-ZesUZnNv4,3254
|
|
733
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=toHzCprq0KetQI0-9IrLYCIm1bQ0nSeP1gXArU0GogI,3245
|
|
734
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=N37dUL_J2JVpgLFlnlz__Ck7Z4njROnNAO8V2oiDqr8,3253
|
|
735
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=pGZZj_gZms1T9Zgjs4tbIm90LhbEy1UUkkgrto9jPts,551
|
|
736
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fqnjZCn0gbY7fO9JwZOHMYJJHe8gceWhWCZOFPRUlYM,3802
|
|
737
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=OTZt3ell0OZ7Cg5L17K2NPU4UwayAkTihV5HjUmUiAw,3810
|
|
738
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=OTZt3ell0OZ7Cg5L17K2NPU4UwayAkTihV5HjUmUiAw,3810
|
|
739
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=LdtOyXsA9r18GiFkmDOkiRinsDSZBZ8NYapL59EZ4iM,3264
|
|
740
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=07GarBHmiiYkyqn-qxEtrAcgCETuUbqm6HqlbH9yJi8,3252
|
|
741
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=xMNxtLL_8tyg4TWSt_llz_IJ2qlxc2NEwhUzhV1VsG8,3252
|
|
742
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=kEuvCsW3YNByF-DALYqPZpW3TL8ZbtQ5gUNq7-8YvZ4,3252
|
|
743
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4uNqB71a6ctZ-c4tF3r66vOsHFrqcR28g_UWy0N8iBo,550
|
|
744
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=QkrfZ69jxW_mweigtHL5R0Sv_WcSBp7wjFX75G9kbHw,3805
|
|
745
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=xMGmoN2ZTjKQBZS-k75mFTPpAEbPR3kyMwqZVtgbEiM,3802
|
|
746
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=xMGmoN2ZTjKQBZS-k75mFTPpAEbPR3kyMwqZVtgbEiM,3802
|
|
747
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=PD4AJYCkHfy2ivv9baMouFXzBTy0eKMumbAfxfm91HI,3256
|
|
748
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=iu8M35YR-RDpKWbjXSRzk02sW9nr_dtbhalfLSNtxNs,3251
|
|
749
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
|
|
750
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=41m0bvskFUzVtlr_yppBr4PZ0cVkqHvy9Hrc5pUCUyY,552
|
|
751
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=2VxMGfWtxTzXcF0bP3d5s7rc1cKb5TNBAn-WiCKAngw,3804
|
|
752
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=VtQGO3fEiyhbKG4sl07cuVc6id2EtKeV05ozLmN_ENQ,3807
|
|
753
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=VtQGO3fEiyhbKG4sl07cuVc6id2EtKeV05ozLmN_ENQ,3807
|
|
754
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=W3cYFteFIZLu5c1K41cOh4_-WZzFU6-jGnZocDzmKaA,3796
|
|
755
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=HIoWSUgAOcNaK2kj2YwDjDa23PzQVTT2C2ePW985Ovw,3805
|
|
756
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=HIoWSUgAOcNaK2kj2YwDjDa23PzQVTT2C2ePW985Ovw,3805
|
|
757
|
+
vllm/model_executor/model_loader/__init__.py,sha256=SCcBIQqgBnp-uob0RHRC2La8mElcfAkXaJ_jwPLbxWI,646
|
|
758
|
+
vllm/model_executor/model_loader/loader.py,sha256=xYk5bYtTfU00Cf4JjaVRpFPlxgga2dNEcPDl343dtQU,66049
|
|
759
|
+
vllm/model_executor/model_loader/neuron.py,sha256=Dn0Qwd-YCyAjaiYjJFw5uoydNabYbYiIe04vnj4y3As,9686
|
|
760
|
+
vllm/model_executor/model_loader/tensorizer.py,sha256=IrDTxwTz43NIJxiWJ-Ravu3iw20O3Opv-xKntAW4oUs,20260
|
|
761
|
+
vllm/model_executor/model_loader/utils.py,sha256=44ASNuDlfDC_3QsHCBh5kzrlYMOqP5dl0VgtO8vojlI,7356
|
|
762
|
+
vllm/model_executor/model_loader/weight_utils.py,sha256=Jp2te6DJ0UzJINDnbA0mUBbI6nLTNyknWAt5J4Zcf5g,28909
|
|
763
|
+
vllm/model_executor/models/__init__.py,sha256=VzcvFDQ-L55rengF3g9oFlD02efze16wV6pIYiAH_RA,863
|
|
764
|
+
vllm/model_executor/models/adapters.py,sha256=_GZXxOBj2I_xJCZlnFjlc_E8zlI8vzBb9GREAtsxVnA,8257
|
|
765
|
+
vllm/model_executor/models/arctic.py,sha256=AIV_GtER1fvdJjoI6R80Ksc0YogBbOLG_HEJOB5vW5M,24491
|
|
766
|
+
vllm/model_executor/models/aria.py,sha256=qyQl3ZWEll6pJIIOg3KtKOcnDQmCpp9wtkVeELkkDKQ,25583
|
|
767
|
+
vllm/model_executor/models/aya_vision.py,sha256=MokcLQ0bnMcyJV2ymu7QPTYn8AfpP83_pw0Obl2tJJw,18719
|
|
768
|
+
vllm/model_executor/models/baichuan.py,sha256=370cX3NY3GgPZavmDFtxaTWKgC0A6PNqQ4WX57P5OzI,18660
|
|
769
|
+
vllm/model_executor/models/bamba.py,sha256=vksG42qQ80g0NkdoqWVyG7t5O9ZtZAFR-fxqAiDbbso,21325
|
|
770
|
+
vllm/model_executor/models/bart.py,sha256=aiDRpWuA7mApgJ697hFBLcTW82OD-8xZdUst0nhwpvY,33800
|
|
771
|
+
vllm/model_executor/models/bert.py,sha256=bZCtgl_FB--ayGiXQcQOfZQMxiB1VIrcq_amUaxakCs,28166
|
|
772
|
+
vllm/model_executor/models/blip.py,sha256=Izi6sUm8nw_YxUY319DHbfBqf2l24y2ItJi-wF4e8ZI,12318
|
|
773
|
+
vllm/model_executor/models/blip2.py,sha256=vljxoKINHPc2bS9EjrqdMthEcipShmZPfoDuLv8jRgw,25878
|
|
774
|
+
vllm/model_executor/models/bloom.py,sha256=iUoBUbxCkiUtxt3iVcFbX9S_A-G3wxdjwL9adY20pDU,13977
|
|
775
|
+
vllm/model_executor/models/chameleon.py,sha256=eFc06QWJAPosLN17mIQgGKtrBYoGDldvKEF_Xo-kM0w,45367
|
|
776
|
+
vllm/model_executor/models/chatglm.py,sha256=nLjpyX6nDCiznwhwqgKRlEOmXBOmd9-sq0KGkLwBrTM,18376
|
|
777
|
+
vllm/model_executor/models/clip.py,sha256=wi0JtGdnVlkyZyImEiXgDwYkvxOHaIAIOfiMntsCn-U,15085
|
|
778
|
+
vllm/model_executor/models/commandr.py,sha256=4E1HtH59Meq7z2qJKn-yBtjw4pkWIb0IhuGQHkX-cds,19156
|
|
779
|
+
vllm/model_executor/models/constant_size_cache.py,sha256=tfXeoXB4rKcq5MW7T2WN16V1-4GUEnHJZjDKEdyTl_k,5858
|
|
780
|
+
vllm/model_executor/models/dbrx.py,sha256=umk6_ODzf1l_i02dYJB5j4qga_xlT4a4O71Xi2yX4Ks,18260
|
|
781
|
+
vllm/model_executor/models/deepseek.py,sha256=1WAj2Dqp-KhsJO3qsmSQMkpiNCoVYReLWbE3htGuTaQ,19791
|
|
782
|
+
vllm/model_executor/models/deepseek_mtp.py,sha256=OICW9sPppw5DIUFCflJAOU77bzVaP0Sz_P312kgDdNA,10901
|
|
783
|
+
vllm/model_executor/models/deepseek_v2.py,sha256=BS1g0F6HAaO5NzDSbJ5xZBC3Kmn5u0kKklawFsgrkGk,35035
|
|
784
|
+
vllm/model_executor/models/deepseek_vl2.py,sha256=WNvgAMB-Rqfqd34M7Zd4KXUvxFuhXkUKyJA-O7SqGbE,25191
|
|
785
|
+
vllm/model_executor/models/eagle.py,sha256=ZMJY04MAj4EWB8pLTHN8chWAaDQxd5qwlsbYy86ZUYk,11175
|
|
786
|
+
vllm/model_executor/models/exaone.py,sha256=HgACkkML5jAvbIWtLYsjkYqL2D7OyeLpxWdFjw2mSlQ,21044
|
|
787
|
+
vllm/model_executor/models/fairseq2_llama.py,sha256=J983ipTj1VNjEtr8AO5zse78kpdRqnL2WzUIC-gRZSU,6489
|
|
788
|
+
vllm/model_executor/models/falcon.py,sha256=dggYGIXA2eayEaBMxnwxe-QCVAkxfbR8dNpPH_vUoIs,21299
|
|
789
|
+
vllm/model_executor/models/florence2.py,sha256=4ybjy_3UJMgrReWq3fEhLobdcxtiYGiRs237PznlvjM,39211
|
|
790
|
+
vllm/model_executor/models/fuyu.py,sha256=0Skwsa-Bqjye6x1Ha9LkC7QV7WaAz0lHXjEwRoOHZdg,14240
|
|
791
|
+
vllm/model_executor/models/gemma.py,sha256=1QQr4_hvrIw3WXWcKAFsBD4BrwXu9jCRfCc72x8UBAI,16224
|
|
792
|
+
vllm/model_executor/models/gemma2.py,sha256=R0pjkiNZLgzCnzs0nLCS5x6TfHpNCfpzI_l9D2Ywn_c,17455
|
|
793
|
+
vllm/model_executor/models/gemma3.py,sha256=lFhtYrcKTAmZ4sDJIE8h0XlrN_6b4lux1-9k5G4F0o4,21666
|
|
794
|
+
vllm/model_executor/models/gemma3_mm.py,sha256=K-zpj013BHzybVzYLlNVaJZRHISswIhHVvRrNXw6hX8,25837
|
|
795
|
+
vllm/model_executor/models/glm.py,sha256=kI2a6yL-Pa91knqI7SHxgrV5kUlACHATeiIWy_C14H4,990
|
|
796
|
+
vllm/model_executor/models/glm4.py,sha256=pmtD41T0NlfE95MXSvMpBzGz_zkr0FROiAUfl0PKAhg,11780
|
|
797
|
+
vllm/model_executor/models/glm4v.py,sha256=Sv1zWlKhohg-KiHBSblhxe4R52pMjW-xGlg6HWDbM7s,22106
|
|
798
|
+
vllm/model_executor/models/gpt2.py,sha256=WtErZpi9ldpe4JG1awFO8R1mctJIZXd71QkL-odBxFU,12363
|
|
799
|
+
vllm/model_executor/models/gpt_bigcode.py,sha256=Wk9Bs5z4UEEmOf2VJoMEkWRaQ1O6ClvFu0Osn3iws3Y,13120
|
|
800
|
+
vllm/model_executor/models/gpt_j.py,sha256=1BYJxR-AZoJnjKJjR7nHPwSTVwm3ENA1Q7qm5AnMyFE,13164
|
|
801
|
+
vllm/model_executor/models/gpt_neox.py,sha256=u8fiL298xnsW3tG_YukPbw0GXHwIDOEATsGDM1cr8oE,13303
|
|
802
|
+
vllm/model_executor/models/granite.py,sha256=pP8Jze9pyOlQ0SmtDtuBPpg34q5rg6bkTrt93__WflM,20224
|
|
803
|
+
vllm/model_executor/models/granite_speech.py,sha256=5E4zCGHm64gto8HgzcC-pGfCfU_eQgVucO_YYBPmsyI,31239
|
|
804
|
+
vllm/model_executor/models/granitemoe.py,sha256=nh6OaWyIlCeHWKHZabhs2lzpR7wY_qzlJed_hu6XVYk,17892
|
|
805
|
+
vllm/model_executor/models/granitemoeshared.py,sha256=HHqElp3Y8tqAbIizGwymoEoL2qz7VCGIVTRvaM-4ZBk,13679
|
|
806
|
+
vllm/model_executor/models/gritlm.py,sha256=TLmcp8jMok5g_Vy5ANUl9Lor2l0nBtqEBMM0LlBWZ60,8988
|
|
807
|
+
vllm/model_executor/models/grok1.py,sha256=XI-v9vhRUqBsQTcZa7AYGoSAVkPcOM6D7zrUfqKO8OU,23300
|
|
808
|
+
vllm/model_executor/models/h2ovl.py,sha256=09DM8KzX9OdXS7uk7AUa1tOy-TbnVmBKb-nS_HBxkhY,18188
|
|
809
|
+
vllm/model_executor/models/idefics2_vision_model.py,sha256=ymx5ptNiC4Se1quA4s4vjhAMhsIUru7ituClLqkIeJY,14948
|
|
810
|
+
vllm/model_executor/models/idefics3.py,sha256=03iHweR7PCGAF2fD-OMdmp-jxQjOQxcfxSBexTS0wwU,27776
|
|
811
|
+
vllm/model_executor/models/interfaces.py,sha256=PVXjyHqzRg1jE4ZYrjwOyNvgOuc0bfxBqNoeeDNUYmg,16022
|
|
812
|
+
vllm/model_executor/models/interfaces_base.py,sha256=qJgt8EwFMFAHUw7jIzCD0kV39nCGyM7GRW2JuYrQ7Cg,4369
|
|
813
|
+
vllm/model_executor/models/intern_vit.py,sha256=jcUq9Q6xp0BHdWs7Zgo-iDoUAiO3bcIrBsYrrGr6xsc,17332
|
|
814
|
+
vllm/model_executor/models/internlm2.py,sha256=QiTjGIwiniAUDNC6ExBIGXe8dpoC97wb0-vo_TmaCXM,17186
|
|
815
|
+
vllm/model_executor/models/internlm2_ve.py,sha256=M_goPBHj8PdFD7OUWFR40swNfC3oV6ddcVoZgpl6ugQ,5739
|
|
816
|
+
vllm/model_executor/models/internvl.py,sha256=uhT9KxcjnUiwNYDJHLXhcr3rgaR2JLpFWM4TpVNJa6E,33749
|
|
817
|
+
vllm/model_executor/models/jais.py,sha256=Qr5ov59m3vhICiwUFrAHu_BhGae90U_KIOxizSqUAiE,14531
|
|
818
|
+
vllm/model_executor/models/jamba.py,sha256=p6dp3GJJgFRSdpQV-ZDbm-a6XQ9x2COIavzHHxGgdqg,24255
|
|
819
|
+
vllm/model_executor/models/kimi_vl.py,sha256=GmD35Q60u0gMgP4974alNjT0w-wPgOseL3LwrmgjRjY,24399
|
|
820
|
+
vllm/model_executor/models/llama.py,sha256=Ys-db2DZNyXXRL_wfsmvGPgi8kGAZ1TJnjV_O76ujsA,24996
|
|
821
|
+
vllm/model_executor/models/llama4.py,sha256=znTmnqUFR4VNB7nPWnl1Z-7d7pfxakY-QBNqmJZVu9Q,21815
|
|
822
|
+
vllm/model_executor/models/llama_eagle.py,sha256=HN2hTxGF7cNmWgZMzjExlPdYEPV_6sf28z0QwsjxDyQ,5399
|
|
823
|
+
vllm/model_executor/models/llama_eagle3.py,sha256=pTPX7rt5O9YKkt3MPeOgKYbb87BcLWOMPuMjQn1qinI,8289
|
|
824
|
+
vllm/model_executor/models/llava.py,sha256=2s4OASAETYny2HU8o-2g484yC3mlUYau-XcixcmdxeI,32449
|
|
825
|
+
vllm/model_executor/models/llava_next.py,sha256=Bg7ToDnBZ_sDePfasxhBHsXZl5hFEWqOy3zGM7sO8Qw,23679
|
|
826
|
+
vllm/model_executor/models/llava_next_video.py,sha256=YJ9MCQc2rTO_VJ9oS6yNg4p0-283IUGPIm262GM3_HU,17619
|
|
827
|
+
vllm/model_executor/models/llava_onevision.py,sha256=drhY2vavac15GlH6CQ6rBVY7zkV_K7Uw-8_7yYKPqko,36917
|
|
828
|
+
vllm/model_executor/models/mamba.py,sha256=vMuY9BDlbRQ8zGGfkhq-8o3an475aucnGO4eA0xtFnw,11564
|
|
829
|
+
vllm/model_executor/models/mamba2.py,sha256=8XzlYoUalkc84Q1C_4szHSUSSH6LlsT9nbvzpg_ymaU,12391
|
|
830
|
+
vllm/model_executor/models/mamba_cache.py,sha256=VsrGAeFHiPSSoK_RE9pfEfiBMZ4uTVFTBXV9xcSKmQI,2923
|
|
831
|
+
vllm/model_executor/models/medusa.py,sha256=c0jYTHJXo8FfueT0-y_yYjA7e91TIFz4PYRu7G5Kqlo,8558
|
|
832
|
+
vllm/model_executor/models/minicpm.py,sha256=wfBfofWrsECes9ot3GmbyK3bWAcP56z4--c1puMcuRQ,23996
|
|
833
|
+
vllm/model_executor/models/minicpm3.py,sha256=7ETT1RdWTbVueHwDaZfWpQTz9IXLaQNLgTBh8AON9Dw,9366
|
|
834
|
+
vllm/model_executor/models/minicpmo.py,sha256=LDxONdHDibO5XuYuYvZxjbfDA2jT-fCw9aedmj5BLMo,27349
|
|
835
|
+
vllm/model_executor/models/minicpmv.py,sha256=Q9JNS2VaITCFBOibthFk28vNti52BKK9Rr0EDXB-Dvg,47256
|
|
836
|
+
vllm/model_executor/models/minimax_cache.py,sha256=29FVD-sSoymCb8yrJxxE7jdvgvt5VctucnI3WkS9Yo0,1143
|
|
837
|
+
vllm/model_executor/models/minimax_text_01.py,sha256=VKRyuPwcjtwcPGMlJtFzJaGcfYDOfPpUNjffWhXK7Qs,50241
|
|
838
|
+
vllm/model_executor/models/mistral3.py,sha256=uycOm-vP9SZ9NgCWZT4nvMN7EbvrQ3kg75Ns5yn9ze4,22820
|
|
839
|
+
vllm/model_executor/models/mixtral.py,sha256=Q5B0jFFz4kX4pyU_xk672ghtph_0MWBacwK1A3JMEkg,20127
|
|
840
|
+
vllm/model_executor/models/mixtral_quant.py,sha256=gV4_4xtsb_Cr8Alr9Ofx2kUbydaKX0kWQpx0xSXjW7s,18238
|
|
841
|
+
vllm/model_executor/models/mllama.py,sha256=Q_iVpzwweFs788Q5s0YvDGne3RvDPJBdfWK87wGexf0,66761
|
|
842
|
+
vllm/model_executor/models/mllama4.py,sha256=LcK-yD4xu-o9jCV4P-d2uCcQ0U-CxfZDFtwu3NMcRMQ,31798
|
|
843
|
+
vllm/model_executor/models/mlp_speculator.py,sha256=hv7XRnxENUo4R07K_Ox_9RVTZyKEDS96fYNECVKjz0A,7905
|
|
844
|
+
vllm/model_executor/models/modernbert.py,sha256=3sg7jB2eT9sKNC5ym0XGGwh54IoQPcqaLgOXgOkl08k,12414
|
|
845
|
+
vllm/model_executor/models/module_mapping.py,sha256=d_72RpeQ7wVO9LXl2eeQDIRyurGtEMsN33ey5IdDG_8,1781
|
|
846
|
+
vllm/model_executor/models/molmo.py,sha256=CZ5FBB6mwmv27uJvvcZOkosK3FU7PHtx3SUePXBrzQY,54927
|
|
847
|
+
vllm/model_executor/models/moonvit.py,sha256=6PUh8VsQDfU7jRTKA4f58ytNoRFJZ5y5XjXERXMVNJ0,24037
|
|
848
|
+
vllm/model_executor/models/mpt.py,sha256=PtejQHucpoAKhF_b6rhOZGLxmYh2oiSyoq25Gf-no6Y,12670
|
|
849
|
+
vllm/model_executor/models/nemotron.py,sha256=jgbTyggSi7qsb9NQEnHyLwrkU241nAYNOB2BPBdXAs0,20651
|
|
850
|
+
vllm/model_executor/models/nemotron_nas.py,sha256=vN9RUCCUn8fS96jacX_0lchE8oe-mZwgpqRBITHPLyo,17577
|
|
851
|
+
vllm/model_executor/models/nvlm_d.py,sha256=d09N62ey6RKEye3UlJJkFr0dde5tiCx-HrCYdqMuIF4,7919
|
|
852
|
+
vllm/model_executor/models/olmo.py,sha256=oXpLGitcmHS5mbgWagH23Gc0Lzi3389Fj3hw41G98vE,15098
|
|
853
|
+
vllm/model_executor/models/olmo2.py,sha256=JcJeWkqZaPcLK2TyLdqQwauC-kOuuycedJ_Or_DjTwQ,16078
|
|
854
|
+
vllm/model_executor/models/olmoe.py,sha256=X2_wKZLXRLjyNKEVIyoO00N0rDcLRH-vgYPGK-1BPQQ,18492
|
|
855
|
+
vllm/model_executor/models/opt.py,sha256=sxm4w5iBWqN31zvguGXJG6DQ8oTKRj3Jd7hLRKqJEHY,16488
|
|
856
|
+
vllm/model_executor/models/orion.py,sha256=9jwSuqmAZkZal83_tKUz5eEAIr9TO9Qwidh5YK3yEs4,14096
|
|
857
|
+
vllm/model_executor/models/paligemma.py,sha256=25MJ65uhdaKO_YSfALMRa0RPfB9CTJ1egrJCI4nL-Mo,14591
|
|
858
|
+
vllm/model_executor/models/persimmon.py,sha256=XMXRgEisHKFzg_D52tyLcwwDxEWAz8zKSKK7jQjFmqM,14244
|
|
859
|
+
vllm/model_executor/models/phi.py,sha256=SisXMazkQQRuPL5SHHOENSYEVmR2Er2PC1lxkZzPjxw,14088
|
|
860
|
+
vllm/model_executor/models/phi3.py,sha256=dfxzNvOZ2XPpQIy5mdRFsxFZObV1_h5vlC-ak2hw3HI,388
|
|
861
|
+
vllm/model_executor/models/phi3_small.py,sha256=t47UhryllQukBHwQ93_nG7dRfjFleHWFXOpMdFR8BCM,18198
|
|
862
|
+
vllm/model_executor/models/phi3v.py,sha256=oEHNayK6qX7cFOIGN1EXakjTEkfPSe5fo_q9q34Rp8Y,28423
|
|
863
|
+
vllm/model_executor/models/phi4mm.py,sha256=f6VUEV5PWoW6thRs3ZNX3fqxAAKR9JZm3lpZ1ZBRFkk,50134
|
|
864
|
+
vllm/model_executor/models/phi4mm_audio.py,sha256=GZiQxE8saU4RAfaY1L2B-s3wEXSrXXCFGYyW8CHzxFw,49073
|
|
865
|
+
vllm/model_executor/models/phi4mm_utils.py,sha256=HTzN13FbJhJmcWtH8yWbLRFIO6yz85XJNcH7P8fpvfw,66669
|
|
866
|
+
vllm/model_executor/models/phimoe.py,sha256=2HYcPPKpQ1nxEMPEY3v0qv7aStvMr-Xu5nds7wHzvbc,24730
|
|
867
|
+
vllm/model_executor/models/pixtral.py,sha256=3lPcsidgqrvdZuBP6kS64LG8lm9k0eP3fqFEViJO3Yo,46914
|
|
868
|
+
vllm/model_executor/models/plamo2.py,sha256=hFxlepNKOOfz4QrMIqHLvU_UrEcUxWjXp59fPuvgrO0,30036
|
|
869
|
+
vllm/model_executor/models/prithvi_geospatial_mae.py,sha256=peK17WmcwS_xRV1VGXlqC4_uL7hp2-qfYQCfHS1jknc,9199
|
|
870
|
+
vllm/model_executor/models/qwen.py,sha256=mq7vPonGJ1T_c1Ruispjz9lIYeZ0s_QvJHtyJ8mGw5I,13838
|
|
871
|
+
vllm/model_executor/models/qwen2.py,sha256=QqdNmJFcLMrweXV9cU7l_NgdECqu9PBZvYejZbO6lz8,21776
|
|
872
|
+
vllm/model_executor/models/qwen2_5_omni_thinker.py,sha256=8Hb8Xe951jc0Dqph4VmxJViXNaBPejX4tk47_W8l5-Y,37144
|
|
873
|
+
vllm/model_executor/models/qwen2_5_vl.py,sha256=NGSpoT5W06mqvP9Pml6GiH3Ue7VQGgNKo586TWJ7RVo,46525
|
|
874
|
+
vllm/model_executor/models/qwen2_audio.py,sha256=UVNmNJalmX59vtuf7BamNQIrVzpRkPPxm6kkuF-v0zI,16225
|
|
875
|
+
vllm/model_executor/models/qwen2_moe.py,sha256=O5A5h5qtooq9uB6xdmr9EvaaS3ZJXAV2s8q7JgXqd2M,22531
|
|
876
|
+
vllm/model_executor/models/qwen2_rm.py,sha256=aeG0z9uoIUUsTGE8LA_c48c8T8Hh3p1LfrRjMFuClJU,4536
|
|
877
|
+
vllm/model_executor/models/qwen2_vl.py,sha256=bYFFm2mm0h0b5tuvptSgiPggg5N4_fhXfVIE5oaQqJ4,53580
|
|
878
|
+
vllm/model_executor/models/qwen3.py,sha256=6j6SSIVZdRIgrIjwxYlFNpHfPb12olnSdv1jKW6gyaQ,12420
|
|
879
|
+
vllm/model_executor/models/qwen3_moe.py,sha256=mruQjfPqLGQGV2xsNy-Db34AESABKvy5yJHd1LXulLQ,22928
|
|
880
|
+
vllm/model_executor/models/qwen_vl.py,sha256=VcdGeNX31d-v6JBDNhSGmO2iAVL5OfiPhV6FYW3grks,26765
|
|
881
|
+
vllm/model_executor/models/registry.py,sha256=K0iMbH47XSqw-xkT9hJuDfCyJm2toVdlFAkjaQN8Prw,24956
|
|
882
|
+
vllm/model_executor/models/roberta.py,sha256=qT8H5UwdGpSa6s6f3Q1Kj1CocFbNE-ikwOexMXGfRNg,13442
|
|
883
|
+
vllm/model_executor/models/siglip.py,sha256=fj5XB_1xoi0IMrELjAQMdJjtvjNBVghUDP3LZKSlSS4,18685
|
|
884
|
+
vllm/model_executor/models/skyworkr1v.py,sha256=QqUuPRKXhzUVe6YiIG2fI9cChgJOj8hyoIpA_fUAKuk,33947
|
|
885
|
+
vllm/model_executor/models/smolvlm.py,sha256=gxdwi7cC_5AohDdTQ6kwe2L71uk8-04JjgBcKsIx0XM,1736
|
|
886
|
+
vllm/model_executor/models/solar.py,sha256=aclPlsjrhMGW-Z6D-B4_ZujzQ2X9dx5hoHYEa-5J67E,19892
|
|
887
|
+
vllm/model_executor/models/stablelm.py,sha256=UlKpFpOVn93UOTNCo2R-XObKnIR2zfMQnVA12ugumxQ,15209
|
|
888
|
+
vllm/model_executor/models/starcoder2.py,sha256=KhkRQBSiuoUBktzHBVdfWjmpka-B34-H9Ryrg3yrScI,14571
|
|
889
|
+
vllm/model_executor/models/telechat2.py,sha256=Nqk3c6Ou3XeqayxXBby-lTLmfsxPNXHbnJco6LGRBs4,5997
|
|
890
|
+
vllm/model_executor/models/teleflm.py,sha256=VybsbNx-m48o364REHE8HczV5yH8G8AISCRvMs96vrA,3143
|
|
891
|
+
vllm/model_executor/models/transformers.py,sha256=d98i8BFWweTnmoNnWuSJ9AOZ0ogJEVPg1YnsMgF0Q50,17546
|
|
892
|
+
vllm/model_executor/models/ultravox.py,sha256=Ho3UzmhsU0OLeJrnV_G_ytR2UKVZPNc6XYfAw4sDcJc,26910
|
|
893
|
+
vllm/model_executor/models/utils.py,sha256=rF4Bh94fgZaI9I-MMHvKvhKwVtypWysTT1CfLrdDogY,24220
|
|
894
|
+
vllm/model_executor/models/vision.py,sha256=J6biI1Hx6ls2E-DauGCdhscdqY7dg4k__m6p5Xu-o3M,5706
|
|
895
|
+
vllm/model_executor/models/whisper.py,sha256=tRVRvUNYNtXcQKpsEE3HcCTMqfcy8jPdDE-b0z9UC8A,27565
|
|
896
|
+
vllm/model_executor/models/zamba2.py,sha256=VilHoaUknriXRwYOOPTGMvy2E50Ib_AFXV1ltJ8YROw,39629
|
|
897
|
+
vllm/multimodal/__init__.py,sha256=H8ox_Aiqk10St_5AgW59cvdksUl92INLWDopBY98LOY,924
|
|
898
|
+
vllm/multimodal/audio.py,sha256=pCpnPG0OhQMS-7SqVmO0VdcdwJombEhiwj2C4AMidy8,3079
|
|
899
|
+
vllm/multimodal/base.py,sha256=oO4hXRWCURMmTeP6SRXNq0fo_sldIhCr2CxBcR8GpVA,7014
|
|
900
|
+
vllm/multimodal/hasher.py,sha256=sHLPsqzjONd2Ra2mZirZLnxfSdV9cV5_PUGpm_IRNuA,2865
|
|
901
|
+
vllm/multimodal/image.py,sha256=sRFpwj1KfKYVrUePkau3tLU65KGYGCOEXFrPTr-_qRo,2233
|
|
902
|
+
vllm/multimodal/inputs.py,sha256=twLmIe_O-WReMqKLfBwwbvtyJwLke47n_ugGBTgFSf0,27246
|
|
903
|
+
vllm/multimodal/parse.py,sha256=k7r6YzgQD1a7hPP8vV8WnUwUhHbSZx9NRUEP7jirRhs,14141
|
|
904
|
+
vllm/multimodal/processing.py,sha256=QGgFqzsflYchwgXIiQ_o9zSBxg7xwl4ZrKBulMfoFoI,56963
|
|
905
|
+
vllm/multimodal/profiling.py,sha256=c8uD2tqvJ9C4nhWxiwLWC-KXmnnBwcHxFustg_fMhB0,9185
|
|
906
|
+
vllm/multimodal/registry.py,sha256=yUyvY1IdcdnfMt83qYcw4Qyp0FLqiLFH5Fwlt2p80dM,10989
|
|
907
|
+
vllm/multimodal/utils.py,sha256=daLxvepBkNZbCdcbHKTr6H9hzaSpkAcCuqIqJQCnf9I,11840
|
|
908
|
+
vllm/multimodal/video.py,sha256=fxRALl53g3Ryz8wC60YNIi11OWTqllwOdxh6iSCiqd0,5159
|
|
909
|
+
vllm/platforms/__init__.py,sha256=ATk-4qv0zzMxRXaXjqyDvEb4bpwaOzn58xVRrXNXdws,10364
|
|
910
|
+
vllm/platforms/cpu.py,sha256=7IaxOy93fXVD-RcyIJarbxf1-9M811pHFRdzffMdXMM,7292
|
|
911
|
+
vllm/platforms/cuda.py,sha256=8qgNTfVzP10DpcooYVReuK2nXfdSYNm2pmW2meKpFoE,18453
|
|
912
|
+
vllm/platforms/hpu.py,sha256=9jcNjaC55sqeV6oHq9hm5JwXJu66lUupMcCArBLaCcY,3522
|
|
913
|
+
vllm/platforms/interface.py,sha256=9HHuvFStExwThlA0AUe4IAzxZCHR2raxghisEpQwuMw,13801
|
|
914
|
+
vllm/platforms/neuron.py,sha256=m6vNECPtIUl7o7oi2dFn9sgz7XfM-hwwbEmlhWDB1ak,2204
|
|
915
|
+
vllm/platforms/rocm.py,sha256=GtILVlxitkldK-8nKqgHyEuzd_focEGa9A9XOYPPKAs,13744
|
|
916
|
+
vllm/platforms/tpu.py,sha256=sPdnT5cWrLQtQjVPLdYy8ys2Utoai8rrsLyStanBzFA,6562
|
|
917
|
+
vllm/platforms/xpu.py,sha256=Bgm3GepKegOQeQPT0jQQ1Asc8_8LW-O0z597FbFCrNc,5498
|
|
918
|
+
vllm/plugins/__init__.py,sha256=eZT4FNCwDJhjMADo5KgKBaGLd_7YR9LzhFgnLuwhAnc,2962
|
|
919
|
+
vllm/profiler/__init__.py,sha256=GWIaNWjYFXuIGl4qCtZyhz-Z88ysZH-mz4G7pYKOe78,128
|
|
920
|
+
vllm/profiler/layerwise_profile.py,sha256=WIh-o0z6si4ua2sCvqRjs4OekSY5PvP7XPgBLud9DTI,13837
|
|
921
|
+
vllm/profiler/utils.py,sha256=itpGYyhLXCQ07nT_dL7seAPJyDlPv9f2Dg6Grh5lpAE,4663
|
|
922
|
+
vllm/prompt_adapter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
923
|
+
vllm/prompt_adapter/layers.py,sha256=aoeVO5L_abEwB92Groio_5lB9al37OiEJac_GkSuCko,2726
|
|
924
|
+
vllm/prompt_adapter/models.py,sha256=ut5JOYkO4AA0UIGEhIGuDqW3u0SjcObmu_c_L7k7sG8,13722
|
|
925
|
+
vllm/prompt_adapter/request.py,sha256=2_iVKLKdWE3kbt7ja_wPHhCerMqVXlg6ztDw344Yd6I,838
|
|
926
|
+
vllm/prompt_adapter/utils.py,sha256=fMMwK-mzuhT9Wu9araO0rSdtNkAmTNvsCAfQXfOkWQk,3668
|
|
927
|
+
vllm/prompt_adapter/worker_manager.py,sha256=qMEPVkdg2_L4bYSIBg_XPEM5As8UgrPrgudoiG4kEAE,7536
|
|
928
|
+
vllm/reasoning/__init__.py,sha256=nioiAm98By9GYD1U2CCj0wgwLpOceBoiOS7umdylkcA,374
|
|
929
|
+
vllm/reasoning/abs_reasoning_parsers.py,sha256=U2D3-63pXXDPpK50_r5OAM_ZIFW584A0wEywfyRNYjU,6507
|
|
930
|
+
vllm/reasoning/deepseek_r1_reasoning_parser.py,sha256=ohpIV6s2j0fha3YmuxKVUFFZjirvTtq8TGTs5r9qkM4,7396
|
|
931
|
+
vllm/reasoning/granite_reasoning_parser.py,sha256=4Wc3hv7tSOjg8ML0V30hRBIFAaVaCY6XCpHxKvBzwyE,15821
|
|
932
|
+
vllm/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
933
|
+
vllm/spec_decode/batch_expansion.py,sha256=_n2h_IcPtL1XhAEVtv0pOPNPvf9GDqfojdZhCAneUjk,22740
|
|
934
|
+
vllm/spec_decode/draft_model_runner.py,sha256=QmXaBxVBHCReo9Qzjm-wJVlWSDn75RG66MRRvXTOlHc,14588
|
|
935
|
+
vllm/spec_decode/interfaces.py,sha256=_hnecfFBgi-_wKMIxPobP-DRaefD769WqethORvcofA,3086
|
|
936
|
+
vllm/spec_decode/medusa_worker.py,sha256=liPn4iIMqJ8owL4cXgrDTVb3bjpUKBlqeNDlFlwEvjc,4900
|
|
937
|
+
vllm/spec_decode/metrics.py,sha256=DuJxAXU0jQT5bF9MXjTmHTm1rg-KkIt_ZkMBCfTcSjY,8063
|
|
938
|
+
vllm/spec_decode/mlp_speculator_worker.py,sha256=NuOIAQtZxCWPs7YXWRJW3KwFCT4i3ETCobsA0gK-qVs,3737
|
|
939
|
+
vllm/spec_decode/mqa_scorer.py,sha256=GdhVN2Ef3L_LlnoftqivY0rRKgPF7EV81XUu3PyMouo,7508
|
|
940
|
+
vllm/spec_decode/multi_step_worker.py,sha256=6a9i56JMBL7Tkd0MY9wF-qDk2nt9vRTZeeZKwBqBX4Q,19251
|
|
941
|
+
vllm/spec_decode/ngram_worker.py,sha256=ClMwq0JnlhRPiBT8hI4fBuHVPgtt-uQe6ttMR2CVIb0,7827
|
|
942
|
+
vllm/spec_decode/proposer_worker_base.py,sha256=Qyl-YKE4xg9kz4RVlK6tO3ZSng3JU515oQmVZntijKQ,2089
|
|
943
|
+
vllm/spec_decode/smaller_tp_proposer_worker.py,sha256=mSFul-wK6bqqd4SAl1Vy_Rso6MhaG2LFEaxPqpMznXM,6835
|
|
944
|
+
vllm/spec_decode/spec_decode_worker.py,sha256=bXt84GE77IWG_43hGKMRaZ8J4VJwUrMTNgYjqZYCSu4,62887
|
|
945
|
+
vllm/spec_decode/target_model_runner.py,sha256=sRW1sqPhEdEwL5MvHCRRmd-lmc-G8LjJgG7R22R6JM0,2073
|
|
946
|
+
vllm/spec_decode/top1_proposer.py,sha256=sEvZMEoj_s9-aXzkX6faUZuffoPtxATuMbEmRnZKecM,12354
|
|
947
|
+
vllm/spec_decode/util.py,sha256=2oG5qDatn0pc99zWPOKoYb2LPDc6fXLUelQRCxHvyBo,9888
|
|
948
|
+
vllm/third_party/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
949
|
+
vllm/third_party/pynvml.py,sha256=Dw3kbk5Rn1l8hXRQgR2KvaLjEqKQM2M6r0WtEqbtANE,234584
|
|
950
|
+
vllm/transformers_utils/__init__.py,sha256=RZEL-BvlcJuQJFMPZLM-LjViwZxSsV2o8Tnx7strgDU,617
|
|
951
|
+
vllm/transformers_utils/config.py,sha256=eyWjoufI_PRdKrbz5YXl3H_XWWUyjyur3rZiV2zooyE,30265
|
|
952
|
+
vllm/transformers_utils/detokenizer.py,sha256=BIuRtDNEOaltof860UDWC3SOc95H4VGnBAB8iLlAFmA,7242
|
|
953
|
+
vllm/transformers_utils/detokenizer_utils.py,sha256=UgzZeQN28OE-y4_VaSGCLryw07cSqM00wJsCs1Ic9YQ,7265
|
|
954
|
+
vllm/transformers_utils/processor.py,sha256=zj16dQMvRA84X4Dn7dCZfnPFP-EcNP1AOuGU4dLM8Wk,7152
|
|
955
|
+
vllm/transformers_utils/s3_utils.py,sha256=W1Pkv_vXDlm5thS1mPtaPxEYMuPUteHjyMzc_p1hgY4,4885
|
|
956
|
+
vllm/transformers_utils/tokenizer.py,sha256=5HIzBALDZm2cMz8xTRWaVqxPF7YrvmyAr9QJ792CZFU,10437
|
|
957
|
+
vllm/transformers_utils/tokenizer_base.py,sha256=ubDCoJEppRd8jMCFCiwAXjUp2AZyVezOyLOgTQohIyY,3861
|
|
958
|
+
vllm/transformers_utils/tokenizer_group.py,sha256=1s24_MSQUE1qiebXjoiqXbA2m1xm7-3Ta44NzWJnBK4,4799
|
|
959
|
+
vllm/transformers_utils/utils.py,sha256=qqwuNDdVLyQSMErmpsD5SI3McRGF2JUkGKgffncVwD0,2636
|
|
960
|
+
vllm/transformers_utils/configs/__init__.py,sha256=3UGWqL8_bh1OB37sTPeo76aBp6VM5d048GJpJpCG3Ag,2189
|
|
961
|
+
vllm/transformers_utils/configs/arctic.py,sha256=ifJ5ruBai3c7dyu597cTcJooj2QKzBpGx13f2VYO4no,8990
|
|
962
|
+
vllm/transformers_utils/configs/chatglm.py,sha256=6H5Hv6Z_yziBZG9q4N_0Obj6eHsGL9DrxQeBhkLsZ9Y,2870
|
|
963
|
+
vllm/transformers_utils/configs/cohere2.py,sha256=yeLdn79NO2kyuYH9IqRAO0WGbMJCh5ntd1jabtZaWJs,10353
|
|
964
|
+
vllm/transformers_utils/configs/dbrx.py,sha256=d0xE5WH999Pxqp7v1MXJzmk10cwp0UMOOBADD8bO5rU,10957
|
|
965
|
+
vllm/transformers_utils/configs/deepseek_vl2.py,sha256=QICTzlaoLil0UUXc0fwgl-CImEPpjOnTy1dAUweKLDw,7252
|
|
966
|
+
vllm/transformers_utils/configs/eagle.py,sha256=SxhOrLUcBFHbqKbgjsCi5GQOv_ytGnkE4sIANYr801I,2191
|
|
967
|
+
vllm/transformers_utils/configs/exaone.py,sha256=o82MCDMCtD8cHvrPzJroEpJV5e4xuVK3yzlHBq4IjvE,8883
|
|
968
|
+
vllm/transformers_utils/configs/falcon.py,sha256=1w9gXJJPzvup7Hd05O1xYzp_IDXDdtxByt06U92uy7c,2917
|
|
969
|
+
vllm/transformers_utils/configs/h2ovl.py,sha256=Tsyex8PgWS_WEuzgTZ9vGcgn7Pv1m0yJDs64Q2meT_Q,489
|
|
970
|
+
vllm/transformers_utils/configs/internvl.py,sha256=hqm1INrEucyhhPKQhwRiwEZ6Ulw1gvnFIw1EISnE5QI,1867
|
|
971
|
+
vllm/transformers_utils/configs/jais.py,sha256=VExjql0k4IoUHn9FmjpnmwlNDt0vOW4mlKkChLU5iL8,10363
|
|
972
|
+
vllm/transformers_utils/configs/kimi_vl.py,sha256=IzdW_JZbZyuyhnnUa4ILY_G56TRbbFI56PsY9UIqsh4,1417
|
|
973
|
+
vllm/transformers_utils/configs/medusa.py,sha256=2gSa-OtMNHi2eL_AJDgbslqb9ntcg9fRfhhgRPGoxr0,1943
|
|
974
|
+
vllm/transformers_utils/configs/mllama.py,sha256=lIkiJ83huJQq7kLAV2hfLWAcUxWVT9aa1YAcSRUzz1Y,805
|
|
975
|
+
vllm/transformers_utils/configs/mlp_speculator.py,sha256=MgeWpPARW5jwb8Nw1hnZaqJbdDdBOc_a_ESTeRy3O8g,2437
|
|
976
|
+
vllm/transformers_utils/configs/moonvit.py,sha256=3UjfhLMqkqFcGfnZwu8oM5S11_jmSn1zzjnC-JfCRuQ,1203
|
|
977
|
+
vllm/transformers_utils/configs/mpt.py,sha256=zLtFoXM4PKJK67mQoeBOwLQrT4wR-zdEMYAsGrHV108,7589
|
|
978
|
+
vllm/transformers_utils/configs/nemotron.py,sha256=xw--8lmM5VzLM6Nfyha4vaeRvVYh5v3bjrAP_Z010nk,8974
|
|
979
|
+
vllm/transformers_utils/configs/nvlm_d.py,sha256=2Mr9ZAI6VG0DbLDw0BnFEIgeiZd7ip3bSoVsfcEeNqQ,458
|
|
980
|
+
vllm/transformers_utils/configs/skyworkr1v.py,sha256=Wg_ykY-bUNPdcJ_9KwpY2qfUhCvRfFdSBcCjjKUaJVM,1869
|
|
981
|
+
vllm/transformers_utils/configs/solar.py,sha256=y5b9R4mQXdgi-fUv2ZqMIFouW7P57lT5nppn54aCOuo,10841
|
|
982
|
+
vllm/transformers_utils/configs/telechat2.py,sha256=JsOuzKHPQHqtJBZNi27dtwc-FWelsQ9GlmORN2AubPE,2200
|
|
983
|
+
vllm/transformers_utils/configs/ultravox.py,sha256=xjkeV_uyw_J5XI1dNysZjPGK_uqCiuLLkutkFQMQ1ss,4465
|
|
984
|
+
vllm/transformers_utils/processors/__init__.py,sha256=WloJ524I5uG04zlyJVWoPtDGVzlRvWpVsuwcczjOM3o,165
|
|
985
|
+
vllm/transformers_utils/processors/deepseek_vl2.py,sha256=bCdlXRRXoiCMtV8eRxOuiL-6_wDHzUAD8sKurZeqYKM,14601
|
|
986
|
+
vllm/transformers_utils/tokenizers/__init__.py,sha256=y-jPMUMSBejRx8frDMzFNBD_EBJGQ9gw41EL7s-yy2A,303
|
|
987
|
+
vllm/transformers_utils/tokenizers/mistral.py,sha256=Jgqii7bUFhktqmNDNpakkvMQrOIVYYw82ftB_LsJpcE,18563
|
|
988
|
+
vllm/triton_utils/__init__.py,sha256=YrkNi5zOb7oxykfHQEmAnAuwLJ6fjAz4nHHBQd7t-k8,116
|
|
989
|
+
vllm/triton_utils/importing.py,sha256=_LiSKQ-tG_DhCVFtFo7insrlbobG8_d71hE99c5zs9w,1632
|
|
990
|
+
vllm/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
991
|
+
vllm/usage/usage_lib.py,sha256=1SODVdJ_VB-ViFv_xRSYVpeKzxeZ7uboQ-32_Ifl_QY,8832
|
|
992
|
+
vllm/v1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
993
|
+
vllm/v1/kv_cache_interface.py,sha256=R80KroFDgFGltuXbzC6Qb5pB7H6sdPTqzIJPVeRsUZE,5557
|
|
994
|
+
vllm/v1/outputs.py,sha256=ST8Y30Mrf2ZnTM_JyEvHM4psWabNpxSufxI9zlnfSvs,3257
|
|
995
|
+
vllm/v1/request.py,sha256=e0bPhOLjyIT-YItNO7_LJD04h5UARoPQor253ybgEik,6610
|
|
996
|
+
vllm/v1/serial_utils.py,sha256=AP1SR35HCNAMeWzTHRQ-0QIbcESdF1wnBxXJX24B75g,11569
|
|
997
|
+
vllm/v1/utils.py,sha256=t6_KMCZ8R-iU26FfB4U9AL8JUH_6SIgJ40T60RXfj8Q,7866
|
|
998
|
+
vllm/v1/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
999
|
+
vllm/v1/attention/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1000
|
+
vllm/v1/attention/backends/flash_attn.py,sha256=ij_Y7V7pWjvUjKA0PTOh8k-cOT89k_pn_PhHRnp0njA,33173
|
|
1001
|
+
vllm/v1/attention/backends/flashinfer.py,sha256=pYe1YvBLPSt44Mxwu1tiMyfZBu3w-tUQLwuTpAv-btk,26453
|
|
1002
|
+
vllm/v1/attention/backends/pallas.py,sha256=iItpOGwTam_4YQS5yWsXkhQONsu3C9GMQjasXnNP8vA,8011
|
|
1003
|
+
vllm/v1/attention/backends/triton_attn.py,sha256=iPOwnQFKK3hu2yj87rM86fyKrn27bBGvr29vHNGWlfw,7761
|
|
1004
|
+
vllm/v1/attention/backends/mla/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1005
|
+
vllm/v1/attention/backends/mla/common.py,sha256=R1kP4ervM89OEL-4ogsiFT60xqDz1yUqgQ_6G6-0Xao,38448
|
|
1006
|
+
vllm/v1/attention/backends/mla/flashmla.py,sha256=ccdMmyevDPK20d-XSboFHz1JylvWiW1cPqp2y5ymMy4,5131
|
|
1007
|
+
vllm/v1/attention/backends/mla/triton_mla.py,sha256=4b-_WGFx2N7Iozw0Tzl9pcDUtIC6yb5sXviTKVyF9qA,4116
|
|
1008
|
+
vllm/v1/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1009
|
+
vllm/v1/core/block_pool.py,sha256=ZhhOyBFIkci_OIiMYCiRmK-wV658SLF8A42aWX_Ygig,11630
|
|
1010
|
+
vllm/v1/core/encoder_cache_manager.py,sha256=O4ODe4lL1PGZ6CrN4M_4phwrGitQiEXlsiX8Vp4i_Iw,5257
|
|
1011
|
+
vllm/v1/core/kv_cache_manager.py,sha256=s4LFWo93XndWF2KGr2TdUySI0KfOA13ooYsLE6dtqMA,16404
|
|
1012
|
+
vllm/v1/core/kv_cache_utils.py,sha256=mlFOVWFGr8_C_6xR8GeHpEm-brDmxONEwIM6H-LhrZo,28759
|
|
1013
|
+
vllm/v1/core/specialized_manager.py,sha256=EQsFADKOKl6TiNj-OO00qTex0Uy8uo1dU00nAGLP-2c,6647
|
|
1014
|
+
vllm/v1/core/sched/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1015
|
+
vllm/v1/core/sched/interface.py,sha256=oqqpwYsYrYlpilXKR9PmjO7j51PF0RKy6ztQfL2SgTY,5255
|
|
1016
|
+
vllm/v1/core/sched/output.py,sha256=xzmSlyOsxyBQ6r1MCA_wwWX_2egwP0JKDnxsgXsWx74,4438
|
|
1017
|
+
vllm/v1/core/sched/scheduler.py,sha256=GPNxfuBAqPm3AVnswq-KT1eiUl4cHI4YlH-ePQsHFqQ,38805
|
|
1018
|
+
vllm/v1/core/sched/utils.py,sha256=h5yxzYQEON09QhjJgDyOyAdBVWWNJEsTSv9Bx1G1rP0,813
|
|
1019
|
+
vllm/v1/engine/__init__.py,sha256=ISLoPGzvbYuSbz8VazZMdI1xLmG7GcmN-jpb8Rm_JJU,5034
|
|
1020
|
+
vllm/v1/engine/async_llm.py,sha256=6D-QW9h55OnIf7TORYqkU_MdBr3elFgiKCyiUdfFSnQ,20982
|
|
1021
|
+
vllm/v1/engine/core.py,sha256=wBsE5X3f9_JPlsFEzkQabazKZvb6zwuXFI-35oDyixg,29223
|
|
1022
|
+
vllm/v1/engine/core_client.py,sha256=Xm1iPhT8RXHj3bOO9E1LiMBSGHQIaruSV0PvwSmAKAU,36362
|
|
1023
|
+
vllm/v1/engine/detokenizer.py,sha256=IGnYsLBCx1KqvqnbXLM9p2ChCkKiT5j-vzFx4nv6OQs,9549
|
|
1024
|
+
vllm/v1/engine/exceptions.py,sha256=mxP2NWpoDgXiympaoPM1rhkEWEq2W76Q_wgTH7v6604,662
|
|
1025
|
+
vllm/v1/engine/llm_engine.py,sha256=9tmaD7YI1cGNkmwsfL8c35tFEaNFsRrFwo1HdM0hfFE,11185
|
|
1026
|
+
vllm/v1/engine/logprobs.py,sha256=xJ4mj9i3kXNEc_UbrZl_KwMV5eFtC463hkbVLG9R4gE,7059
|
|
1027
|
+
vllm/v1/engine/mm_input_cache.py,sha256=U_p50tRpmtW1lplYmF-NEYlQhoGn_8on1L8G7qBcmQg,2908
|
|
1028
|
+
vllm/v1/engine/output_processor.py,sha256=-w7t6ZQ9oPdiZnp8ucpwjf8gw7WmY_JO4FmH8rsIRus,15837
|
|
1029
|
+
vllm/v1/engine/parallel_sampling.py,sha256=OjBEE6zzWcY8jfgCiEOGaJfeM4M9-yeV2qWdxdRtLrU,4765
|
|
1030
|
+
vllm/v1/engine/processor.py,sha256=OTT-o-X73ATHFX7-OR28HG7c6ly-jWtwPpsw5VdH-Wo,16896
|
|
1031
|
+
vllm/v1/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1032
|
+
vllm/v1/executor/abstract.py,sha256=RlosW-n6QNCT16VCB2PukDakpdyo318HQKBa3KrFzxI,4464
|
|
1033
|
+
vllm/v1/executor/multiproc_executor.py,sha256=c1zXL6iWuve5sesbCcJ1hRpQm5xErkcvS7YyOcZJwTs,18192
|
|
1034
|
+
vllm/v1/executor/ray_distributed_executor.py,sha256=IJYuJWHHkYUlAfGPrIeoMFnR0oaMYtZrJHv1hmIynBY,1992
|
|
1035
|
+
vllm/v1/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1036
|
+
vllm/v1/metrics/loggers.py,sha256=_5N7YLBtflloVdZSnFB14sKx7pQ7ZzAV62p4txWJLkI,20299
|
|
1037
|
+
vllm/v1/metrics/stats.py,sha256=3naKGjAi9lk9Grrk1ygFUedqTxjk2A2HSfYIisqi1xs,9386
|
|
1038
|
+
vllm/v1/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1039
|
+
vllm/v1/sample/metadata.py,sha256=94neEjn1GPZehdsaQ8zoz2Qzl3btezcSnVUUuKHnvTI,1094
|
|
1040
|
+
vllm/v1/sample/rejection_sampler.py,sha256=vVuw8o5AZ9YFZk0bVh_p0njQ1oG8naoz-KyKqjC59GI,22915
|
|
1041
|
+
vllm/v1/sample/sampler.py,sha256=nRwExvRK26q7ljiMoHZnkTUabIT6lWcr5woRFkmJcrE,10245
|
|
1042
|
+
vllm/v1/sample/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1043
|
+
vllm/v1/sample/ops/bad_words.py,sha256=_Q9GFppyv3mOstR_3-hKVUOaq12uuHR-xUdkcFAP5Ng,1122
|
|
1044
|
+
vllm/v1/sample/ops/penalties.py,sha256=3W-rVWsNwuDnouIRyaqZW3HtnWyXc9Jlz_Eq_xx5mtA,2149
|
|
1045
|
+
vllm/v1/sample/ops/topk_topp_sampler.py,sha256=7t32UApYjnPccmtXpGRrb0uCdj8DEaAXr0WXns-RxW4,12227
|
|
1046
|
+
vllm/v1/sample/tpu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1047
|
+
vllm/v1/sample/tpu/metadata.py,sha256=p_qm2ZhbI7FqjDvTCVFag8r8deZsBrn9sZZWZ1oGKHY,4286
|
|
1048
|
+
vllm/v1/sample/tpu/sampler.py,sha256=b1YZCNM44SqidDMiKEESox0QkgmqW8Cb8tsUIidDawY,5472
|
|
1049
|
+
vllm/v1/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1050
|
+
vllm/v1/spec_decode/eagle.py,sha256=YBjH7jWdzd6Q2CKMZHppRFNKWZviTXvuuvyIxN19td0,13481
|
|
1051
|
+
vllm/v1/spec_decode/metadata.py,sha256=E5jGcv8qd1LzIB35ijGHpgTquQDJsrfTzzvUBT9Ba1A,2188
|
|
1052
|
+
vllm/v1/spec_decode/metrics.py,sha256=VP-VTiQD1iA9NhAHakwK2pJIWEDOniUYBTwkwW8RHdc,6451
|
|
1053
|
+
vllm/v1/spec_decode/ngram_proposer.py,sha256=OlsaNfDwu0FYzgo0MZBBy253IR6IDXhkeEIpnolwmmQ,4224
|
|
1054
|
+
vllm/v1/spec_decode/utils.py,sha256=ysp1hU55Mp8VpzjLSgvMeTzmCxUddKb84PiPv4igM5Y,666
|
|
1055
|
+
vllm/v1/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1056
|
+
vllm/v1/stats/common.py,sha256=PNWYkSP8dxUXuDNHON1f0PaLU03bWU9otB8j4kQ0YuI,17226
|
|
1057
|
+
vllm/v1/structured_output/__init__.py,sha256=DAccbNg12evGVBg7B8o2xuLMxgr7lJHaPvhZEbPPQj0,4725
|
|
1058
|
+
vllm/v1/structured_output/backend_guidance.py,sha256=eLJayula4vgnIRs1oBcrc1Kp42r2uOSFT2CZIl51ZeA,7986
|
|
1059
|
+
vllm/v1/structured_output/backend_types.py,sha256=HQ62R06dbQwO_kI4Ueix6dbVu6WkzWo8lKqzryogxN4,2646
|
|
1060
|
+
vllm/v1/structured_output/backend_xgrammar.py,sha256=-nLSP19LygHc7J11QHKuz9TFsJC8BQ2ohk1Ny0qx8XI,11762
|
|
1061
|
+
vllm/v1/structured_output/request.py,sha256=ZZyzhtm73unguoSp4aFgqBiLCwnHuHSwPPgB8kD_Klw,3108
|
|
1062
|
+
vllm/v1/structured_output/utils.py,sha256=vgIMRJcubDnh6nRGmIDzCkLEGpg-qdL_hSQumlv0m_Q,5788
|
|
1063
|
+
vllm/v1/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1064
|
+
vllm/v1/worker/block_table.py,sha256=hOeaO0wpiCxrpQMyTrYmWHgIjMsjJizxXTYdrjC-HbA,2804
|
|
1065
|
+
vllm/v1/worker/gpu_input_batch.py,sha256=2pURhfl7rUgMU6FhoMAdAB9ySPmAoAyXtf6HNOApOww,29697
|
|
1066
|
+
vllm/v1/worker/gpu_model_runner.py,sha256=zKSV1NhrzDib0hZ0BNB22VxNS27Ec-tFAS5D6_zJpBo,83083
|
|
1067
|
+
vllm/v1/worker/gpu_worker.py,sha256=KDHgJ7ZdINcmauFQl_3tLHGiV2bcf8v2cQUBx4e6eLo,14379
|
|
1068
|
+
vllm/v1/worker/lora_model_runner_mixin.py,sha256=w3_2YfgEHuTOCQU69F3WSNG4EpxaJZxC_ZakGGmAKC8,5781
|
|
1069
|
+
vllm/v1/worker/tpu_model_runner.py,sha256=RbT3vfLACFQkJxKFv6Zkz30PrO8eBowWYeH7uNa-hWo,66278
|
|
1070
|
+
vllm/v1/worker/tpu_worker.py,sha256=U_uNV9Vh7_bbeeXolIPJMqvMmMQfyFNoQ8iWQmi0b5g,11034
|
|
1071
|
+
vllm/v1/worker/utils.py,sha256=Is726LIfgtRsHmyl6HBH8Ip2zsjAlnA1gVlvyoUZuvY,2522
|
|
1072
|
+
vllm/v1/worker/worker_base.py,sha256=y8BsJDgREwPB8DH8dynVIDfykYydbUzcrSTHLSMqMUU,1976
|
|
1073
|
+
vllm/vllm_flash_attn/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1074
|
+
vllm/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1075
|
+
vllm/worker/cache_engine.py,sha256=iR6mNFDgBUsGYqx7kBKpQB1UQsv7e-AgsB63OCYFGIQ,6006
|
|
1076
|
+
vllm/worker/cpu_enc_dec_model_runner.py,sha256=rXO1k-uUFlN1Rahl5ql-u8m_CgWqDSEcyJnOjcBBwmg,13014
|
|
1077
|
+
vllm/worker/cpu_model_runner.py,sha256=SHWy8uF-KR0bEELUGiSEy-rd5JTg7SLWQADdVUrzXsY,28276
|
|
1078
|
+
vllm/worker/cpu_pooling_model_runner.py,sha256=VJYQ_9wQ3roxiCETjclnUZaVQVKgEx4VTpCFm0o_Hag,4755
|
|
1079
|
+
vllm/worker/cpu_worker.py,sha256=b3S2jYAtr1XIy52IzM2bI9GLVdylH5bx7x-76br18oQ,16024
|
|
1080
|
+
vllm/worker/enc_dec_model_runner.py,sha256=G-QDKfWbT7rek6Yq_WwC3SenIlQfl_218L1IzN5vSmk,23517
|
|
1081
|
+
vllm/worker/hpu_model_runner.py,sha256=LPG2T6-fd2Z2w-isTeZMfIOsHhyIU_B55tajpGY8QQw,100237
|
|
1082
|
+
vllm/worker/hpu_worker.py,sha256=GO7xz8RVIN3qEdD5n0IYr-qn2G7NGjXe_0ZC0RBUWn0,21583
|
|
1083
|
+
vllm/worker/model_runner.py,sha256=RHGkj7qFBeiPI9_AHXRrCwI5yGeSHF_8MRWnzPStK0w,91952
|
|
1084
|
+
vllm/worker/model_runner_base.py,sha256=c76vGfi0-uqdIf-B2A0dU3HrAGd1FQmR6p9f8JVE8Oc,9370
|
|
1085
|
+
vllm/worker/multi_step_hpu_worker.py,sha256=3GKiJPMq_dJ0qrV8PidC3yGqvQW0z9iIMD3A02KFkLo,5296
|
|
1086
|
+
vllm/worker/multi_step_model_runner.py,sha256=z9ZBVJyzYwulftu6lEzbMTAOTval4m7_cPoW5wkeNPE,39186
|
|
1087
|
+
vllm/worker/multi_step_tpu_worker.py,sha256=c6A1A6Aagis6VyskYP6jP35Mt5paZwDV2jn750Sn5LA,4450
|
|
1088
|
+
vllm/worker/multi_step_worker.py,sha256=90a_O9w3bmGHjs5rGVsNEbYzMMhNVZSqLLyOVAHCxk0,9416
|
|
1089
|
+
vllm/worker/neuron_model_runner.py,sha256=87SwSDhX29FKfoXDlewdKdbweJ_9zPI--rsJ_-ScW68,14326
|
|
1090
|
+
vllm/worker/neuron_worker.py,sha256=y-YJYeZmP3g-SnzsFmnFBlRNS0Zwin8ZT_gHStNkC8k,4991
|
|
1091
|
+
vllm/worker/pooling_model_runner.py,sha256=l516VFqfsBcjgEsy7C7NMMi_-aV01sDWoEhxHHmszqw,8738
|
|
1092
|
+
vllm/worker/tpu_model_runner.py,sha256=3SBLtvsqmpBXzIujoQu--IHHfbvfkrluFQaz4aUrzv0,40781
|
|
1093
|
+
vllm/worker/tpu_worker.py,sha256=jZAQpPE0VYxLne4ILNnVBLNP8ItcBNvIIX-VjPGYbE0,14637
|
|
1094
|
+
vllm/worker/utils.py,sha256=qDJF2qtK6Pyfa6JV_ZTpIUDYl-rwn3imoshuS7yXr_A,1918
|
|
1095
|
+
vllm/worker/worker.py,sha256=Il_KV4ICfkq1tPc1H-8ZZQ4d5GIuWBhCv8X6xjWCr1s,25282
|
|
1096
|
+
vllm/worker/worker_base.py,sha256=GhIywLGVRO48Nb9-CxIXnMbvt8CrJayFkxZ7AAHnmu0,26064
|
|
1097
|
+
vllm/worker/xpu_model_runner.py,sha256=xUbVAClSAiYlWfJf6Kg313yR6I6PFMcJrPcBzZkT9Dg,24495
|
|
1098
|
+
vllm/worker/xpu_worker.py,sha256=4aLv0VO2Z8J5_QNKoUzm1dsn5Tr3K2nLOSgRxkUb1F4,7929
|
|
1099
|
+
vllm_cpu-0.8.5.post2.dist-info/METADATA,sha256=vphVSYYj60rArhnjCovjXLNPQd1np7kMB_vP3eqrtyE,15092
|
|
1100
|
+
vllm_cpu-0.8.5.post2.dist-info/WHEEL,sha256=o8YcOpH5mC_n9ScWnBte7z1kkjfSI6p87bunUssSWm8,113
|
|
1101
|
+
vllm_cpu-0.8.5.post2.dist-info/entry_points.txt,sha256=Tzs5SimudQtsKYvM1TICJcRnD-2QYhptzMObwvRogNk,56
|
|
1102
|
+
vllm_cpu-0.8.5.post2.dist-info/top_level.txt,sha256=fAgb8Pt4zQoKTUA3ZnKEIgcjh0L97_dwEjYDTL5MEEo,5
|
|
1103
|
+
vllm_cpu-0.8.5.post2.dist-info/RECORD,,
|