vllm-cpu-avx512bf16 0.9.0.post2__cp310-cp310-manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vllm/_C.abi3.so +0 -0
- vllm/__init__.py +170 -0
- vllm/_custom_ops.py +1742 -0
- vllm/_ipex_ops.py +243 -0
- vllm/_version.py +34 -0
- vllm/adapter_commons/__init__.py +0 -0
- vllm/adapter_commons/layers.py +15 -0
- vllm/adapter_commons/models.py +105 -0
- vllm/adapter_commons/request.py +25 -0
- vllm/adapter_commons/utils.py +92 -0
- vllm/adapter_commons/worker_manager.py +38 -0
- vllm/assets/__init__.py +0 -0
- vllm/assets/audio.py +44 -0
- vllm/assets/base.py +40 -0
- vllm/assets/image.py +33 -0
- vllm/assets/video.py +114 -0
- vllm/attention/__init__.py +19 -0
- vllm/attention/backends/__init__.py +0 -0
- vllm/attention/backends/abstract.py +306 -0
- vllm/attention/backends/blocksparse_attn.py +457 -0
- vllm/attention/backends/cpu_mla.py +305 -0
- vllm/attention/backends/dual_chunk_flash_attn.py +1494 -0
- vllm/attention/backends/flash_attn.py +999 -0
- vllm/attention/backends/flashinfer.py +1100 -0
- vllm/attention/backends/flashmla.py +242 -0
- vllm/attention/backends/hpu_attn.py +309 -0
- vllm/attention/backends/ipex_attn.py +394 -0
- vllm/attention/backends/mla/__init__.py +0 -0
- vllm/attention/backends/mla/common.py +1381 -0
- vllm/attention/backends/pallas.py +347 -0
- vllm/attention/backends/placeholder_attn.py +399 -0
- vllm/attention/backends/rocm_aiter_mla.py +435 -0
- vllm/attention/backends/rocm_flash_attn.py +970 -0
- vllm/attention/backends/torch_sdpa.py +691 -0
- vllm/attention/backends/triton_mla.py +113 -0
- vllm/attention/backends/utils.py +609 -0
- vllm/attention/backends/xformers.py +798 -0
- vllm/attention/layer.py +452 -0
- vllm/attention/ops/__init__.py +0 -0
- vllm/attention/ops/blocksparse_attention/__init__.py +0 -0
- vllm/attention/ops/blocksparse_attention/blocksparse_attention_kernel.py +432 -0
- vllm/attention/ops/blocksparse_attention/interface.py +238 -0
- vllm/attention/ops/blocksparse_attention/utils.py +245 -0
- vllm/attention/ops/chunked_prefill_paged_decode.py +367 -0
- vllm/attention/ops/flashmla.py +115 -0
- vllm/attention/ops/hpu_paged_attn.py +87 -0
- vllm/attention/ops/ipex_attn.py +194 -0
- vllm/attention/ops/merge_attn_states.py +42 -0
- vllm/attention/ops/nki_flash_attn.py +905 -0
- vllm/attention/ops/paged_attn.py +255 -0
- vllm/attention/ops/prefix_prefill.py +901 -0
- vllm/attention/ops/rocm_aiter_mla.py +99 -0
- vllm/attention/ops/rocm_aiter_paged_attn.py +101 -0
- vllm/attention/ops/triton_decode_attention.py +673 -0
- vllm/attention/ops/triton_flash_attention.py +1374 -0
- vllm/attention/ops/triton_merge_attn_states.py +96 -0
- vllm/attention/ops/triton_unified_attention.py +337 -0
- vllm/attention/selector.py +186 -0
- vllm/attention/utils/fa_utils.py +54 -0
- vllm/beam_search.py +82 -0
- vllm/benchmarks/__init__.py +0 -0
- vllm/benchmarks/datasets.py +921 -0
- vllm/benchmarks/endpoint_request_func.py +160 -0
- vllm/benchmarks/latency.py +184 -0
- vllm/benchmarks/serve.py +925 -0
- vllm/benchmarks/throughput.py +609 -0
- vllm/benchmarks/utils.py +69 -0
- vllm/collect_env.py +818 -0
- vllm/compilation/__init__.py +0 -0
- vllm/compilation/activation_quant_fusion.py +88 -0
- vllm/compilation/backends.py +560 -0
- vllm/compilation/base_piecewise_backend.py +71 -0
- vllm/compilation/collective_fusion.py +126 -0
- vllm/compilation/compiler_interface.py +533 -0
- vllm/compilation/counter.py +33 -0
- vllm/compilation/cuda_piecewise_backend.py +213 -0
- vllm/compilation/decorators.py +249 -0
- vllm/compilation/fix_functionalization.py +190 -0
- vllm/compilation/fusion.py +617 -0
- vllm/compilation/fx_utils.py +61 -0
- vllm/compilation/inductor_pass.py +114 -0
- vllm/compilation/monitor.py +38 -0
- vllm/compilation/multi_output_match.py +108 -0
- vllm/compilation/noop_elimination.py +136 -0
- vllm/compilation/pass_manager.py +77 -0
- vllm/compilation/sequence_parallelism.py +267 -0
- vllm/compilation/torch25_custom_graph_pass.py +41 -0
- vllm/compilation/vllm_inductor_pass.py +66 -0
- vllm/compilation/wrapper.py +129 -0
- vllm/config.py +4600 -0
- vllm/connections.py +173 -0
- vllm/core/__init__.py +0 -0
- vllm/core/block/__init__.py +0 -0
- vllm/core/block/block_table.py +398 -0
- vllm/core/block/common.py +370 -0
- vllm/core/block/cpu_gpu_block_allocator.py +440 -0
- vllm/core/block/interfaces.py +318 -0
- vllm/core/block/naive_block.py +465 -0
- vllm/core/block/prefix_caching_block.py +1134 -0
- vllm/core/block/utils.py +27 -0
- vllm/core/block_manager.py +520 -0
- vllm/core/evictor.py +156 -0
- vllm/core/interfaces.py +134 -0
- vllm/core/placeholder_block_space_manager.py +99 -0
- vllm/core/scheduler.py +2092 -0
- vllm/device_allocator/__init__.py +0 -0
- vllm/device_allocator/cumem.py +280 -0
- vllm/distributed/__init__.py +5 -0
- vllm/distributed/communication_op.py +40 -0
- vllm/distributed/device_communicators/__init__.py +0 -0
- vllm/distributed/device_communicators/all2all.py +126 -0
- vllm/distributed/device_communicators/base_device_communicator.py +260 -0
- vllm/distributed/device_communicators/cpu_communicator.py +144 -0
- vllm/distributed/device_communicators/cuda_communicator.py +167 -0
- vllm/distributed/device_communicators/cuda_wrapper.py +179 -0
- vllm/distributed/device_communicators/custom_all_reduce.py +303 -0
- vllm/distributed/device_communicators/custom_all_reduce_utils.py +258 -0
- vllm/distributed/device_communicators/hpu_communicator.py +45 -0
- vllm/distributed/device_communicators/neuron_communicator.py +19 -0
- vllm/distributed/device_communicators/pynccl.py +217 -0
- vllm/distributed/device_communicators/pynccl_wrapper.py +340 -0
- vllm/distributed/device_communicators/shm_broadcast.py +541 -0
- vllm/distributed/device_communicators/tpu_communicator.py +102 -0
- vllm/distributed/device_communicators/xpu_communicator.py +54 -0
- vllm/distributed/kv_events.py +296 -0
- vllm/distributed/kv_transfer/README.md +29 -0
- vllm/distributed/kv_transfer/__init__.py +11 -0
- vllm/distributed/kv_transfer/disagg_prefill_workflow.jpg +0 -0
- vllm/distributed/kv_transfer/kv_connector/__init__.py +0 -0
- vllm/distributed/kv_transfer/kv_connector/base.py +127 -0
- vllm/distributed/kv_transfer/kv_connector/factory.py +126 -0
- vllm/distributed/kv_transfer/kv_connector/lmcache_connector.py +98 -0
- vllm/distributed/kv_transfer/kv_connector/mooncake_store_connector.py +202 -0
- vllm/distributed/kv_transfer/kv_connector/simple_connector.py +328 -0
- vllm/distributed/kv_transfer/kv_connector/utils.py +91 -0
- vllm/distributed/kv_transfer/kv_connector/v1/__init__.py +5 -0
- vllm/distributed/kv_transfer/kv_connector/v1/base.py +259 -0
- vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py +133 -0
- vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py +189 -0
- vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +851 -0
- vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py +383 -0
- vllm/distributed/kv_transfer/kv_connector_agent.py +76 -0
- vllm/distributed/kv_transfer/kv_lookup_buffer/__init__.py +0 -0
- vllm/distributed/kv_transfer/kv_lookup_buffer/base.py +174 -0
- vllm/distributed/kv_transfer/kv_lookup_buffer/mooncake_store.py +160 -0
- vllm/distributed/kv_transfer/kv_lookup_buffer/simple_buffer.py +236 -0
- vllm/distributed/kv_transfer/kv_pipe/__init__.py +0 -0
- vllm/distributed/kv_transfer/kv_pipe/base.py +66 -0
- vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py +279 -0
- vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py +279 -0
- vllm/distributed/kv_transfer/kv_transfer_state.py +70 -0
- vllm/distributed/parallel_state.py +1294 -0
- vllm/distributed/utils.py +520 -0
- vllm/engine/__init__.py +0 -0
- vllm/engine/arg_utils.py +1649 -0
- vllm/engine/async_llm_engine.py +1274 -0
- vllm/engine/async_timeout.py +191 -0
- vllm/engine/llm_engine.py +2153 -0
- vllm/engine/metrics.py +717 -0
- vllm/engine/metrics_types.py +96 -0
- vllm/engine/multiprocessing/__init__.py +188 -0
- vllm/engine/multiprocessing/client.py +755 -0
- vllm/engine/multiprocessing/engine.py +459 -0
- vllm/engine/output_processor/__init__.py +0 -0
- vllm/engine/output_processor/interfaces.py +74 -0
- vllm/engine/output_processor/multi_step.py +215 -0
- vllm/engine/output_processor/single_step.py +144 -0
- vllm/engine/output_processor/stop_checker.py +130 -0
- vllm/engine/output_processor/util.py +27 -0
- vllm/engine/protocol.py +310 -0
- vllm/entrypoints/__init__.py +0 -0
- vllm/entrypoints/api_server.py +177 -0
- vllm/entrypoints/chat_utils.py +1298 -0
- vllm/entrypoints/cli/__init__.py +0 -0
- vllm/entrypoints/cli/benchmark/__init__.py +0 -0
- vllm/entrypoints/cli/benchmark/base.py +38 -0
- vllm/entrypoints/cli/benchmark/latency.py +29 -0
- vllm/entrypoints/cli/benchmark/main.py +53 -0
- vllm/entrypoints/cli/benchmark/serve.py +29 -0
- vllm/entrypoints/cli/benchmark/throughput.py +29 -0
- vllm/entrypoints/cli/collect_env.py +34 -0
- vllm/entrypoints/cli/main.py +62 -0
- vllm/entrypoints/cli/openai.py +204 -0
- vllm/entrypoints/cli/serve.py +141 -0
- vllm/entrypoints/cli/types.py +24 -0
- vllm/entrypoints/launcher.py +146 -0
- vllm/entrypoints/llm.py +1503 -0
- vllm/entrypoints/logger.py +49 -0
- vllm/entrypoints/openai/__init__.py +0 -0
- vllm/entrypoints/openai/api_server.py +1376 -0
- vllm/entrypoints/openai/cli_args.py +306 -0
- vllm/entrypoints/openai/logits_processors.py +89 -0
- vllm/entrypoints/openai/protocol.py +1890 -0
- vllm/entrypoints/openai/run_batch.py +439 -0
- vllm/entrypoints/openai/serving_chat.py +1192 -0
- vllm/entrypoints/openai/serving_classification.py +159 -0
- vllm/entrypoints/openai/serving_completion.py +590 -0
- vllm/entrypoints/openai/serving_embedding.py +200 -0
- vllm/entrypoints/openai/serving_engine.py +985 -0
- vllm/entrypoints/openai/serving_models.py +314 -0
- vllm/entrypoints/openai/serving_pooling.py +231 -0
- vllm/entrypoints/openai/serving_score.py +432 -0
- vllm/entrypoints/openai/serving_tokenization.py +151 -0
- vllm/entrypoints/openai/serving_transcription.py +421 -0
- vllm/entrypoints/openai/tool_parsers/__init__.py +22 -0
- vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py +163 -0
- vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py +369 -0
- vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py +258 -0
- vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py +236 -0
- vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py +370 -0
- vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py +215 -0
- vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py +307 -0
- vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py +302 -0
- vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py +266 -0
- vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py +342 -0
- vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py +111 -0
- vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py +296 -0
- vllm/entrypoints/openai/tool_parsers/utils.py +123 -0
- vllm/entrypoints/score_utils.py +49 -0
- vllm/entrypoints/ssl.py +74 -0
- vllm/entrypoints/utils.py +219 -0
- vllm/env_override.py +34 -0
- vllm/envs.py +896 -0
- vllm/executor/__init__.py +0 -0
- vllm/executor/executor_base.py +400 -0
- vllm/executor/mp_distributed_executor.py +243 -0
- vllm/executor/msgspec_utils.py +29 -0
- vllm/executor/multiproc_worker_utils.py +312 -0
- vllm/executor/ray_distributed_executor.py +700 -0
- vllm/executor/ray_utils.py +398 -0
- vllm/executor/uniproc_executor.py +138 -0
- vllm/forward_context.py +147 -0
- vllm/inputs/__init__.py +40 -0
- vllm/inputs/data.py +330 -0
- vllm/inputs/parse.py +150 -0
- vllm/inputs/preprocess.py +908 -0
- vllm/inputs/registry.py +214 -0
- vllm/jsontree.py +79 -0
- vllm/logger.py +211 -0
- vllm/logging_utils/__init__.py +7 -0
- vllm/logging_utils/dump_input.py +84 -0
- vllm/logging_utils/formatter.py +17 -0
- vllm/logits_process.py +118 -0
- vllm/lora/__init__.py +0 -0
- vllm/lora/fully_sharded_layers.py +354 -0
- vllm/lora/layers.py +1284 -0
- vllm/lora/lora.py +198 -0
- vllm/lora/models.py +817 -0
- vllm/lora/ops/__init__.py +0 -0
- vllm/lora/ops/torch_ops/__init__.py +15 -0
- vllm/lora/ops/torch_ops/lora_ops.py +115 -0
- vllm/lora/ops/triton_ops/__init__.py +11 -0
- vllm/lora/ops/triton_ops/kernel_utils.py +242 -0
- vllm/lora/ops/triton_ops/lora_expand_op.py +289 -0
- vllm/lora/ops/triton_ops/lora_kernel_metadata.py +147 -0
- vllm/lora/ops/triton_ops/lora_shrink_op.py +243 -0
- vllm/lora/ops/triton_ops/utils.py +119 -0
- vllm/lora/ops/xla_ops/__init__.py +6 -0
- vllm/lora/ops/xla_ops/lora_ops.py +106 -0
- vllm/lora/ops/xla_ops/pallas.py +133 -0
- vllm/lora/peft_helper.py +135 -0
- vllm/lora/punica_wrapper/__init__.py +9 -0
- vllm/lora/punica_wrapper/punica_base.py +484 -0
- vllm/lora/punica_wrapper/punica_cpu.py +348 -0
- vllm/lora/punica_wrapper/punica_gpu.py +289 -0
- vllm/lora/punica_wrapper/punica_hpu.py +144 -0
- vllm/lora/punica_wrapper/punica_selector.py +19 -0
- vllm/lora/punica_wrapper/punica_tpu.py +325 -0
- vllm/lora/punica_wrapper/utils.py +163 -0
- vllm/lora/request.py +98 -0
- vllm/lora/resolver.py +84 -0
- vllm/lora/utils.py +239 -0
- vllm/lora/worker_manager.py +253 -0
- vllm/model_executor/__init__.py +15 -0
- vllm/model_executor/custom_op.py +151 -0
- vllm/model_executor/guided_decoding/__init__.py +180 -0
- vllm/model_executor/guided_decoding/guidance_decoding.py +62 -0
- vllm/model_executor/guided_decoding/guidance_logits_processors.py +103 -0
- vllm/model_executor/guided_decoding/guided_fields.py +42 -0
- vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py +66 -0
- vllm/model_executor/guided_decoding/outlines_decoding.py +154 -0
- vllm/model_executor/guided_decoding/outlines_logits_processors.py +283 -0
- vllm/model_executor/guided_decoding/utils.py +241 -0
- vllm/model_executor/guided_decoding/xgrammar_decoding.py +425 -0
- vllm/model_executor/layers/__init__.py +0 -0
- vllm/model_executor/layers/activation.py +368 -0
- vllm/model_executor/layers/fused_moe/__init__.py +53 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H20.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=128,N=96,device_name=NVIDIA_H20.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=NVIDIA_H100.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +130 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +130 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +130 -0
- vllm/model_executor/layers/fused_moe/configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325X,block_shape=[128,128].json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=60,N=1408,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=60,N=176,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=60,N=352,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=60,N=704,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=64,N=896,device_name=NVIDIA_H20.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +138 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_L40S.json +173 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X.json +200 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
- vllm/model_executor/layers/fused_moe/configs/README +12 -0
- vllm/model_executor/layers/fused_moe/cutlass_moe.py +382 -0
- vllm/model_executor/layers/fused_moe/deep_gemm_moe.py +227 -0
- vllm/model_executor/layers/fused_moe/fused_batched_moe.py +755 -0
- vllm/model_executor/layers/fused_moe/fused_marlin_moe.py +231 -0
- vllm/model_executor/layers/fused_moe/fused_moe.py +1722 -0
- vllm/model_executor/layers/fused_moe/layer.py +1366 -0
- vllm/model_executor/layers/fused_moe/modular_kernel.py +364 -0
- vllm/model_executor/layers/fused_moe/moe_align_block_size.py +242 -0
- vllm/model_executor/layers/fused_moe/moe_pallas.py +83 -0
- vllm/model_executor/layers/fused_moe/moe_permute_unpermute.py +188 -0
- vllm/model_executor/layers/fused_moe/moe_torch_iterative.py +59 -0
- vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py +146 -0
- vllm/model_executor/layers/fused_moe/prepare_finalize.py +60 -0
- vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py +372 -0
- vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py +112 -0
- vllm/model_executor/layers/fused_moe/utils.py +97 -0
- vllm/model_executor/layers/layernorm.py +287 -0
- vllm/model_executor/layers/lightning_attn.py +651 -0
- vllm/model_executor/layers/linear.py +1523 -0
- vllm/model_executor/layers/logits_processor.py +196 -0
- vllm/model_executor/layers/mamba/__init__.py +0 -0
- vllm/model_executor/layers/mamba/mamba2_metadata.py +124 -0
- vllm/model_executor/layers/mamba/mamba_mixer.py +244 -0
- vllm/model_executor/layers/mamba/mamba_mixer2.py +615 -0
- vllm/model_executor/layers/mamba/ops/__init__.py +0 -0
- vllm/model_executor/layers/mamba/ops/causal_conv1d.py +104 -0
- vllm/model_executor/layers/mamba/ops/mamba_ssm.py +413 -0
- vllm/model_executor/layers/mamba/ops/ssd_bmm.py +261 -0
- vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py +588 -0
- vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py +750 -0
- vllm/model_executor/layers/mamba/ops/ssd_combined.py +231 -0
- vllm/model_executor/layers/mamba/ops/ssd_state_passing.py +205 -0
- vllm/model_executor/layers/pooler.py +343 -0
- vllm/model_executor/layers/quantization/__init__.py +156 -0
- vllm/model_executor/layers/quantization/aqlm.py +375 -0
- vllm/model_executor/layers/quantization/auto_round.py +308 -0
- vllm/model_executor/layers/quantization/awq.py +185 -0
- vllm/model_executor/layers/quantization/awq_marlin.py +518 -0
- vllm/model_executor/layers/quantization/awq_triton.py +319 -0
- vllm/model_executor/layers/quantization/base_config.py +150 -0
- vllm/model_executor/layers/quantization/bitblas.py +460 -0
- vllm/model_executor/layers/quantization/bitsandbytes.py +397 -0
- vllm/model_executor/layers/quantization/compressed_tensors/__init__.py +0 -0
- vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py +644 -0
- vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +1252 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py +21 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py +357 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py +54 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py +159 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_nvfp4.py +92 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py +120 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +149 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py +110 -0
- vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py +200 -0
- vllm/model_executor/layers/quantization/compressed_tensors/triton_scaled_mm.py +205 -0
- vllm/model_executor/layers/quantization/compressed_tensors/utils.py +214 -0
- vllm/model_executor/layers/quantization/deepspeedfp.py +194 -0
- vllm/model_executor/layers/quantization/experts_int8.py +195 -0
- vllm/model_executor/layers/quantization/fbgemm_fp8.py +171 -0
- vllm/model_executor/layers/quantization/fp8.py +876 -0
- vllm/model_executor/layers/quantization/gguf.py +564 -0
- vllm/model_executor/layers/quantization/gptq.py +277 -0
- vllm/model_executor/layers/quantization/gptq_bitblas.py +444 -0
- vllm/model_executor/layers/quantization/gptq_marlin.py +647 -0
- vllm/model_executor/layers/quantization/gptq_marlin_24.py +296 -0
- vllm/model_executor/layers/quantization/hqq_marlin.py +331 -0
- vllm/model_executor/layers/quantization/ipex_quant.py +249 -0
- vllm/model_executor/layers/quantization/kernels/__init__.py +0 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py +89 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/__init__.py +82 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/allspark.py +115 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/bitblas.py +299 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/exllama.py +142 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py +119 -0
- vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py +130 -0
- vllm/model_executor/layers/quantization/kernels/scaled_mm/ScaledMMLinearKernel.py +66 -0
- vllm/model_executor/layers/quantization/kernels/scaled_mm/__init__.py +86 -0
- vllm/model_executor/layers/quantization/kernels/scaled_mm/aiter.py +119 -0
- vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py +136 -0
- vllm/model_executor/layers/quantization/kernels/scaled_mm/triton.py +40 -0
- vllm/model_executor/layers/quantization/kernels/scaled_mm/xla.py +104 -0
- vllm/model_executor/layers/quantization/kv_cache.py +138 -0
- vllm/model_executor/layers/quantization/marlin.py +260 -0
- vllm/model_executor/layers/quantization/modelopt.py +734 -0
- vllm/model_executor/layers/quantization/moe_wna16.py +448 -0
- vllm/model_executor/layers/quantization/neuron_quant.py +68 -0
- vllm/model_executor/layers/quantization/ptpc_fp8.py +126 -0
- vllm/model_executor/layers/quantization/qqq.py +274 -0
- vllm/model_executor/layers/quantization/quark/__init__.py +0 -0
- vllm/model_executor/layers/quantization/quark/quark.py +440 -0
- vllm/model_executor/layers/quantization/quark/quark_moe.py +236 -0
- vllm/model_executor/layers/quantization/quark/schemes/__init__.py +8 -0
- vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py +54 -0
- vllm/model_executor/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py +125 -0
- vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py +145 -0
- vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py +121 -0
- vllm/model_executor/layers/quantization/quark/utils.py +104 -0
- vllm/model_executor/layers/quantization/schema.py +85 -0
- vllm/model_executor/layers/quantization/torchao.py +143 -0
- vllm/model_executor/layers/quantization/tpu_int8.py +120 -0
- vllm/model_executor/layers/quantization/utils/__init__.py +5 -0
- vllm/model_executor/layers/quantization/utils/allspark_utils.py +51 -0
- vllm/model_executor/layers/quantization/utils/bitblas_utils.py +207 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +18 -0
- vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
- vllm/model_executor/layers/quantization/utils/fp8_utils.py +611 -0
- vllm/model_executor/layers/quantization/utils/gptq_utils.py +94 -0
- vllm/model_executor/layers/quantization/utils/int8_utils.py +484 -0
- vllm/model_executor/layers/quantization/utils/layer_utils.py +39 -0
- vllm/model_executor/layers/quantization/utils/machete_utils.py +32 -0
- vllm/model_executor/layers/quantization/utils/marlin_utils.py +475 -0
- vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py +277 -0
- vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py +324 -0
- vllm/model_executor/layers/quantization/utils/marlin_utils_test.py +164 -0
- vllm/model_executor/layers/quantization/utils/marlin_utils_test_24.py +463 -0
- vllm/model_executor/layers/quantization/utils/marlin_utils_test_qqq.py +125 -0
- vllm/model_executor/layers/quantization/utils/mxfp4_utils.py +44 -0
- vllm/model_executor/layers/quantization/utils/nvfp4_emulation_utils.py +61 -0
- vllm/model_executor/layers/quantization/utils/quant_utils.py +572 -0
- vllm/model_executor/layers/quantization/utils/w8a8_utils.py +404 -0
- vllm/model_executor/layers/rejection_sampler.py +405 -0
- vllm/model_executor/layers/resampler.py +269 -0
- vllm/model_executor/layers/rotary_embedding.py +1861 -0
- vllm/model_executor/layers/sampler.py +1203 -0
- vllm/model_executor/layers/spec_decode_base_sampler.py +258 -0
- vllm/model_executor/layers/typical_acceptance_sampler.py +165 -0
- vllm/model_executor/layers/utils.py +99 -0
- vllm/model_executor/layers/vocab_parallel_embedding.py +486 -0
- vllm/model_executor/model_loader/__init__.py +75 -0
- vllm/model_executor/model_loader/base_loader.py +24 -0
- vllm/model_executor/model_loader/bitsandbytes_loader.py +582 -0
- vllm/model_executor/model_loader/default_loader.py +295 -0
- vllm/model_executor/model_loader/dummy_loader.py +37 -0
- vllm/model_executor/model_loader/gguf_loader.py +113 -0
- vllm/model_executor/model_loader/neuron.py +475 -0
- vllm/model_executor/model_loader/neuronx_distributed.py +622 -0
- vllm/model_executor/model_loader/runai_streamer_loader.py +120 -0
- vllm/model_executor/model_loader/sharded_state_loader.py +211 -0
- vllm/model_executor/model_loader/tensorizer.py +632 -0
- vllm/model_executor/model_loader/tensorizer_loader.py +122 -0
- vllm/model_executor/model_loader/utils.py +301 -0
- vllm/model_executor/model_loader/weight_utils.py +781 -0
- vllm/model_executor/models/__init__.py +27 -0
- vllm/model_executor/models/adapters.py +247 -0
- vllm/model_executor/models/aimv2.py +199 -0
- vllm/model_executor/models/arctic.py +558 -0
- vllm/model_executor/models/aria.py +656 -0
- vllm/model_executor/models/aya_vision.py +461 -0
- vllm/model_executor/models/baichuan.py +473 -0
- vllm/model_executor/models/bamba.py +542 -0
- vllm/model_executor/models/bart.py +937 -0
- vllm/model_executor/models/bert.py +517 -0
- vllm/model_executor/models/bert_with_rope.py +714 -0
- vllm/model_executor/models/blip.py +338 -0
- vllm/model_executor/models/blip2.py +717 -0
- vllm/model_executor/models/bloom.py +372 -0
- vllm/model_executor/models/chameleon.py +1135 -0
- vllm/model_executor/models/chatglm.py +477 -0
- vllm/model_executor/models/clip.py +411 -0
- vllm/model_executor/models/commandr.py +471 -0
- vllm/model_executor/models/constant_size_cache.py +136 -0
- vllm/model_executor/models/dbrx.py +471 -0
- vllm/model_executor/models/deepseek.py +485 -0
- vllm/model_executor/models/deepseek_mtp.py +268 -0
- vllm/model_executor/models/deepseek_v2.py +842 -0
- vllm/model_executor/models/deepseek_vl2.py +647 -0
- vllm/model_executor/models/eagle.py +259 -0
- vllm/model_executor/models/exaone.py +550 -0
- vllm/model_executor/models/fairseq2_llama.py +153 -0
- vllm/model_executor/models/falcon.py +509 -0
- vllm/model_executor/models/falcon_h1.py +684 -0
- vllm/model_executor/models/florence2.py +1102 -0
- vllm/model_executor/models/fuyu.py +388 -0
- vllm/model_executor/models/gemma.py +424 -0
- vllm/model_executor/models/gemma2.py +424 -0
- vllm/model_executor/models/gemma3.py +532 -0
- vllm/model_executor/models/gemma3_mm.py +708 -0
- vllm/model_executor/models/glm.py +22 -0
- vllm/model_executor/models/glm4.py +304 -0
- vllm/model_executor/models/glm4v.py +647 -0
- vllm/model_executor/models/gpt2.py +327 -0
- vllm/model_executor/models/gpt_bigcode.py +334 -0
- vllm/model_executor/models/gpt_j.py +338 -0
- vllm/model_executor/models/gpt_neox.py +331 -0
- vllm/model_executor/models/granite.py +492 -0
- vllm/model_executor/models/granite_speech.py +778 -0
- vllm/model_executor/models/granitemoe.py +436 -0
- vllm/model_executor/models/granitemoehybrid.py +585 -0
- vllm/model_executor/models/granitemoeshared.py +340 -0
- vllm/model_executor/models/gritlm.py +223 -0
- vllm/model_executor/models/grok1.py +545 -0
- vllm/model_executor/models/h2ovl.py +545 -0
- vllm/model_executor/models/idefics2_vision_model.py +388 -0
- vllm/model_executor/models/idefics3.py +767 -0
- vllm/model_executor/models/interfaces.py +571 -0
- vllm/model_executor/models/interfaces_base.py +163 -0
- vllm/model_executor/models/intern_vit.py +475 -0
- vllm/model_executor/models/internlm2.py +454 -0
- vllm/model_executor/models/internlm2_ve.py +146 -0
- vllm/model_executor/models/internvl.py +1405 -0
- vllm/model_executor/models/jais.py +372 -0
- vllm/model_executor/models/jamba.py +591 -0
- vllm/model_executor/models/kimi_vl.py +576 -0
- vllm/model_executor/models/llama.py +643 -0
- vllm/model_executor/models/llama4.py +531 -0
- vllm/model_executor/models/llama_eagle.py +166 -0
- vllm/model_executor/models/llama_eagle3.py +257 -0
- vllm/model_executor/models/llava.py +865 -0
- vllm/model_executor/models/llava_next.py +585 -0
- vllm/model_executor/models/llava_next_video.py +470 -0
- vllm/model_executor/models/llava_onevision.py +955 -0
- vllm/model_executor/models/mamba.py +272 -0
- vllm/model_executor/models/mamba2.py +302 -0
- vllm/model_executor/models/mamba_cache.py +75 -0
- vllm/model_executor/models/medusa.py +218 -0
- vllm/model_executor/models/mimo.py +191 -0
- vllm/model_executor/models/mimo_mtp.py +284 -0
- vllm/model_executor/models/minicpm.py +590 -0
- vllm/model_executor/models/minicpm3.py +229 -0
- vllm/model_executor/models/minicpmo.py +758 -0
- vllm/model_executor/models/minicpmv.py +1286 -0
- vllm/model_executor/models/minimax_cache.py +35 -0
- vllm/model_executor/models/minimax_text_01.py +1303 -0
- vllm/model_executor/models/minimax_vl_01.py +363 -0
- vllm/model_executor/models/mistral3.py +603 -0
- vllm/model_executor/models/mixtral.py +487 -0
- vllm/model_executor/models/mixtral_quant.py +452 -0
- vllm/model_executor/models/mllama.py +1623 -0
- vllm/model_executor/models/mllama4.py +838 -0
- vllm/model_executor/models/mlp_speculator.py +205 -0
- vllm/model_executor/models/modernbert.py +329 -0
- vllm/model_executor/models/module_mapping.py +71 -0
- vllm/model_executor/models/molmo.py +1567 -0
- vllm/model_executor/models/moonvit.py +629 -0
- vllm/model_executor/models/mpt.py +330 -0
- vllm/model_executor/models/nemotron.py +507 -0
- vllm/model_executor/models/nemotron_nas.py +483 -0
- vllm/model_executor/models/nvlm_d.py +215 -0
- vllm/model_executor/models/olmo.py +388 -0
- vllm/model_executor/models/olmo2.py +413 -0
- vllm/model_executor/models/olmoe.py +446 -0
- vllm/model_executor/models/opt.py +411 -0
- vllm/model_executor/models/orion.py +348 -0
- vllm/model_executor/models/ovis.py +554 -0
- vllm/model_executor/models/paligemma.py +397 -0
- vllm/model_executor/models/persimmon.py +343 -0
- vllm/model_executor/models/phi.py +355 -0
- vllm/model_executor/models/phi3.py +18 -0
- vllm/model_executor/models/phi3_small.py +464 -0
- vllm/model_executor/models/phi3v.py +722 -0
- vllm/model_executor/models/phi4mm.py +1245 -0
- vllm/model_executor/models/phi4mm_audio.py +1232 -0
- vllm/model_executor/models/phi4mm_utils.py +1883 -0
- vllm/model_executor/models/phimoe.py +664 -0
- vllm/model_executor/models/pixtral.py +1315 -0
- vllm/model_executor/models/plamo2.py +737 -0
- vllm/model_executor/models/prithvi_geospatial_mae.py +231 -0
- vllm/model_executor/models/qwen.py +361 -0
- vllm/model_executor/models/qwen2.py +567 -0
- vllm/model_executor/models/qwen2_5_omni_thinker.py +903 -0
- vllm/model_executor/models/qwen2_5_vl.py +1171 -0
- vllm/model_executor/models/qwen2_audio.py +409 -0
- vllm/model_executor/models/qwen2_moe.py +539 -0
- vllm/model_executor/models/qwen2_rm.py +131 -0
- vllm/model_executor/models/qwen2_vl.py +1410 -0
- vllm/model_executor/models/qwen3.py +320 -0
- vllm/model_executor/models/qwen3_moe.py +534 -0
- vllm/model_executor/models/qwen_vl.py +784 -0
- vllm/model_executor/models/registry.py +618 -0
- vllm/model_executor/models/roberta.py +273 -0
- vllm/model_executor/models/siglip.py +523 -0
- vllm/model_executor/models/skyworkr1v.py +950 -0
- vllm/model_executor/models/smolvlm.py +51 -0
- vllm/model_executor/models/solar.py +505 -0
- vllm/model_executor/models/stablelm.py +342 -0
- vllm/model_executor/models/starcoder2.py +355 -0
- vllm/model_executor/models/telechat2.py +139 -0
- vllm/model_executor/models/teleflm.py +78 -0
- vllm/model_executor/models/transformers.py +507 -0
- vllm/model_executor/models/ultravox.py +655 -0
- vllm/model_executor/models/utils.py +730 -0
- vllm/model_executor/models/vision.py +146 -0
- vllm/model_executor/models/whisper.py +746 -0
- vllm/model_executor/models/zamba2.py +1008 -0
- vllm/model_executor/parameter.py +458 -0
- vllm/model_executor/pooling_metadata.py +71 -0
- vllm/model_executor/sampling_metadata.py +596 -0
- vllm/model_executor/utils.py +53 -0
- vllm/multimodal/__init__.py +32 -0
- vllm/multimodal/audio.py +105 -0
- vllm/multimodal/base.py +218 -0
- vllm/multimodal/hasher.py +117 -0
- vllm/multimodal/image.py +96 -0
- vllm/multimodal/inputs.py +872 -0
- vllm/multimodal/parse.py +460 -0
- vllm/multimodal/processing.py +1894 -0
- vllm/multimodal/profiling.py +273 -0
- vllm/multimodal/registry.py +330 -0
- vllm/multimodal/utils.py +392 -0
- vllm/multimodal/video.py +197 -0
- vllm/outputs.py +525 -0
- vllm/platforms/__init__.py +290 -0
- vllm/platforms/cpu.py +205 -0
- vllm/platforms/cuda.py +461 -0
- vllm/platforms/hpu.py +105 -0
- vllm/platforms/interface.py +492 -0
- vllm/platforms/neuron.py +152 -0
- vllm/platforms/rocm.py +388 -0
- vllm/platforms/tpu.py +215 -0
- vllm/platforms/xpu.py +155 -0
- vllm/plugins/__init__.py +86 -0
- vllm/plugins/lora_resolvers/README.md +15 -0
- vllm/plugins/lora_resolvers/__init__.py +0 -0
- vllm/plugins/lora_resolvers/filesystem_resolver.py +49 -0
- vllm/pooling_params.py +53 -0
- vllm/profiler/__init__.py +0 -0
- vllm/profiler/layerwise_profile.py +374 -0
- vllm/profiler/utils.py +147 -0
- vllm/prompt_adapter/__init__.py +0 -0
- vllm/prompt_adapter/layers.py +82 -0
- vllm/prompt_adapter/models.py +357 -0
- vllm/prompt_adapter/request.py +36 -0
- vllm/prompt_adapter/utils.py +97 -0
- vllm/prompt_adapter/worker_manager.py +178 -0
- vllm/py.typed +2 -0
- vllm/reasoning/__init__.py +14 -0
- vllm/reasoning/abs_reasoning_parsers.py +191 -0
- vllm/reasoning/deepseek_r1_reasoning_parser.py +172 -0
- vllm/reasoning/granite_reasoning_parser.py +362 -0
- vllm/reasoning/qwen3_reasoning_parser.py +150 -0
- vllm/sampling_params.py +590 -0
- vllm/scalar_type.py +346 -0
- vllm/scripts.py +14 -0
- vllm/sequence.py +1567 -0
- vllm/spec_decode/__init__.py +0 -0
- vllm/spec_decode/batch_expansion.py +505 -0
- vllm/spec_decode/draft_model_runner.py +349 -0
- vllm/spec_decode/interfaces.py +98 -0
- vllm/spec_decode/medusa_worker.py +137 -0
- vllm/spec_decode/metrics.py +212 -0
- vllm/spec_decode/mlp_speculator_worker.py +93 -0
- vllm/spec_decode/mqa_scorer.py +159 -0
- vllm/spec_decode/multi_step_worker.py +422 -0
- vllm/spec_decode/ngram_worker.py +195 -0
- vllm/spec_decode/proposer_worker_base.py +58 -0
- vllm/spec_decode/smaller_tp_proposer_worker.py +195 -0
- vllm/spec_decode/spec_decode_worker.py +1325 -0
- vllm/spec_decode/target_model_runner.py +44 -0
- vllm/spec_decode/top1_proposer.py +274 -0
- vllm/spec_decode/util.py +276 -0
- vllm/test_utils.py +129 -0
- vllm/third_party/__init__.py +0 -0
- vllm/third_party/pynvml.py +6139 -0
- vllm/tracing.py +130 -0
- vllm/transformers_utils/__init__.py +23 -0
- vllm/transformers_utils/chat_templates/__init__.py +4 -0
- vllm/transformers_utils/chat_templates/registry.py +59 -0
- vllm/transformers_utils/chat_templates/template_basic.jinja +3 -0
- vllm/transformers_utils/chat_templates/template_blip2.jinja +11 -0
- vllm/transformers_utils/chat_templates/template_chatml.jinja +10 -0
- vllm/transformers_utils/chat_templates/template_deepseek_vl2.jinja +23 -0
- vllm/transformers_utils/chat_templates/template_fuyu.jinja +3 -0
- vllm/transformers_utils/config.py +835 -0
- vllm/transformers_utils/configs/__init__.py +58 -0
- vllm/transformers_utils/configs/arctic.py +206 -0
- vllm/transformers_utils/configs/chatglm.py +71 -0
- vllm/transformers_utils/configs/cohere2.py +194 -0
- vllm/transformers_utils/configs/dbrx.py +279 -0
- vllm/transformers_utils/configs/deepseek_vl2.py +215 -0
- vllm/transformers_utils/configs/eagle.py +84 -0
- vllm/transformers_utils/configs/exaone.py +189 -0
- vllm/transformers_utils/configs/falcon.py +89 -0
- vllm/transformers_utils/configs/h2ovl.py +15 -0
- vllm/transformers_utils/configs/internvl.py +53 -0
- vllm/transformers_utils/configs/jais.py +237 -0
- vllm/transformers_utils/configs/kimi_vl.py +36 -0
- vllm/transformers_utils/configs/medusa.py +62 -0
- vllm/transformers_utils/configs/minimax_text_01.py +69 -0
- vllm/transformers_utils/configs/minimax_vl_01.py +70 -0
- vllm/transformers_utils/configs/mllama.py +30 -0
- vllm/transformers_utils/configs/mlp_speculator.py +67 -0
- vllm/transformers_utils/configs/moonvit.py +32 -0
- vllm/transformers_utils/configs/mpt.py +179 -0
- vllm/transformers_utils/configs/nemotron.py +204 -0
- vllm/transformers_utils/configs/nvlm_d.py +14 -0
- vllm/transformers_utils/configs/ovis.py +183 -0
- vllm/transformers_utils/configs/skyworkr1v.py +53 -0
- vllm/transformers_utils/configs/solar.py +246 -0
- vllm/transformers_utils/configs/telechat2.py +63 -0
- vllm/transformers_utils/configs/ultravox.py +107 -0
- vllm/transformers_utils/detokenizer.py +167 -0
- vllm/transformers_utils/detokenizer_utils.py +188 -0
- vllm/transformers_utils/processor.py +220 -0
- vllm/transformers_utils/processors/__init__.py +7 -0
- vllm/transformers_utils/processors/deepseek_vl2.py +362 -0
- vllm/transformers_utils/processors/ovis.py +419 -0
- vllm/transformers_utils/s3_utils.py +161 -0
- vllm/transformers_utils/tokenizer.py +301 -0
- vllm/transformers_utils/tokenizer_base.py +148 -0
- vllm/transformers_utils/tokenizer_group.py +119 -0
- vllm/transformers_utils/tokenizers/__init__.py +9 -0
- vllm/transformers_utils/tokenizers/mistral.py +490 -0
- vllm/transformers_utils/utils.py +98 -0
- vllm/triton_utils/__init__.py +13 -0
- vllm/triton_utils/importing.py +49 -0
- vllm/usage/__init__.py +0 -0
- vllm/usage/usage_lib.py +255 -0
- vllm/utils.py +2844 -0
- vllm/v1/__init__.py +0 -0
- vllm/v1/attention/__init__.py +0 -0
- vllm/v1/attention/backends/__init__.py +0 -0
- vllm/v1/attention/backends/flash_attn.py +833 -0
- vllm/v1/attention/backends/flashinfer.py +639 -0
- vllm/v1/attention/backends/mla/__init__.py +0 -0
- vllm/v1/attention/backends/mla/common.py +926 -0
- vllm/v1/attention/backends/mla/flashmla.py +150 -0
- vllm/v1/attention/backends/mla/rocm_aiter_mla.py +221 -0
- vllm/v1/attention/backends/mla/triton_mla.py +118 -0
- vllm/v1/attention/backends/pallas.py +235 -0
- vllm/v1/attention/backends/triton_attn.py +279 -0
- vllm/v1/attention/backends/utils.py +18 -0
- vllm/v1/core/__init__.py +0 -0
- vllm/v1/core/block_pool.py +328 -0
- vllm/v1/core/encoder_cache_manager.py +149 -0
- vllm/v1/core/kv_cache_manager.py +372 -0
- vllm/v1/core/kv_cache_utils.py +748 -0
- vllm/v1/core/sched/__init__.py +0 -0
- vllm/v1/core/sched/interface.py +143 -0
- vllm/v1/core/sched/output.py +153 -0
- vllm/v1/core/sched/scheduler.py +1015 -0
- vllm/v1/core/sched/utils.py +22 -0
- vllm/v1/core/single_type_kv_cache_manager.py +358 -0
- vllm/v1/engine/__init__.py +171 -0
- vllm/v1/engine/async_llm.py +546 -0
- vllm/v1/engine/core.py +801 -0
- vllm/v1/engine/core_client.py +1020 -0
- vllm/v1/engine/detokenizer.py +260 -0
- vllm/v1/engine/exceptions.py +16 -0
- vllm/v1/engine/llm_engine.py +316 -0
- vllm/v1/engine/logprobs.py +198 -0
- vllm/v1/engine/mm_input_cache.py +90 -0
- vllm/v1/engine/output_processor.py +427 -0
- vllm/v1/engine/parallel_sampling.py +132 -0
- vllm/v1/engine/processor.py +398 -0
- vllm/v1/executor/__init__.py +0 -0
- vllm/v1/executor/abstract.py +112 -0
- vllm/v1/executor/multiproc_executor.py +532 -0
- vllm/v1/executor/ray_distributed_executor.py +61 -0
- vllm/v1/kv_cache_interface.py +208 -0
- vllm/v1/metrics/__init__.py +0 -0
- vllm/v1/metrics/loggers.py +511 -0
- vllm/v1/metrics/ray_wrappers.py +120 -0
- vllm/v1/metrics/reader.py +245 -0
- vllm/v1/metrics/stats.py +238 -0
- vllm/v1/outputs.py +115 -0
- vllm/v1/request.py +191 -0
- vllm/v1/sample/__init__.py +0 -0
- vllm/v1/sample/metadata.py +43 -0
- vllm/v1/sample/ops/__init__.py +0 -0
- vllm/v1/sample/ops/bad_words.py +38 -0
- vllm/v1/sample/ops/penalties.py +58 -0
- vllm/v1/sample/ops/topk_topp_sampler.py +292 -0
- vllm/v1/sample/rejection_sampler.py +630 -0
- vllm/v1/sample/sampler.py +270 -0
- vllm/v1/sample/tpu/__init__.py +0 -0
- vllm/v1/sample/tpu/metadata.py +123 -0
- vllm/v1/sample/tpu/sampler.py +144 -0
- vllm/v1/serial_utils.py +313 -0
- vllm/v1/spec_decode/__init__.py +0 -0
- vllm/v1/spec_decode/eagle.py +424 -0
- vllm/v1/spec_decode/medusa.py +61 -0
- vllm/v1/spec_decode/metadata.py +61 -0
- vllm/v1/spec_decode/metrics.py +177 -0
- vllm/v1/spec_decode/ngram_proposer.py +131 -0
- vllm/v1/spec_decode/utils.py +45 -0
- vllm/v1/structured_output/__init__.py +215 -0
- vllm/v1/structured_output/backend_guidance.py +244 -0
- vllm/v1/structured_output/backend_types.py +133 -0
- vllm/v1/structured_output/backend_xgrammar.py +317 -0
- vllm/v1/structured_output/request.py +85 -0
- vllm/v1/structured_output/utils.py +174 -0
- vllm/v1/utils.py +294 -0
- vllm/v1/worker/__init__.py +0 -0
- vllm/v1/worker/block_table.py +139 -0
- vllm/v1/worker/gpu_input_batch.py +680 -0
- vllm/v1/worker/gpu_model_runner.py +2084 -0
- vllm/v1/worker/gpu_worker.py +373 -0
- vllm/v1/worker/lora_model_runner_mixin.py +145 -0
- vllm/v1/worker/tpu_model_runner.py +1510 -0
- vllm/v1/worker/tpu_worker.py +276 -0
- vllm/v1/worker/utils.py +74 -0
- vllm/v1/worker/worker_base.py +64 -0
- vllm/version.py +40 -0
- vllm/vllm_flash_attn/.gitkeep +0 -0
- vllm/worker/__init__.py +0 -0
- vllm/worker/cache_engine.py +144 -0
- vllm/worker/cpu_enc_dec_model_runner.py +326 -0
- vllm/worker/cpu_model_runner.py +671 -0
- vllm/worker/cpu_pooling_model_runner.py +125 -0
- vllm/worker/cpu_worker.py +400 -0
- vllm/worker/enc_dec_model_runner.py +555 -0
- vllm/worker/hpu_model_runner.py +2319 -0
- vllm/worker/hpu_worker.py +483 -0
- vllm/worker/model_runner.py +2178 -0
- vllm/worker/model_runner_base.py +281 -0
- vllm/worker/multi_step_hpu_worker.py +122 -0
- vllm/worker/multi_step_model_runner.py +910 -0
- vllm/worker/multi_step_neuron_model_runner.py +84 -0
- vllm/worker/multi_step_neuronx_distributed_model_runner.py +63 -0
- vllm/worker/multi_step_tpu_worker.py +107 -0
- vllm/worker/multi_step_worker.py +196 -0
- vllm/worker/neuron_model_runner.py +418 -0
- vllm/worker/neuron_worker.py +158 -0
- vllm/worker/neuronx_distributed_model_runner.py +136 -0
- vllm/worker/pooling_model_runner.py +211 -0
- vllm/worker/tpu_model_runner.py +908 -0
- vllm/worker/tpu_worker.py +336 -0
- vllm/worker/utils.py +52 -0
- vllm/worker/worker.py +574 -0
- vllm/worker/worker_base.py +644 -0
- vllm/worker/xpu_model_runner.py +606 -0
- vllm/worker/xpu_worker.py +185 -0
- vllm_cpu_avx512bf16-0.9.0.post2.dist-info/METADATA +335 -0
- vllm_cpu_avx512bf16-0.9.0.post2.dist-info/RECORD +1175 -0
- vllm_cpu_avx512bf16-0.9.0.post2.dist-info/WHEEL +5 -0
- vllm_cpu_avx512bf16-0.9.0.post2.dist-info/entry_points.txt +5 -0
- vllm_cpu_avx512bf16-0.9.0.post2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1175 @@
|
|
|
1
|
+
vllm/_C.abi3.so,sha256=Cmf0MTybMrKyi41RfcuNsVQjV5J2CHxB3JapzB_b1BE,20240920
|
|
2
|
+
vllm/__init__.py,sha256=W0nZA68TuQKHgaVuYMvu4cR1R65gqrgNQOa3JdEVwAA,6582
|
|
3
|
+
vllm/_custom_ops.py,sha256=dLyDsRLveg-tbbI8QfwUKs8e5xZhFdODVKEJs2ywI6k,72263
|
|
4
|
+
vllm/_ipex_ops.py,sha256=HvJlM9jLFMdFCaSq7aFvETbtzsLFyJqndgHBV6H1H3I,8646
|
|
5
|
+
vllm/_version.py,sha256=DTAYCCMkFXLphUn1-KbCXe5KUXdXfuexRZUS0QM8FfA,719
|
|
6
|
+
vllm/beam_search.py,sha256=bL5N3-Whe38DNaBMnUywj9JoDyqpXYbNJwt9oSf2P84,2386
|
|
7
|
+
vllm/collect_env.py,sha256=QOk_VvP2ZgNCrWnniopcBWnzdBBaJlycc_LKhhqgfq0,28212
|
|
8
|
+
vllm/config.py,sha256=_2f_wVckL-Ah0DrA9RNBXiDwmr3PsscIhJH0x6aALqU,201710
|
|
9
|
+
vllm/connections.py,sha256=vrp2xhxfnFTi_fKYWAYBHJSu86TpWYNn0rWLlykwb9c,5019
|
|
10
|
+
vllm/env_override.py,sha256=v5bFL5pZEz9a1Q8xPmyKb7mrxNIb_y7cWr4E-FcBbS4,1475
|
|
11
|
+
vllm/envs.py,sha256=pAadJwZUTUpmntLZWPBiQHvbo_147eDFbPp0sYP5sdc,36039
|
|
12
|
+
vllm/forward_context.py,sha256=4B1azRC575gAPKsL6kS04xyIYoBdg7lQWv55DKbQ6es,5848
|
|
13
|
+
vllm/jsontree.py,sha256=uEJ99TCQv26_ZrRmZuZAuIrh6U8n8KAAQod4WUP4pLM,2143
|
|
14
|
+
vllm/logger.py,sha256=JkPmpFURFd-hKBIRFVm5nnBAqP0FSi3SqkkYJEZyeKA,7506
|
|
15
|
+
vllm/logits_process.py,sha256=GVP8y3NfJvsYcTI_jDycguJwF62IvBrhp03mKG-3OK4,4371
|
|
16
|
+
vllm/outputs.py,sha256=ZA0rU45pTHzarymqF7FGmfLaK3UpSiNAGabhMpWLm0A,20535
|
|
17
|
+
vllm/pooling_params.py,sha256=TQcDIM8CKnH9_Io24akBrVhZUZIl9_zhuO5szeEmBqs,2017
|
|
18
|
+
vllm/py.typed,sha256=F5LUrt0voM87SNuuOky2X9veCVDqJUgRg_VohYqDigY,65
|
|
19
|
+
vllm/sampling_params.py,sha256=RV2nXkmYFPXflCwzrlk0E-TQB-IxDsFUmtX4U2HCdaY,26396
|
|
20
|
+
vllm/scalar_type.py,sha256=8q2_jt1ilOgaz0EqDsmlf0I2yy95I-A_xxOwUkz4msE,12302
|
|
21
|
+
vllm/scripts.py,sha256=f4JQeU_63yCFEvUth0qKfLX18lsPDgcxBveXqvG7js8,432
|
|
22
|
+
vllm/sequence.py,sha256=Kg5dThkLBugtCDWlbrROZ4hEl7klZbPhHsK83mrlMY4,62250
|
|
23
|
+
vllm/test_utils.py,sha256=Ft3N8Qh8Yp_c_EACZCcn-M1Sh8oJLYJeiGxHPa4ahyY,5992
|
|
24
|
+
vllm/tracing.py,sha256=u98azd2ER4HnjempIUdqZhPOvQaK2tZfbSOUTZn_OMo,4776
|
|
25
|
+
vllm/utils.py,sha256=wtnDr1dSHIdNqnyQEF7TtGLs702IgCIi9oYoFWn1Z-g,96291
|
|
26
|
+
vllm/version.py,sha256=FOUZzkwMR0KzHLv4gr6R25HwulikpB9H8Vver3VX8sE,1306
|
|
27
|
+
vllm/adapter_commons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
+
vllm/adapter_commons/layers.py,sha256=S_e-bDpST43_0dbXWVbFZKRpJagiVCbhEae43Ssqet8,381
|
|
29
|
+
vllm/adapter_commons/models.py,sha256=2PKSrV52xaE9eXczghEMaAffI9RvUejdyRKjbZ9KCYM,2801
|
|
30
|
+
vllm/adapter_commons/request.py,sha256=GoLdKUNCU6x-8plK95CuLOy56QOSYW6IQAg6ZQg76C8,617
|
|
31
|
+
vllm/adapter_commons/utils.py,sha256=EM1MeBzB-aU7jzE5V4-2ryXSpZURHZ1Oco4tnH8qIE0,3260
|
|
32
|
+
vllm/adapter_commons/worker_manager.py,sha256=uVpp1a6vMI7fIP9DlzOyPsQZyawYqFNyGR6T1DoVxPU,923
|
|
33
|
+
vllm/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
+
vllm/assets/audio.py,sha256=5VFl-JOAv6TSd7g9kO4CscNn6Il_GIstVatK1gY7lwY,1185
|
|
35
|
+
vllm/assets/base.py,sha256=IdwWieuPqaAaYKo2ybSfg07dt86k4-NrP1BHPCrFN2s,1196
|
|
36
|
+
vllm/assets/image.py,sha256=sUJn_mKEvLm7wQEXIzhUwRuWkgItymv2YSvUgwSeIWo,955
|
|
37
|
+
vllm/assets/video.py,sha256=zEg3K1DUUaLMhB3sFIwcOxFm0e2UqhQJ2V2FrtJ_TGM,3412
|
|
38
|
+
vllm/attention/__init__.py,sha256=YW7x8Ahq9TPxx8GiduskAliKrJUM5i-kDrOjuLep7aA,610
|
|
39
|
+
vllm/attention/layer.py,sha256=9PC_Z9OHw_fbT1T9J38ycuEm9GLn2aVXoQ8YMmh-PdU,18575
|
|
40
|
+
vllm/attention/selector.py,sha256=Jz6nwwL_GqmQyMfrPvt5ju9fAnM8v5sGZVpGLSHOPbU,5865
|
|
41
|
+
vllm/attention/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
+
vllm/attention/backends/abstract.py,sha256=4E2pYsrrdxQ2ztuyyBQj2C_ud4O4crvrS3ipADOCXCY,9357
|
|
43
|
+
vllm/attention/backends/blocksparse_attn.py,sha256=uv0BHWZJJpyqG4b-4LTKFIgtrtu2AgsVpCenGnCGd5s,18006
|
|
44
|
+
vllm/attention/backends/cpu_mla.py,sha256=2vhAyuZxbHieHZvKA1MLrL-Xani-Ncd7jhG4X4nMNSM,11162
|
|
45
|
+
vllm/attention/backends/dual_chunk_flash_attn.py,sha256=8JbVxVn0szCf6oqzO9VfNU_EBtj0tWs3O7l0dfeCjak,65978
|
|
46
|
+
vllm/attention/backends/flash_attn.py,sha256=iCHOfdsb7waXXokUMa4TbFqwoYaU_7DaCGlbBvNhiPs,44480
|
|
47
|
+
vllm/attention/backends/flashinfer.py,sha256=JxVGZRoSRQoBdPqTEjKHj-MyTV4yMHWAJPKFCqEDWrU,47704
|
|
48
|
+
vllm/attention/backends/flashmla.py,sha256=38TpdJQ1mnuTXbUpp0Ea_kYF6eUf2PhKC89KiKk6IAc,9012
|
|
49
|
+
vllm/attention/backends/hpu_attn.py,sha256=_4ns2mxdcsPXoIeLtz8u390vG6Vmwo2_6YboeLABy0g,12277
|
|
50
|
+
vllm/attention/backends/ipex_attn.py,sha256=Pdzv6XDQXQ34FoD-yfxfA6RcGXiDzP7tXkUFy6jEdJw,14822
|
|
51
|
+
vllm/attention/backends/pallas.py,sha256=cLecgpgryFQ_9MDhmUD_tsT8o7xumY-tuUfD-87m9wE,13676
|
|
52
|
+
vllm/attention/backends/placeholder_attn.py,sha256=0noOZ6cJXaPmL_rhs1UKI90Xj9hnHTVfDxdAsyWQjlg,16096
|
|
53
|
+
vllm/attention/backends/rocm_aiter_mla.py,sha256=-sT-4wyrBfb5fAsXJDrSjtQTeMd5TJ-J0DggOZUi6j4,18006
|
|
54
|
+
vllm/attention/backends/rocm_flash_attn.py,sha256=up0A5uCIYdiE-tIQjtuuwL3aG5I7C_Z4qOy5q_o7Xkk,42460
|
|
55
|
+
vllm/attention/backends/torch_sdpa.py,sha256=3IJ1wAK73YuIpeAZOMGN1gMhjD55wNJLgPEWorZLCrg,27346
|
|
56
|
+
vllm/attention/backends/triton_mla.py,sha256=aL-YTCLL2G8pGofl_qkKVUgaZvDPX51ahLfdHxelh2k,3943
|
|
57
|
+
vllm/attention/backends/utils.py,sha256=DdUN4Fu1PNwxOyBqoncHPfMlgJlR0ZAnUl7sL4h_uo4,25926
|
|
58
|
+
vllm/attention/backends/xformers.py,sha256=PFtsIRNkym9ceLMS_KWqUGGd0sk13-hbR5tiAqHSjn0,33633
|
|
59
|
+
vllm/attention/backends/mla/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
60
|
+
vllm/attention/backends/mla/common.py,sha256=POGMWitTVdoZffJEh0isGc042U2F9uOSVXTxRTqE134,57622
|
|
61
|
+
vllm/attention/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
|
+
vllm/attention/ops/chunked_prefill_paged_decode.py,sha256=L3-Pv3uBbvKHbUcZALJbVkbruXllwvitvI8LTaviH3A,12577
|
|
63
|
+
vllm/attention/ops/flashmla.py,sha256=Jsx27AMgaf6_XLF68n-zkBcZx8P5wap7CmAZhgX7IK4,3884
|
|
64
|
+
vllm/attention/ops/hpu_paged_attn.py,sha256=iVU8TZT74UleYw9QpYNsrWSZVj9OZVTrXcJD2DYwrVU,2917
|
|
65
|
+
vllm/attention/ops/ipex_attn.py,sha256=ZhtSrFBDunK7xKU7FyDtwnCcQV-9NASW6c39-q65Gso,5650
|
|
66
|
+
vllm/attention/ops/merge_attn_states.py,sha256=2BvJ9H7A30OfnnntsG5tjLPZNfbNteLG_JMicqhyuFE,1637
|
|
67
|
+
vllm/attention/ops/nki_flash_attn.py,sha256=0tEpBTS-QdBZvj0Zh9NWOVu7rta3ITcWjdIOBPV2u7I,32612
|
|
68
|
+
vllm/attention/ops/paged_attn.py,sha256=c0lJ_5D-rniDyKadqr3h_WIGewY4rcHS1as2EWsb0ow,8319
|
|
69
|
+
vllm/attention/ops/prefix_prefill.py,sha256=RVhBauBItx8e__U3zuO7vr1qTKRUbS-egF62F6tZtpI,30977
|
|
70
|
+
vllm/attention/ops/rocm_aiter_mla.py,sha256=KCcvCpdq2d0puDkIXJhqMeuyt0jDwp2Ymx1TyPcdR6Y,3511
|
|
71
|
+
vllm/attention/ops/rocm_aiter_paged_attn.py,sha256=WVCMDB-wDHPc8zRfmZb1M76UGGxbZO5y2dJk60GB-3o,3885
|
|
72
|
+
vllm/attention/ops/triton_decode_attention.py,sha256=yXo67iTjPeDa8eWrh9KqDu_yi_bZXGImRloucxpTRwk,19121
|
|
73
|
+
vllm/attention/ops/triton_flash_attention.py,sha256=ZWHGEnnimoVm-I3uffQt_ajJVC1XTtR1XynFxuLy3Bs,51265
|
|
74
|
+
vllm/attention/ops/triton_merge_attn_states.py,sha256=WnOo1x2U7VY27PFZpi4H6L2lQkvHwkRW61Nt6RZRUzo,3494
|
|
75
|
+
vllm/attention/ops/triton_unified_attention.py,sha256=XQTDlWPXxjb_DWImv664DhlpbDe7gnkvhkU7Y-1cu-E,10850
|
|
76
|
+
vllm/attention/ops/blocksparse_attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
|
+
vllm/attention/ops/blocksparse_attention/blocksparse_attention_kernel.py,sha256=W0ZGV6T8e87yL2Yy65EJfSf63lxWrqU-SSdkiS2cAY0,11534
|
|
78
|
+
vllm/attention/ops/blocksparse_attention/interface.py,sha256=QfWOzV2KjpJu7goLLOFegFbkIOA484Io3T6E-T8KXwo,9326
|
|
79
|
+
vllm/attention/ops/blocksparse_attention/utils.py,sha256=vKs6ho29zhzf68kI8q9aFntRUOu3YxHXc2LaqLUKu3c,8109
|
|
80
|
+
vllm/attention/utils/fa_utils.py,sha256=S0UZ-Ew2gJ0rpD53YEvEf4-_X-Ppd8om4o8N_HmT2rY,2018
|
|
81
|
+
vllm/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
82
|
+
vllm/benchmarks/datasets.py,sha256=HUUKxM42Evi7DsRid4ZfgsEWNxf7cQBaH4C8La7CnCI,34228
|
|
83
|
+
vllm/benchmarks/endpoint_request_func.py,sha256=3-qqeaNP0Y6mbxyoQYgSjmAW-JQiM1wpNRjN7LkRFqk,5873
|
|
84
|
+
vllm/benchmarks/latency.py,sha256=iLv_0jQQdMBWAuZWk1npdnDcYo7Z4zK3WJ6w1uPhwoU,6565
|
|
85
|
+
vllm/benchmarks/serve.py,sha256=uuXCjdsOCp-1j4ZHrQiZsJR299nKExbAaO6DqyaLRfA,36139
|
|
86
|
+
vllm/benchmarks/throughput.py,sha256=2vQ_H6Eg1z0Glwe6M9ZDVl99e1T_Ioau0z53kvZV02Y,24815
|
|
87
|
+
vllm/benchmarks/utils.py,sha256=gpiQUYOxOG6Bve3zQeOoKWpj6fTIm3pyJ8JllU0vK0Y,2178
|
|
88
|
+
vllm/compilation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
89
|
+
vllm/compilation/activation_quant_fusion.py,sha256=bFsrJ-LFJNJ8CRmzq9C3w-hV1O6dKofSopHN2STgw1M,3166
|
|
90
|
+
vllm/compilation/backends.py,sha256=Qh-gub408d1wClG7b4T4_4dHicgP1fNJGYPgOcrbEiE,22251
|
|
91
|
+
vllm/compilation/base_piecewise_backend.py,sha256=n-InrPoCUhwgdy6Nauu9Aus8O7dOUnuyz0PA9qekH_4,2796
|
|
92
|
+
vllm/compilation/collective_fusion.py,sha256=43T8YpenGj_IostiLQhOwNjWY1rhxPY53Ms3aNPC4Ck,4263
|
|
93
|
+
vllm/compilation/compiler_interface.py,sha256=Z7jb9Fg91O0EdEbK3Det7_baBdOqDN1i9Q3OKn3YA8g,21568
|
|
94
|
+
vllm/compilation/counter.py,sha256=lGVEXL9lTnNWEBc17xMUna4WplhdPUlQaLQPn5qjHWA,937
|
|
95
|
+
vllm/compilation/cuda_piecewise_backend.py,sha256=weITLxKlqauc64rH_Z8-Ss-e-tX0zt00v3FskU5aM_8,8990
|
|
96
|
+
vllm/compilation/decorators.py,sha256=WIFgEj7AJ7JBhMNPcMpyrs_n388Cg4V35QIBQqv4gVw,10234
|
|
97
|
+
vllm/compilation/fix_functionalization.py,sha256=iDgJml2HfComqBMm-rPNdM_aWkaZ0uTY2KNfQ0YLiNo,8393
|
|
98
|
+
vllm/compilation/fusion.py,sha256=jOaFlYhC2ly02vK3xwCVGNgvZmrNqPf__DQOvkVNE3o,24514
|
|
99
|
+
vllm/compilation/fx_utils.py,sha256=TTp5xzE1_tt_yT3ghRKd9A5QAg32PPVpdfhtm-tvY3Y,2050
|
|
100
|
+
vllm/compilation/inductor_pass.py,sha256=qbjP_LR8b9VsC6BiaGe1VGf68HnyN2Savpw68llIbHs,3382
|
|
101
|
+
vllm/compilation/monitor.py,sha256=p4LQ1roNhKQE8M8zhlYwgdnSFBFi6EkDUazr6X2usc0,1346
|
|
102
|
+
vllm/compilation/multi_output_match.py,sha256=NKIV1AMx-3KRVdahB8-q9Op55zuG5MywOuyWXx1u3e0,3835
|
|
103
|
+
vllm/compilation/noop_elimination.py,sha256=lQKjYfAvdCPzJcijINMmMREGJ87lnkOOnD8ny1YjpKE,5234
|
|
104
|
+
vllm/compilation/pass_manager.py,sha256=JKzEMnuEStH00QPYbJ2t8TJ-RRrRYwKBg7hkIPLcq54,2902
|
|
105
|
+
vllm/compilation/sequence_parallelism.py,sha256=nr-K-I9eC4A6EXV_6EWfXIW6qw5oscveMBQp5fVwofE,9649
|
|
106
|
+
vllm/compilation/torch25_custom_graph_pass.py,sha256=2Uegyh-fmx5OsvKOFU4byAXdvN_XqTel3VdHTU-XplQ,1361
|
|
107
|
+
vllm/compilation/vllm_inductor_pass.py,sha256=AXYy_jzfe6QMKU3QkrgaRyUCXVdSq0zfp0bc5SsbIxU,2404
|
|
108
|
+
vllm/compilation/wrapper.py,sha256=GFQykPy1BSWoBMaDmVvKR_xEySFrVbTpfXLEz-s3y8M,5625
|
|
109
|
+
vllm/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
110
|
+
vllm/core/block_manager.py,sha256=XoH1P87ofSfcNqcPpKuhDlrpV_8TlO2zkodkAezveJQ,22204
|
|
111
|
+
vllm/core/evictor.py,sha256=Jy-eZwgdA4Q2F0buFgDNv2fPeiIxJAUEFtyKYz1VL40,5446
|
|
112
|
+
vllm/core/interfaces.py,sha256=Uou6g2s9rlGrSYtk8x-TmeFRHK-SsvB2w2-pNxgfUAs,3590
|
|
113
|
+
vllm/core/placeholder_block_space_manager.py,sha256=7HEHgCYHMNdAvd7ESfplHkjUQbqf8jOn6zXBEk_ShRo,2971
|
|
114
|
+
vllm/core/scheduler.py,sha256=OcFIj2SmF2_i2fPtu3Ed5lINNGjXxgtekae5l56Z9Og,91709
|
|
115
|
+
vllm/core/block/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
116
|
+
vllm/core/block/block_table.py,sha256=HMwMwVY8pHLjlje6gfVsrHvyvLupcd3SMAvgcsUcnxM,16022
|
|
117
|
+
vllm/core/block/common.py,sha256=cfDse1iNYLehOXrSfUypTmakGAdSSXrX0YmodFPpJjI,13200
|
|
118
|
+
vllm/core/block/cpu_gpu_block_allocator.py,sha256=1zWQKDaRTIkkZIwP-pJ_i2bIbIcTRk3MBhpT3V06V3k,16947
|
|
119
|
+
vllm/core/block/interfaces.py,sha256=yx7jEGmrXqAKyDQ76oEGZdfCAKBIld_5Tv7mmf7ra5E,8144
|
|
120
|
+
vllm/core/block/naive_block.py,sha256=EgYRm94K88DyFM3Xjfa1A8hWuGZStL0nIqiZqbVxQMI,16355
|
|
121
|
+
vllm/core/block/prefix_caching_block.py,sha256=tBjZ58xTQmmzx2s24BcneVyCf5F-aaOqgJz07bxbfoo,44182
|
|
122
|
+
vllm/core/block/utils.py,sha256=osLxVwSUYjOsLeal8RzpmGT72F4aU3qbTGuYMdWIsHY,928
|
|
123
|
+
vllm/device_allocator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
124
|
+
vllm/device_allocator/cumem.py,sha256=76ZsJjPEYFCQwst-BVf4Rp-U9PUTP9rLYjRuwJbBeo0,10949
|
|
125
|
+
vllm/distributed/__init__.py,sha256=Rk8k7bXtcPNaihFk5qOn__toXjElImWbszyRJBzeYHA,122
|
|
126
|
+
vllm/distributed/communication_op.py,sha256=pkGcPW7t0uhKtLqVsv19yk0Ixn3ndZ0HbwhFptDETZo,1493
|
|
127
|
+
vllm/distributed/kv_events.py,sha256=EKrYnRMm939umT-7Iy5vrOTviuEZmqsBmX4KKSK4isg,9710
|
|
128
|
+
vllm/distributed/parallel_state.py,sha256=bnxVPPCtblAy3I0etcDAFrsGwiukZtgBRYY_X0HnhJI,50118
|
|
129
|
+
vllm/distributed/utils.py,sha256=9AD1ILUtpTOkFa7IrW8A2gDohoyq48PxqxRr68lJ52U,20363
|
|
130
|
+
vllm/distributed/device_communicators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
131
|
+
vllm/distributed/device_communicators/all2all.py,sha256=qH6iuMAEZ3aPu6VYVR4cKz8xeDz3uHJ1ZIs__lvMgFA,4666
|
|
132
|
+
vllm/distributed/device_communicators/base_device_communicator.py,sha256=sWQXmKVQ1B7vF1e2lcPihor-5dcbAyzE_LvTKggyiIo,10278
|
|
133
|
+
vllm/distributed/device_communicators/cpu_communicator.py,sha256=RxQIT9R0qUFpzxRXVpm78KRLpgXpbOTeI640OE_qWrA,5524
|
|
134
|
+
vllm/distributed/device_communicators/cuda_communicator.py,sha256=P3ps007YH4utWJLqv1JefPrO72hsMg72B6-kSj7_Scg,6508
|
|
135
|
+
vllm/distributed/device_communicators/cuda_wrapper.py,sha256=H7HYG8TnGfVD8GMrVOEQ1DI4_LvtNHqaaFpDSfLCdSQ,7116
|
|
136
|
+
vllm/distributed/device_communicators/custom_all_reduce.py,sha256=WSCx1aVAEVaggN4-RFpIj4id52oGNMHYwbDnK40kLz4,12606
|
|
137
|
+
vllm/distributed/device_communicators/custom_all_reduce_utils.py,sha256=i5AbOs46_17Fyr--MULtThGmW3dBgR3d1M2jou25fWA,10489
|
|
138
|
+
vllm/distributed/device_communicators/hpu_communicator.py,sha256=LSIPK-d_v2ICOii97Du1VhDAVHq0lggXnW8GOahmYiM,1767
|
|
139
|
+
vllm/distributed/device_communicators/neuron_communicator.py,sha256=qrRh1kLgdB2bBYrEJnSs5nTD0YJ6DW9n7_CPJ9x15eo,624
|
|
140
|
+
vllm/distributed/device_communicators/pynccl.py,sha256=Z4QXdGf_qzz2J3PfKtU7bEG29oxbXy4dEorGB9WNB3k,9142
|
|
141
|
+
vllm/distributed/device_communicators/pynccl_wrapper.py,sha256=0a2fcE9Bn9KwAYYDVUnQU2XgWmuK1jtxTRG5eilrrPQ,13694
|
|
142
|
+
vllm/distributed/device_communicators/shm_broadcast.py,sha256=v9w2fZ-pAlFr05KpyYNM5WVTmJoKVnS9Ql3nses2MK0,23584
|
|
143
|
+
vllm/distributed/device_communicators/tpu_communicator.py,sha256=5bw_2OQXBOSXNZUJwuSAGbMC1zkawXsiikywBLgoC78,4090
|
|
144
|
+
vllm/distributed/device_communicators/xpu_communicator.py,sha256=mP9w-kIxqxd98-htC2QHLLQqc7T9YtXGJqLR6K4hWKA,2107
|
|
145
|
+
vllm/distributed/kv_transfer/README.md,sha256=B4s4s-6F9FP4wbgmrYJDSpdUu0_Yq4EeWLEyZMNkAyk,2006
|
|
146
|
+
vllm/distributed/kv_transfer/__init__.py,sha256=HhBNRgm9F5n8KWEApw3rFRiPRlDgCT39QaOyDr9ScHY,392
|
|
147
|
+
vllm/distributed/kv_transfer/disagg_prefill_workflow.jpg,sha256=fOFUEx-2Fm1uxHCGopvCREaRqdvR87Z7C0bMqEVH3Iw,142656
|
|
148
|
+
vllm/distributed/kv_transfer/kv_connector_agent.py,sha256=9ZGSa7HIkNQCZgJw1p1ZD-9Jg9OEVjP_e7iiUDRSkjI,2420
|
|
149
|
+
vllm/distributed/kv_transfer/kv_transfer_state.py,sha256=HvxD6knnUDmnjXK-TYA27idyEIz1Th-jkTKHd2Te6Rs,2266
|
|
150
|
+
vllm/distributed/kv_transfer/kv_connector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
151
|
+
vllm/distributed/kv_transfer/kv_connector/base.py,sha256=fAl69CJYictie5R9nwu5nBy1CN4QLu9oy0SDs6HYfRc,4421
|
|
152
|
+
vllm/distributed/kv_transfer/kv_connector/factory.py,sha256=mKlnlx-kj89MWKqFUdaDn40_6_tiMvxNt2jTM92jiBE,4781
|
|
153
|
+
vllm/distributed/kv_transfer/kv_connector/lmcache_connector.py,sha256=Ry1ihjDWaSrA9DnKd6WxBCxHEIhfZ-7siSLSKHGOs28,3670
|
|
154
|
+
vllm/distributed/kv_transfer/kv_connector/mooncake_store_connector.py,sha256=l97Z_-E4eX124xjzd2wCkqRT0xQMbHwZU7wb2YHMiqU,8610
|
|
155
|
+
vllm/distributed/kv_transfer/kv_connector/simple_connector.py,sha256=2guK29okbgA_b0ronMiLazdBLt_zCR1x0HIcUAGwulQ,13857
|
|
156
|
+
vllm/distributed/kv_transfer/kv_connector/utils.py,sha256=H-P_rctk77fAd50NHKTY4MtVXNKesZLFOISrG3pBFsQ,3785
|
|
157
|
+
vllm/distributed/kv_transfer/kv_connector/v1/__init__.py,sha256=NX4qtFDAEBGdOyRCMsvgVXm1WhWyFRLhy4mEloWovF4,196
|
|
158
|
+
vllm/distributed/kv_transfer/kv_connector/v1/base.py,sha256=BeEx9hcC3U6--xwmSfUv8AYclSXN5TVkuKu5Ogjo7OY,8669
|
|
159
|
+
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py,sha256=At5WkjMU0tni9yrVGyI4WLbs1SlffKM1a8c5-UVJqUk,5116
|
|
160
|
+
vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py,sha256=S5j5oGE7LExzNPUD6CrZZAYMMDTslCjSPTQxEYVBNdQ,7770
|
|
161
|
+
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py,sha256=EDc70-dmgr7IP3Db9varkTdOejf4Y8nlugVAIW-qhoE,35819
|
|
162
|
+
vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py,sha256=s2XNGdfBTWNwLkhyepEfnXan1tn5KQPEQ3DcBuxrPRQ,15711
|
|
163
|
+
vllm/distributed/kv_transfer/kv_lookup_buffer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
164
|
+
vllm/distributed/kv_transfer/kv_lookup_buffer/base.py,sha256=5avevKT_Zj7xXp5rsg94OhxLWw5jUL2cG8cTmk0kV2s,6211
|
|
165
|
+
vllm/distributed/kv_transfer/kv_lookup_buffer/mooncake_store.py,sha256=qU8ulFs4EZOi0udp1DeDakHCqSHHzdGKcOxR_WMVl5c,5610
|
|
166
|
+
vllm/distributed/kv_transfer/kv_lookup_buffer/simple_buffer.py,sha256=pUbiv-eh4coq5opKw-zhMZUVpTxUBwlpd1ZqH1Xqv1Y,9087
|
|
167
|
+
vllm/distributed/kv_transfer/kv_pipe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
168
|
+
vllm/distributed/kv_transfer/kv_pipe/base.py,sha256=U4hivz-zJkjhTGgNdtcuupc_ArsoUPFuWEv_AXJ9rqs,2087
|
|
169
|
+
vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py,sha256=BXVNE9q2w6jsrrnyaFS_qBwUbfMW6rUcOhTslH1GrJ4,12042
|
|
170
|
+
vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py,sha256=MPr5KiReZLSWCOBdsbTdPBv6Z2UFfVZY8RqVhA9nhHc,9651
|
|
171
|
+
vllm/engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
172
|
+
vllm/engine/arg_utils.py,sha256=o-ZDt1ZgCxv1v-M-JvA1fpFatcWZI05BSxdSQZDVlHc,75859
|
|
173
|
+
vllm/engine/async_llm_engine.py,sha256=AGGSsV6EuqZO_Oa5WYe5HQmnaPtV07Y_wZzHOAsHH1c,51451
|
|
174
|
+
vllm/engine/async_timeout.py,sha256=JxUaRVK_M5P5wRVkKHQ-QkDMnGxKMTt9S9OhQeQzP-s,7092
|
|
175
|
+
vllm/engine/llm_engine.py,sha256=APwaNeK1xTU0Jd5wcfgXRWV6Llbadcjp4MjjXt267fo,94266
|
|
176
|
+
vllm/engine/metrics.py,sha256=wM5GpLLn2PoL2GdDDGAOR0grbovUklopOhKUZh9TD2Y,31398
|
|
177
|
+
vllm/engine/metrics_types.py,sha256=9qcaNDFM1xfaQGjY9aPK_Cn-GObdctJiqR_t6cLzy_Y,3309
|
|
178
|
+
vllm/engine/protocol.py,sha256=NjFT4gP3RWh8_yKaWsVRsk_1ioGSY5xOjpg0xUbcd5w,10843
|
|
179
|
+
vllm/engine/multiprocessing/__init__.py,sha256=iQ8cWj81FuyrFaXBSTKGXhQL7Ru17cHvlXMPK9KSevs,5037
|
|
180
|
+
vllm/engine/multiprocessing/client.py,sha256=HMTKlS9qTBEL8cXJoU0-eEhEdeAI1k12a_6QcCg2mdA,30597
|
|
181
|
+
vllm/engine/multiprocessing/engine.py,sha256=hVkKVPG98oALAScAA6GlTkz111YY7dd9MmQKxDx2ouQ,18331
|
|
182
|
+
vllm/engine/output_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
183
|
+
vllm/engine/output_processor/interfaces.py,sha256=99zPnCsA0H9k8d7uXfv8yGva69mAS1m3apR9sdWIUfY,2994
|
|
184
|
+
vllm/engine/output_processor/multi_step.py,sha256=XV-JnfrywuX4xlpn7wFEiyJruqe-1NrYrJF429XCmV8,9438
|
|
185
|
+
vllm/engine/output_processor/single_step.py,sha256=PRgNXy1MYf5mpCkWckqCtP-V7USu_F-AlYe3n3n8Vqg,6353
|
|
186
|
+
vllm/engine/output_processor/stop_checker.py,sha256=XtOa0t-ZErekuf9SFoxQv-nZN2ddPxEiFIwIJEUK-ig,5067
|
|
187
|
+
vllm/engine/output_processor/util.py,sha256=IoNFmy8vKrK5pn3nGS26Ey5irhKr8mzNOGP30SsT1qA,1056
|
|
188
|
+
vllm/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
189
|
+
vllm/entrypoints/api_server.py,sha256=f0hrA1OIb2ko5S4RZASnvaxPxJT2hXZOwc2zcayzdbg,5744
|
|
190
|
+
vllm/entrypoints/chat_utils.py,sha256=XpFq5VsVSqob9bbm6hQt4gHRWGe6K3yfF7r4SyWAthM,46832
|
|
191
|
+
vllm/entrypoints/launcher.py,sha256=ZLE3YFhCKhintM-Ra34S8p8L5-eUhzoArdygX8QvVF4,5212
|
|
192
|
+
vllm/entrypoints/llm.py,sha256=G7nHLE4lKVh06LaDgf-_k6tBIjwTE16EDfp1cqgS-W4,66421
|
|
193
|
+
vllm/entrypoints/logger.py,sha256=DJ__GtD7pFMtpxTE-5xhEb-WVN9pv6q3HPvB1X1JOYk,1616
|
|
194
|
+
vllm/entrypoints/score_utils.py,sha256=LY8-RqiTJ2G45xK43FiQjGg15IgwJGMOTh3BcooFTIk,1654
|
|
195
|
+
vllm/entrypoints/ssl.py,sha256=JigVmJhUkhrDPvD1z-iCCwOOLE__qD8V_2h94zvt19A,2736
|
|
196
|
+
vllm/entrypoints/utils.py,sha256=nnOCQadcxKV2ZkqhG8OXbG93FRMSiOVBBQ3MA8qowYc,8860
|
|
197
|
+
vllm/entrypoints/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
198
|
+
vllm/entrypoints/cli/collect_env.py,sha256=TmIqovMewYUVy8BVdCctSXiRBxbDZp99QITfhn2t9nI,1029
|
|
199
|
+
vllm/entrypoints/cli/main.py,sha256=4Roxfc-laVuYA3UOWbfnM_er79IIGKZG8qlR68KTj0A,1650
|
|
200
|
+
vllm/entrypoints/cli/openai.py,sha256=yKMBBRav6gykoUCLJqFoyLrPDJJk8Xn5iyaD4Wmsf-A,6852
|
|
201
|
+
vllm/entrypoints/cli/serve.py,sha256=wk7BNthrnvcAr3IHlk3H0TIYk62cCP9xMGD2lNUutPQ,5039
|
|
202
|
+
vllm/entrypoints/cli/types.py,sha256=9GDzWTOdmPeQk-Z2cXdXSk6DgQMlrhbmLPgQ69Hof0Y,637
|
|
203
|
+
vllm/entrypoints/cli/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
204
|
+
vllm/entrypoints/cli/benchmark/base.py,sha256=blt2nY-bqD8LZV_lt73JcYdVd1f_Pb_fRHhm8wXYYtk,1105
|
|
205
|
+
vllm/entrypoints/cli/benchmark/latency.py,sha256=htqbqDiw338UQ4Imrm8h4BSvHgNe7jXsSE9sHxZDI7c,810
|
|
206
|
+
vllm/entrypoints/cli/benchmark/main.py,sha256=b4ANrwJgpjSMPg8bHnvxhyukxlc_s7x3jrL9n01ni00,1780
|
|
207
|
+
vllm/entrypoints/cli/benchmark/serve.py,sha256=ISS4pL4Q_yF54GOICzDBGcTEDWMnU9e5SLdhosgFK_I,792
|
|
208
|
+
vllm/entrypoints/cli/benchmark/throughput.py,sha256=p6_xRqPiHbXoElsDA2wIT6zIB3c_ClxDQewCk2VROQo,812
|
|
209
|
+
vllm/entrypoints/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
210
|
+
vllm/entrypoints/openai/api_server.py,sha256=CVdnN-_BofuFNpSMvhDCSgvRzOvNKZE7RVkAXsj8eFI,52478
|
|
211
|
+
vllm/entrypoints/openai/cli_args.py,sha256=dJ3O_DQCtXLn7Fyoa1ZFK5zGoCE7FdHKkP1akduJzkk,12067
|
|
212
|
+
vllm/entrypoints/openai/logits_processors.py,sha256=zbAeibwgnyMxn99ZlIheZZtlN3iBzk9AzqbOC7qVUr4,3161
|
|
213
|
+
vllm/entrypoints/openai/protocol.py,sha256=oNoV-opM0k_TMr4RdmmQlhrkWfAJQzUBW4zNKRSQWRc,70078
|
|
214
|
+
vllm/entrypoints/openai/run_batch.py,sha256=Ki_67IocrGmNzD1ZrfXoui8zkXaAtKKb8twPJQCD_zo,16786
|
|
215
|
+
vllm/entrypoints/openai/serving_chat.py,sha256=y654CHtANivp_nojdcFuep3YHnFX1tug1EIU9ZXdgUg,56089
|
|
216
|
+
vllm/entrypoints/openai/serving_classification.py,sha256=ocGRfjwy-2a0Eect8J3ZUt9QvOsgKn2E3yigmsM1XKs,5266
|
|
217
|
+
vllm/entrypoints/openai/serving_completion.py,sha256=4M3DxUd4DUdJfQN1dKW2NAmvpF7Q6EF7qo0mn2Y_tWY,25464
|
|
218
|
+
vllm/entrypoints/openai/serving_embedding.py,sha256=e9J82Hy_4ProkbU9FaaH9RR-jkhjXcmHa8qmlu0wuMw,7315
|
|
219
|
+
vllm/entrypoints/openai/serving_engine.py,sha256=MVKEBPbTCOGXwl3pfbWkpKkj6Xy3mWLN-S1uY6Xs9W4,38634
|
|
220
|
+
vllm/entrypoints/openai/serving_models.py,sha256=C_NVmE_oimem4sRLOQkIyJTuIhD6fKiSemwRSQQOaoo,12728
|
|
221
|
+
vllm/entrypoints/openai/serving_pooling.py,sha256=GKkrnG6jCYAypkFALOZTVNRV5jWxx-7OReUtBuikYRo,8650
|
|
222
|
+
vllm/entrypoints/openai/serving_score.py,sha256=yDIRvVcKBv4Bt_vla3ot8_VRBGtuYT7lY-OKiz86Ukk,15907
|
|
223
|
+
vllm/entrypoints/openai/serving_tokenization.py,sha256=HQHxtShdNtiJGhpNzOWH_zriFsR9ZYFGg6RjtD_cB7Q,5833
|
|
224
|
+
vllm/entrypoints/openai/serving_transcription.py,sha256=8w8DvHzoAma-T50asNFb66badNOCwwV-tqekxWiG50Q,15629
|
|
225
|
+
vllm/entrypoints/openai/tool_parsers/__init__.py,sha256=0idz-w5WR3Y-YSduSOMpZRhc05KuiAlzF2JYjOXuACc,1035
|
|
226
|
+
vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py,sha256=tULmZD-It5bOOf08FFJFN-bx2A_0CT5Atr9zgvpiGBg,6026
|
|
227
|
+
vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py,sha256=oB-B2c5VxDVRPosYh47ta7hjHeZdliDacs62ia2UTCc,16609
|
|
228
|
+
vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py,sha256=peoYUHC_DBH6fddG68GxP0JgChSyHjvUJWSo3J7iamA,11433
|
|
229
|
+
vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py,sha256=YquNTZI3QUTlgBw9l51fPJ6oPjXrx00iF5vNvr8-86g,10654
|
|
230
|
+
vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py,sha256=NRn8eWXjTqGWwQTB0Yq8kNDC5UegJ_fcGlfmiJWIi0g,16822
|
|
231
|
+
vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py,sha256=Kl-NgdCqEpQSLFqcFa9jK1z4uVbb1Vi4_oDuh3D4C8A,9392
|
|
232
|
+
vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py,sha256=8vf_T7CzXhF5-UpZRiBYlUz_asL0ZkL6-CQB-KL-XC8,13878
|
|
233
|
+
vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py,sha256=9VYihpiuhJi-BM82D0NbT9uuRchY88TUglqwFq_HpLM,12710
|
|
234
|
+
vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py,sha256=zXORfqdimXcfY3IbryyOAkQvyA1pUtz7OTVaZlqJ8XE,12244
|
|
235
|
+
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py,sha256=pkrkw30ym9Y7Lki7d9ICFkHus04KDor6t-BjbyLg2sk,15380
|
|
236
|
+
vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py,sha256=6BQ_9EZlxIjZJsoSQ0WjlM9IIrdwaONkKLIE9glrDf4,4295
|
|
237
|
+
vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py,sha256=wDtzbkThI3gP-GVy1yxkjULESJYd8ykO-S8jzkFk31c,12090
|
|
238
|
+
vllm/entrypoints/openai/tool_parsers/utils.py,sha256=56zqKHw3Q5XqhqNpwZWnRSbzhx17qzuyFsASFXmeZZk,3805
|
|
239
|
+
vllm/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
240
|
+
vllm/executor/executor_base.py,sha256=HWYmjAhNKt8geatz-MZPjoe-YrAnISuU0W5oC-Kz2hI,15663
|
|
241
|
+
vllm/executor/mp_distributed_executor.py,sha256=6mrv5wsBUw6ng08_qyn7LtmYsaGgGgTHoZSdP-hmPtY,9854
|
|
242
|
+
vllm/executor/msgspec_utils.py,sha256=FG5Qh6ghtLjyI6BHiTGmJQ3sGop2Lpm4LoADP_0Hs9o,909
|
|
243
|
+
vllm/executor/multiproc_worker_utils.py,sha256=yXbbNwhImWw7xgSgjoOSwbGLsuW0hCLsye0IFBVU0pU,10725
|
|
244
|
+
vllm/executor/ray_distributed_executor.py,sha256=MBmbhw1mv9otEVwV60Eb7xKZqveR65wo04PByw60dkI,30706
|
|
245
|
+
vllm/executor/ray_utils.py,sha256=E0ywH6d3cmSzGMrXcenEDaz5juD1kb_vW0DWxvaAm3I,16814
|
|
246
|
+
vllm/executor/uniproc_executor.py,sha256=SNUB-DP5P2se7u6Bqdakyvoy_DA7Iik9teLLxbTmyWI,5479
|
|
247
|
+
vllm/inputs/__init__.py,sha256=_jOVkFxJx9_jHaYLayw7_L_RYkEnEb7UX-TRLM8uFrA,1260
|
|
248
|
+
vllm/inputs/data.py,sha256=nlkg9w3vb9pFYumH75P1UEeGbNtkNwtkHHlyAWGZ854,10760
|
|
249
|
+
vllm/inputs/parse.py,sha256=ZFOAvZ1IAmmpqj8MhlXvS1cKEZHGyU7_u0xUfvhoUZc,4430
|
|
250
|
+
vllm/inputs/preprocess.py,sha256=b8cn-DML6NMoeHL-VO5mg2qVJd_Kdheux5LXdW5b5xQ,34347
|
|
251
|
+
vllm/inputs/registry.py,sha256=3lIiepN6h_M3pVs0oEKpHxnea2pvWrW4mamRS9OHQYY,6499
|
|
252
|
+
vllm/logging_utils/__init__.py,sha256=t2aDazCRc19hTrOxiW3eY-d702nQFBOJz_QIfSip9ok,136
|
|
253
|
+
vllm/logging_utils/dump_input.py,sha256=YPtBMqv-QGnfTAjcAP_XFYxt0dN48s9rhge6pIJApcQ,3041
|
|
254
|
+
vllm/logging_utils/formatter.py,sha256=AAWbFV4wgQZn_Ek0MKA-TBJwCQiO3ejIuIV1rQm8ADQ,525
|
|
255
|
+
vllm/lora/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
256
|
+
vllm/lora/fully_sharded_layers.py,sha256=s0FuN-IxDL3-CF8t9-fDrxvDWOc9ufDcLpjyNbczFBQ,12625
|
|
257
|
+
vllm/lora/layers.py,sha256=_-u5jJzrKBpGFGTBM2luUov66AZGAJ7HP3OOGMjFCmQ,46671
|
|
258
|
+
vllm/lora/lora.py,sha256=KqfRCDAotJK0z-NU5-wtg4VW3HWpjChYKqEjunon-ec,6225
|
|
259
|
+
vllm/lora/models.py,sha256=dZK6YN3VVZuV75fJ3QJhoOWJk_u_ligSjQXuRck4hWY,35962
|
|
260
|
+
vllm/lora/peft_helper.py,sha256=ubfv7Bg4ksQhEXPclK4XOcx7oPhNZ2X2-kBX2Hu_AkM,5265
|
|
261
|
+
vllm/lora/request.py,sha256=TLFUua7VUoHsiIwAsLCU9TFF2c_zvvGbeukI8yNOh6Q,3109
|
|
262
|
+
vllm/lora/resolver.py,sha256=SpZaKKlmfMAvWqkL19XPm0TV1cen0KRaa_ZFlVjarhY,2812
|
|
263
|
+
vllm/lora/utils.py,sha256=-NwSmiKcez26MOH8D9PbkUFCjVi0wwX3nWxenwdXxsU,9249
|
|
264
|
+
vllm/lora/worker_manager.py,sha256=uCnJxHia1IiAlq_wO6UsCct5mq2fBGPcX612tmTYlUQ,10779
|
|
265
|
+
vllm/lora/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
266
|
+
vllm/lora/ops/torch_ops/__init__.py,sha256=z03eb5aCSj_Z-_RPa3huUKuXRBvhxj_M8lK7izkQJHE,466
|
|
267
|
+
vllm/lora/ops/torch_ops/lora_ops.py,sha256=ilxQObKw2wEz3BJJ8X87xWPHGOz0jSII3b13wpj66es,4300
|
|
268
|
+
vllm/lora/ops/triton_ops/__init__.py,sha256=5NLaUPy-HiTkRFincQxa36n-5QjEdkfEyVq7LKoB-BE,315
|
|
269
|
+
vllm/lora/ops/triton_ops/kernel_utils.py,sha256=jDzZUV7CK-9Dg8I-EP91f2d2KeKj7sb2oyRR2QLbSXs,8440
|
|
270
|
+
vllm/lora/ops/triton_ops/lora_expand_op.py,sha256=rOYZ4aLf3XhuxdwSdk1sSv7LnIK382DPJ1u1zIaTbzA,8896
|
|
271
|
+
vllm/lora/ops/triton_ops/lora_kernel_metadata.py,sha256=H_p_IRRT1_v_WTFiZQTlYZG54gi4qmF2HYVOBNhPMmY,5898
|
|
272
|
+
vllm/lora/ops/triton_ops/lora_shrink_op.py,sha256=CabFcucdQtphxgkVoG7aK5DEzAj1QJwd40TMXXPJCdM,7927
|
|
273
|
+
vllm/lora/ops/triton_ops/utils.py,sha256=Po3CiwA-4-ivvlhGomHQ67d85oe4Y8x5dVi8mztDrQA,4847
|
|
274
|
+
vllm/lora/ops/xla_ops/__init__.py,sha256=u7yvnIEYhbY2Gfi5UrtLN_goe5HUzysu6a-EWM4M_f4,235
|
|
275
|
+
vllm/lora/ops/xla_ops/lora_ops.py,sha256=f9wRYwtUKTSJkDA8jHklaUii50GKjqF0jBFEPBP93h8,3782
|
|
276
|
+
vllm/lora/ops/xla_ops/pallas.py,sha256=NOVMGx17zbJUacCuYv8gA_AbckXLx1LBecPQWLlfXqs,4387
|
|
277
|
+
vllm/lora/punica_wrapper/__init__.py,sha256=RAbrZogtmoPZNIMImJFX1REM0cydwz5C-ATIp7_qHFA,244
|
|
278
|
+
vllm/lora/punica_wrapper/punica_base.py,sha256=6orkD8UXUcwOSVKHOGRH3FQJwDZjnhkAwBvCo7f7bdo,18431
|
|
279
|
+
vllm/lora/punica_wrapper/punica_cpu.py,sha256=w4KeY9_dxDkKLpecVCsBwftUsJfWi-OkP0oc0b2TM2E,12458
|
|
280
|
+
vllm/lora/punica_wrapper/punica_gpu.py,sha256=d0hJ5C1Oi9vuQJu5rYMFWPk8D1nEK0CAeiQwDHrY4H0,10816
|
|
281
|
+
vllm/lora/punica_wrapper/punica_hpu.py,sha256=cmXMpLBNtb_2OlX_YwUgn5II09v3-NKuLnrmIn0MhaI,5779
|
|
282
|
+
vllm/lora/punica_wrapper/punica_selector.py,sha256=r4qj8OgaSmx2yVJzU30D8M3-SjnyJdg6T7-bisSpluk,730
|
|
283
|
+
vllm/lora/punica_wrapper/punica_tpu.py,sha256=IMTZyI2B4p7vP_lmFThw4umGW2wyW7DWE2P0hniuV8o,12705
|
|
284
|
+
vllm/lora/punica_wrapper/utils.py,sha256=wYIF3_CbpgAVBBetCG_Sa99XhJIXNFPxmqfC8gFUey8,6981
|
|
285
|
+
vllm/model_executor/__init__.py,sha256=cRhmybV9ftoNVy7E91WIczp4wLL4E6y77KQ9vrhWqL0,505
|
|
286
|
+
vllm/model_executor/custom_op.py,sha256=PMJHMRZZ-OOkJcTOQzjFaePYm93E8yB3Fs5fp_Y2pfM,5600
|
|
287
|
+
vllm/model_executor/parameter.py,sha256=uqCaKZy4iKETQMqNleUXoMKcQ0zYA1KwEO32s3d4bwM,16689
|
|
288
|
+
vllm/model_executor/pooling_metadata.py,sha256=cdeVmpAq93ye4e67W5etqasSLkSW0zeWkcksUU9VUfY,2058
|
|
289
|
+
vllm/model_executor/sampling_metadata.py,sha256=t3QCWt2l6I7oXRwEgJN81ajhdJS-bUcWwlOP9grHUs4,22944
|
|
290
|
+
vllm/model_executor/utils.py,sha256=uSAu77_xLzeOBIlRQ6qSyEqrRm1zqMok3MtiOcpn11M,1909
|
|
291
|
+
vllm/model_executor/guided_decoding/__init__.py,sha256=pE-MqODU9s4YujKFFV6zTNlUUZbBSwE26rZtwQvAm0c,8263
|
|
292
|
+
vllm/model_executor/guided_decoding/guidance_decoding.py,sha256=DXVsduVmbGjoLKQiDWEDyQLafHitNRNZ6KdaZspdQrA,2531
|
|
293
|
+
vllm/model_executor/guided_decoding/guidance_logits_processors.py,sha256=BdK6yf5BaF1X0MWvFq8wWcLb22BSTLt-rHlFtBzkaX8,3275
|
|
294
|
+
vllm/model_executor/guided_decoding/guided_fields.py,sha256=RNX9eRh_qIoVCF3l3OOqcM9MTVS9Cy84dLkNs62YKS0,1551
|
|
295
|
+
vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py,sha256=uUzfxJuP1NDjgLzPdEHJqGCj17J2JInP_LksjCP2utQ,2678
|
|
296
|
+
vllm/model_executor/guided_decoding/outlines_decoding.py,sha256=wb4qSxcmGEsWB2qYXcYgVsy_2_xDFEcSIw0s9cWQjGI,5502
|
|
297
|
+
vllm/model_executor/guided_decoding/outlines_logits_processors.py,sha256=iEKsX5QqQCpylribWKmJ8_zyS4Ny8Vn7DqTcEOD07RY,10820
|
|
298
|
+
vllm/model_executor/guided_decoding/utils.py,sha256=K8wdE8Q0t3MtVp97YsH9ZW7CO_ClmkmrGJ-roBtsZwE,7897
|
|
299
|
+
vllm/model_executor/guided_decoding/xgrammar_decoding.py,sha256=801Lc3eO7olNw4ZpTGCTHpb2_o8TbLkTUScVumlIwCc,16764
|
|
300
|
+
vllm/model_executor/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
301
|
+
vllm/model_executor/layers/activation.py,sha256=c1VCKOkVaHdeuF6jTLd_D5Oc56mrDjsMG0XF0x9JxNo,12515
|
|
302
|
+
vllm/model_executor/layers/layernorm.py,sha256=qP0EhiSas-uQ41_AgzmrK0-y8GsOxmb6Ssa7LXzFJTI,9161
|
|
303
|
+
vllm/model_executor/layers/lightning_attn.py,sha256=tutKvEgCuSl1c3TyQSl4tt-iH1vykGm7klIwUtWgSFw,20893
|
|
304
|
+
vllm/model_executor/layers/linear.py,sha256=Xd6NXtGi_Qmy9fEHVXKcwbCgcJ_9JS8sRUMgmyuj2JQ,65401
|
|
305
|
+
vllm/model_executor/layers/logits_processor.py,sha256=5wOuzlbfGtiki7nuiTpDzJJPdtGmSPm16sAhX7O6h0U,7756
|
|
306
|
+
vllm/model_executor/layers/pooler.py,sha256=rR6blqywbnyMG4-Mj67brZo8T2cwy7wzTOOkKifvQ2g,11680
|
|
307
|
+
vllm/model_executor/layers/rejection_sampler.py,sha256=h6UdSonlTVnp0oF8RPmbnJjRR4YDbh0E1ptE11xfqR0,16315
|
|
308
|
+
vllm/model_executor/layers/resampler.py,sha256=UIvozVCcpEm_IxtLCvOwqPff-FcjljLkHHW28BC2ahg,10437
|
|
309
|
+
vllm/model_executor/layers/rotary_embedding.py,sha256=Flfhg6JXRqydSGntBGbkA2rUjV5lY6eAjBfwwSG775Q,78050
|
|
310
|
+
vllm/model_executor/layers/sampler.py,sha256=ikwJUBvOL8QyN7aMVs1cnSWBOF7hFA3OViOtwh65iW0,49620
|
|
311
|
+
vllm/model_executor/layers/spec_decode_base_sampler.py,sha256=91mkD-yavnUNMcTNxvtV48STOXqeSr-FWwJA9KPXHzc,10184
|
|
312
|
+
vllm/model_executor/layers/typical_acceptance_sampler.py,sha256=8XwpBA-UHesGe0xF6VmUxvGaXBHAA-glUVX1GkA05dw,6957
|
|
313
|
+
vllm/model_executor/layers/utils.py,sha256=DbrpXEH8Y3EWR3sc8l4c2DHuu-l8A3GELKK6gy0Pwdw,4025
|
|
314
|
+
vllm/model_executor/layers/vocab_parallel_embedding.py,sha256=SusZgu5R27CgWb3f9Nd_5fZbU_ryL8GBmda8RitgCY8,22696
|
|
315
|
+
vllm/model_executor/layers/fused_moe/__init__.py,sha256=EqdHkNJifWDGb0vfah2fuebXuNAwbza57ggUOC95ZGg,1365
|
|
316
|
+
vllm/model_executor/layers/fused_moe/cutlass_moe.py,sha256=UA9Pb8J4c0dSpvb5y-_xqI2jYGkwWVfCwF5hVxi2llA,15755
|
|
317
|
+
vllm/model_executor/layers/fused_moe/deep_gemm_moe.py,sha256=I_TKmdFp-0G0_h4I737jIzbnIkruCX0h4DwpkSxupjg,7749
|
|
318
|
+
vllm/model_executor/layers/fused_moe/fused_batched_moe.py,sha256=J2mU8FMcpwYP_5nprB0tfheoO9_5myx8ssX4vi1x6bs,25796
|
|
319
|
+
vllm/model_executor/layers/fused_moe/fused_marlin_moe.py,sha256=6duTPNBrIDvqJx4RUv59P1G_q9Pg2bhkkI_giCckvIM,8842
|
|
320
|
+
vllm/model_executor/layers/fused_moe/fused_moe.py,sha256=ghTENHHnMJqu_4tqSdIeP94rlv-GMBEs7JrKGCgo3FM,69246
|
|
321
|
+
vllm/model_executor/layers/fused_moe/layer.py,sha256=_TUy8elYx-wDPTIsNt_gGRtTrfZGNoyDD14AWtG1xdE,56222
|
|
322
|
+
vllm/model_executor/layers/fused_moe/modular_kernel.py,sha256=Kz5td0Nb4dkmNtrqzrTTEK50IAFcz21ZvrYxtSAJu90,14492
|
|
323
|
+
vllm/model_executor/layers/fused_moe/moe_align_block_size.py,sha256=hHF5Nb-trstfQnhjmo3yruQG84hSWQ-7ytDg9NCyBpQ,8271
|
|
324
|
+
vllm/model_executor/layers/fused_moe/moe_pallas.py,sha256=ZAj9CW1VJrJUVOxa6RHeSgJFKtzNHMSztv5CAaDHJfg,3153
|
|
325
|
+
vllm/model_executor/layers/fused_moe/moe_permute_unpermute.py,sha256=XD3PhPLA7_0Hc42XOyEdX5ynNmzJNe49P2OLBLAMaC8,7914
|
|
326
|
+
vllm/model_executor/layers/fused_moe/moe_torch_iterative.py,sha256=XSzX80m9PnrlCgGPzSud-lsro4Xx9qlWoiT9OfBZ6WQ,2087
|
|
327
|
+
vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py,sha256=GKqP3rPimwLlTe9_YzH_Dce_cJuRodbBkpOB7aXKnDE,5315
|
|
328
|
+
vllm/model_executor/layers/fused_moe/prepare_finalize.py,sha256=5bgsRtwVyujbF35kWeCBGw_CC7CzF6iYqQTXvWnzhN4,2160
|
|
329
|
+
vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py,sha256=lEpKXylmYhlh_6N5UK00dV_DfcPnLCVTvuQ0VhstDGc,13158
|
|
330
|
+
vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py,sha256=4yg5yPhyb-iMplQyDB_nCdpseOHkTEECi3aFlb9SmrU,4058
|
|
331
|
+
vllm/model_executor/layers/fused_moe/utils.py,sha256=l4yOYcYzqU6B-snpEbHJ5H0CbDo8r2fjJYXP6QK2Ckg,3198
|
|
332
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
|
|
333
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=hH5rRN9Wtyv35azxMzyUMHWtiKgOHev5tNjIG8j6dsE,2751
|
|
334
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=qPumkNxaHMvVBnEjPe_Xiuz9ICb6Hqc-9I1DAR8s3gA,4130
|
|
335
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=s47lb8VLnyxMgWlqcIR4BdPBsjKWL4olXF49uZvygzQ,4140
|
|
336
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=gzfjrYDcS0vsACq7ONGVkNA3FqVjr3e89q9fO9kokkg,4133
|
|
337
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Np7yRX9Z7Y7Z5Nutbl02wpKdZRltbt4WqlPlleiYs2E,4146
|
|
338
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=XsNfNXY8v0eatazkLCDiDclI0FnTudUGLYO01e1_4aA,4149
|
|
339
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=H0K4_O1CMbNLi-srcycT3lSl4JaBl3EGF89GY5Rj9MU,4130
|
|
340
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=arPqstZMzZjz8BNpY3alKT4vGCJyUj5I2hEeK02aq98,4152
|
|
341
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=wjnQ4v-dflJMR3iFDHBuZI_1R0xXjsNoWc2kHu6C8JI,4135
|
|
342
|
+
"vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=7WHPz_0fxeI3Ed0D9VIpZVoeN9RtJVVARvptfcmQu40,4146
|
|
343
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=2kWS9Qvy5Q3mvUFmbPVures5iZAriAXsy8WrtE5wu00,3727
|
|
344
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X.json",sha256=D2dn9vXyN4FCKsZCf7VYgAWLedCx8XpPjbkQVVAvwAA,4737
|
|
345
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=5QqFljwwA8OaPlFnXy1zogl5oi6aE0OqN39xk2IUC64,3245
|
|
346
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=I3k416HbXU_rYb8scD8gAI4fuBlElHl06PM347Qa11w,3253
|
|
347
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H20.json",sha256=RgV8C4F1LO09h01YsgF_eqX6GNoBtC7ulPfJRUUbg_g,3241
|
|
348
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H200.json",sha256=nsNEuDNks0tVLfQfIm7xxFwEeptTfQcoa9fJy0NS8xQ,3247
|
|
349
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=PvRpT_brUV3Y3zMfWEcsXMmdrYKjiq2qI9iHejPhhsU,3743
|
|
350
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=qbqjisJ4oKmcYzumHPRk5UyOzsdi8J6xas82UWHMeAI,3263
|
|
351
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20.json",sha256=vS2DRIDOqWyiBvbG6H746ownfkD1F8Aj2YZ0ET9xll8,3232
|
|
352
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=MlpzcrkZo78kFYr6cqmh4lBdpxKcEvlzqvRf0bmeduQ,3264
|
|
353
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200.json",sha256=xqhl748it8GV2KXX0XixitE_ywnsKksqK8AGL7tAgT8,3254
|
|
354
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=FsWbV4Q6AzAtgegVuENBDz2ZcSJsqNiwUIVfQbpP7hQ,3244
|
|
355
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=rN55MyeJ8U6VGNRg7lwC3aa8BgjxdzVg-CofcZ7LTyk,3743
|
|
356
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=IuvyC8TNhCVAmUZfLSoETsyCKsmejKXrs_0zuwFLPAU,3265
|
|
357
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20.json",sha256=10Ntu2aVD5vGLonx-jW0qNw-tgZWdZmzMGx7utDVeng,3237
|
|
358
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RFH5FcN2ZCPk6DsxviTti1Q8JU5jzBRFXvUQNgOvnmI,3265
|
|
359
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200.json",sha256=JraM-Nvbg5V_TJkSl6UPFYZN1zHHoIbr2pAcksenoTY,3248
|
|
360
|
+
"vllm/model_executor/layers/fused_moe/configs/E=128,N=96,device_name=NVIDIA_H20.json",sha256=JtcHRlPz8xQEAqJ9EWI63oYvdmjQFG6VTHqtt85VOSA,3221
|
|
361
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=AMD_Instinct_MI300X.json",sha256=f3iM3xm8hGUirJ4ilAIPO6Pe9bs4sm3qaRKMswN9SKE,4731
|
|
362
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=NVIDIA_H100.json",sha256=Bq57MPQXuSib06u6OwiEmSzOr3XvPYoD6ohYDJaBnII,3244
|
|
363
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=pCCKkdUzzuBVtljyk7AEIAbeDf12DUiieXaODZXzm5E,3254
|
|
364
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=trX2-c4N6hTTD6zFNi6A2bT3FkhxKjkM2rPl-o1K9ss,3250
|
|
365
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=I4d56uD7E1JMXD9RAxq3FebdPquDsnNEkVaIY9Ctm9w,3246
|
|
366
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ypuAxMQ7JESPXLBltt68wly2wTrJzlnobhUMip6xAmc,2751
|
|
367
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=tUptlureu5QgyAEedtx5sm7CFudXAE6fIXepOb9gfas,2745
|
|
368
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=h57svdmDlZC_D8w9XWjPRS8ciYVkJiPEYfhrD2NRVVY,4127
|
|
369
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JmXhUnhX6YOy8RsmT0zFLGyNCpRBPV2q2Db9Y9ctZeE,4144
|
|
370
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=G4PKqWxh0MlBhg7QHKj0m--_fP3Ll0gs7VJaeg-NIDM,3254
|
|
371
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=bKX9AvcxN6k-i3RUmHSchZZ3rjoYRYb4iBqhCI4L3MY,3257
|
|
372
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=bWR6XBZ4nJ_ROg8rEgrQGc04I3BDbwILDHMZxATO-H4,2740
|
|
373
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Gu1wROuky-xS0dsFgbXS2QD_hOVV8yol9a5iqiYyq3s,2749
|
|
374
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=_9HO7SaR6aQeh6vqCDpo3kjHnGJ9BVKLiMwYYgd3SmQ,2913
|
|
375
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=2ONiQSa9odzdPe1dIgBpP24l5z-5wB1eos06xOj0V_Q,2738
|
|
376
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=Twkm9DVNxijpowfvioJ_4cKwIIlAWdyNWO9TA3gxAHs,4149
|
|
377
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=THQWP1o2bWhnJh0rq3ZIVvs_sagIJgoK4x3pJbiFbHk,2910
|
|
378
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=o1pR3rNpO1eW4BHOKpPIQLjviw4P2X5Fr4HQBcdHA-I,2747
|
|
379
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=iySqae0zI_PRBLqV-vfSCwDS4Jxcl5QjWa2NnhndL0U,2752
|
|
380
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Uhq0SrWiCrldkWbb0ZZZhWaCZ0SsvpiNL4z30KZUN5g,2747
|
|
381
|
+
"vllm/model_executor/layers/fused_moe/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=ydsFUdXdVE_ZSScVhUxvxOFwKG-nkTraNeN69wqzxIM,2903
|
|
382
|
+
"vllm/model_executor/layers/fused_moe/configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=TtDngG7ljrU5RtWZ7g-xxdBT3uEuawiKhP8EwPr97XM,3254
|
|
383
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325X,block_shape=[128,128].json",sha256=fT7fwjuit4HbbyREYV3ECJ9Rm88FW-V54e27nG9nA_Q,4741
|
|
384
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fT7fwjuit4HbbyREYV3ECJ9Rm88FW-V54e27nG9nA_Q,4741
|
|
385
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=HNvrgcXxV-eVMLwb7zY_R5KgJ7uBz-YIyQsKq1lWnWA,3263
|
|
386
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json",sha256=bHJEVy-CeImiY9JBRCMlHfHPAUi5xO7ENxgVVboN2Yo,3258
|
|
387
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=PnNmKSRFznCIUzZ4ZfaYTrMHeF2_kCQr4_bsEy_9Zu8,3259
|
|
388
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json",sha256=0Vlxxzp4wrvkFj-NF4OAsJAaPkm-hhisJg0tgNl-W9g,3254
|
|
389
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262
|
|
390
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Lqom_VMIPduSZTZQdeL2Wl_x3r9q6RmI9bojJrYwQZ4,3255
|
|
391
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fd2p65T9OboKIgw7MQc4IdKaJsoO73Nu3VQiKjV6Ffk,3261
|
|
392
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FUGuYbs_QhqKfErofvbTUplhAVN465A7NR_-ryXvebE,3741
|
|
393
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bpDPbTyrXLyCSy-o0diveVVeVUF_xj-fdSzCzWmEcKA,4733
|
|
394
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bpDPbTyrXLyCSy-o0diveVVeVUF_xj-fdSzCzWmEcKA,4733
|
|
395
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=skSJdv0Pr4rba5ODxp-fHZ6dpxn8KkvACGzNf74j81I,3257
|
|
396
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=wMt0NyoRSdACdmS1Qi3qFiu6GiFX-4lVvbGEno1W4zE,3252
|
|
397
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=mtm7RgEBEJJkHsOis9BtAFo1OCk3vBbt7l7eumDzd7k,3263
|
|
398
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=R4B2n2vGt4pPo6jS4Bmnx8AYtcfF9qQJE5bD7OhmXHs,3265
|
|
399
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=JnqtO0t2HBcQECdYavi18mu9_MwblGr4zfRcW4zU7_c,3265
|
|
400
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bpDPbTyrXLyCSy-o0diveVVeVUF_xj-fdSzCzWmEcKA,4733
|
|
401
|
+
"vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=rVORXxNsxy4WmO5SJR8Sd4k7vozKqhYf50wZNCMeQzs,3239
|
|
402
|
+
"vllm/model_executor/layers/fused_moe/configs/E=60,N=1408,device_name=AMD_Instinct_MI300X.json",sha256=4UXbsSNHmrSWnD85SdRMLp4cFGRufndzJjB6hoQPclU,4736
|
|
403
|
+
"vllm/model_executor/layers/fused_moe/configs/E=60,N=176,device_name=AMD_Instinct_MI300X.json",sha256=p6TKUp-KDeLB9E9LqThR1e7J2-ogSXPJojISdHgCxaY,4727
|
|
404
|
+
"vllm/model_executor/layers/fused_moe/configs/E=60,N=352,device_name=AMD_Instinct_MI300X.json",sha256=gHxtmO_uvpueLVlsJgXBVE3_pS1S9EeRxNmHG_ZQszg,4729
|
|
405
|
+
"vllm/model_executor/layers/fused_moe/configs/E=60,N=704,device_name=AMD_Instinct_MI300X.json",sha256=tVdpbIU1scsylx6oz3IADhkcwvZaNqw-_QVb7a6oVX8,4732
|
|
406
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=6QPLhZurIqcMVdy3w0Dd7gLViKxsyJRBz-qd8agpi6Q,3248
|
|
407
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=WPu80-OWyEJBy1hdnewLN1H1neFW8UVJrqyeDGegXc0,3250
|
|
408
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=PaFLbT5ftJiiVSOVkq_DH01EcbIs0sBVkCd9PdYYmw4,3253
|
|
409
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=ozS2ECxk-Dsd4Y9DgCGGwDwJlCf5T20ANf5gnTUMuSc,3252
|
|
410
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=KEN6xt8pgPH_FbLT2fsAD4s03_V-Z9GXuEC4IKe3cPg,3262
|
|
411
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200.json",sha256=w18R3eHB4oUhfbcCXjHyDvp0RiDSeCrfM-VFESim2hQ,3253
|
|
412
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=iz4W1UAV1fcz1ZFh4hNQSLJ_F1MdXW-V3msy7t0WrRM,3262
|
|
413
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=dYpKgvuG7Jji0W0zg_E9NfIojStBAdBcKd4B3nhimqk,3263
|
|
414
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200.json",sha256=CXiHlGpea5cEGmFi28Jec34uxEZITF2XldVFcJteZX0,3251
|
|
415
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=W1q4PfievvgJ_SiPsDhOsR0Q0eJKb4o8JZhMcVhC-_4,3264
|
|
416
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=tku4-yTbIr0H5TNrm1Pq3tJJFYTXqHpdzJDSEF3bk9A,3238
|
|
417
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=HJcV-Tzt-yojzNQkPCgi84B44F_RppXxOIicRyg20-U,3264
|
|
418
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200.json",sha256=bM9g-XpirsThO3Q2x8ChSx3PPtHuHRXLvVMnTWt8jLI,3243
|
|
419
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=oxOKFDrgmw1YmgxTtRa1uoe3p09ylTLrkj_jOTqNh1Q,3249
|
|
420
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=-B6gZAEYLwMJZOnpO81pTxqs-YVKs_144Nn9BSLaMh0,3247
|
|
421
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json",sha256=GPjPHicomrS7ntHu7nnvgNXcHCoUw9vhyTUewkXpppo,3252
|
|
422
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=ObHUCUAgHTnld8Cq9Dy1n3ilmbBzyNC4jZcz6YYhMXA,3264
|
|
423
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=WegYsHl39QVlHu_4EZJSrgA4LQ5fYxSVNWFhoL6W2Rc,3251
|
|
424
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Hrlas0Nt7d3JMr1vTpI3OVgkzxqcRziSMfFf_U5pQ58,3267
|
|
425
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200.json",sha256=J59rmqF8NQWkqmay__ahA3t3IwaPXNu5AVNLnTaDfYA,3252
|
|
426
|
+
"vllm/model_executor/layers/fused_moe/configs/E=64,N=896,device_name=NVIDIA_H20.json",sha256=GNbp4W4MBoHHN4-0sXJovY0lX6rHfZzGyKicrumupGQ,3225
|
|
427
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=X8FVPE7rLblDs_Dw_Iu-KDw9H7PaC417EHyVclYjfv8,3733
|
|
428
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json",sha256=FsIv5bqSpkWbxK2dBfg1N6tX9epZ55ZhgkJCD7hENlY,4733
|
|
429
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=CnjQX3SlQn6fIGsX6P_dbNO0TYgAd-sVUb1FfDcDFUo,3732
|
|
430
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json",sha256=fnO-v4YqBz0vUo0UtOTTD0n7VDG_ivczeQ1tR6Qm9f0,4734
|
|
431
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=V_sgDtEtGEuBsGVa0maYJHhhGqe1NE7l-1ek2ed9WP8,3082
|
|
432
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=QaITFIJU4UsrOBXaGdPYJwTmYJ0nT9kiiqeUiZzvd1k,3270
|
|
433
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200.json",sha256=CC_jsMhXzrYne7eIOroDa0fCBKNnffiaVW2TKd4P-ek,3260
|
|
434
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=LgHbxG1kQV36zZPkJcnurHYzwAjMh04lvEHEsfzS1t0,3732
|
|
435
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X.json",sha256=_fcdkmWvdMqHiH8ZAGke-zXhH7qVPQx5CmKELW5hRCA,4735
|
|
436
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=mVH8Rl4sLATinf7_0A9lTS83kv1E7Cm9oC0BL-pc9n4,3732
|
|
437
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X.json",sha256=JKYW21c0CzR0fgE5ZnYp6C1sY_tVRlm8L_lgak5V5zE,4736
|
|
438
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=yTf2R9cngSf4OafucAYlDDn4-bftaMFKaY7qhaBZPqQ,3739
|
|
439
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json",sha256=_1eVE7ok935L2V43-3D3bVNWSVaoViia19sh0VrXmXM,4735
|
|
440
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=5exlPUKvZxGDR0UT4_Dn5fp-_ZETJ6_Dbw_Vk1u8bbE,3735
|
|
441
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json",sha256=18v6YruKbQ95pXPV8ocV4VdM1zNw3aZFp3WByeUkNSM,4736
|
|
442
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=AffDc0_51ML8HiA3757zbD10TZJdUsUDIYIqO4g0yUw,3250
|
|
443
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=IEYBNjt9HGnzoOVSWvL0A0jUqq926QD0_BvVYR4RA1Y,3252
|
|
444
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=Ns9Y12aZbJnFhcG3nwb67bDqqiQAo9tdTAIe8K2Ajz4,3255
|
|
445
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=uGSLFPZXK_JQ3GTDUAEiIecDor1yjbC3bJvMolF0Xl8,3267
|
|
446
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200.json",sha256=8q6ol5JQBWj6yVfzFOn7Gz5MSXTaW9javL7qQmYVOwg,3245
|
|
447
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=6jRC0oOpVpq5c1xePFKNRy-Xtmb038i4LE9N2zao2W4,3730
|
|
448
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X.json",sha256=cFWeyNJtEbs-Bfohgzclxo1rcYGU863oV0BzJyQ4T0w,4734
|
|
449
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=SMtsqtQeqcyy8aNwl9hPxRvx_XQdT7I3SBDNJ3OIvwY,3728
|
|
450
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X.json",sha256=ZyOFJB6GUgGZsAjjT43XJwG8P-QrZ5yTvmgzQP7ThQY,4734
|
|
451
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=HOxWmCI2ifHmWc0or2y8nEen86jDeLDov1-tuMzuhxo,3256
|
|
452
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=csHezh0HGWaNwrblGzMgcE95hqbqjWS8HImLRJYr_ts,3266
|
|
453
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=_5weLBinQCDzyV75hHKIT95Y0ce94KWft2_5BC6EkbQ,3254
|
|
454
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=DlatRLPaSr8HJuO50gRZ2lzXoelx55EP3SDUdgIT2v4,3269
|
|
455
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200.json",sha256=TXSOoqvi-x8H13xPqrB9qz2T3opEGA-2D0v_4n5BEG4,3259
|
|
456
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=ro3drDpWAdeXH7IjMvx8wYGhIuDPOl0bpbJaIB5Msns,3732
|
|
457
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json",sha256=w_R2LL8k5jNVUARcqvSgGLvNoQiQC0Mh73ciqSIAz54,4734
|
|
458
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=hjDoTXRmEFLKhhmBFEjPowQus_z23ISonxFljql3c9k,3732
|
|
459
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json",sha256=AdOTy7ASetdAXUhNM8buoU8_rLLjcUYF0m8RGFrLWRo,4733
|
|
460
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=Ru460ZgnUP4U8OsJfwF8n-AI-gfcolNR3_qzoxG6DtY,3254
|
|
461
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=K6BGrKw_oHTAtHjsZldcjp-BUM1dIecKXrrRn9OpRGs,3254
|
|
462
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json",sha256=4aK_plqztXcJ-hs5_PsAvM0jclMzcO3hd3zTo0FhDro,3251
|
|
463
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=qqFoMaObuO8pFWcSb9q0wYsdC4eSCO7B-_ruQhR1N9M,3264
|
|
464
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=-5nkLIunjG1ghPoUEtt2AXEQw9oGiilP7K3UvQv9CqE,3252
|
|
465
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=WKzddrIXo-KavpuXuouW3aLLAptu5Q4XJUb5K2PLgDM,3262
|
|
466
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200.json",sha256=ad1ZkkSyLJwRGb4Kf24qg5hW_DPmt0BXrKR85oAiV34,3257
|
|
467
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_L40S.json",sha256=qX5_yErBEwDRzhv2FvxrS3pEMa8zn0GHzLp5TUMX90g,3872
|
|
468
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=ysRCWmxV20K2BYD9XEUtxwREFGtA3QHI191vHRA0k_Q,3733
|
|
469
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X.json",sha256=L8VA1sfygHoyLJ-Ybfs8DP5c0YWFmMkwxHT8yJ9PEFM,4732
|
|
470
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=FJWpDLr13XF3hHiHfJykpjbLiP7Ccu2en3U6BL-QwXw,3732
|
|
471
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X.json",sha256=FnVcfzf5gXkQRt0XgsRzIQVbDPaUDOwWJX_9qOlyvRc,4731
|
|
472
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=DxYu8regZOSFu8ugFGA_QbwWK4g8xwQUZF9a_nNY4Cs,3255
|
|
473
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=obzfE_9XgsbFNfC9biYOHxR-V_Bgc7PKT8qZZJaiJJc,3262
|
|
474
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=qwKy8oaMsd3QrXgQbM_x9xcfYiHK_Ou1CEwDPL5Gbgo,3259
|
|
475
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=qUifbWbE4cOKZbIHWmmLx68VRaslQX69eZHwRIQx-7I,3269
|
|
476
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200.json",sha256=JT-ZMLhAqqzSkqivOW5ATTKRlyyaFQkqQDnaPS4DE10,3262
|
|
477
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=QsR-Xr9vyuiArMTSo-dX-1DFgATfqwIGOzFuQJAuE_Y,3734
|
|
478
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json",sha256=EtVorGY4khTEuimlqZu0AAlPz84PH3ZkDZmVpxLtgQw,4735
|
|
479
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=D3wX0_s_ylo3nLIUfaWZmGYtMvX7oiieOLMdQ9k7mng,3734
|
|
480
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json",sha256=JPdO0azlh4yUvbpC9dEHYpRT11ELEr5LXBSb5XP4E_4,4735
|
|
481
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=BAJnXTZoewwCtzJLUPJ0oYuALv640MvDuLseGcsYaaw,3252
|
|
482
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=-Tj7ImS6ZFDof_0VTyq7kVm8XD9B54RD6CUOPSf3Jjg,3265
|
|
483
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=tme0ydWzIxdABZLk4tU8G_X2dJUYGGZNkQzNGcmcvUc,3261
|
|
484
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=g6Ivy4wvadaCAMJ4ZElbUU-CwyTMdbaa49M7IVQhVjk,3273
|
|
485
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200.json",sha256=GstQosPPHUn_I2DV3eMGtn3xXOw6kl1hb8L0EvRsbEU,3261
|
|
486
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=kF4Fx0yHUmiMSLFNXT6xqAEA4AgCaHOoy_3irv4dNss,3732
|
|
487
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X.json",sha256=uOlVzTdJl_4VrRK4wmxIb8JKfveFZRjO9syjw_oEeL0,4732
|
|
488
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=plnx7r9jkcYXkhvapbeeNvUg3NMGdGsIgIPSrfVy2qU,3733
|
|
489
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X.json",sha256=UC-iTgh8_dUSXRaYHOIhDH31KOiJmcfqM_Bv_UBf3ks,4733
|
|
490
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
|
|
491
|
+
"vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=WQLKugnKzlQ0avf1N-41lRHtG6wJ56DfVPv_nip6NBc,3273
|
|
492
|
+
vllm/model_executor/layers/fused_moe/configs/README,sha256=W2yIZkP9O8GGlg97We9BJfTtWUtPbuz5ZH3esrrjBX0,572
|
|
493
|
+
vllm/model_executor/layers/mamba/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
494
|
+
vllm/model_executor/layers/mamba/mamba2_metadata.py,sha256=b_QAqnsaONqgN_rBxJRTEiVNEJjZLnOBqQJgJ1oVPtI,5024
|
|
495
|
+
vllm/model_executor/layers/mamba/mamba_mixer.py,sha256=Zf4FM5tdpJDsZwMMehWnNOjhpXnKJRPVvoLBZoONLaM,10141
|
|
496
|
+
vllm/model_executor/layers/mamba/mamba_mixer2.py,sha256=o9BPoVt90cVxZDCv_uluYQXAQbNttmR-ok4kIFhhqUA,24727
|
|
497
|
+
vllm/model_executor/layers/mamba/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
498
|
+
vllm/model_executor/layers/mamba/ops/causal_conv1d.py,sha256=_ZiWUKMLApKDWDH8iB_8Zw_GGGAFHDJRxbcMWQlMYac,4470
|
|
499
|
+
vllm/model_executor/layers/mamba/ops/mamba_ssm.py,sha256=fgqyeeNgybC7TV7kzk8ZdyZ16dclJPkeeTBaTzwVxcU,14165
|
|
500
|
+
vllm/model_executor/layers/mamba/ops/ssd_bmm.py,sha256=aH6uouYxCKE3dHnrYp4YNQYL0oqdy_AFHAV-AGds6-A,8571
|
|
501
|
+
vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py,sha256=gXXbBrcwMC5HOhQLLpeKkQFRpdVzEJR-Myyytgfqe78,20836
|
|
502
|
+
vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py,sha256=uiKw_a81nr0vTG9SDV84iP2UM3ZQ4OGQ3VFU7HATGvw,25612
|
|
503
|
+
vllm/model_executor/layers/mamba/ops/ssd_combined.py,sha256=Tr5DuGgR2TURLLANw-HxyqW31GGRUbAJdDE_r2xvGsQ,9332
|
|
504
|
+
vllm/model_executor/layers/mamba/ops/ssd_state_passing.py,sha256=dGpTw78VStQt8HRhn1kM7hB63OPCprFAkZiRxX6KGMg,7369
|
|
505
|
+
vllm/model_executor/layers/quantization/__init__.py,sha256=CrPvo_sf6fhSVUQYtyK-CWiwVZZyBdYjoAlgF0GDHDA,5151
|
|
506
|
+
vllm/model_executor/layers/quantization/aqlm.py,sha256=KErqCsHIu-2vOwYq9fq2cRa__mNfljl8wAzL1z5MXXg,13708
|
|
507
|
+
vllm/model_executor/layers/quantization/auto_round.py,sha256=PiklT9P4T04SVZXYbIsUB7NCNr68yZEgXtmC3Z8E-_I,13338
|
|
508
|
+
vllm/model_executor/layers/quantization/awq.py,sha256=nfurTPe6OnIM42lBIOqbhRwROiE-SahIsm2OiZH_DW4,7153
|
|
509
|
+
vllm/model_executor/layers/quantization/awq_marlin.py,sha256=KOdWgZdLyBjxgc6z0ehjkdaxG5lhxnxw5z7HPhneWKo,21314
|
|
510
|
+
vllm/model_executor/layers/quantization/awq_triton.py,sha256=DzFJAWUxQSnffDJpXJgxVxJO40GA23QxCNnjKWp2Ox4,12414
|
|
511
|
+
vllm/model_executor/layers/quantization/base_config.py,sha256=nj-s-IyqrMXyrYHB5BaQzxMjRDWEK9YYKJzxMBXXNqw,5158
|
|
512
|
+
vllm/model_executor/layers/quantization/bitblas.py,sha256=GvGxzBuI4oBnv8wHf1JAiHxj3JmyzbPBUc_veGq14yI,17491
|
|
513
|
+
vllm/model_executor/layers/quantization/bitsandbytes.py,sha256=bB5ziV-8uvM_ptEEb70PSigA8G-aPfHfoIUFdmO1TD8,15296
|
|
514
|
+
vllm/model_executor/layers/quantization/deepspeedfp.py,sha256=2iE8ae3ScLwMEdRv0sPobJluYA64FdSE-EwjtvYHMyU,7217
|
|
515
|
+
vllm/model_executor/layers/quantization/experts_int8.py,sha256=TgPYNnTPhedkinCi2tj1xEnP7n6AG1ZbQ7EH-WS9w_M,7620
|
|
516
|
+
vllm/model_executor/layers/quantization/fbgemm_fp8.py,sha256=J9amor_2KlQ73QYv0eDItO6El2HjuTJB7uxZotBzuBI,6895
|
|
517
|
+
vllm/model_executor/layers/quantization/fp8.py,sha256=eF4W2V5jmDSrm9tu1K0lPOOi0Q86qmbv6Ob32EuyN6I,40839
|
|
518
|
+
vllm/model_executor/layers/quantization/gguf.py,sha256=Xjd3d8gf3EwBOfqHnuCI8RACNw4EC39GdqJX_9XrIx4,21101
|
|
519
|
+
vllm/model_executor/layers/quantization/gptq.py,sha256=-1eDyQOsserFzBiQov5IEt9XpAnJ2VFk47-R-O8HqEA,10769
|
|
520
|
+
vllm/model_executor/layers/quantization/gptq_bitblas.py,sha256=V_85SUPBYiiBFV6zcL3jlFWiEGX7tQGanZDSkkZ_lqE,16949
|
|
521
|
+
vllm/model_executor/layers/quantization/gptq_marlin.py,sha256=m6Mls-UMAT1L_M5cpur4CtjxAIZ929XmCLwIpJgCOxA,26056
|
|
522
|
+
vllm/model_executor/layers/quantization/gptq_marlin_24.py,sha256=d9ceVN0WbvFB2tMiQKZNfLCP3-SbjEcPB0B-jLeOZvw,10949
|
|
523
|
+
vllm/model_executor/layers/quantization/hqq_marlin.py,sha256=jzBaYmCuuZau06o6xarR1kM9NQpLmk8pL8BJVcKHYMo,12843
|
|
524
|
+
vllm/model_executor/layers/quantization/ipex_quant.py,sha256=gjcVseZGoQ_NvGR35YOPrnXDMw0c-p0LUz-gAAnW4vU,9773
|
|
525
|
+
vllm/model_executor/layers/quantization/kv_cache.py,sha256=a-ZtHXlu18ph0tH0wFSzWnvd5LrF9yj9wDe9mU1jly4,6157
|
|
526
|
+
vllm/model_executor/layers/quantization/marlin.py,sha256=uaSHaMqt8iqd0FjNphZdLoYM4B1Bqma_fIyNvUJTi5w,9658
|
|
527
|
+
vllm/model_executor/layers/quantization/modelopt.py,sha256=CKOMTv8Vvkzp00OCnnB5H5FrEn9d51BjrfaYAOn30Uc,30969
|
|
528
|
+
vllm/model_executor/layers/quantization/moe_wna16.py,sha256=Tl2bW7p-8M8EFs1tTVNFGP48CbKguZeajrE0Bna_tRU,19773
|
|
529
|
+
vllm/model_executor/layers/quantization/neuron_quant.py,sha256=XDWJ6X6h8j3UET48QNy7Rzwo2gbfnMdlpCcKzgtCWWA,2497
|
|
530
|
+
vllm/model_executor/layers/quantization/ptpc_fp8.py,sha256=wwj6RfsIfJWgXOJEr2ahuBR2P0vqi-LQLiP9rXYIo90,5282
|
|
531
|
+
vllm/model_executor/layers/quantization/qqq.py,sha256=6qo1G4beLKA5VRw7u0zGuc4PlT3s7KPLOwnOGNQ0P4g,10014
|
|
532
|
+
vllm/model_executor/layers/quantization/schema.py,sha256=M3GFqR5dL5h44hEKK3KtWAVqTbX_6JqaqgnvQS4Wdrk,3680
|
|
533
|
+
vllm/model_executor/layers/quantization/torchao.py,sha256=-sWwHklxXUPCyEx89C0OrhStjClXpkzm7e_QX1E7-dE,4997
|
|
534
|
+
vllm/model_executor/layers/quantization/tpu_int8.py,sha256=uIfcwqeWi22PbK9MjI_C6Xoy2s0uQnooeL9y3lo0zC4,4512
|
|
535
|
+
vllm/model_executor/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
536
|
+
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py,sha256=o94ww4WVBHbzW5dsWhadD7IWMN6BzetvjmSYV5kMS_0,28020
|
|
537
|
+
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=YsZ5Tqtufo1b3D7FBGtaXRBteFR1ru9sMmmH-1-QkuA,55153
|
|
538
|
+
vllm/model_executor/layers/quantization/compressed_tensors/triton_scaled_mm.py,sha256=BbISZdjI8rwC92DJ6HvV9uGzY9gPCQ-00oKCX96Yv74,7724
|
|
539
|
+
vllm/model_executor/layers/quantization/compressed_tensors/utils.py,sha256=etxoR6ruL3Vnd1XwcP4lbxZpBAefYvy5ce1rrAB2W6c,7737
|
|
540
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py,sha256=HwT7ylrKlKeILKKmpN_DLLsGcOdFwcKqHo2BrgLfkTc,1030
|
|
541
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py,sha256=QpqKBxr0Xk7i2pu62Qhv7Jiofhh-i9ybmHKvMtgPc4Q,14046
|
|
542
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py,sha256=Jxb5WOHR4m98DRO4y3XKJMDpT_NIp3EwkAJe8HYqwqE,1527
|
|
543
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py,sha256=3BxaM0yfjHd00LMZ1nC2zjf3dFauCaShl1onK_wu4s0,6201
|
|
544
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_nvfp4.py,sha256=ew2zLQRSEx1CDy9porGJC3EG2p6imwqEr6AaWfgNhJ8,3973
|
|
545
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py,sha256=97lFruT_TgQFpDqFRR0WjfqFjZ7INHMiXNJgvO_RG9k,5422
|
|
546
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py,sha256=iqUyQSlOkZp_KR_4PhymQD_VNyLa5hZIFZBEqc9mwSM,6437
|
|
547
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py,sha256=DDqEHktl-Eopw4K1bfbvwrnrHfcdjf9t-i9kJvzIYOU,4861
|
|
548
|
+
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py,sha256=tdqgfWW1koBQ65nNZ4SWh1XZJhYNtze40etyAdvKytg,8472
|
|
549
|
+
vllm/model_executor/layers/quantization/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
550
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py,sha256=T15LSUc7zsPSOHrsnEH2I1pIeZXv4AxtkBt95nMoRqA,2872
|
|
551
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/__init__.py,sha256=S7fxX9Nu3fTgTQdvZOqvUOMxEzGjXWJ0-0rEEvxGLDs,3136
|
|
552
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/allspark.py,sha256=jUVwJ1gvXP03FZh2zQXg0lFV8f90c8t93lS8reltNEQ,4375
|
|
553
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/bitblas.py,sha256=6yncs3pOh2B5nbisfnHrTQW78mzYNmt-dgpe4sWsznc,11967
|
|
554
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/exllama.py,sha256=6lWlotQYbg576slU_o-YZNxfrvDshelluyQW5iyZtMM,6144
|
|
555
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py,sha256=vbYH_T_pC4QWhMgA7C4M21HVa_MHQ47q47pi3ieswIc,4974
|
|
556
|
+
vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py,sha256=bf6sXkeywSxhwg8f2HPxWOQeL4JSmzL2saxhI6I9KLc,5691
|
|
557
|
+
vllm/model_executor/layers/quantization/kernels/scaled_mm/ScaledMMLinearKernel.py,sha256=TBRrhA41BDedkexCyJ7Kd5a_npjVS6MQlM1qmZC7JOI,2039
|
|
558
|
+
vllm/model_executor/layers/quantization/kernels/scaled_mm/__init__.py,sha256=Bz50v-RZ4WSOnqG5_dEYmWZzLvyYsp5k17skVjVMbuY,3438
|
|
559
|
+
vllm/model_executor/layers/quantization/kernels/scaled_mm/aiter.py,sha256=h50yW_WdM9-t21kxnUbCz9HigCbJ6UG_sPYnqk3B1OU,4815
|
|
560
|
+
vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py,sha256=mpQpVaNcmcTwdt8DaQRumIz2vXRGdLIkeW-b5ecesSY,5995
|
|
561
|
+
vllm/model_executor/layers/quantization/kernels/scaled_mm/triton.py,sha256=esPYDUziIzqpeoNJAxQmIC3nulYmmdLj7RFRM6NbAVU,1276
|
|
562
|
+
vllm/model_executor/layers/quantization/kernels/scaled_mm/xla.py,sha256=tHGNCXaOH9QfnMMwtTd5VEvw_MiTvhHqi_mOnWD4-r4,4296
|
|
563
|
+
vllm/model_executor/layers/quantization/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
564
|
+
vllm/model_executor/layers/quantization/quark/quark.py,sha256=cvsHN2nhF3FN-u1G13cgMlq9Oe_o5fnQ3zXcat4aDZk,18940
|
|
565
|
+
vllm/model_executor/layers/quantization/quark/quark_moe.py,sha256=y43FIn3ZMftpuJ7XX6E1MS8Ch-_qBVV8cxRtpZLs6po,10776
|
|
566
|
+
vllm/model_executor/layers/quantization/quark/utils.py,sha256=8LvBDUYUe3OJgu8k0WGL0QiiMYPFeONJsiSGFMB0ZiQ,3589
|
|
567
|
+
vllm/model_executor/layers/quantization/quark/schemes/__init__.py,sha256=V13_meleiD0wxEZUXF4qN55BtQ9iND1iDlw7SsGlcbM,284
|
|
568
|
+
vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py,sha256=f_5qRmZ3SZvIgBWiMlNjfu-WN1uVwSvfLGFe9QS25R4,1491
|
|
569
|
+
vllm/model_executor/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py,sha256=tMArpJi4-su2CStcqszuwUH8NDbttu-UOn6wbw1up_k,4660
|
|
570
|
+
vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py,sha256=PKPtb2xZ9J-rTDUrK4YCIQUnHbht_pCmcTxT6gJN2oI,6233
|
|
571
|
+
vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py,sha256=Qk6mblu8708BgWariZfWQhQt9LOveiH_pMDN28Qw-Mw,5393
|
|
572
|
+
vllm/model_executor/layers/quantization/utils/__init__.py,sha256=VbdLnvlGCFpa2o9SRnEMflRyJ3NOXd6j6d1fPN_xm5w,166
|
|
573
|
+
vllm/model_executor/layers/quantization/utils/allspark_utils.py,sha256=OCRUcTMINyOKCWe0tik98msBXKTGXX2beZQZeTMyXz0,2191
|
|
574
|
+
vllm/model_executor/layers/quantization/utils/bitblas_utils.py,sha256=hwuBUHWA9bhNrc9SPGAnNau95rZEuATHaabxkyLlUFI,8144
|
|
575
|
+
vllm/model_executor/layers/quantization/utils/fp8_utils.py,sha256=eqPbWdYSL2b0wKgNnhyTDmVqYmMdS2gOzwmk8NknxHs,19761
|
|
576
|
+
vllm/model_executor/layers/quantization/utils/gptq_utils.py,sha256=nJmkoURxP1ndukTISQE1SyUv4_wj4-FAqANTjFObPxQ,3805
|
|
577
|
+
vllm/model_executor/layers/quantization/utils/int8_utils.py,sha256=LVHXoUrDR5vb090WDyMjQ89j_qjLRCwYgmaRrKVFIuw,15045
|
|
578
|
+
vllm/model_executor/layers/quantization/utils/layer_utils.py,sha256=HmjtrTYHbc5u6kKfE8cnu5O2Nqg9ZyRy9BAGHYgenDY,1562
|
|
579
|
+
vllm/model_executor/layers/quantization/utils/machete_utils.py,sha256=lnOgjOV2oVBW3UVACS49rdlolR0Eo2wT2RauMCLx1Rk,1061
|
|
580
|
+
vllm/model_executor/layers/quantization/utils/marlin_utils.py,sha256=yEIeVQf1k6__BOynCoTwnrGVBuA9nadQojK8WRFcl0s,18938
|
|
581
|
+
vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py,sha256=xLk-I0MO9foKRDVpTGY0n9wHXOBcOTP8xYwOaWDrIJw,10846
|
|
582
|
+
vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py,sha256=XGim66Drt_cR9uIuojv4yIr12STITtY5Rqi1Yjgl-AE,13083
|
|
583
|
+
vllm/model_executor/layers/quantization/utils/marlin_utils_test.py,sha256=4o6cqFTmSXYvxfhCBkM85c8zsZJ3nwnpnfRv-nZqr68,5305
|
|
584
|
+
vllm/model_executor/layers/quantization/utils/marlin_utils_test_24.py,sha256=z3o8gkeSa_7kccBqHd_1_8d3WMN-rF3mBc6iWFPQcqo,17534
|
|
585
|
+
vllm/model_executor/layers/quantization/utils/marlin_utils_test_qqq.py,sha256=fnMfIz4ZzmEkZij-ACU_0AZhUozTAPPMxbzecN6QOSs,4076
|
|
586
|
+
vllm/model_executor/layers/quantization/utils/mxfp4_utils.py,sha256=A72EEDCHrQpYHL0Rc2w9zjBE9HoTQUcHUwBs3ZIKeyI,1576
|
|
587
|
+
vllm/model_executor/layers/quantization/utils/nvfp4_emulation_utils.py,sha256=ORffDN95lKyVqNoxoYKeAerJXFD7Y6qNuiD-LWNEQzw,2188
|
|
588
|
+
vllm/model_executor/layers/quantization/utils/quant_utils.py,sha256=h1yl3NME6x60hGyJ6yZ976ND4ML5Iu7n3JL0j8dCFZo,19472
|
|
589
|
+
vllm/model_executor/layers/quantization/utils/w8a8_utils.py,sha256=TrwSQzmTdfXmaLSZw6RSk3uG3QsFeUSGtKaEMYD3GyE,16709
|
|
590
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=t8TaODfMF2Nq0qg6KOc8NSTs7m90Jcu6Ih3BXUvFb04,3799
|
|
591
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=CNI-I9ncqHJ7ukpzgyxdJtz0bd29vsgC38tvMM6TV1U,3803
|
|
592
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=CNI-I9ncqHJ7ukpzgyxdJtz0bd29vsgC38tvMM6TV1U,3803
|
|
593
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=-j7Xyk4xFaiAD90FeH4AqRSnS82f4owKRGMHbObrrHQ,3250
|
|
594
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=sW_T-BdLbjJoFqlr-B5f9emF8E0IdKfy_1wUSIEi55g,3253
|
|
595
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
|
|
596
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=akDNAjUZ3EXBznF9w6qUcpXxaLWq7oXnX5jy-R9cleI,3246
|
|
597
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=JAycl7EaUZtmCoXMjq4JwKXCeXxZ6S4Ts_DricRUw_o,549
|
|
598
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=q5KZyi9T-l07P3r1u9i6-Dpw89Upjw1gpTp3f1CluEo,3799
|
|
599
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RTnTPFQNg5JULbPLWJDTRNRZHI7FsrTxqSDkZfSbmzw,3806
|
|
600
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RTnTPFQNg5JULbPLWJDTRNRZHI7FsrTxqSDkZfSbmzw,3806
|
|
601
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=DLCfW5tQ9k74AGZ2yER1etP-HgUGglPp_woJiaPuxgQ,3249
|
|
602
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=8v9mdWPs1eXczo3iwFrNnRo2LF9wPU4Scm-r9bL7Fz8,3251
|
|
603
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
|
|
604
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=7OFCbBqqEA7vQ1oiygfW-7Tqqx8OJATaLujtcQIgyTU,3247
|
|
605
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
|
|
606
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=iJZ_tAzoYGUmg9ltil4e8vzKlKi980yTmswEMWqV1Jw,546
|
|
607
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fDomA7uBQKX8kbO_4MFcoBwHhIR_7sOkngQPv6cQq4Y,548
|
|
608
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ucrZBIN_ivmmfMAvkT40xQpH87LdQK38lZbeLWMyV4M,3806
|
|
609
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=zDnVqBqgT-nLkz_Cou-KTPsNIVh-YbTBno9L2MgdRTM,3803
|
|
610
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=zDnVqBqgT-nLkz_Cou-KTPsNIVh-YbTBno9L2MgdRTM,3803
|
|
611
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=zd5cMYrxQ6PD0jKpd3YF6ThT9RGdqgEQnCW6F4W-r4E,3249
|
|
612
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=CjO6dh_qt1iTu5kYRs98tTLL-W6FOzLO4AESMUFHz5s,3254
|
|
613
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=7v4tp0RaT4vxF4urSBrkK5FR_5ikeFQ1htF3DwDl1lk,3249
|
|
614
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=M5F5wzSmFokEm0X8__ogLvdE1QVC6EW8atqq-kp3rVA,3253
|
|
615
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0J2MFgaLkv-mfVE5x363lgVKYU6miLG_xRO3tJUga_M,3249
|
|
616
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=983yfFeeo-BClL_H1g-owXwbA6t0l-kREiy7kLURUMw,550
|
|
617
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=C2eM8RudmP-qXEf_Apg-qcB5n2Ugxf8-7uG8hQDSt1g,3801
|
|
618
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=llI6PWlSDgQf-ouTDXkFYOoSz9u3bzklwBtZYY_fWVM,3807
|
|
619
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=llI6PWlSDgQf-ouTDXkFYOoSz9u3bzklwBtZYY_fWVM,3807
|
|
620
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=q9HUcoL0cdZCOWZ8MKbcpR8NSy5iNEBq6NPTaHLgRB0,3242
|
|
621
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=uJu6Gv4e80vxVrDyBo8_y47tOV03RmWVsMIWQ-bbW6Q,3251
|
|
622
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4ubbhwSFX_XbefRLEkLoWxJkcetFWPzsszPu0X3_Wrw,3242
|
|
623
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=euiKvhb3DXkvPPQJLqNE_xN2evsTOoZnVIiquyN2Cm4,3246
|
|
624
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FhyniGTx5QeCuVrBSVTQys6q05Pr5lPEcPykpAX7Iyo,3247
|
|
625
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=pLQvMaVvlet_JenEz25mxxplAaHNisl6SFTSZ7lYP2w,548
|
|
626
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=uAa-ZQmASwlqZbr1l1CM6FyJI9irNdLBzc1U5Hdyw1E,3802
|
|
627
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RnN7lfu15CE-4ywMjAbEz8wWV743AP-1Fq5U_j8EQeI,3812
|
|
628
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RnN7lfu15CE-4ywMjAbEz8wWV743AP-1Fq5U_j8EQeI,3812
|
|
629
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=cE3BscS_zEtF_m_jr51IPfpaZZgIEojmhTHsrb9jABM,3260
|
|
630
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=SScyo-oYCBxJR9C7ZIKu_pJJNiXdpT13kYe26rddvPQ,3261
|
|
631
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0v17v78pETXv6S2ZoibekxOVhiTmCm807DYG4DONUck,3259
|
|
632
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=I44PvJj758-sw_fCOVROLTpG0NQ5_5PCYyQcpZC1YSY,3259
|
|
633
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ulvOEAFO8c-UOa34FEZrjOkCR6ovhJlfFFDhmaKIBiU,3245
|
|
634
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=BiZowqExbvXftuE37SYcheOdtYX7Z5BEXyykJ6GbYSk,3254
|
|
635
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-CVHqClROli9FWe_FnlnuAG2LiFivDFK_nghH6t-BWc,3261
|
|
636
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=7ok0uooTihvRSckZMNd6jInRvht_xkC5posHO66ejqc,552
|
|
637
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=h_Z6wBKdSGBEo5BfQKaxuFlxztrnbbZR0pkcYKv92sk,551
|
|
638
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=k63VgniyN3Rl_-h1hYmT_q9QZtSFqQmXBqhEXJQkxqE,3800
|
|
639
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=icswqRYUsUdoQMrv4YIqO46GG9BzepmBJmnTre9-VjU,3800
|
|
640
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=icswqRYUsUdoQMrv4YIqO46GG9BzepmBJmnTre9-VjU,3800
|
|
641
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=sL0E4zZzb01g6GHaTCXltg20uSbthXHSJFQ0SaxZ7PU,3245
|
|
642
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=MZcJz7BjwVOHHHxvYqGrWw77WnxslYhwW80bZw-jSKQ,3249
|
|
643
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=GsLoYkaZ2p4Qu0Coj-X90s7JWyfZBOloIHPlyNKSIes,3246
|
|
644
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4--7YWnJYUK4XmQ2zZ4M1ZYdKvUkET0VkNgIBn6xaOA,3247
|
|
645
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=NjEA2QjOVXyOaVSMPch5qa1Dq3igbW7MmE986-7taW0,547
|
|
646
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=21Buh2aiGTHjpW45Rm-TwZD8MSaAy8NMUrK5l_hGT5k,3803
|
|
647
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=P8p-dZZt_D61G6k3PgUetF01xzTRmCDJAnqCIsSDW8I,3805
|
|
648
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=P8p-dZZt_D61G6k3PgUetF01xzTRmCDJAnqCIsSDW8I,3805
|
|
649
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=8zuJhFdd6aXREpiqPFhIKEFWA5lgLVGrG0-a9UXcBqk,3262
|
|
650
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=_42bDZX4VODErI6OL-NrWja36iNHC4DzgF1l5Mk67-c,3248
|
|
651
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Zn1TvhAoPOv0zQBYHOZhwdDw3oqyxm0zIa7IJkTCHpo,3247
|
|
652
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=H9wONEU0XXSxOJfkx5UkS8Ss3A2QCp9G0XNoJEqE9nQ,548
|
|
653
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=2T2TYZhXgC97slH92HQ8GvZS3KuUt1ZiC3RtudPVEPA,3802
|
|
654
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=b6_bhUuQrI9HYvvwmAvUYh4v1GZ8w0sjApOmwuj_t8Y,3806
|
|
655
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=b6_bhUuQrI9HYvvwmAvUYh4v1GZ8w0sjApOmwuj_t8Y,3806
|
|
656
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=yqjO7zML7EseBJw6Bn5MTyHeAitkPsl1dndXeL6Rn6A,3257
|
|
657
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-nQIhKAOVCQrxLV6HDlcD0V8HMWvqrv-vyiORVU7qls,3244
|
|
658
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=KKmCvNh5T_qfD8v7JijMqXxQ5L6-gRX7oc6c5re6EF0,3248
|
|
659
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=M3nwpZd2-0w263ywZt9gaw53z7MN673T5tl4tc43Ntk,3249
|
|
660
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=H9wONEU0XXSxOJfkx5UkS8Ss3A2QCp9G0XNoJEqE9nQ,548
|
|
661
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=KmEgJ7zP2Sr_7GsAfL-12_g2S2a2wVpnxgCiF5dFiLI,3802
|
|
662
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=J4SXwpsioBRdTXOaj2OjrdNrEuW1NF43cLds65UWzCY,3808
|
|
663
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=J4SXwpsioBRdTXOaj2OjrdNrEuW1NF43cLds65UWzCY,3808
|
|
664
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=UjBOmVqYynBH3dJVuMJXjKnuZ6LssohzzEBpLBG4_G4,3256
|
|
665
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=8BJsjc2UUYdotrIqwyzisjrq0wcyW4jnTo_M8J3qYwA,3263
|
|
666
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=vLoV3JMtvHOKpR5D1BeCQPMuYlWUAlrXu54gByNkwKY,3266
|
|
667
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Mtw7a9BSspj2TzC-aPxE82o1LEvwzgbUuIofwRxUNA0,3263
|
|
668
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=N0kCPHvybNK-HvMO2EqNDLkj7m7WrHTl-3AD32LBD4k,3248
|
|
669
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=mjh-AgJN_IoWAc1uwhUiB1lE3ufAPDf-KPP6vUTrDKw,3251
|
|
670
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=NHdx3tZnfLF7NplswMzcTRbQEQFLtChg4rd7GU9lMbM,3262
|
|
671
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=mcF12eQTtGxocrVIA3I98NHd1NLd0-8EyfXtqDgv0PM,549
|
|
672
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AThoa7FUcGdNXYB_v9iMpBh2X8C0iLfc7y-C0xy2cRY,548
|
|
673
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=MJgIvZHf01ju8IWEVO6vyMedy5OTZxDpzv6A7_8W-Tg,3813
|
|
674
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AT2yrMoTvmoizi4sxwLtiULZ57P1CBhKGg9-6Gxnuc4,3819
|
|
675
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AT2yrMoTvmoizi4sxwLtiULZ57P1CBhKGg9-6Gxnuc4,3819
|
|
676
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=laYeH4w0iZOj2Yg3vDgtKoroNQnwBEX4GUGLrO9095I,3260
|
|
677
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=TWcPDZ2miQMD6OWDC1FteRs80ND9RC-oJL3PLVmJbtI,3257
|
|
678
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=cPtr1UJq_B-dTqgMrVm8ptiYXA6qOy_F8rs2f7ljuEI,3811
|
|
679
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=cobt_ZhR3dt2CySr12bGPVwn1oS98YvGLdIh9H8BDQ0,3801
|
|
680
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=cobt_ZhR3dt2CySr12bGPVwn1oS98YvGLdIh9H8BDQ0,3801
|
|
681
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=6Z7kIa14RjVq3ek_C15q5mUu1IrY2r0OP8S-_pm-MYU,3252
|
|
682
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=r63SZkUJJV87B00hAX074_uaC7wwQXdurlJsB1jUA0I,3254
|
|
683
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=hL3doX7zzxld3UcS8p9ACSadDaE6t3xXlYwM7X3GOeI,3252
|
|
684
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=xBhxdCFf3waTUsLxJxA54R90zODbC_DKI3XXBVKjKRw,3252
|
|
685
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=2ks7TQUULAD-Zn5i69YHo_2hpmsmxlocdYmJccSh2No,552
|
|
686
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=eiI8X2fFNknJmiT0uHbzSaEKQwwZk5bxn676gNvcyg0,3802
|
|
687
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fQQDJMlLdYsY5Cosg5HkRzvrJ4asjQmc0WGgoD4bC20,3810
|
|
688
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fQQDJMlLdYsY5Cosg5HkRzvrJ4asjQmc0WGgoD4bC20,3810
|
|
689
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=O_SV2vo_oaABfT6Mxqcmo12pnhKtfX4TnXfe02OcHJk,3254
|
|
690
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=g12Xkurat7oUS7LdS9pHLKFlur4_FaMGiGBvdq-iBCs,3242
|
|
691
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=EWLxbWncwGJyL-dV6EO-s8kk25wfYrESa0STjCnzD64,3244
|
|
692
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=tFdrY5nADmXUlShdN8w8Jzkxuj_RPLXCRceX9FhQ35E,3251
|
|
693
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=M-ewEHbgHLBLYLi1Hgz5Pp4kypnUiCRo0ut2scNnvDw,550
|
|
694
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=zTzLbdff09HwMuWlWpoAIgQZ6NEjsFXSF0Y5z4Be7Ig,3802
|
|
695
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=dcPHbYEbz8T9SM5-a5sP_K_npDkhH7u0KM9aiLn9esE,3806
|
|
696
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=dcPHbYEbz8T9SM5-a5sP_K_npDkhH7u0KM9aiLn9esE,3806
|
|
697
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=TO2qRGmp37v53Zqu8Joeq_BSbtwM_mpVoozGyoNg0-o,3254
|
|
698
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=QqijmgLqIoBUxRPnuUQGsoQASRFRMsCVQKTjEjGecVo,3247
|
|
699
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0xquf00fgfrDODpaxyre0VDcjqfzqExj939rzeJ8pMo,3244
|
|
700
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ipg8iK8w2ySRe1Z08YJUWAHX43rvkrXpR6svxRhSnFE,548
|
|
701
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-wuzdNXf3K0jfFQGB8nFSyoSZ4BfAvIkY10k6FdjnLY,3800
|
|
702
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-o9QqqQQ-9kRVCuDOUGBuKXHRTd0asGTzrDcHGGYJLQ,3799
|
|
703
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-o9QqqQQ-9kRVCuDOUGBuKXHRTd0asGTzrDcHGGYJLQ,3799
|
|
704
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=DbemSQdo2h5vGjSNB6Fovnn-aAGfjti04Bp-5KxLALk,3246
|
|
705
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=6glWpljtfiuspJv_Esg_LWCDDQ57d2HETsOIv0zr2Ec,3249
|
|
706
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=qG6v3n3qF6LE2DdGT-mDIXecZ1a7vg7p3QqXYCMX85k,3254
|
|
707
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=EgFTGyW_YuDwyEDUCoGglyI1ETdj9J7AR0UfJ86jMoI,3249
|
|
708
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4--7YWnJYUK4XmQ2zZ4M1ZYdKvUkET0VkNgIBn6xaOA,3247
|
|
709
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=ZfPPlx0qcuR4WjaFAE-W1QZgSPAMf3NyGcpvQIvyFMs,3245
|
|
710
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=NiorJgOotxkQcP49ID3z5al1UA4QQDrT8MvbCwAWL5Y,3248
|
|
711
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=QgSlDAhlB2W4bzTd2O98UL-C_IKfJm_cVmQz8FqsLF0,361
|
|
712
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=i3wy_CBO7BQQVhKReRC2F0PaRIQDdN9F5lJ7kD0xe1I,548
|
|
713
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=QpkqpJnyjuHH8Zo4U4QZgehUF2F2uQDZFb8fdhixXWI,3794
|
|
714
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=wv5GjGAA-NyJ41SYdYG3tPAgwf6JK7Zf6SaWALQ5c3Y,3806
|
|
715
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=wv5GjGAA-NyJ41SYdYG3tPAgwf6JK7Zf6SaWALQ5c3Y,3806
|
|
716
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=RRMNeM_qiHvlUTOAeqwgs7ukSoAZSlK8XN4z8hgWl0k,3258
|
|
717
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=brB0-FFr-Sv2bdrz4DQJ_NaFhETctf1g4Yzwj_Fcczc,3251
|
|
718
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bPQWtvaJrzOOIgI-R-MIxs_f4yC_FobkDydu3OkOFtg,3252
|
|
719
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RYLh-Uim9U2_djLkFwwpV0rNQHik0tZHzecuj1_hPLw,3248
|
|
720
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ZRgiuHZ2SFC6u-WV5DGwau4k1RiPLI67eENO0e-5Ylg,3253
|
|
721
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4EzbnLWHVwrjyKYPMcDxbxM2o-krjlT0YXvM8oPH5Cg,549
|
|
722
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=OFgOtRkUHwyOT7Hk_BQft_WzuZOwbhMSLP65Fbr4goA,3799
|
|
723
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AOu05da2LZbCzD9SKsrgnzH-ih3CdXsRIdJc_4J1lps,3807
|
|
724
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AOu05da2LZbCzD9SKsrgnzH-ih3CdXsRIdJc_4J1lps,3807
|
|
725
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=qzmFm2pqxphir1LBrycDZp5JA4It8OdQeQ5iTrTwLNE,3253
|
|
726
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=2UyOMRMdbvHt6WlZdOKALm3Or0eMCx7vvwgLiCYyoOs,3259
|
|
727
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-hP_P8NM0K04mGzTmpGBNibQ5xxh5gPz5WtoMXhoz1E,3253
|
|
728
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=vEU4_YOMnLdYFf1BkBEdFbGRMG8KLhsO_t0gv7vaO4Y,3244
|
|
729
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FB5Le4obvPoCgFSnC_3-Uh59n-Mt4Rol8saXVcK3RPw,3252
|
|
730
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=k1rzpgm9m19AHf_HPQcNCuSBtAwFgMePUYB1jZeFyYY,549
|
|
731
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=9IbzTwLRgTCfFLSvjEWKiajCjG81R-wTljIV2zUYUA8,3809
|
|
732
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=B4uEwuftvaj9gHGdoDBnVhxbNRmzUtzu4LH0u-O7voA,3804
|
|
733
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=B4uEwuftvaj9gHGdoDBnVhxbNRmzUtzu4LH0u-O7voA,3804
|
|
734
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=ZTPWtJA3JBL2jhy7C60RdsntKCN8oQ-DDIL17ok7OB4,3257
|
|
735
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=mokCWoXdKi8p4mLYqgljjwDRJWK5I2oF6_MJuObi5sU,3254
|
|
736
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=kLviGvVngpgOuelfKtvv9Is7MWQ89rGxlomMRP6t0Ic,3250
|
|
737
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bIVRtaaHThozH54VIte0Nk0sOGV67K4s2YZUE6QWx2s,3252
|
|
738
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=_YXzQ6N3QpF3Ou1Fy-51YyL-J3i5gOBVCgSM42vOT9I,549
|
|
739
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=csaz7AaVDTvCuzaptN-e8K1PNuIwZm9OwnPSJydHI90,3803
|
|
740
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=scfO3_ncCtyrqcYSnIoAZTMfvBzjB4o_0_bdiiVSNh4,3803
|
|
741
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=scfO3_ncCtyrqcYSnIoAZTMfvBzjB4o_0_bdiiVSNh4,3803
|
|
742
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=CE1wRLyFONo4_icKO8fcTTX-5giKNJ9_1F-2mr-lGQU,3257
|
|
743
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=JdUaLiMmf8oEbwuhPHMIncvWzXS2SxOEgfM80ZjM7l0,3259
|
|
744
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=vlys0Zi_CaaU41OHGbWSBtbVglFi98bgqEySBMc9Sdg,3258
|
|
745
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=YWyByOlKSqp5lbcUa8eu6N2dHRKJqJDbCDSjdDQJngg,3249
|
|
746
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=GY9VBPi21K6vJlF1NOEzCyqMS7LX3xq5dRxrK0jvIHk,3244
|
|
747
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=8LWF55ZPjrOY_sEdRGqf1eLcTNySgUiiWNWsN4EGxLY,3247
|
|
748
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=j5PTW0IC4Z2yQIygcdICaOsvb639u6Mv-ZpJYkrBQ2k,3254
|
|
749
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=_Cc0EqUzl6d93OxWJRWYbYpEaTIp0glJhdfV-GSAi5M,552
|
|
750
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ZSHvdnC2vOXI2HPW1iNI9HdihoLcNYlRLMF85pqjWZE,551
|
|
751
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=SkyMLsoxGoHdO4kgTerihone7eEi0nmHlrvZUI1I_V4,3804
|
|
752
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=6Jo2hw2gQpyiNoCRZpGItu4MBkYytzdW-VggWUC4fPE,3804
|
|
753
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=6Jo2hw2gQpyiNoCRZpGItu4MBkYytzdW-VggWUC4fPE,3804
|
|
754
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=xbDfUYLphVtZWJojZWODlxGMCoiIgxn4LsnD9ge3r9A,3257
|
|
755
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=hqh8TQw3t5hPM9u7rmHPuaMjwgxmQ-Zt35fSTgOS0HQ,3261
|
|
756
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Ggy4hejkcWjiw5Bi-wGzSP5JLVuvOjip_rbjXFBJZbs,3257
|
|
757
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Xy4mgZx5iiEvuv2ydO4dFNIT8s0jgBhNHE1vu93fGJM,3250
|
|
758
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=qKG9hmaxN_7tCB_06L1dh0csxs3TGeya9B-X6W-tNhg,3245
|
|
759
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=jb7vGi1RJefImkT3BZU_9iOkiCulcd5oDjxpVSt7big,3246
|
|
760
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=x476nFeltB_2iO9_6y-z2P_unAbh7ghLPFi5z2LOTOo,3253
|
|
761
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=_Cc0EqUzl6d93OxWJRWYbYpEaTIp0glJhdfV-GSAi5M,552
|
|
762
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=TWpzs48j0QwApAsBWt3iIlu6cqR46Meslyp96MOANcc,551
|
|
763
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=i5b52A1Oe8kCdPrPLBGud7OMHm8779JD0rBocYO_lo4,3797
|
|
764
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=U20Q4JwG63kU-6cc241VHGdpettCWbBXRJ9EZ-fbkqA,3803
|
|
765
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=U20Q4JwG63kU-6cc241VHGdpettCWbBXRJ9EZ-fbkqA,3803
|
|
766
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4uWiQMh3cZY_EtLA0a3PU8Z1VCunF2PpolTPYeP9Rjo,3256
|
|
767
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=D0moiKqS73oril32iNj5gRJUWpT2SZ5jf-ZesUZnNv4,3254
|
|
768
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=toHzCprq0KetQI0-9IrLYCIm1bQ0nSeP1gXArU0GogI,3245
|
|
769
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=N37dUL_J2JVpgLFlnlz__Ck7Z4njROnNAO8V2oiDqr8,3253
|
|
770
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=pGZZj_gZms1T9Zgjs4tbIm90LhbEy1UUkkgrto9jPts,551
|
|
771
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fqnjZCn0gbY7fO9JwZOHMYJJHe8gceWhWCZOFPRUlYM,3802
|
|
772
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=OTZt3ell0OZ7Cg5L17K2NPU4UwayAkTihV5HjUmUiAw,3810
|
|
773
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=OTZt3ell0OZ7Cg5L17K2NPU4UwayAkTihV5HjUmUiAw,3810
|
|
774
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=LdtOyXsA9r18GiFkmDOkiRinsDSZBZ8NYapL59EZ4iM,3264
|
|
775
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=07GarBHmiiYkyqn-qxEtrAcgCETuUbqm6HqlbH9yJi8,3252
|
|
776
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=xMNxtLL_8tyg4TWSt_llz_IJ2qlxc2NEwhUzhV1VsG8,3252
|
|
777
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=kEuvCsW3YNByF-DALYqPZpW3TL8ZbtQ5gUNq7-8YvZ4,3252
|
|
778
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4uNqB71a6ctZ-c4tF3r66vOsHFrqcR28g_UWy0N8iBo,550
|
|
779
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=QkrfZ69jxW_mweigtHL5R0Sv_WcSBp7wjFX75G9kbHw,3805
|
|
780
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=xMGmoN2ZTjKQBZS-k75mFTPpAEbPR3kyMwqZVtgbEiM,3802
|
|
781
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=xMGmoN2ZTjKQBZS-k75mFTPpAEbPR3kyMwqZVtgbEiM,3802
|
|
782
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=PD4AJYCkHfy2ivv9baMouFXzBTy0eKMumbAfxfm91HI,3256
|
|
783
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=iu8M35YR-RDpKWbjXSRzk02sW9nr_dtbhalfLSNtxNs,3251
|
|
784
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
|
|
785
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=41m0bvskFUzVtlr_yppBr4PZ0cVkqHvy9Hrc5pUCUyY,552
|
|
786
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=2VxMGfWtxTzXcF0bP3d5s7rc1cKb5TNBAn-WiCKAngw,3804
|
|
787
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=VtQGO3fEiyhbKG4sl07cuVc6id2EtKeV05ozLmN_ENQ,3807
|
|
788
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=VtQGO3fEiyhbKG4sl07cuVc6id2EtKeV05ozLmN_ENQ,3807
|
|
789
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=W3cYFteFIZLu5c1K41cOh4_-WZzFU6-jGnZocDzmKaA,3796
|
|
790
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=HIoWSUgAOcNaK2kj2YwDjDa23PzQVTT2C2ePW985Ovw,3805
|
|
791
|
+
"vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=HIoWSUgAOcNaK2kj2YwDjDa23PzQVTT2C2ePW985Ovw,3805
|
|
792
|
+
vllm/model_executor/model_loader/__init__.py,sha256=teALPxcO48a7CpsDEZhdUhhzZjXvh_W9YfhOlmUwHQw,2687
|
|
793
|
+
vllm/model_executor/model_loader/base_loader.py,sha256=PALh_HXia5I1YbL0uNtb5xxeDosRLOQFaHmhhDU6ycc,732
|
|
794
|
+
vllm/model_executor/model_loader/bitsandbytes_loader.py,sha256=i1KwXmp-AZ7bLIfTywOztTGnFbAn7MTSyJTNx28Ee-E,25947
|
|
795
|
+
vllm/model_executor/model_loader/default_loader.py,sha256=_wj9xk-COrlwF53Xvtt33lK5DiHZkEVXAIEvaa0w4_A,12005
|
|
796
|
+
vllm/model_executor/model_loader/dummy_loader.py,sha256=WG2GJuTddszjT_lT00Vl01gwsQ3sDRibWa9diDRzO6o,1577
|
|
797
|
+
vllm/model_executor/model_loader/gguf_loader.py,sha256=i-27_UvLDBpN1YS1aRdoj0yb7zBwRynEUnmalTmczNY,5138
|
|
798
|
+
vllm/model_executor/model_loader/neuron.py,sha256=oXCRwKGiDZh35fTe_GJjU9FPhzft6lpX8ALwy-9P_X0,19800
|
|
799
|
+
vllm/model_executor/model_loader/neuronx_distributed.py,sha256=0yW0G6ATXbOeAcDkqXWNBpbTpOqpMeBnUmv5jgVAjh4,27971
|
|
800
|
+
vllm/model_executor/model_loader/runai_streamer_loader.py,sha256=61Ej9wBFVi_7jb7LaprnyAzLPPVVBCUEU31jRUESChQ,4952
|
|
801
|
+
vllm/model_executor/model_loader/sharded_state_loader.py,sha256=0qrR5XqUwW-uIOJWVLaa9Ed4keGYlJ92LJc2cEkMc1I,8728
|
|
802
|
+
vllm/model_executor/model_loader/tensorizer.py,sha256=URzbyOJNX6C-cvJXnMZfEAtrevo6l8OLKgsWir43aY4,25824
|
|
803
|
+
vllm/model_executor/model_loader/tensorizer_loader.py,sha256=9YQJ354-1cRGV7NPVDNC-n8EEIcuNXYf6bLw0v2kEts,4862
|
|
804
|
+
vllm/model_executor/model_loader/utils.py,sha256=KZo7AuJYdw7Fg33D3axTIplKxBIFRr4qTYZbFlowqKQ,13063
|
|
805
|
+
vllm/model_executor/model_loader/weight_utils.py,sha256=MbTbJvCO4S9Rxu5I2ok5kgZxEUySnFOHQuOeTOlCyNM,29882
|
|
806
|
+
vllm/model_executor/models/__init__.py,sha256=VzcvFDQ-L55rengF3g9oFlD02efze16wV6pIYiAH_RA,863
|
|
807
|
+
vllm/model_executor/models/adapters.py,sha256=_GZXxOBj2I_xJCZlnFjlc_E8zlI8vzBb9GREAtsxVnA,8257
|
|
808
|
+
vllm/model_executor/models/aimv2.py,sha256=NOzKdZjP7ACwFxQK1yBzxC5oVKKrvJSHsmaJDNUggk0,7192
|
|
809
|
+
vllm/model_executor/models/arctic.py,sha256=GdL-QMPsve93DC49P8b37-8sx78SEF4G3puJ4g6CNd4,24403
|
|
810
|
+
vllm/model_executor/models/aria.py,sha256=7cdKlcOhIfCqsLztrRbaLh1a9S-ertJSswSnY0EyevQ,25565
|
|
811
|
+
vllm/model_executor/models/aya_vision.py,sha256=t8ZorwBpREJDMzP8W6OVfKcq5Dj6DO9fXgjQffyBGKI,18712
|
|
812
|
+
vllm/model_executor/models/baichuan.py,sha256=BfEGC-kbgg-aqjLH0DwR3VubchgdGkIwkDxNe3eeD1Y,18878
|
|
813
|
+
vllm/model_executor/models/bamba.py,sha256=u7hJ4IyKyYICU1Tku7tDkNPII1yQtWwBbUdTvGC7kco,21307
|
|
814
|
+
vllm/model_executor/models/bart.py,sha256=dMtns0MEXK3mo9DolomQGcxH-dvo7tWaHi9W_ExyfMA,33820
|
|
815
|
+
vllm/model_executor/models/bert.py,sha256=PhFybWucqFl580rifsAr83q8qwkz1nAPbZPzWgNQDV0,20356
|
|
816
|
+
vllm/model_executor/models/bert_with_rope.py,sha256=PAMsvP9LCPfPxEF7JvNEHZf4AZ55wpjXxDkp03YM_6Q,27785
|
|
817
|
+
vllm/model_executor/models/blip.py,sha256=dxQKBpFNzqsBWxRgIKVeL3C6As2cjQ5Qcq99Y_qPmx8,12333
|
|
818
|
+
vllm/model_executor/models/blip2.py,sha256=4heKored3gF-CRfXkhvOudbPo57KVExClYrIdps1-3w,25857
|
|
819
|
+
vllm/model_executor/models/bloom.py,sha256=-0QlaDOoJCMB8-VUy_PXRrJTCYU-88_31dejbHhT5Hs,14454
|
|
820
|
+
vllm/model_executor/models/chameleon.py,sha256=rWtjI0QVYQ4hJuZaSf8VbvX52aytPaWuwI-H_9cCcmw,45354
|
|
821
|
+
vllm/model_executor/models/chatglm.py,sha256=tudrcW6k8u69DMh8RqXqwefDoHyt4myrTDdHFy_w_Lo,18391
|
|
822
|
+
vllm/model_executor/models/clip.py,sha256=WxSN64fbkoEf1NNk_T-94-xU3a6eA3MLlHe7edLey7M,15100
|
|
823
|
+
vllm/model_executor/models/commandr.py,sha256=9kElm3KdFsjGoPBbYFoF4aZhYkqHcUKml6NWETg8HiQ,19311
|
|
824
|
+
vllm/model_executor/models/constant_size_cache.py,sha256=5F31KAK_Q0Sn-b9qLky0otJv6CiW94Xb8wquIXi-yDI,5839
|
|
825
|
+
vllm/model_executor/models/dbrx.py,sha256=1wloKmHUlUrHbWkRvl8u94KqmE84pytrnv6utAleP_4,18364
|
|
826
|
+
vllm/model_executor/models/deepseek.py,sha256=0EoUC2iUzisLCNkQb4M6crI-Egpi3RPasDQ_twMNwlc,19800
|
|
827
|
+
vllm/model_executor/models/deepseek_mtp.py,sha256=DWkOgqDapX5EPfcGE7vWG1uguC1FDug3lgyAL_JXbYI,10963
|
|
828
|
+
vllm/model_executor/models/deepseek_v2.py,sha256=aHoYPdY1OvB1kh2coIGAv7TOeiUcBX21DOx0e1JRlww,35237
|
|
829
|
+
vllm/model_executor/models/deepseek_vl2.py,sha256=9MhsFGJDH2RYV3VwyGtZWP29IVkAZc4HYhEXhD_gkSs,25147
|
|
830
|
+
vllm/model_executor/models/eagle.py,sha256=1X1Mg2Z25TvltmWAzznfPiL_FUhiYv7s-XiA7DpGszA,11696
|
|
831
|
+
vllm/model_executor/models/exaone.py,sha256=mWdaX0c_J6T0HgYdIbdPLdaUhg2brfUYvtakBU6Mie4,21088
|
|
832
|
+
vllm/model_executor/models/fairseq2_llama.py,sha256=MUWrsR4TSramf_P5RuDMPM5mFXz7kFD4ILcsGiz8wD4,6486
|
|
833
|
+
vllm/model_executor/models/falcon.py,sha256=ZGcxKw__efVYZ37cgWNM4qNR_NQuH_9f90OLX_quVio,21314
|
|
834
|
+
vllm/model_executor/models/falcon_h1.py,sha256=drdyZxPfQ6-pBkwoZXAN_x6f_Rb8Ej4HOfK8NhiR2m0,26832
|
|
835
|
+
vllm/model_executor/models/florence2.py,sha256=_VKxy8eJHvTWELV7C7YcPPSG0iqbkOtulRmSx1la0yA,39193
|
|
836
|
+
vllm/model_executor/models/fuyu.py,sha256=kKyu-Z9lyzqeDW2woj78CijkugtXGjExK8IJC6nWWZM,14228
|
|
837
|
+
vllm/model_executor/models/gemma.py,sha256=JOMLemt4CxmjV9Lpsq3TbjsLsWl-Y2nzIlghAhUZO9o,16239
|
|
838
|
+
vllm/model_executor/models/gemma2.py,sha256=HGAFtLH2QfOS1o33EeUe2bBc5IE-d5UimFaAXj54OEM,17470
|
|
839
|
+
vllm/model_executor/models/gemma3.py,sha256=SvK6e_CY8Q4niAbULJfn0_3KW3FMnDNulzyFsVx-J_k,21681
|
|
840
|
+
vllm/model_executor/models/gemma3_mm.py,sha256=g8Vr8u35341EU2RykyXJWpCj3APGqRMGxv8wd9jXTcI,25604
|
|
841
|
+
vllm/model_executor/models/glm.py,sha256=kI2a6yL-Pa91knqI7SHxgrV5kUlACHATeiIWy_C14H4,990
|
|
842
|
+
vllm/model_executor/models/glm4.py,sha256=SGh4OnIjs3Kl-G1BgWcdDeRnpj3kaG6ZEQRuFbfa3JI,11795
|
|
843
|
+
vllm/model_executor/models/glm4v.py,sha256=Sv1zWlKhohg-KiHBSblhxe4R52pMjW-xGlg6HWDbM7s,22106
|
|
844
|
+
vllm/model_executor/models/gpt2.py,sha256=BLneZpNZBi_EcwxgzTS4Jxj-b6pPG6imlX2k5X-AXz8,12838
|
|
845
|
+
vllm/model_executor/models/gpt_bigcode.py,sha256=hnGzETUvkxF5g9CNnJY-KJR-9kZ4TkZeZvi95oT2Fw0,13110
|
|
846
|
+
vllm/model_executor/models/gpt_j.py,sha256=C04Cb-YAHGvFUfstPTD6RbyyOjeEzwbfvrQ9omEzWPs,13179
|
|
847
|
+
vllm/model_executor/models/gpt_neox.py,sha256=KmlFDCgsFOu_TgU9JIonIh2RvnczNxuW9lZ-qI3VCh8,13318
|
|
848
|
+
vllm/model_executor/models/granite.py,sha256=pzGsv0pifK2UfAuMfW8UeAbn1-M1FLEhV8DpkXMtfw8,20056
|
|
849
|
+
vllm/model_executor/models/granite_speech.py,sha256=Pv_uto4_LLivRuGVu4z4vcTb1jXBVhPXZyvZ4sJjikw,31249
|
|
850
|
+
vllm/model_executor/models/granitemoe.py,sha256=DXNCZlmo0qciyUvisvQzFg22zqbE6GqJMcGDCxsRhd4,17907
|
|
851
|
+
vllm/model_executor/models/granitemoehybrid.py,sha256=4vNhyy5kPL6or_Roos-BwVAeg5_AIsCR2mAJElOxx84,24442
|
|
852
|
+
vllm/model_executor/models/granitemoeshared.py,sha256=OQzPSm1Hb89xxxc-EokMy8gBjImzVESKqoB1wN1HKZM,13694
|
|
853
|
+
vllm/model_executor/models/gritlm.py,sha256=KNlQiop2Ycv3ZM5Wf9YPKQEQ5lv5fVr0kDLwpS_YBPk,8096
|
|
854
|
+
vllm/model_executor/models/grok1.py,sha256=lpr7pp8KvneSbR4cxgYlLCLGYXQg3j-lWnbOywDquII,22438
|
|
855
|
+
vllm/model_executor/models/h2ovl.py,sha256=J7MnQJLFfcoGJgmbz2r1zK5eEPUcAQV1L_z_Sv--Jag,18276
|
|
856
|
+
vllm/model_executor/models/idefics2_vision_model.py,sha256=I4ruDWuklnxZf4MA_IgoYKrMAHqp8TZHBDUc10b4u6E,14963
|
|
857
|
+
vllm/model_executor/models/idefics3.py,sha256=7Iy0raP8iJZoYHN-eo3tzZNoL7lESp71Aew68PjEoOk,27758
|
|
858
|
+
vllm/model_executor/models/interfaces.py,sha256=RVLJSzCeLY3m9zOiYR2EJnk6H94VXdqPeS1oU9pL8Rs,16080
|
|
859
|
+
vllm/model_executor/models/interfaces_base.py,sha256=sExVX7ciSRpdP2_DwIeR10V3XOhaiCK473EFSun2OY4,4363
|
|
860
|
+
vllm/model_executor/models/intern_vit.py,sha256=gvIObFHF7z-ALrZsfFetDHppTKFUtSTjpAlfpAbsrO4,17181
|
|
861
|
+
vllm/model_executor/models/internlm2.py,sha256=t-JpaC0nVSCF6R8nidFcB0JbLlMtpEAOiJ8qH2I7mDU,17189
|
|
862
|
+
vllm/model_executor/models/internlm2_ve.py,sha256=wMuupyp00D5KAT8SFqr0qvhmL3ZUQQhT1aJSkLmQiMs,5732
|
|
863
|
+
vllm/model_executor/models/internvl.py,sha256=NAVoG5jR2f5RlwlTls-61LYtlugJtyx_kGE2vgqJl-A,51018
|
|
864
|
+
vllm/model_executor/models/jais.py,sha256=oty04RnlcLsJrphWA7-XzVYV5wnxY7YP10znqQNFjx4,14546
|
|
865
|
+
vllm/model_executor/models/jamba.py,sha256=zaxBKrsVuCzGZJKMcW6tjPo3av5A12jFLWuVA3yBt1I,24270
|
|
866
|
+
vllm/model_executor/models/kimi_vl.py,sha256=3rG-sg7HVrzsqCNfDg7OsfK4MsZ2JrVJQWJZyNT4rWI,24364
|
|
867
|
+
vllm/model_executor/models/llama.py,sha256=qRx2_SNGQQOP5UFR79BVwjcnTVHcTbfS3kPxsNBK8m4,25884
|
|
868
|
+
vllm/model_executor/models/llama4.py,sha256=WMoxsXEVEVtbshjuORLWd-CSfRdeN8ZgwChMiniKsf8,21822
|
|
869
|
+
vllm/model_executor/models/llama_eagle.py,sha256=ytJ6oVKXI_pAJkCeLQqiGsP1OBuOyV8jWRSlF55l668,5954
|
|
870
|
+
vllm/model_executor/models/llama_eagle3.py,sha256=cxfLvNPwjR1Dr-NtDFM6lO2uZ3kq7g25ko_MLE9igSo,9099
|
|
871
|
+
vllm/model_executor/models/llava.py,sha256=V5FgJ9SrYGMlmyJ-Ht_x7sG-h71xEPOnH9s1oSUEFvQ,32237
|
|
872
|
+
vllm/model_executor/models/llava_next.py,sha256=sUEma6Zqm5WQGx-YZ2nASApqBOcY3BtWVp8N7ds-2xw,23737
|
|
873
|
+
vllm/model_executor/models/llava_next_video.py,sha256=bfk3Fu4lonHOfbY8guEten0904pZmaRSBC85WALC0CA,17601
|
|
874
|
+
vllm/model_executor/models/llava_onevision.py,sha256=fE7mSVzJoTAWxpHM1bZm_SoT6J4qHZeW2L9bebH2Ako,36939
|
|
875
|
+
vllm/model_executor/models/mamba.py,sha256=po8Itod05G9cbJN73lHlYYKIJZFv0DiyiOqhq6hCbZ4,11579
|
|
876
|
+
vllm/model_executor/models/mamba2.py,sha256=XQFt1AseVjFbt3fHfApNnnVkp364VpIPreoP1ozuqHk,12373
|
|
877
|
+
vllm/model_executor/models/mamba_cache.py,sha256=un5CJQN8-8l3UAys8EJOsEbuykbNEpKOS4Hs25SqpM8,2898
|
|
878
|
+
vllm/model_executor/models/medusa.py,sha256=nn18_eYZ4U7YDvSDPOH42hzPfLfJmIh1qAeDw5eiSjU,8934
|
|
879
|
+
vllm/model_executor/models/mimo.py,sha256=WvsVPrLsdNTrWkBucqvCpMFBwDgH3WPGBNtMPMNp3Wg,7847
|
|
880
|
+
vllm/model_executor/models/mimo_mtp.py,sha256=REC8zXfJ9t90xp4pLcohCvVagYk2pOZLmSqD0wcCInI,11376
|
|
881
|
+
vllm/model_executor/models/minicpm.py,sha256=P5Fm4DNxQyFWuCHib6gZsDtjKTzxsUC2ggGnZDIW1Cc,23872
|
|
882
|
+
vllm/model_executor/models/minicpm3.py,sha256=bnHBgK_myyR1NZQ-rSNCAnF0aBlSZcrSrG4Gr6iUw9U,9360
|
|
883
|
+
vllm/model_executor/models/minicpmo.py,sha256=6ttEucHtL2jUQVjQwmq_UXeWVQk2Vr4WkEkaEcazaYM,28786
|
|
884
|
+
vllm/model_executor/models/minicpmv.py,sha256=D0Boex96EBV4npBCSXMFOlJaaoaJSuRFYOGvqC5UQWY,47229
|
|
885
|
+
vllm/model_executor/models/minimax_cache.py,sha256=29FVD-sSoymCb8yrJxxE7jdvgvt5VctucnI3WkS9Yo0,1143
|
|
886
|
+
vllm/model_executor/models/minimax_text_01.py,sha256=UExLmmU0hb7mR8XMRbAg03nalA5MXi_LNoKb0agHcE8,51668
|
|
887
|
+
vllm/model_executor/models/minimax_vl_01.py,sha256=xYcP59L6wKNd_zPB_FUpNsGr8ybfxHJduoND374Prxg,13904
|
|
888
|
+
vllm/model_executor/models/mistral3.py,sha256=e4i-fcLvcXTa--K0QCX-J5Ddn24ew9wmn4Iva714b_M,22899
|
|
889
|
+
vllm/model_executor/models/mixtral.py,sha256=kb5YjhWUdr-6jOc6rIiZROOLF078EFwhuO_W4_XJAtI,20138
|
|
890
|
+
vllm/model_executor/models/mixtral_quant.py,sha256=DMLzwThRV_W3qBovWGXwQphmc9cmXgbRx_8mhBFUqJw,18458
|
|
891
|
+
vllm/model_executor/models/mllama.py,sha256=Z6NhbV00vesxbWHmv02em-hehKc-sCEQjTjcAYtaVac,66729
|
|
892
|
+
vllm/model_executor/models/mllama4.py,sha256=5-g-TCO0Er5fT67Kigz85VK9q3t0byQqtGskiQNv2Zg,31777
|
|
893
|
+
vllm/model_executor/models/mlp_speculator.py,sha256=scq0cIwJUMEFeVz4LR_6Ve4z95ktVg2BlMf8i8vEHH8,7896
|
|
894
|
+
vllm/model_executor/models/modernbert.py,sha256=CMe7Eo5uhcAAoDDvbTN-cZb18BnN6B6mK2uQoPUzHuo,12623
|
|
895
|
+
vllm/model_executor/models/module_mapping.py,sha256=pbHfG9SLwSlMNEGxEMTUCbSY9IgjkChOAEqMMC9VGGE,1775
|
|
896
|
+
vllm/model_executor/models/molmo.py,sha256=DDT7ZXJxfrvRVTyfO-uqCOuPYh58JG1R59RBvN_dvdI,54872
|
|
897
|
+
vllm/model_executor/models/moonvit.py,sha256=t2PK-aKQwT6sBIXVIttE5nKWDXLmxvaHivQ0p2mrOeA,24051
|
|
898
|
+
vllm/model_executor/models/mpt.py,sha256=KGktbWx1AZ5_s80FYmhaEtXhyrriur3MbBGsx4XAgIY,12685
|
|
899
|
+
vllm/model_executor/models/nemotron.py,sha256=W5sPKqSdmbJoJzFDUa-9BEALsSKRCbvzObflwTvObbk,20646
|
|
900
|
+
vllm/model_executor/models/nemotron_nas.py,sha256=2_gjkZ4KVv95gQCRmVvQVCgk_X3weCkpvz8ODcB_4IY,19138
|
|
901
|
+
vllm/model_executor/models/nvlm_d.py,sha256=9uSTlja_rlgB5z8xH_gXFBB4xq7BJVY6JJlMT67vbvw,7997
|
|
902
|
+
vllm/model_executor/models/olmo.py,sha256=-8s7rfi1XVe52Y7VJHiSx4-9TYozvNGvm104FwMHjy8,14855
|
|
903
|
+
vllm/model_executor/models/olmo2.py,sha256=u-81Q7xtE83TYp5uYK40j0p51Gw22yO4XU34Giwb7YQ,15915
|
|
904
|
+
vllm/model_executor/models/olmoe.py,sha256=TuQmMIFa36FSnRh7o-f_3jjues17Q5i93hHBGM1UNqc,18396
|
|
905
|
+
vllm/model_executor/models/opt.py,sha256=JYZVqu4_uidpZtrAVM3jF0f4CBFGKCfoInOdYKgBkoE,16503
|
|
906
|
+
vllm/model_executor/models/orion.py,sha256=8c6JKyhWuDKytw8hvMpeTYneHswkUy8O2i72KoDFNz8,13793
|
|
907
|
+
vllm/model_executor/models/ovis.py,sha256=QaAIyGCSI06SlJdA8Kv2QxvvlmjnIdURC2HcrlBzgFw,21011
|
|
908
|
+
vllm/model_executor/models/paligemma.py,sha256=d4aglFK7OYH55uap2SMNl6wdUiRDyS6dL4fPHTj5dtg,14579
|
|
909
|
+
vllm/model_executor/models/persimmon.py,sha256=Um_Q2_TCNg3W14fn9aGHvzwOxPYsF4KRTEBzMFTbdyE,14259
|
|
910
|
+
vllm/model_executor/models/phi.py,sha256=PBJYTBo8ytdVWV_3EzP_HMvqKTn-WtSvYBQDnlJMRxo,14103
|
|
911
|
+
vllm/model_executor/models/phi3.py,sha256=dfxzNvOZ2XPpQIy5mdRFsxFZObV1_h5vlC-ak2hw3HI,388
|
|
912
|
+
vllm/model_executor/models/phi3_small.py,sha256=eBw2iRbZL7dSMVF88mJVgNURJB8iHPD2Zjg18jZfarA,18213
|
|
913
|
+
vllm/model_executor/models/phi3v.py,sha256=EkY941hdaPsvY7vIkeaisOKCtd2RPZa6ugop_lH5TkM,28414
|
|
914
|
+
vllm/model_executor/models/phi4mm.py,sha256=bIiRShQMbJk7jCKx1IjlfDS58JlobbKK8gYcTbYHVwQ,49314
|
|
915
|
+
vllm/model_executor/models/phi4mm_audio.py,sha256=lWfyFP7yfRDelWelZoj0fJN6-SGfIOxIM04-SWB58q8,49063
|
|
916
|
+
vllm/model_executor/models/phi4mm_utils.py,sha256=JePUMssJBX0_cvKwILEheK_qaX-qOcNCg9_tF145h2E,66670
|
|
917
|
+
vllm/model_executor/models/phimoe.py,sha256=hm_0oOZhoN_CfuyhB_TmK0r_j_KuOHzBiGM7whJ23Hg,24669
|
|
918
|
+
vllm/model_executor/models/pixtral.py,sha256=ZUHJiNLnBlkEo_uJQgc7R1HViE183Tcv8vAHbc0APV4,48166
|
|
919
|
+
vllm/model_executor/models/plamo2.py,sha256=HMUpLALF60CMi85AwheOewGzuRwdCyCwc2RgMWeEclE,30056
|
|
920
|
+
vllm/model_executor/models/prithvi_geospatial_mae.py,sha256=bUyR-dvxsexD_vMqnyLfBpxOY_oNMfFa7zKe06lAouc,9187
|
|
921
|
+
vllm/model_executor/models/qwen.py,sha256=CjiuCkvMz2OQyaLQOwmQdwo3JEApM2ZWl7aVdarxlcs,13847
|
|
922
|
+
vllm/model_executor/models/qwen2.py,sha256=h7ZLW6uDzL5cIxU5S-tteFZUGOzOSYjYac3HVLk5HUQ,22247
|
|
923
|
+
vllm/model_executor/models/qwen2_5_omni_thinker.py,sha256=7zgYwhacEgGJNQb-xD_fX9-SM_0SuCnZIz1CHJqWmEo,37117
|
|
924
|
+
vllm/model_executor/models/qwen2_5_vl.py,sha256=i6nRHj8M3DwDRCrtv26Jx9Tnhip5DvHYkK4zIBsi9qw,47427
|
|
925
|
+
vllm/model_executor/models/qwen2_audio.py,sha256=6gLWrqGsneyjQbBirOD8U1kKmK3oQU79nWLbpcBsgwc,16512
|
|
926
|
+
vllm/model_executor/models/qwen2_moe.py,sha256=vBgmC9KuddqQoU97Vpelp6mz0AplYLMvEs3GL_ROId4,23001
|
|
927
|
+
vllm/model_executor/models/qwen2_rm.py,sha256=wPYRXiWgWG6fOGAnJE_YmRDIOhBgMMatAhgg6YEU6rw,4551
|
|
928
|
+
vllm/model_executor/models/qwen2_vl.py,sha256=1EIuZIL3Hx85yYi6vUBhSMT8J2H-4q9kKR7eKXUkZHA,53566
|
|
929
|
+
vllm/model_executor/models/qwen3.py,sha256=jXoaQDYqZScd5_JtM0ZjGApSfKxml-2z5ONFDrc-_nQ,12405
|
|
930
|
+
vllm/model_executor/models/qwen3_moe.py,sha256=9ly_Fd3cga7zJVBDKq2j2KAnwPp936zLldZ8LAV_BX8,22957
|
|
931
|
+
vllm/model_executor/models/qwen_vl.py,sha256=Bp62rQy4QgXuffIuMMEAcgiXY73sT3gy3IGzIsdhmp0,26769
|
|
932
|
+
vllm/model_executor/models/registry.py,sha256=hWNhXv0RmWSlnT3kegp8bU3SG8wx-vMWQOmzB9NpRrY,25484
|
|
933
|
+
vllm/model_executor/models/roberta.py,sha256=noAGhd4m1xcFGph3f0twu9hhJ4meCdBMlk2I2-9bDwA,11389
|
|
934
|
+
vllm/model_executor/models/siglip.py,sha256=7A1tc0PpGFuBswyEZI-t4UtY-Eu-KhgKZXJ3qtjoijU,18700
|
|
935
|
+
vllm/model_executor/models/skyworkr1v.py,sha256=zDXn6HI-0zzoe_-GhB1pKEKl7Aq9HPo80Fnib2pAUs4,33970
|
|
936
|
+
vllm/model_executor/models/smolvlm.py,sha256=s9ar4gslhX51EY0RJGWXaLMbJCM1DpT4-m63WK-x53Q,1730
|
|
937
|
+
vllm/model_executor/models/solar.py,sha256=kqyIMzRPP2XiUHRZOOxDUEcsvuz_62o_PFYkXRAj2ic,19893
|
|
938
|
+
vllm/model_executor/models/stablelm.py,sha256=x9yVn3d3RKcuWvMIcH9ylQrVEw9XxNRYrF1Dc6i_3sA,14938
|
|
939
|
+
vllm/model_executor/models/starcoder2.py,sha256=reFDB8k9vYCHemDBKp3c19p8_CkkR4TDTpjk9lB58Ts,14541
|
|
940
|
+
vllm/model_executor/models/telechat2.py,sha256=MLDGZKucrXhmPfamO3U43wXoww1mTux17qoFmA6vCCc,5994
|
|
941
|
+
vllm/model_executor/models/teleflm.py,sha256=VybsbNx-m48o364REHE8HczV5yH8G8AISCRvMs96vrA,3143
|
|
942
|
+
vllm/model_executor/models/transformers.py,sha256=MvAV-VIENaW8LTEsr_-mkLYBfvBuFosHjyLaWrspgK0,20395
|
|
943
|
+
vllm/model_executor/models/ultravox.py,sha256=_elRAM5QhsZm9P4pvcB_Zl6hisKiBpuqS_Yhyv3pwCE,26891
|
|
944
|
+
vllm/model_executor/models/utils.py,sha256=7WMHa9wR3IPekP6YfF1pcArMSdCcq4JGxckz6Puqn38,24901
|
|
945
|
+
vllm/model_executor/models/vision.py,sha256=nBA5AYHuDzr2bTKq-8WaTR6e0uBgPIZmZXweojtteHs,5534
|
|
946
|
+
vllm/model_executor/models/whisper.py,sha256=K6QtlLlE-ziHhCrdQXnTTNNW52Cxy1ffBulnl1QXXW8,27547
|
|
947
|
+
vllm/model_executor/models/zamba2.py,sha256=KdKJucE16iKxB8y2WwrhqcATHLek-uCIZ5Nkack3jzY,39599
|
|
948
|
+
vllm/multimodal/__init__.py,sha256=kU_DP-wqEImT-MCI5lTnw050WrFXpaV2wlE_l2vK3PI,990
|
|
949
|
+
vllm/multimodal/audio.py,sha256=pCpnPG0OhQMS-7SqVmO0VdcdwJombEhiwj2C4AMidy8,3079
|
|
950
|
+
vllm/multimodal/base.py,sha256=onVgZRQXU6X_NCrIj4LYp93oRflDnK3vT7v1sup1mP8,6933
|
|
951
|
+
vllm/multimodal/hasher.py,sha256=XtogP-xOrhUGyNnrT2UAeNnw9EsfXOtMJCPF6RXPS1c,3385
|
|
952
|
+
vllm/multimodal/image.py,sha256=9MWVO4Hz6u6aCdjkIBJA2rZ7--u8hG1hJ0t8qRVp3lw,2926
|
|
953
|
+
vllm/multimodal/inputs.py,sha256=nQUnH-0MFmvgWsGgZQoXtBVsBhZyAAl--2l3pvow1L8,28259
|
|
954
|
+
vllm/multimodal/parse.py,sha256=YGRGdOczSP5NA4yJx0A17YAxkz3gE6kKM48OOvgFkjQ,14416
|
|
955
|
+
vllm/multimodal/processing.py,sha256=6ey3KWpaRmsrD7GpL4xoTlR3UNZM5Jf7ho7EgALRRKE,60927
|
|
956
|
+
vllm/multimodal/profiling.py,sha256=risrMTPIWVRxZ-D1_V47F7NzBjwGpzX5mGd3lW08x8E,9295
|
|
957
|
+
vllm/multimodal/registry.py,sha256=hG3U3I5B1EDFndTisquAQ3OpdkQiUs2ZiZU7gCHU8s4,11185
|
|
958
|
+
vllm/multimodal/utils.py,sha256=kE_SpZwA3g2uuI08nUEh8V-mEZjR955ai_Y0-XGjzag,11977
|
|
959
|
+
vllm/multimodal/video.py,sha256=U0QbuMXG8lmR6lIaYwewrDpHHlVXHSmGlaTEv1bx-RQ,6079
|
|
960
|
+
vllm/platforms/__init__.py,sha256=FOqPH_Axx_AV-z7oZVPXdlj6ITPbfP84qcZKqhe7ov0,10568
|
|
961
|
+
vllm/platforms/cpu.py,sha256=LH2OofJ8wrFLF9OI-osn77HOJjOeBLETo-b_xrHNTbQ,8622
|
|
962
|
+
vllm/platforms/cuda.py,sha256=OCKD11beBVojIcgbtLpmDkIvL7PwMBYJts-IvnlR5aw,18618
|
|
963
|
+
vllm/platforms/hpu.py,sha256=MDoJPDQc9DHcS_ZZHiylYPIbdUQQseXfWi6Oy_maom4,4137
|
|
964
|
+
vllm/platforms/interface.py,sha256=xrBfV8Kjfrq59yG8pE_cxtxy23istgCc7FIGLG76WkQ,16214
|
|
965
|
+
vllm/platforms/neuron.py,sha256=bjDnm3G9OBHsgQVY6nax2iRHzrBd3A9tfj3s5zREySs,5603
|
|
966
|
+
vllm/platforms/rocm.py,sha256=yb6UU0iXtKiT2RPQKiKYR8HPVHuxcXOp1QmwI6qu97M,15463
|
|
967
|
+
vllm/platforms/tpu.py,sha256=JMRUj26BIMF17q-V-adPMNg-UKY8fy09T07AjQ09elc,8209
|
|
968
|
+
vllm/platforms/xpu.py,sha256=y0cn9KlXsz4pBTJCqzRVaPOLg5jyq8Yrkxkx5NzpKqI,6131
|
|
969
|
+
vllm/plugins/__init__.py,sha256=i44BS42zbI8ZdAG_qMOcZKhpZiNwWMaMjBWUfsbBJBE,2962
|
|
970
|
+
vllm/plugins/lora_resolvers/README.md,sha256=I4lYxAwarJdoR322hv-UQqsvuqjdQIxexWtBVdwyrL0,828
|
|
971
|
+
vllm/plugins/lora_resolvers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
972
|
+
vllm/plugins/lora_resolvers/filesystem_resolver.py,sha256=b4ha-o09iGftl_naSm-TtQG3E1aM6IxWOeNsnCi1qSk,2017
|
|
973
|
+
vllm/profiler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
974
|
+
vllm/profiler/layerwise_profile.py,sha256=VRyK33Ep7S-JXA8vm0bi9GlHHe_h3gZTjkXoYK-ybt8,13818
|
|
975
|
+
vllm/profiler/utils.py,sha256=oCv3sUoUkyyatfV6mNUSmo76MA4aDrTM_GRO0l7ywto,4645
|
|
976
|
+
vllm/prompt_adapter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
977
|
+
vllm/prompt_adapter/layers.py,sha256=aoeVO5L_abEwB92Groio_5lB9al37OiEJac_GkSuCko,2726
|
|
978
|
+
vllm/prompt_adapter/models.py,sha256=ut5JOYkO4AA0UIGEhIGuDqW3u0SjcObmu_c_L7k7sG8,13722
|
|
979
|
+
vllm/prompt_adapter/request.py,sha256=2_iVKLKdWE3kbt7ja_wPHhCerMqVXlg6ztDw344Yd6I,838
|
|
980
|
+
vllm/prompt_adapter/utils.py,sha256=fMMwK-mzuhT9Wu9araO0rSdtNkAmTNvsCAfQXfOkWQk,3668
|
|
981
|
+
vllm/prompt_adapter/worker_manager.py,sha256=qMEPVkdg2_L4bYSIBg_XPEM5As8UgrPrgudoiG4kEAE,7536
|
|
982
|
+
vllm/reasoning/__init__.py,sha256=II76ZkuKt-7dlu-XNhU2RjtHvIPcdESxgBb2mk-nW58,459
|
|
983
|
+
vllm/reasoning/abs_reasoning_parsers.py,sha256=cDxAFhiRHLy-dcIWWxVR-NBiMGs6e5ZNjrNdtfukggQ,6576
|
|
984
|
+
vllm/reasoning/deepseek_r1_reasoning_parser.py,sha256=ohpIV6s2j0fha3YmuxKVUFFZjirvTtq8TGTs5r9qkM4,7396
|
|
985
|
+
vllm/reasoning/granite_reasoning_parser.py,sha256=0sO2HXDhSCq8xZiZzNy4B-cEmqz3juF0XFknwlpjAz0,15830
|
|
986
|
+
vllm/reasoning/qwen3_reasoning_parser.py,sha256=GTjfJnTeZXqKi7mOuuph_quTfW9F-pEtAXyaorkDIFk,6427
|
|
987
|
+
vllm/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
988
|
+
vllm/spec_decode/batch_expansion.py,sha256=_n2h_IcPtL1XhAEVtv0pOPNPvf9GDqfojdZhCAneUjk,22740
|
|
989
|
+
vllm/spec_decode/draft_model_runner.py,sha256=IJgFZN-R4IJscqWGZs_Z1yjOzhfKzaOOOiBxMneXh-g,15221
|
|
990
|
+
vllm/spec_decode/interfaces.py,sha256=_hnecfFBgi-_wKMIxPobP-DRaefD769WqethORvcofA,3086
|
|
991
|
+
vllm/spec_decode/medusa_worker.py,sha256=liPn4iIMqJ8owL4cXgrDTVb3bjpUKBlqeNDlFlwEvjc,4900
|
|
992
|
+
vllm/spec_decode/metrics.py,sha256=qhxaJzzxUhQRLH_DcHZrIJXaMjzLpJqYT97Y1eDPJp0,8083
|
|
993
|
+
vllm/spec_decode/mlp_speculator_worker.py,sha256=NuOIAQtZxCWPs7YXWRJW3KwFCT4i3ETCobsA0gK-qVs,3737
|
|
994
|
+
vllm/spec_decode/mqa_scorer.py,sha256=GdhVN2Ef3L_LlnoftqivY0rRKgPF7EV81XUu3PyMouo,7508
|
|
995
|
+
vllm/spec_decode/multi_step_worker.py,sha256=kWWOWHofUOd2NiqGLomBx1IXGRo2V4hgl7R3u-m6jTE,19604
|
|
996
|
+
vllm/spec_decode/ngram_worker.py,sha256=ClMwq0JnlhRPiBT8hI4fBuHVPgtt-uQe6ttMR2CVIb0,7827
|
|
997
|
+
vllm/spec_decode/proposer_worker_base.py,sha256=Qyl-YKE4xg9kz4RVlK6tO3ZSng3JU515oQmVZntijKQ,2089
|
|
998
|
+
vllm/spec_decode/smaller_tp_proposer_worker.py,sha256=S_GUKm8uK_WTRJwjL6GWvwyf0u-pvabhLlMdz3SU_Ws,6883
|
|
999
|
+
vllm/spec_decode/spec_decode_worker.py,sha256=lzN7uFnfqNKfUv3pB6KfiPQZmSC-XX-VTQvWosrrUHo,62958
|
|
1000
|
+
vllm/spec_decode/target_model_runner.py,sha256=sRW1sqPhEdEwL5MvHCRRmd-lmc-G8LjJgG7R22R6JM0,2073
|
|
1001
|
+
vllm/spec_decode/top1_proposer.py,sha256=sEvZMEoj_s9-aXzkX6faUZuffoPtxATuMbEmRnZKecM,12354
|
|
1002
|
+
vllm/spec_decode/util.py,sha256=2oG5qDatn0pc99zWPOKoYb2LPDc6fXLUelQRCxHvyBo,9888
|
|
1003
|
+
vllm/third_party/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1004
|
+
vllm/third_party/pynvml.py,sha256=Dw3kbk5Rn1l8hXRQgR2KvaLjEqKQM2M6r0WtEqbtANE,234584
|
|
1005
|
+
vllm/transformers_utils/__init__.py,sha256=fPqOR4zeohDgfZpAHbxPkC_4GNuBtTFqVICGwvZ-QcM,841
|
|
1006
|
+
vllm/transformers_utils/config.py,sha256=A2DTyfFYjSiSYGw-WkSD9KPyWEn6GGK5Rj7JdjkwDh8,31448
|
|
1007
|
+
vllm/transformers_utils/detokenizer.py,sha256=pXjyKrFpcOSBDVtO--iPRWJ6CljR-WGvBcmasnBgkmI,7230
|
|
1008
|
+
vllm/transformers_utils/detokenizer_utils.py,sha256=Dq7zJzmZx1eatUhCsz0P4Lgm8sMRY5pMsCL2SEno6gI,7252
|
|
1009
|
+
vllm/transformers_utils/processor.py,sha256=xbCZ6r4RiJPQgyX7YHgDh0AJbQ5PGKhES2KbhUU-DgY,7533
|
|
1010
|
+
vllm/transformers_utils/s3_utils.py,sha256=W1Pkv_vXDlm5thS1mPtaPxEYMuPUteHjyMzc_p1hgY4,4885
|
|
1011
|
+
vllm/transformers_utils/tokenizer.py,sha256=SEtINv9HhrFZVby8OFvis0Oa-XXjkjwdyR1rPBJikbg,10652
|
|
1012
|
+
vllm/transformers_utils/tokenizer_base.py,sha256=lt6xolCs4SExjBgrTgfhtByRfcty3imiahyaFa22m90,3960
|
|
1013
|
+
vllm/transformers_utils/tokenizer_group.py,sha256=l9ly9KBOguL6aOgNktWk-HrD3GHjxCrr6fDLATlV64M,5190
|
|
1014
|
+
vllm/transformers_utils/utils.py,sha256=vf6-aupApkaOC3pS4BWWjrYcBV8QjE8KXS-BQVyddLk,2630
|
|
1015
|
+
vllm/transformers_utils/chat_templates/__init__.py,sha256=haeHYzr5GKTbTMXUd7chuW6cqRbV2eAPQQa8BT-cwgw,139
|
|
1016
|
+
vllm/transformers_utils/chat_templates/registry.py,sha256=VFpMrP4QF6MV91o9cl6RU3n-VhnyodrQfPfmkNXLnW4,1876
|
|
1017
|
+
vllm/transformers_utils/chat_templates/template_basic.jinja,sha256=DMH0156UMA7eoJelXKUMEDzB-SigjbyCOBxIu9OyFJE,78
|
|
1018
|
+
vllm/transformers_utils/chat_templates/template_blip2.jinja,sha256=ltMbjFdK7T4HUcN_OQaX4hj2r0PGlS1EJ9zhSlnTz1c,332
|
|
1019
|
+
vllm/transformers_utils/chat_templates/template_chatml.jinja,sha256=CKxCWf_KemM_DntV70Hf03WNkDvxznolyW-03SJJw54,370
|
|
1020
|
+
vllm/transformers_utils/chat_templates/template_deepseek_vl2.jinja,sha256=WX32uOZ7h8_xqrWvmsI5R-6Ns8ZcXVn74CKB7FJOifA,785
|
|
1021
|
+
vllm/transformers_utils/chat_templates/template_fuyu.jinja,sha256=hzdsPgeUMaZnd5L23QPiz2oC6_wMBy5WgZkXMVs3Dgo,85
|
|
1022
|
+
vllm/transformers_utils/configs/__init__.py,sha256=Wn668YO8l8j65JugeJjNwm1OZd44vBxk-ZpferyV03E,2475
|
|
1023
|
+
vllm/transformers_utils/configs/arctic.py,sha256=-eVYRmwswr_4pB0rt20hFJhWKJrpyihqyK3rs5JWykY,8984
|
|
1024
|
+
vllm/transformers_utils/configs/chatglm.py,sha256=6H5Hv6Z_yziBZG9q4N_0Obj6eHsGL9DrxQeBhkLsZ9Y,2870
|
|
1025
|
+
vllm/transformers_utils/configs/cohere2.py,sha256=qItc6lYMcmlULkKjVilAwnP6sjo6WPjqTsNtVc0awds,10353
|
|
1026
|
+
vllm/transformers_utils/configs/dbrx.py,sha256=gALa5jIyDD0y8xfG5QFb5X4AEZvRWnsn7yuLyhtfru8,10915
|
|
1027
|
+
vllm/transformers_utils/configs/deepseek_vl2.py,sha256=9K2LZtL90X4jG9Gmwee-ssr60Dvn0P_6Cs0BBryQ67s,7227
|
|
1028
|
+
vllm/transformers_utils/configs/eagle.py,sha256=xlp-BXONN86Jlh_de2gkFu3PDE0VBqFjn-MY4ZdSBng,3153
|
|
1029
|
+
vllm/transformers_utils/configs/exaone.py,sha256=-Oit6CQTp7N_I7TZNf1T1URddaMbultyHNVL-_mLZHk,8858
|
|
1030
|
+
vllm/transformers_utils/configs/falcon.py,sha256=1w9gXJJPzvup7Hd05O1xYzp_IDXDdtxByt06U92uy7c,2917
|
|
1031
|
+
vllm/transformers_utils/configs/h2ovl.py,sha256=Tsyex8PgWS_WEuzgTZ9vGcgn7Pv1m0yJDs64Q2meT_Q,489
|
|
1032
|
+
vllm/transformers_utils/configs/internvl.py,sha256=hqm1INrEucyhhPKQhwRiwEZ6Ulw1gvnFIw1EISnE5QI,1867
|
|
1033
|
+
vllm/transformers_utils/configs/jais.py,sha256=8hj7I6zjOk5fvbR-uluBXXNIsZi8HXYgoYZWwniX_6k,10363
|
|
1034
|
+
vllm/transformers_utils/configs/kimi_vl.py,sha256=IzdW_JZbZyuyhnnUa4ILY_G56TRbbFI56PsY9UIqsh4,1417
|
|
1035
|
+
vllm/transformers_utils/configs/medusa.py,sha256=2gSa-OtMNHi2eL_AJDgbslqb9ntcg9fRfhhgRPGoxr0,1943
|
|
1036
|
+
vllm/transformers_utils/configs/minimax_text_01.py,sha256=ow7OVW0_vH0BeK5suaSIz3hIixWkMuvk5UFNGmoED3U,2315
|
|
1037
|
+
vllm/transformers_utils/configs/minimax_vl_01.py,sha256=ctj_tcYXe6TB6zp-OCpsVfQPrm9NGn6Tcu7hr2Vembo,2548
|
|
1038
|
+
vllm/transformers_utils/configs/mllama.py,sha256=lIkiJ83huJQq7kLAV2hfLWAcUxWVT9aa1YAcSRUzz1Y,805
|
|
1039
|
+
vllm/transformers_utils/configs/mlp_speculator.py,sha256=aAFw9R-D5D0u-C-E9R6wb6dXQ9pw9C3oErFuXtvhSY4,2431
|
|
1040
|
+
vllm/transformers_utils/configs/moonvit.py,sha256=3UjfhLMqkqFcGfnZwu8oM5S11_jmSn1zzjnC-JfCRuQ,1203
|
|
1041
|
+
vllm/transformers_utils/configs/mpt.py,sha256=GQ-vlKb7Q8xH4MU9Hea8QQu06vWkMv0N1O7GbvK1b-0,7583
|
|
1042
|
+
vllm/transformers_utils/configs/nemotron.py,sha256=xw--8lmM5VzLM6Nfyha4vaeRvVYh5v3bjrAP_Z010nk,8974
|
|
1043
|
+
vllm/transformers_utils/configs/nvlm_d.py,sha256=2Mr9ZAI6VG0DbLDw0BnFEIgeiZd7ip3bSoVsfcEeNqQ,458
|
|
1044
|
+
vllm/transformers_utils/configs/ovis.py,sha256=gxKlK229W6zBupvfTvoBcEIL48gDxAZGkuzhRG1EDY0,7655
|
|
1045
|
+
vllm/transformers_utils/configs/skyworkr1v.py,sha256=Wg_ykY-bUNPdcJ_9KwpY2qfUhCvRfFdSBcCjjKUaJVM,1869
|
|
1046
|
+
vllm/transformers_utils/configs/solar.py,sha256=ZSGikoKuPglnXffexb3eUzUfbXAV6_9x6Ss2Uq62Mn8,10841
|
|
1047
|
+
vllm/transformers_utils/configs/telechat2.py,sha256=JsOuzKHPQHqtJBZNi27dtwc-FWelsQ9GlmORN2AubPE,2200
|
|
1048
|
+
vllm/transformers_utils/configs/ultravox.py,sha256=gJ3vpq1Ie8GYygu9md2AziRKDcjYJIfwoWKd8l7RJuw,4459
|
|
1049
|
+
vllm/transformers_utils/processors/__init__.py,sha256=Gq4xlB3VUsQhcHeh3HFxtNKr8XSOKwURbMVLgzndSAY,248
|
|
1050
|
+
vllm/transformers_utils/processors/deepseek_vl2.py,sha256=CDbLhURMUxafQXex-Zg5WT11odfU8s3xC8ypejKll-4,14570
|
|
1051
|
+
vllm/transformers_utils/processors/ovis.py,sha256=yYt0b91iecTcr6q-3n4BWsOXG_1ObEuE5TATPaOrYM8,18859
|
|
1052
|
+
vllm/transformers_utils/tokenizers/__init__.py,sha256=y-jPMUMSBejRx8frDMzFNBD_EBJGQ9gw41EL7s-yy2A,303
|
|
1053
|
+
vllm/transformers_utils/tokenizers/mistral.py,sha256=upaoUWmt0yGPUAnCla5OEVH83lOY9nm7OpFEbyNnmxw,18900
|
|
1054
|
+
vllm/triton_utils/__init__.py,sha256=fSHtisZZiqW1wJIQh7cnJeOliy6gyLjLpB06i4rAvP8,364
|
|
1055
|
+
vllm/triton_utils/importing.py,sha256=LFpuKJugxsuIx0MIV9LL2SoW1i41bFhMH1vNPaLVBY4,1361
|
|
1056
|
+
vllm/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1057
|
+
vllm/usage/usage_lib.py,sha256=C8Dvv9RwIh_wFReYlmq-zaY9cmVqvgp4zDyAm5y-Cps,8834
|
|
1058
|
+
vllm/v1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1059
|
+
vllm/v1/kv_cache_interface.py,sha256=-9KU01IF6lpVm6kUcZbGa-IVROJbQIxD_6E4KE-9zXU,7270
|
|
1060
|
+
vllm/v1/outputs.py,sha256=4lJhwgbqXjaQjxzKk6_JAodN48UE48DVuv8OcVn8fCQ,3711
|
|
1061
|
+
vllm/v1/request.py,sha256=Vr4oT4WOg-x86yvZGBk1LTUKgta-KMSBdrz-MRndDzI,7170
|
|
1062
|
+
vllm/v1/serial_utils.py,sha256=dUtVEJZcEtvPjH6wZprPHNrfbw4FnR7d8xHOpgkFh5s,13165
|
|
1063
|
+
vllm/v1/utils.py,sha256=3hu3UjhPjtrGYx604lzUxeccD9YYCYGWZx6c1bNWC1M,9416
|
|
1064
|
+
vllm/v1/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1065
|
+
vllm/v1/attention/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1066
|
+
vllm/v1/attention/backends/flash_attn.py,sha256=yt4r4Sbg93SdQXG8DLJmUWkXmKDdWRIefGssFA9cTLU,35534
|
|
1067
|
+
vllm/v1/attention/backends/flashinfer.py,sha256=SyoOSUH1ahaQJr3mYSh0EgNZEewPxLVxznVgu_Cmx18,26582
|
|
1068
|
+
vllm/v1/attention/backends/pallas.py,sha256=j-ak-zt7-hAuCxR9LlAeu2gLBz3m219-v2PhSvWwAWk,8644
|
|
1069
|
+
vllm/v1/attention/backends/triton_attn.py,sha256=L6y0Pkv0u299SvrfmfWhDdDB7CwPVcGUhNUKLhH4oKM,11088
|
|
1070
|
+
vllm/v1/attention/backends/utils.py,sha256=JGvB-T-s5dfxRkpNEjdWmf4ZS1W2Qf5W-vWzbPnkrGY,537
|
|
1071
|
+
vllm/v1/attention/backends/mla/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1072
|
+
vllm/v1/attention/backends/mla/common.py,sha256=JvpUFl3t6svkMZ3beTpdQvJOO9FMn92eNvj-fKJk0bI,36190
|
|
1073
|
+
vllm/v1/attention/backends/mla/flashmla.py,sha256=Iutwb-OXFsUSmpZPHWDzA1kmAO_vtu6ONuVHmHDjYV8,5239
|
|
1074
|
+
vllm/v1/attention/backends/mla/rocm_aiter_mla.py,sha256=IgGsxcNMRwccC1yBfbCT69BkxtR1WrTrEobN9ZSFtT0,7947
|
|
1075
|
+
vllm/v1/attention/backends/mla/triton_mla.py,sha256=JUbifo-j4QOhfGCZZLu2ciIQYm9AAuaGmGYLMDVTJTk,4105
|
|
1076
|
+
vllm/v1/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1077
|
+
vllm/v1/core/block_pool.py,sha256=EK1yxrxBM2xr_5x_CwMttu8jiaRqBhgz5aWIiBp3iu0,13395
|
|
1078
|
+
vllm/v1/core/encoder_cache_manager.py,sha256=O4ODe4lL1PGZ6CrN4M_4phwrGitQiEXlsiX8Vp4i_Iw,5257
|
|
1079
|
+
vllm/v1/core/kv_cache_manager.py,sha256=3yevpdDSO0u9Uzu4E_0LCcts1LrZtQMLxX9OEAkG2Hw,15434
|
|
1080
|
+
vllm/v1/core/kv_cache_utils.py,sha256=z3wZHzuzeRNmnq9DGjTI0Glv0TClbl-I-7y2sm9v4FQ,29029
|
|
1081
|
+
vllm/v1/core/single_type_kv_cache_manager.py,sha256=mmkxYpKdJVWvngXM-F4_M9DO76hAYv5587LQbiZMfaM,14865
|
|
1082
|
+
vllm/v1/core/sched/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1083
|
+
vllm/v1/core/sched/interface.py,sha256=8r2NssMn_HtrCcfYghU32zj0ahfSMRZg7LUeWosuAxw,5547
|
|
1084
|
+
vllm/v1/core/sched/output.py,sha256=2_19Xm9CymeFPcgkUnhFYaJ7bWqAkJyDfmibPWQWPSk,5669
|
|
1085
|
+
vllm/v1/core/sched/scheduler.py,sha256=M4wdkW79LSX4yB6Kc0Xw9HbuZwj3ItD1yUE4SURon6E,47117
|
|
1086
|
+
vllm/v1/core/sched/utils.py,sha256=h5yxzYQEON09QhjJgDyOyAdBVWWNJEsTSv9Bx1G1rP0,813
|
|
1087
|
+
vllm/v1/engine/__init__.py,sha256=uQtFibES1n5y45Ltu-hCnAXc08sr7OuDmxwCQNc5q-M,5203
|
|
1088
|
+
vllm/v1/engine/async_llm.py,sha256=oIkiJFXjGoyGIcFJCbnmw_uRqAE0sFRK5FF8U7HB9RM,21632
|
|
1089
|
+
vllm/v1/engine/core.py,sha256=XZql00A4XIQFx10DfntGo4oaKRt1zaVgQ-oUjyt8EYo,33340
|
|
1090
|
+
vllm/v1/engine/core_client.py,sha256=InYae7NNNfOtDWtUZiIIqKqCyZLAp2wtdwkDn3LHEM0,40437
|
|
1091
|
+
vllm/v1/engine/detokenizer.py,sha256=IGnYsLBCx1KqvqnbXLM9p2ChCkKiT5j-vzFx4nv6OQs,9549
|
|
1092
|
+
vllm/v1/engine/exceptions.py,sha256=mxP2NWpoDgXiympaoPM1rhkEWEq2W76Q_wgTH7v6604,662
|
|
1093
|
+
vllm/v1/engine/llm_engine.py,sha256=aUqJw-f4fjZx8aN_NEjnKETIjhBJQSmZx_h1K_BKSdc,12491
|
|
1094
|
+
vllm/v1/engine/logprobs.py,sha256=xJ4mj9i3kXNEc_UbrZl_KwMV5eFtC463hkbVLG9R4gE,7059
|
|
1095
|
+
vllm/v1/engine/mm_input_cache.py,sha256=V4cVrhroLDkZygas2LWOgmOQk4QwD22yrE2kIde1LpA,3151
|
|
1096
|
+
vllm/v1/engine/output_processor.py,sha256=yRYqKiyLgjYTlTd1tPV4wF07XuG0IJG5FIM-1dE8Xbk,16299
|
|
1097
|
+
vllm/v1/engine/parallel_sampling.py,sha256=OjBEE6zzWcY8jfgCiEOGaJfeM4M9-yeV2qWdxdRtLrU,4765
|
|
1098
|
+
vllm/v1/engine/processor.py,sha256=wQE5pwhPYm_EKpaLHkdXf8w4dB2k6qtCT6HtC2DmFw0,17390
|
|
1099
|
+
vllm/v1/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1100
|
+
vllm/v1/executor/abstract.py,sha256=RlosW-n6QNCT16VCB2PukDakpdyo318HQKBa3KrFzxI,4464
|
|
1101
|
+
vllm/v1/executor/multiproc_executor.py,sha256=3QOf6Zjm7FSTCYbyievsroPR0hTsKU6WkhXQMcj41_0,20491
|
|
1102
|
+
vllm/v1/executor/ray_distributed_executor.py,sha256=IJYuJWHHkYUlAfGPrIeoMFnR0oaMYtZrJHv1hmIynBY,1992
|
|
1103
|
+
vllm/v1/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1104
|
+
vllm/v1/metrics/loggers.py,sha256=_ZLiwUd1PO6y1-meie7j5kRzYjcoidoPstdTUOSYGXw,20651
|
|
1105
|
+
vllm/v1/metrics/ray_wrappers.py,sha256=tUmHgRI012hP-w_LyUL7_eJbpXZ07G24D9sCtapGwUE,4020
|
|
1106
|
+
vllm/v1/metrics/reader.py,sha256=c3mr9z4MlhCs-DU-VozX1XSy-49sCNVwokbHs9ItJTg,8633
|
|
1107
|
+
vllm/v1/metrics/stats.py,sha256=0jlQO3ARWExbVFzNhtU0MYeEZiyrZm5CELyyQx3ERfw,9386
|
|
1108
|
+
vllm/v1/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1109
|
+
vllm/v1/sample/metadata.py,sha256=94neEjn1GPZehdsaQ8zoz2Qzl3btezcSnVUUuKHnvTI,1094
|
|
1110
|
+
vllm/v1/sample/rejection_sampler.py,sha256=-GmCooKpQfMC79ZHyu1OaFssCQ24kAhtr4kZzuZxjQY,22937
|
|
1111
|
+
vllm/v1/sample/sampler.py,sha256=nRwExvRK26q7ljiMoHZnkTUabIT6lWcr5woRFkmJcrE,10245
|
|
1112
|
+
vllm/v1/sample/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1113
|
+
vllm/v1/sample/ops/bad_words.py,sha256=_Q9GFppyv3mOstR_3-hKVUOaq12uuHR-xUdkcFAP5Ng,1122
|
|
1114
|
+
vllm/v1/sample/ops/penalties.py,sha256=3W-rVWsNwuDnouIRyaqZW3HtnWyXc9Jlz_Eq_xx5mtA,2149
|
|
1115
|
+
vllm/v1/sample/ops/topk_topp_sampler.py,sha256=kBs1j7jkGd9xQDQwlW4QnNN0GCQzNElQHHrQf6gxlGo,11015
|
|
1116
|
+
vllm/v1/sample/tpu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1117
|
+
vllm/v1/sample/tpu/metadata.py,sha256=TsT6TVfQRHpNpbW0K95HRn9R7hCLZsKmKhpMoi24Gcc,4626
|
|
1118
|
+
vllm/v1/sample/tpu/sampler.py,sha256=4VS30ohbmx2d63bay8zhekLTiXEkKYG_z-KS0c6e8W8,5029
|
|
1119
|
+
vllm/v1/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1120
|
+
vllm/v1/spec_decode/eagle.py,sha256=p9MmTEqnsgVKuLn354sSErHOGHspwl0ylJ15Oe4xWpg,18696
|
|
1121
|
+
vllm/v1/spec_decode/medusa.py,sha256=exwY2T5ii0mQk8Kkvq8hZiy7SnOHCrXbTl6L4-OoWNk,2079
|
|
1122
|
+
vllm/v1/spec_decode/metadata.py,sha256=E5jGcv8qd1LzIB35ijGHpgTquQDJsrfTzzvUBT9Ba1A,2188
|
|
1123
|
+
vllm/v1/spec_decode/metrics.py,sha256=7FffH2sOaR2pxS124ZozYXfkK7BH6ubBCLHnLSbstOs,6666
|
|
1124
|
+
vllm/v1/spec_decode/ngram_proposer.py,sha256=OlsaNfDwu0FYzgo0MZBBy253IR6IDXhkeEIpnolwmmQ,4224
|
|
1125
|
+
vllm/v1/spec_decode/utils.py,sha256=SMbQimw7JiTXmkHIwZMDet06kRZvk5LavFeGInC2Cms,1375
|
|
1126
|
+
vllm/v1/structured_output/__init__.py,sha256=S5mHzIkzYW1A4zjAvIB1svs0Ck9R_6L612rIlfzTjCw,9274
|
|
1127
|
+
vllm/v1/structured_output/backend_guidance.py,sha256=JCB_F5Pf5SObmJDyoFCAL8uI3SWLIOxRt2jXmLEckWQ,8715
|
|
1128
|
+
vllm/v1/structured_output/backend_types.py,sha256=1ssvbIgYrFQmNQKrz1pYKArKtfWaGfig1hjHDsNohzo,3737
|
|
1129
|
+
vllm/v1/structured_output/backend_xgrammar.py,sha256=MuhmzoEUEFAejqisPnkapZ4GCtQaZ1PUtcgmz4RUBeo,12180
|
|
1130
|
+
vllm/v1/structured_output/request.py,sha256=6T3rqBiRvzRerEc-PYFFZXHqvHxc4ec4iz2p0MIv-GY,3142
|
|
1131
|
+
vllm/v1/structured_output/utils.py,sha256=My1yM9DtLORHJCZj6dVFJngfNRmnZRyAc9Sf5bKM2kc,5797
|
|
1132
|
+
vllm/v1/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1133
|
+
vllm/v1/worker/block_table.py,sha256=Oh-a2xkyV6qJb2VF2gU6JGIw7Ks_g3aPaatNNKPyqK0,4963
|
|
1134
|
+
vllm/v1/worker/gpu_input_batch.py,sha256=Cy65sRB9BRSvcvWDR98Z2fGOApZzvY1vJi_e7ilcoXI,29824
|
|
1135
|
+
vllm/v1/worker/gpu_model_runner.py,sha256=NuEpAzJuwYIKUjyQinQPGQOJe2Jbu2SrWAKkugW9Kxo,97544
|
|
1136
|
+
vllm/v1/worker/gpu_worker.py,sha256=Dk9YejC374cKPgEwCLPU4hSJl4Fx-7p9H9uCDtwvhpw,15487
|
|
1137
|
+
vllm/v1/worker/lora_model_runner_mixin.py,sha256=w3_2YfgEHuTOCQU69F3WSNG4EpxaJZxC_ZakGGmAKC8,5781
|
|
1138
|
+
vllm/v1/worker/tpu_model_runner.py,sha256=jNApadQUKuotupDFrsvSJPDKRWGTrTpJ1lQRvhTIew8,70274
|
|
1139
|
+
vllm/v1/worker/tpu_worker.py,sha256=cAY9qHeWjPkLWq6yjn_v4mLNihqNCS_8H2wlBDcjW-A,11563
|
|
1140
|
+
vllm/v1/worker/utils.py,sha256=MwCMs3WdGg3NT-1i5cvkI7GGC6XSt4R2zWjAu6BEHuQ,2513
|
|
1141
|
+
vllm/v1/worker/worker_base.py,sha256=y8BsJDgREwPB8DH8dynVIDfykYydbUzcrSTHLSMqMUU,1976
|
|
1142
|
+
vllm/vllm_flash_attn/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1143
|
+
vllm/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1144
|
+
vllm/worker/cache_engine.py,sha256=iR6mNFDgBUsGYqx7kBKpQB1UQsv7e-AgsB63OCYFGIQ,6006
|
|
1145
|
+
vllm/worker/cpu_enc_dec_model_runner.py,sha256=sr5gBdz-wHO7JqcnkxeUzrFYq013-lkNUp4fBhZEF6w,13067
|
|
1146
|
+
vllm/worker/cpu_model_runner.py,sha256=cYPdHGB_5APy6M-PHwgelE8r_JUwbHd-fNGfpZt_VpU,28353
|
|
1147
|
+
vllm/worker/cpu_pooling_model_runner.py,sha256=RoBhMmJU-jvKXFLsV39az9vTUPy5qGn8db8auHwwhw4,4808
|
|
1148
|
+
vllm/worker/cpu_worker.py,sha256=b3S2jYAtr1XIy52IzM2bI9GLVdylH5bx7x-76br18oQ,16024
|
|
1149
|
+
vllm/worker/enc_dec_model_runner.py,sha256=0sDWD1wM4wNDFxKC6WTJl9VLHKdag9nFcamL9ki0sP0,24048
|
|
1150
|
+
vllm/worker/hpu_model_runner.py,sha256=Iyceota6Noj5o_lZLhH9GK5kGfyNDFWfMTkOyDD6mHY,104341
|
|
1151
|
+
vllm/worker/hpu_worker.py,sha256=11gAYhs_wlxrA6DKz9TxdIKdAhSU4jHSQDrviwWR4tc,21579
|
|
1152
|
+
vllm/worker/model_runner.py,sha256=SDP1iJ8fvuXpwoTbkddhkl6EZeYXfy9F0r07Rhn4IrA,97881
|
|
1153
|
+
vllm/worker/model_runner_base.py,sha256=c76vGfi0-uqdIf-B2A0dU3HrAGd1FQmR6p9f8JVE8Oc,9370
|
|
1154
|
+
vllm/worker/multi_step_hpu_worker.py,sha256=3GKiJPMq_dJ0qrV8PidC3yGqvQW0z9iIMD3A02KFkLo,5296
|
|
1155
|
+
vllm/worker/multi_step_model_runner.py,sha256=75-HQbzVbTTYNn6KJruE0gAgsLX7iKX_hnrVP59_kCc,39340
|
|
1156
|
+
vllm/worker/multi_step_neuron_model_runner.py,sha256=GOFXy-_-YPiQcE4XAG8zkcKFoMjX5WBv4WSRGA_t1EE,3270
|
|
1157
|
+
vllm/worker/multi_step_neuronx_distributed_model_runner.py,sha256=ahbE2o_rQKj3DKRGlPgwV3u3Boa6gZcjUskQOPZIIHc,2177
|
|
1158
|
+
vllm/worker/multi_step_tpu_worker.py,sha256=c6A1A6Aagis6VyskYP6jP35Mt5paZwDV2jn750Sn5LA,4450
|
|
1159
|
+
vllm/worker/multi_step_worker.py,sha256=90a_O9w3bmGHjs5rGVsNEbYzMMhNVZSqLLyOVAHCxk0,9416
|
|
1160
|
+
vllm/worker/neuron_model_runner.py,sha256=IIcu4Slcde06QfbF9hVBdF3DdUtzOo5VQdx1MUSmSvk,17853
|
|
1161
|
+
vllm/worker/neuron_worker.py,sha256=NSZK9lTuQ1t6Rtr2YKTZUZd4lwmTdytyTr_PgXDulWQ,5891
|
|
1162
|
+
vllm/worker/neuronx_distributed_model_runner.py,sha256=SeJ4t3RJGmSxiH8wdT8sRqc3fKZKmyQ-hsvT9wb1x1o,5437
|
|
1163
|
+
vllm/worker/pooling_model_runner.py,sha256=T69qjB4xZ1hUSc8Qt-qzD-ZvJPwD08LM2QU90t_NXNw,9175
|
|
1164
|
+
vllm/worker/tpu_model_runner.py,sha256=XAvnSqcHK_o4IJYmOHQjTnxibk0mvvICH2jv7D2fr_c,40779
|
|
1165
|
+
vllm/worker/tpu_worker.py,sha256=_8LAcvhLqC0cL9PgiVzsHJ4ljDcbVq3UaDvdbUp3USU,14792
|
|
1166
|
+
vllm/worker/utils.py,sha256=wOcwZ-qRWMUF7RtKjuZDPpFAePc6DYUFCAvxW382JLo,1911
|
|
1167
|
+
vllm/worker/worker.py,sha256=ErW7lPZLV9EF2t3dbBh81hj89_pjrjDAN7sQ7Ep8Fb8,25387
|
|
1168
|
+
vllm/worker/worker_base.py,sha256=GhIywLGVRO48Nb9-CxIXnMbvt8CrJayFkxZ7AAHnmu0,26064
|
|
1169
|
+
vllm/worker/xpu_model_runner.py,sha256=Vqbm96_ubt2zCAvB5xYdtoZwzp2lzgZ63tHQmRdTWTk,24529
|
|
1170
|
+
vllm/worker/xpu_worker.py,sha256=Ek_2yHb0iz-9Md7TdFA3BtHwzlb0HH32piMl42bI0uo,7921
|
|
1171
|
+
vllm_cpu_avx512bf16-0.9.0.post2.dist-info/METADATA,sha256=bIAEmdNMld4DDKuONAVDdaRZbtAR-2NNkSRDYstJbyg,16012
|
|
1172
|
+
vllm_cpu_avx512bf16-0.9.0.post2.dist-info/WHEEL,sha256=1LnhXYRWWL9rYaadn8flXqk7Fe1mIy4KAeWZuewMlVY,113
|
|
1173
|
+
vllm_cpu_avx512bf16-0.9.0.post2.dist-info/entry_points.txt,sha256=ErfiCUEEMrGDD3jBwf8c54AolBCFv7qrc8Ix9iqzzfs,184
|
|
1174
|
+
vllm_cpu_avx512bf16-0.9.0.post2.dist-info/top_level.txt,sha256=fAgb8Pt4zQoKTUA3ZnKEIgcjh0L97_dwEjYDTL5MEEo,5
|
|
1175
|
+
vllm_cpu_avx512bf16-0.9.0.post2.dist-info/RECORD,,
|