vllm-cpu-amxbf16 0.9.1__cp312-cp312-manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1197) hide show
  1. vllm/_C.abi3.so +0 -0
  2. vllm/__init__.py +53 -0
  3. vllm/_custom_ops.py +1828 -0
  4. vllm/_ipex_ops.py +244 -0
  5. vllm/_version.py +34 -0
  6. vllm/adapter_commons/__init__.py +0 -0
  7. vllm/adapter_commons/layers.py +16 -0
  8. vllm/adapter_commons/models.py +106 -0
  9. vllm/adapter_commons/request.py +26 -0
  10. vllm/adapter_commons/utils.py +93 -0
  11. vllm/adapter_commons/worker_manager.py +39 -0
  12. vllm/assets/__init__.py +0 -0
  13. vllm/assets/audio.py +45 -0
  14. vllm/assets/base.py +41 -0
  15. vllm/assets/image.py +34 -0
  16. vllm/assets/video.py +115 -0
  17. vllm/attention/__init__.py +20 -0
  18. vllm/attention/backends/__init__.py +0 -0
  19. vllm/attention/backends/abstract.py +308 -0
  20. vllm/attention/backends/blocksparse_attn.py +461 -0
  21. vllm/attention/backends/cpu_mla.py +307 -0
  22. vllm/attention/backends/dual_chunk_flash_attn.py +1498 -0
  23. vllm/attention/backends/flash_attn.py +1003 -0
  24. vllm/attention/backends/flashinfer.py +1104 -0
  25. vllm/attention/backends/flashmla.py +244 -0
  26. vllm/attention/backends/hpu_attn.py +313 -0
  27. vllm/attention/backends/ipex_attn.py +398 -0
  28. vllm/attention/backends/mla/__init__.py +0 -0
  29. vllm/attention/backends/mla/common.py +1385 -0
  30. vllm/attention/backends/pallas.py +351 -0
  31. vllm/attention/backends/placeholder_attn.py +400 -0
  32. vllm/attention/backends/rocm_aiter_mla.py +435 -0
  33. vllm/attention/backends/rocm_flash_attn.py +975 -0
  34. vllm/attention/backends/torch_sdpa.py +703 -0
  35. vllm/attention/backends/triton_mla.py +115 -0
  36. vllm/attention/backends/utils.py +610 -0
  37. vllm/attention/backends/xformers.py +802 -0
  38. vllm/attention/layer.py +468 -0
  39. vllm/attention/ops/__init__.py +0 -0
  40. vllm/attention/ops/blocksparse_attention/__init__.py +0 -0
  41. vllm/attention/ops/blocksparse_attention/blocksparse_attention_kernel.py +433 -0
  42. vllm/attention/ops/blocksparse_attention/interface.py +239 -0
  43. vllm/attention/ops/blocksparse_attention/utils.py +246 -0
  44. vllm/attention/ops/chunked_prefill_paged_decode.py +368 -0
  45. vllm/attention/ops/flashmla.py +116 -0
  46. vllm/attention/ops/hpu_paged_attn.py +88 -0
  47. vllm/attention/ops/ipex_attn.py +195 -0
  48. vllm/attention/ops/merge_attn_states.py +43 -0
  49. vllm/attention/ops/nki_flash_attn.py +906 -0
  50. vllm/attention/ops/paged_attn.py +256 -0
  51. vllm/attention/ops/prefix_prefill.py +902 -0
  52. vllm/attention/ops/rocm_aiter_mla.py +100 -0
  53. vllm/attention/ops/rocm_aiter_paged_attn.py +102 -0
  54. vllm/attention/ops/triton_decode_attention.py +674 -0
  55. vllm/attention/ops/triton_flash_attention.py +979 -0
  56. vllm/attention/ops/triton_merge_attn_states.py +97 -0
  57. vllm/attention/ops/triton_unified_attention.py +334 -0
  58. vllm/attention/selector.py +187 -0
  59. vllm/attention/utils/fa_utils.py +55 -0
  60. vllm/beam_search.py +87 -0
  61. vllm/benchmarks/__init__.py +0 -0
  62. vllm/benchmarks/datasets.py +1185 -0
  63. vllm/benchmarks/endpoint_request_func.py +381 -0
  64. vllm/benchmarks/latency.py +168 -0
  65. vllm/benchmarks/serve.py +1135 -0
  66. vllm/benchmarks/throughput.py +609 -0
  67. vllm/benchmarks/utils.py +70 -0
  68. vllm/collect_env.py +820 -0
  69. vllm/compilation/__init__.py +0 -0
  70. vllm/compilation/activation_quant_fusion.py +89 -0
  71. vllm/compilation/backends.py +563 -0
  72. vllm/compilation/base_piecewise_backend.py +72 -0
  73. vllm/compilation/collective_fusion.py +127 -0
  74. vllm/compilation/compiler_interface.py +544 -0
  75. vllm/compilation/counter.py +38 -0
  76. vllm/compilation/cuda_piecewise_backend.py +214 -0
  77. vllm/compilation/decorators.py +250 -0
  78. vllm/compilation/fix_functionalization.py +191 -0
  79. vllm/compilation/fusion.py +618 -0
  80. vllm/compilation/fx_utils.py +62 -0
  81. vllm/compilation/inductor_pass.py +115 -0
  82. vllm/compilation/monitor.py +39 -0
  83. vllm/compilation/multi_output_match.py +109 -0
  84. vllm/compilation/noop_elimination.py +137 -0
  85. vllm/compilation/pass_manager.py +78 -0
  86. vllm/compilation/sequence_parallelism.py +268 -0
  87. vllm/compilation/torch25_custom_graph_pass.py +42 -0
  88. vllm/compilation/vllm_inductor_pass.py +67 -0
  89. vllm/compilation/wrapper.py +135 -0
  90. vllm/config.py +4746 -0
  91. vllm/connections.py +174 -0
  92. vllm/core/__init__.py +0 -0
  93. vllm/core/block/__init__.py +0 -0
  94. vllm/core/block/block_table.py +399 -0
  95. vllm/core/block/common.py +371 -0
  96. vllm/core/block/cpu_gpu_block_allocator.py +441 -0
  97. vllm/core/block/interfaces.py +319 -0
  98. vllm/core/block/naive_block.py +466 -0
  99. vllm/core/block/prefix_caching_block.py +1135 -0
  100. vllm/core/block/utils.py +28 -0
  101. vllm/core/block_manager.py +521 -0
  102. vllm/core/evictor.py +157 -0
  103. vllm/core/interfaces.py +135 -0
  104. vllm/core/placeholder_block_space_manager.py +100 -0
  105. vllm/core/scheduler.py +2093 -0
  106. vllm/device_allocator/__init__.py +0 -0
  107. vllm/device_allocator/cumem.py +281 -0
  108. vllm/distributed/__init__.py +6 -0
  109. vllm/distributed/communication_op.py +41 -0
  110. vllm/distributed/device_communicators/__init__.py +0 -0
  111. vllm/distributed/device_communicators/all2all.py +264 -0
  112. vllm/distributed/device_communicators/base_device_communicator.py +260 -0
  113. vllm/distributed/device_communicators/cpu_communicator.py +145 -0
  114. vllm/distributed/device_communicators/cuda_communicator.py +176 -0
  115. vllm/distributed/device_communicators/cuda_wrapper.py +180 -0
  116. vllm/distributed/device_communicators/custom_all_reduce.py +304 -0
  117. vllm/distributed/device_communicators/custom_all_reduce_utils.py +259 -0
  118. vllm/distributed/device_communicators/hpu_communicator.py +46 -0
  119. vllm/distributed/device_communicators/neuron_communicator.py +20 -0
  120. vllm/distributed/device_communicators/pynccl.py +218 -0
  121. vllm/distributed/device_communicators/pynccl_wrapper.py +341 -0
  122. vllm/distributed/device_communicators/shm_broadcast.py +585 -0
  123. vllm/distributed/device_communicators/tpu_communicator.py +103 -0
  124. vllm/distributed/device_communicators/xpu_communicator.py +55 -0
  125. vllm/distributed/kv_events.py +356 -0
  126. vllm/distributed/kv_transfer/README.md +29 -0
  127. vllm/distributed/kv_transfer/__init__.py +12 -0
  128. vllm/distributed/kv_transfer/disagg_prefill_workflow.jpg +0 -0
  129. vllm/distributed/kv_transfer/kv_connector/__init__.py +0 -0
  130. vllm/distributed/kv_transfer/kv_connector/base.py +128 -0
  131. vllm/distributed/kv_transfer/kv_connector/factory.py +128 -0
  132. vllm/distributed/kv_transfer/kv_connector/lmcache_connector.py +99 -0
  133. vllm/distributed/kv_transfer/kv_connector/mooncake_store_connector.py +203 -0
  134. vllm/distributed/kv_transfer/kv_connector/simple_connector.py +329 -0
  135. vllm/distributed/kv_transfer/kv_connector/utils.py +108 -0
  136. vllm/distributed/kv_transfer/kv_connector/v1/__init__.py +6 -0
  137. vllm/distributed/kv_transfer/kv_connector/v1/base.py +283 -0
  138. vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py +134 -0
  139. vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py +201 -0
  140. vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +1030 -0
  141. vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py +384 -0
  142. vllm/distributed/kv_transfer/kv_connector_agent.py +77 -0
  143. vllm/distributed/kv_transfer/kv_lookup_buffer/__init__.py +0 -0
  144. vllm/distributed/kv_transfer/kv_lookup_buffer/base.py +175 -0
  145. vllm/distributed/kv_transfer/kv_lookup_buffer/mooncake_store.py +161 -0
  146. vllm/distributed/kv_transfer/kv_lookup_buffer/simple_buffer.py +237 -0
  147. vllm/distributed/kv_transfer/kv_pipe/__init__.py +0 -0
  148. vllm/distributed/kv_transfer/kv_pipe/base.py +67 -0
  149. vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py +280 -0
  150. vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py +280 -0
  151. vllm/distributed/kv_transfer/kv_transfer_state.py +71 -0
  152. vllm/distributed/parallel_state.py +1296 -0
  153. vllm/distributed/tpu_distributed_utils.py +177 -0
  154. vllm/distributed/utils.py +536 -0
  155. vllm/engine/__init__.py +0 -0
  156. vllm/engine/arg_utils.py +1708 -0
  157. vllm/engine/async_llm_engine.py +1200 -0
  158. vllm/engine/async_timeout.py +173 -0
  159. vllm/engine/llm_engine.py +2097 -0
  160. vllm/engine/metrics.py +629 -0
  161. vllm/engine/metrics_types.py +94 -0
  162. vllm/engine/multiprocessing/__init__.py +148 -0
  163. vllm/engine/multiprocessing/client.py +681 -0
  164. vllm/engine/multiprocessing/engine.py +460 -0
  165. vllm/engine/output_processor/__init__.py +0 -0
  166. vllm/engine/output_processor/interfaces.py +75 -0
  167. vllm/engine/output_processor/multi_step.py +216 -0
  168. vllm/engine/output_processor/single_step.py +145 -0
  169. vllm/engine/output_processor/stop_checker.py +131 -0
  170. vllm/engine/output_processor/util.py +28 -0
  171. vllm/engine/protocol.py +317 -0
  172. vllm/entrypoints/__init__.py +0 -0
  173. vllm/entrypoints/api_server.py +178 -0
  174. vllm/entrypoints/chat_utils.py +1299 -0
  175. vllm/entrypoints/cli/__init__.py +0 -0
  176. vllm/entrypoints/cli/benchmark/__init__.py +0 -0
  177. vllm/entrypoints/cli/benchmark/base.py +39 -0
  178. vllm/entrypoints/cli/benchmark/latency.py +30 -0
  179. vllm/entrypoints/cli/benchmark/main.py +54 -0
  180. vllm/entrypoints/cli/benchmark/serve.py +30 -0
  181. vllm/entrypoints/cli/benchmark/throughput.py +30 -0
  182. vllm/entrypoints/cli/collect_env.py +35 -0
  183. vllm/entrypoints/cli/main.py +65 -0
  184. vllm/entrypoints/cli/openai.py +205 -0
  185. vllm/entrypoints/cli/run_batch.py +62 -0
  186. vllm/entrypoints/cli/serve.py +328 -0
  187. vllm/entrypoints/cli/types.py +25 -0
  188. vllm/entrypoints/launcher.py +147 -0
  189. vllm/entrypoints/llm.py +1544 -0
  190. vllm/entrypoints/logger.py +50 -0
  191. vllm/entrypoints/openai/__init__.py +0 -0
  192. vllm/entrypoints/openai/api_server.py +1387 -0
  193. vllm/entrypoints/openai/cli_args.py +315 -0
  194. vllm/entrypoints/openai/logits_processors.py +90 -0
  195. vllm/entrypoints/openai/protocol.py +1913 -0
  196. vllm/entrypoints/openai/run_batch.py +463 -0
  197. vllm/entrypoints/openai/serving_chat.py +1221 -0
  198. vllm/entrypoints/openai/serving_classification.py +160 -0
  199. vllm/entrypoints/openai/serving_completion.py +592 -0
  200. vllm/entrypoints/openai/serving_embedding.py +201 -0
  201. vllm/entrypoints/openai/serving_engine.py +986 -0
  202. vllm/entrypoints/openai/serving_models.py +315 -0
  203. vllm/entrypoints/openai/serving_pooling.py +232 -0
  204. vllm/entrypoints/openai/serving_score.py +433 -0
  205. vllm/entrypoints/openai/serving_tokenization.py +157 -0
  206. vllm/entrypoints/openai/serving_transcription.py +424 -0
  207. vllm/entrypoints/openai/tool_parsers/__init__.py +23 -0
  208. vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py +164 -0
  209. vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py +370 -0
  210. vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py +259 -0
  211. vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py +237 -0
  212. vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py +371 -0
  213. vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py +216 -0
  214. vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py +308 -0
  215. vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py +316 -0
  216. vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py +267 -0
  217. vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py +369 -0
  218. vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py +112 -0
  219. vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py +308 -0
  220. vllm/entrypoints/openai/tool_parsers/utils.py +124 -0
  221. vllm/entrypoints/score_utils.py +50 -0
  222. vllm/entrypoints/ssl.py +75 -0
  223. vllm/entrypoints/utils.py +233 -0
  224. vllm/env_override.py +41 -0
  225. vllm/envs.py +944 -0
  226. vllm/executor/__init__.py +0 -0
  227. vllm/executor/executor_base.py +401 -0
  228. vllm/executor/mp_distributed_executor.py +244 -0
  229. vllm/executor/msgspec_utils.py +30 -0
  230. vllm/executor/multiproc_worker_utils.py +313 -0
  231. vllm/executor/ray_distributed_executor.py +701 -0
  232. vllm/executor/ray_utils.py +399 -0
  233. vllm/executor/uniproc_executor.py +139 -0
  234. vllm/forward_context.py +179 -0
  235. vllm/inputs/__init__.py +41 -0
  236. vllm/inputs/data.py +331 -0
  237. vllm/inputs/parse.py +151 -0
  238. vllm/inputs/preprocess.py +909 -0
  239. vllm/inputs/registry.py +237 -0
  240. vllm/jsontree.py +80 -0
  241. vllm/logger.py +212 -0
  242. vllm/logging_utils/__init__.py +8 -0
  243. vllm/logging_utils/dump_input.py +85 -0
  244. vllm/logging_utils/formatter.py +18 -0
  245. vllm/logits_process.py +119 -0
  246. vllm/lora/__init__.py +0 -0
  247. vllm/lora/fully_sharded_layers.py +355 -0
  248. vllm/lora/layers.py +1285 -0
  249. vllm/lora/lora.py +199 -0
  250. vllm/lora/models.py +818 -0
  251. vllm/lora/ops/__init__.py +0 -0
  252. vllm/lora/ops/torch_ops/__init__.py +16 -0
  253. vllm/lora/ops/torch_ops/lora_ops.py +119 -0
  254. vllm/lora/ops/triton_ops/__init__.py +12 -0
  255. vllm/lora/ops/triton_ops/kernel_utils.py +243 -0
  256. vllm/lora/ops/triton_ops/lora_expand_op.py +290 -0
  257. vllm/lora/ops/triton_ops/lora_kernel_metadata.py +148 -0
  258. vllm/lora/ops/triton_ops/lora_shrink_op.py +244 -0
  259. vllm/lora/ops/triton_ops/utils.py +120 -0
  260. vllm/lora/ops/xla_ops/__init__.py +7 -0
  261. vllm/lora/ops/xla_ops/lora_ops.py +145 -0
  262. vllm/lora/peft_helper.py +136 -0
  263. vllm/lora/punica_wrapper/__init__.py +10 -0
  264. vllm/lora/punica_wrapper/punica_base.py +485 -0
  265. vllm/lora/punica_wrapper/punica_cpu.py +349 -0
  266. vllm/lora/punica_wrapper/punica_gpu.py +290 -0
  267. vllm/lora/punica_wrapper/punica_hpu.py +145 -0
  268. vllm/lora/punica_wrapper/punica_selector.py +20 -0
  269. vllm/lora/punica_wrapper/punica_tpu.py +405 -0
  270. vllm/lora/punica_wrapper/utils.py +164 -0
  271. vllm/lora/request.py +99 -0
  272. vllm/lora/resolver.py +85 -0
  273. vllm/lora/utils.py +240 -0
  274. vllm/lora/worker_manager.py +259 -0
  275. vllm/model_executor/__init__.py +16 -0
  276. vllm/model_executor/custom_op.py +152 -0
  277. vllm/model_executor/guided_decoding/__init__.py +181 -0
  278. vllm/model_executor/guided_decoding/guidance_decoding.py +63 -0
  279. vllm/model_executor/guided_decoding/guidance_logits_processors.py +104 -0
  280. vllm/model_executor/guided_decoding/guided_fields.py +41 -0
  281. vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py +67 -0
  282. vllm/model_executor/guided_decoding/outlines_decoding.py +155 -0
  283. vllm/model_executor/guided_decoding/outlines_logits_processors.py +284 -0
  284. vllm/model_executor/guided_decoding/utils.py +242 -0
  285. vllm/model_executor/guided_decoding/xgrammar_decoding.py +426 -0
  286. vllm/model_executor/layers/__init__.py +0 -0
  287. vllm/model_executor/layers/activation.py +369 -0
  288. vllm/model_executor/layers/fused_moe/__init__.py +54 -0
  289. vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py +125 -0
  290. vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py +117 -0
  291. vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
  292. vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  293. vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
  294. vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
  295. vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
  296. vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +218 -0
  297. vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json +218 -0
  298. vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
  299. vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
  300. vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
  301. vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
  302. vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
  303. vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X.json +200 -0
  304. vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  305. vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  306. vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H20-3e.json +146 -0
  307. vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H20.json +146 -0
  308. vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H200.json +146 -0
  309. vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  310. vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  311. vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20-3e.json +146 -0
  312. vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20.json +146 -0
  313. vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  314. vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200.json +146 -0
  315. vllm/model_executor/layers/fused_moe/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  316. vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  317. vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  318. vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20.json +146 -0
  319. vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  320. vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200.json +146 -0
  321. vllm/model_executor/layers/fused_moe/configs/E=128,N=96,device_name=NVIDIA_H20.json +146 -0
  322. vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=AMD_Instinct_MI300X.json +200 -0
  323. vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=NVIDIA_H100.json +146 -0
  324. vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
  325. vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  326. vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  327. vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
  328. vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  329. vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +218 -0
  330. vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
  331. vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  332. vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  333. vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
  334. vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +146 -0
  335. vllm/model_executor/layers/fused_moe/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +130 -0
  336. vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
  337. vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +218 -0
  338. vllm/model_executor/layers/fused_moe/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +130 -0
  339. vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json +146 -0
  340. vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  341. vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +146 -0
  342. vllm/model_executor/layers/fused_moe/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +130 -0
  343. vllm/model_executor/layers/fused_moe/configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
  344. vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325X,block_shape=[128,128].json +200 -0
  345. vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +200 -0
  346. vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  347. vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json +146 -0
  348. vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  349. vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +146 -0
  350. vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  351. vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  352. vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  353. vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  354. vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +200 -0
  355. vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +200 -0
  356. vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  357. vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  358. vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  359. vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  360. vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  361. vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  362. vllm/model_executor/layers/fused_moe/configs/E=256,N=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +200 -0
  363. vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
  364. vllm/model_executor/layers/fused_moe/configs/E=60,N=1408,device_name=AMD_Instinct_MI300X.json +200 -0
  365. vllm/model_executor/layers/fused_moe/configs/E=60,N=176,device_name=AMD_Instinct_MI300X.json +200 -0
  366. vllm/model_executor/layers/fused_moe/configs/E=60,N=352,device_name=AMD_Instinct_MI300X.json +200 -0
  367. vllm/model_executor/layers/fused_moe/configs/E=60,N=704,device_name=AMD_Instinct_MI300X.json +200 -0
  368. vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  369. vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
  370. vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
  371. vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  372. vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  373. vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200.json +146 -0
  374. vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
  375. vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  376. vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200.json +146 -0
  377. vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
  378. vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  379. vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  380. vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200.json +146 -0
  381. vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  382. vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json +146 -0
  383. vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json +146 -0
  384. vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
  385. vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  386. vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  387. vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200.json +146 -0
  388. vllm/model_executor/layers/fused_moe/configs/E=64,N=896,device_name=NVIDIA_H20.json +146 -0
  389. vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
  390. vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json +200 -0
  391. vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
  392. vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json +200 -0
  393. vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +138 -0
  394. vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  395. vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200.json +146 -0
  396. vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
  397. vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X.json +200 -0
  398. vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
  399. vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X.json +200 -0
  400. vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
  401. vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json +200 -0
  402. vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
  403. vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json +200 -0
  404. vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
  405. vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  406. vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  407. vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  408. vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200.json +146 -0
  409. vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
  410. vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X.json +200 -0
  411. vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
  412. vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X.json +200 -0
  413. vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  414. vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
  415. vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  416. vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  417. vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200.json +146 -0
  418. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
  419. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json +200 -0
  420. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
  421. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json +200 -0
  422. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json +146 -0
  423. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  424. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json +146 -0
  425. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
  426. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  427. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  428. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200.json +146 -0
  429. vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_L40S.json +173 -0
  430. vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
  431. vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X.json +200 -0
  432. vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
  433. vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X.json +200 -0
  434. vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  435. vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
  436. vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  437. vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  438. vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200.json +146 -0
  439. vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
  440. vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json +200 -0
  441. vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
  442. vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json +200 -0
  443. vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
  444. vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
  445. vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +146 -0
  446. vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  447. vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200.json +146 -0
  448. vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json +164 -0
  449. vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X.json +200 -0
  450. vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json +164 -0
  451. vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X.json +200 -0
  452. vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json +146 -0
  453. vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json +146 -0
  454. vllm/model_executor/layers/fused_moe/configs/README +12 -0
  455. vllm/model_executor/layers/fused_moe/cutlass_moe.py +461 -0
  456. vllm/model_executor/layers/fused_moe/deep_gemm_moe.py +240 -0
  457. vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py +240 -0
  458. vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py +186 -0
  459. vllm/model_executor/layers/fused_moe/fused_batched_moe.py +775 -0
  460. vllm/model_executor/layers/fused_moe/fused_marlin_moe.py +232 -0
  461. vllm/model_executor/layers/fused_moe/fused_moe.py +1724 -0
  462. vllm/model_executor/layers/fused_moe/layer.py +1535 -0
  463. vllm/model_executor/layers/fused_moe/modular_kernel.py +446 -0
  464. vllm/model_executor/layers/fused_moe/moe_align_block_size.py +243 -0
  465. vllm/model_executor/layers/fused_moe/moe_pallas.py +80 -0
  466. vllm/model_executor/layers/fused_moe/moe_permute_unpermute.py +190 -0
  467. vllm/model_executor/layers/fused_moe/moe_torch_iterative.py +60 -0
  468. vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py +159 -0
  469. vllm/model_executor/layers/fused_moe/prepare_finalize.py +69 -0
  470. vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py +421 -0
  471. vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py +117 -0
  472. vllm/model_executor/layers/fused_moe/utils.py +98 -0
  473. vllm/model_executor/layers/layernorm.py +288 -0
  474. vllm/model_executor/layers/lightning_attn.py +652 -0
  475. vllm/model_executor/layers/linear.py +1524 -0
  476. vllm/model_executor/layers/logits_processor.py +197 -0
  477. vllm/model_executor/layers/mamba/__init__.py +0 -0
  478. vllm/model_executor/layers/mamba/mamba2_metadata.py +125 -0
  479. vllm/model_executor/layers/mamba/mamba_mixer.py +245 -0
  480. vllm/model_executor/layers/mamba/mamba_mixer2.py +616 -0
  481. vllm/model_executor/layers/mamba/ops/__init__.py +0 -0
  482. vllm/model_executor/layers/mamba/ops/causal_conv1d.py +105 -0
  483. vllm/model_executor/layers/mamba/ops/mamba_ssm.py +414 -0
  484. vllm/model_executor/layers/mamba/ops/ssd_bmm.py +262 -0
  485. vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py +589 -0
  486. vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py +751 -0
  487. vllm/model_executor/layers/mamba/ops/ssd_combined.py +232 -0
  488. vllm/model_executor/layers/mamba/ops/ssd_state_passing.py +206 -0
  489. vllm/model_executor/layers/pooler.py +350 -0
  490. vllm/model_executor/layers/quantization/__init__.py +157 -0
  491. vllm/model_executor/layers/quantization/aqlm.py +376 -0
  492. vllm/model_executor/layers/quantization/auto_round.py +310 -0
  493. vllm/model_executor/layers/quantization/awq.py +194 -0
  494. vllm/model_executor/layers/quantization/awq_marlin.py +519 -0
  495. vllm/model_executor/layers/quantization/awq_triton.py +320 -0
  496. vllm/model_executor/layers/quantization/base_config.py +151 -0
  497. vllm/model_executor/layers/quantization/bitblas.py +461 -0
  498. vllm/model_executor/layers/quantization/bitsandbytes.py +396 -0
  499. vllm/model_executor/layers/quantization/compressed_tensors/__init__.py +0 -0
  500. vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py +668 -0
  501. vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +1260 -0
  502. vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py +24 -0
  503. vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py +358 -0
  504. vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py +55 -0
  505. vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py +160 -0
  506. vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_nvfp4.py +93 -0
  507. vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_nvfp4.py +178 -0
  508. vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py +121 -0
  509. vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +150 -0
  510. vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py +111 -0
  511. vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py +201 -0
  512. vllm/model_executor/layers/quantization/compressed_tensors/triton_scaled_mm.py +206 -0
  513. vllm/model_executor/layers/quantization/compressed_tensors/utils.py +216 -0
  514. vllm/model_executor/layers/quantization/deepspeedfp.py +195 -0
  515. vllm/model_executor/layers/quantization/experts_int8.py +196 -0
  516. vllm/model_executor/layers/quantization/fbgemm_fp8.py +172 -0
  517. vllm/model_executor/layers/quantization/fp8.py +906 -0
  518. vllm/model_executor/layers/quantization/gguf.py +565 -0
  519. vllm/model_executor/layers/quantization/gptq.py +278 -0
  520. vllm/model_executor/layers/quantization/gptq_bitblas.py +445 -0
  521. vllm/model_executor/layers/quantization/gptq_marlin.py +648 -0
  522. vllm/model_executor/layers/quantization/gptq_marlin_24.py +297 -0
  523. vllm/model_executor/layers/quantization/hqq_marlin.py +332 -0
  524. vllm/model_executor/layers/quantization/ipex_quant.py +250 -0
  525. vllm/model_executor/layers/quantization/kernels/__init__.py +0 -0
  526. vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py +90 -0
  527. vllm/model_executor/layers/quantization/kernels/mixed_precision/__init__.py +83 -0
  528. vllm/model_executor/layers/quantization/kernels/mixed_precision/allspark.py +116 -0
  529. vllm/model_executor/layers/quantization/kernels/mixed_precision/bitblas.py +300 -0
  530. vllm/model_executor/layers/quantization/kernels/mixed_precision/exllama.py +143 -0
  531. vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py +120 -0
  532. vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py +131 -0
  533. vllm/model_executor/layers/quantization/kernels/scaled_mm/ScaledMMLinearKernel.py +67 -0
  534. vllm/model_executor/layers/quantization/kernels/scaled_mm/__init__.py +87 -0
  535. vllm/model_executor/layers/quantization/kernels/scaled_mm/aiter.py +120 -0
  536. vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py +137 -0
  537. vllm/model_executor/layers/quantization/kernels/scaled_mm/triton.py +41 -0
  538. vllm/model_executor/layers/quantization/kernels/scaled_mm/xla.py +105 -0
  539. vllm/model_executor/layers/quantization/kv_cache.py +139 -0
  540. vllm/model_executor/layers/quantization/marlin.py +261 -0
  541. vllm/model_executor/layers/quantization/modelopt.py +737 -0
  542. vllm/model_executor/layers/quantization/moe_wna16.py +449 -0
  543. vllm/model_executor/layers/quantization/neuron_quant.py +76 -0
  544. vllm/model_executor/layers/quantization/ptpc_fp8.py +127 -0
  545. vllm/model_executor/layers/quantization/qqq.py +275 -0
  546. vllm/model_executor/layers/quantization/quark/__init__.py +0 -0
  547. vllm/model_executor/layers/quantization/quark/quark.py +441 -0
  548. vllm/model_executor/layers/quantization/quark/quark_moe.py +237 -0
  549. vllm/model_executor/layers/quantization/quark/schemes/__init__.py +9 -0
  550. vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py +55 -0
  551. vllm/model_executor/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py +126 -0
  552. vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py +146 -0
  553. vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py +122 -0
  554. vllm/model_executor/layers/quantization/quark/utils.py +105 -0
  555. vllm/model_executor/layers/quantization/schema.py +86 -0
  556. vllm/model_executor/layers/quantization/torchao.py +161 -0
  557. vllm/model_executor/layers/quantization/tpu_int8.py +121 -0
  558. vllm/model_executor/layers/quantization/utils/__init__.py +6 -0
  559. vllm/model_executor/layers/quantization/utils/allspark_utils.py +52 -0
  560. vllm/model_executor/layers/quantization/utils/bitblas_utils.py +208 -0
  561. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  562. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  563. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  564. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  565. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  566. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  567. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  568. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  569. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  570. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  571. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  572. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  573. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  574. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  575. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  576. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  577. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  578. vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  579. vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  580. vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  581. vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  582. vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  583. vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  584. vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  585. vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  586. vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  587. vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  588. vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  589. vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  590. vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  591. vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  592. vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  593. vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  594. vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  595. vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  596. vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  597. vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  598. vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  599. vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  600. vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  601. vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  602. vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  603. vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  604. vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  605. vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  606. vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  607. vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  608. vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  609. vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  610. vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  611. vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  612. vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  613. vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  614. vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  615. vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  616. vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  617. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  618. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  619. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  620. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  621. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  622. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  623. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  624. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  625. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  626. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  627. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  628. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  629. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  630. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  631. vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  632. vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  633. vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  634. vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  635. vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  636. vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  637. vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  638. vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  639. vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  640. vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  641. vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  642. vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  643. vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  644. vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  645. vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  646. vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  647. vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  648. vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  649. vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  650. vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  651. vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  652. vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  653. vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  654. vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  655. vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  656. vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  657. vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  658. vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  659. vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  660. vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  661. vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  662. vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  663. vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  664. vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  665. vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  666. vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  667. vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  668. vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  669. vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  670. vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  671. vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  672. vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  673. vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  674. vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  675. vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  676. vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  677. vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  678. vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  679. vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  680. vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  681. vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  682. vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +18 -0
  683. vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  684. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  685. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  686. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  687. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  688. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  689. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  690. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  691. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  692. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  693. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  694. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  695. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  696. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  697. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  698. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  699. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  700. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  701. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  702. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  703. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  704. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  705. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  706. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  707. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  708. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  709. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  710. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  711. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  712. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  713. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  714. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  715. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  716. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  717. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  718. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  719. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  720. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  721. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  722. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  723. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  724. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  725. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  726. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  727. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  728. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  729. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  730. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  731. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  732. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  733. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  734. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  735. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  736. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  737. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  738. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  739. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  740. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  741. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  742. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  743. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  744. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  745. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  746. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  747. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  748. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  749. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  750. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  751. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  752. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  753. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  754. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json +146 -0
  755. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json +146 -0
  756. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json +26 -0
  757. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  758. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  759. vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  760. vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  761. vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  762. vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json +164 -0
  763. vllm/model_executor/layers/quantization/utils/fp8_utils.py +618 -0
  764. vllm/model_executor/layers/quantization/utils/gptq_utils.py +95 -0
  765. vllm/model_executor/layers/quantization/utils/int8_utils.py +485 -0
  766. vllm/model_executor/layers/quantization/utils/layer_utils.py +40 -0
  767. vllm/model_executor/layers/quantization/utils/machete_utils.py +33 -0
  768. vllm/model_executor/layers/quantization/utils/marlin_utils.py +476 -0
  769. vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py +283 -0
  770. vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py +325 -0
  771. vllm/model_executor/layers/quantization/utils/marlin_utils_test.py +165 -0
  772. vllm/model_executor/layers/quantization/utils/marlin_utils_test_24.py +464 -0
  773. vllm/model_executor/layers/quantization/utils/marlin_utils_test_qqq.py +126 -0
  774. vllm/model_executor/layers/quantization/utils/mxfp4_utils.py +45 -0
  775. vllm/model_executor/layers/quantization/utils/nvfp4_emulation_utils.py +104 -0
  776. vllm/model_executor/layers/quantization/utils/quant_utils.py +573 -0
  777. vllm/model_executor/layers/quantization/utils/w8a8_utils.py +405 -0
  778. vllm/model_executor/layers/rejection_sampler.py +406 -0
  779. vllm/model_executor/layers/resampler.py +270 -0
  780. vllm/model_executor/layers/rotary_embedding.py +1862 -0
  781. vllm/model_executor/layers/sampler.py +1204 -0
  782. vllm/model_executor/layers/spec_decode_base_sampler.py +259 -0
  783. vllm/model_executor/layers/typical_acceptance_sampler.py +166 -0
  784. vllm/model_executor/layers/utils.py +95 -0
  785. vllm/model_executor/layers/vocab_parallel_embedding.py +487 -0
  786. vllm/model_executor/model_loader/__init__.py +76 -0
  787. vllm/model_executor/model_loader/base_loader.py +43 -0
  788. vllm/model_executor/model_loader/bitsandbytes_loader.py +570 -0
  789. vllm/model_executor/model_loader/default_loader.py +282 -0
  790. vllm/model_executor/model_loader/dummy_loader.py +27 -0
  791. vllm/model_executor/model_loader/gguf_loader.py +120 -0
  792. vllm/model_executor/model_loader/neuron.py +476 -0
  793. vllm/model_executor/model_loader/neuronx_distributed.py +685 -0
  794. vllm/model_executor/model_loader/runai_streamer_loader.py +109 -0
  795. vllm/model_executor/model_loader/sharded_state_loader.py +201 -0
  796. vllm/model_executor/model_loader/tensorizer.py +600 -0
  797. vllm/model_executor/model_loader/tensorizer_loader.py +123 -0
  798. vllm/model_executor/model_loader/tpu.py +112 -0
  799. vllm/model_executor/model_loader/utils.py +302 -0
  800. vllm/model_executor/model_loader/weight_utils.py +782 -0
  801. vllm/model_executor/models/__init__.py +28 -0
  802. vllm/model_executor/models/adapters.py +248 -0
  803. vllm/model_executor/models/aimv2.py +246 -0
  804. vllm/model_executor/models/arctic.py +559 -0
  805. vllm/model_executor/models/aria.py +657 -0
  806. vllm/model_executor/models/aya_vision.py +466 -0
  807. vllm/model_executor/models/baichuan.py +474 -0
  808. vllm/model_executor/models/bamba.py +543 -0
  809. vllm/model_executor/models/bart.py +938 -0
  810. vllm/model_executor/models/bert.py +523 -0
  811. vllm/model_executor/models/bert_with_rope.py +769 -0
  812. vllm/model_executor/models/blip.py +339 -0
  813. vllm/model_executor/models/blip2.py +718 -0
  814. vllm/model_executor/models/bloom.py +373 -0
  815. vllm/model_executor/models/chameleon.py +1136 -0
  816. vllm/model_executor/models/chatglm.py +478 -0
  817. vllm/model_executor/models/clip.py +407 -0
  818. vllm/model_executor/models/commandr.py +472 -0
  819. vllm/model_executor/models/constant_size_cache.py +137 -0
  820. vllm/model_executor/models/dbrx.py +472 -0
  821. vllm/model_executor/models/deepseek.py +486 -0
  822. vllm/model_executor/models/deepseek_mtp.py +269 -0
  823. vllm/model_executor/models/deepseek_v2.py +843 -0
  824. vllm/model_executor/models/deepseek_vl2.py +648 -0
  825. vllm/model_executor/models/eagle.py +260 -0
  826. vllm/model_executor/models/exaone.py +551 -0
  827. vllm/model_executor/models/fairseq2_llama.py +154 -0
  828. vllm/model_executor/models/falcon.py +510 -0
  829. vllm/model_executor/models/falcon_h1.py +685 -0
  830. vllm/model_executor/models/florence2.py +1103 -0
  831. vllm/model_executor/models/fuyu.py +389 -0
  832. vllm/model_executor/models/gemma.py +425 -0
  833. vllm/model_executor/models/gemma2.py +425 -0
  834. vllm/model_executor/models/gemma3.py +533 -0
  835. vllm/model_executor/models/gemma3_mm.py +709 -0
  836. vllm/model_executor/models/glm.py +23 -0
  837. vllm/model_executor/models/glm4.py +305 -0
  838. vllm/model_executor/models/glm4v.py +648 -0
  839. vllm/model_executor/models/gpt2.py +328 -0
  840. vllm/model_executor/models/gpt_bigcode.py +335 -0
  841. vllm/model_executor/models/gpt_j.py +339 -0
  842. vllm/model_executor/models/gpt_neox.py +332 -0
  843. vllm/model_executor/models/granite.py +493 -0
  844. vllm/model_executor/models/granite_speech.py +779 -0
  845. vllm/model_executor/models/granitemoe.py +437 -0
  846. vllm/model_executor/models/granitemoehybrid.py +586 -0
  847. vllm/model_executor/models/granitemoeshared.py +341 -0
  848. vllm/model_executor/models/gritlm.py +224 -0
  849. vllm/model_executor/models/grok1.py +546 -0
  850. vllm/model_executor/models/h2ovl.py +546 -0
  851. vllm/model_executor/models/idefics2_vision_model.py +389 -0
  852. vllm/model_executor/models/idefics3.py +776 -0
  853. vllm/model_executor/models/interfaces.py +572 -0
  854. vllm/model_executor/models/interfaces_base.py +164 -0
  855. vllm/model_executor/models/intern_vit.py +480 -0
  856. vllm/model_executor/models/internlm2.py +455 -0
  857. vllm/model_executor/models/internlm2_ve.py +147 -0
  858. vllm/model_executor/models/internvl.py +1418 -0
  859. vllm/model_executor/models/jais.py +373 -0
  860. vllm/model_executor/models/jamba.py +592 -0
  861. vllm/model_executor/models/kimi_vl.py +577 -0
  862. vllm/model_executor/models/llama.py +644 -0
  863. vllm/model_executor/models/llama4.py +532 -0
  864. vllm/model_executor/models/llama_eagle.py +165 -0
  865. vllm/model_executor/models/llama_eagle3.py +263 -0
  866. vllm/model_executor/models/llava.py +866 -0
  867. vllm/model_executor/models/llava_next.py +586 -0
  868. vllm/model_executor/models/llava_next_video.py +471 -0
  869. vllm/model_executor/models/llava_onevision.py +956 -0
  870. vllm/model_executor/models/mamba.py +273 -0
  871. vllm/model_executor/models/mamba2.py +308 -0
  872. vllm/model_executor/models/mamba_cache.py +76 -0
  873. vllm/model_executor/models/medusa.py +219 -0
  874. vllm/model_executor/models/mimo.py +192 -0
  875. vllm/model_executor/models/mimo_mtp.py +285 -0
  876. vllm/model_executor/models/minicpm.py +592 -0
  877. vllm/model_executor/models/minicpm3.py +230 -0
  878. vllm/model_executor/models/minicpm_eagle.py +391 -0
  879. vllm/model_executor/models/minicpmo.py +759 -0
  880. vllm/model_executor/models/minicpmv.py +1287 -0
  881. vllm/model_executor/models/minimax_cache.py +36 -0
  882. vllm/model_executor/models/minimax_text_01.py +1301 -0
  883. vllm/model_executor/models/minimax_vl_01.py +364 -0
  884. vllm/model_executor/models/mistral3.py +604 -0
  885. vllm/model_executor/models/mixtral.py +488 -0
  886. vllm/model_executor/models/mixtral_quant.py +453 -0
  887. vllm/model_executor/models/mllama.py +1624 -0
  888. vllm/model_executor/models/mllama4.py +938 -0
  889. vllm/model_executor/models/mlp_speculator.py +206 -0
  890. vllm/model_executor/models/modernbert.py +331 -0
  891. vllm/model_executor/models/module_mapping.py +72 -0
  892. vllm/model_executor/models/molmo.py +1568 -0
  893. vllm/model_executor/models/moonvit.py +630 -0
  894. vllm/model_executor/models/mpt.py +331 -0
  895. vllm/model_executor/models/nemotron.py +508 -0
  896. vllm/model_executor/models/nemotron_h.py +573 -0
  897. vllm/model_executor/models/nemotron_nas.py +484 -0
  898. vllm/model_executor/models/nvlm_d.py +216 -0
  899. vllm/model_executor/models/olmo.py +389 -0
  900. vllm/model_executor/models/olmo2.py +414 -0
  901. vllm/model_executor/models/olmoe.py +468 -0
  902. vllm/model_executor/models/opt.py +412 -0
  903. vllm/model_executor/models/orion.py +349 -0
  904. vllm/model_executor/models/ovis.py +567 -0
  905. vllm/model_executor/models/paligemma.py +398 -0
  906. vllm/model_executor/models/persimmon.py +344 -0
  907. vllm/model_executor/models/phi.py +356 -0
  908. vllm/model_executor/models/phi3.py +19 -0
  909. vllm/model_executor/models/phi3_small.py +465 -0
  910. vllm/model_executor/models/phi3v.py +723 -0
  911. vllm/model_executor/models/phi4mm.py +1246 -0
  912. vllm/model_executor/models/phi4mm_audio.py +1233 -0
  913. vllm/model_executor/models/phi4mm_utils.py +1884 -0
  914. vllm/model_executor/models/phimoe.py +665 -0
  915. vllm/model_executor/models/pixtral.py +1316 -0
  916. vllm/model_executor/models/plamo2.py +738 -0
  917. vllm/model_executor/models/prithvi_geospatial_mae.py +232 -0
  918. vllm/model_executor/models/qwen.py +362 -0
  919. vllm/model_executor/models/qwen2.py +497 -0
  920. vllm/model_executor/models/qwen2_5_omni_thinker.py +904 -0
  921. vllm/model_executor/models/qwen2_5_vl.py +1166 -0
  922. vllm/model_executor/models/qwen2_audio.py +410 -0
  923. vllm/model_executor/models/qwen2_moe.py +540 -0
  924. vllm/model_executor/models/qwen2_rm.py +132 -0
  925. vllm/model_executor/models/qwen2_vl.py +1405 -0
  926. vllm/model_executor/models/qwen3.py +321 -0
  927. vllm/model_executor/models/qwen3_moe.py +535 -0
  928. vllm/model_executor/models/qwen_vl.py +785 -0
  929. vllm/model_executor/models/registry.py +622 -0
  930. vllm/model_executor/models/roberta.py +276 -0
  931. vllm/model_executor/models/siglip.py +524 -0
  932. vllm/model_executor/models/skyworkr1v.py +951 -0
  933. vllm/model_executor/models/smolvlm.py +52 -0
  934. vllm/model_executor/models/solar.py +506 -0
  935. vllm/model_executor/models/stablelm.py +343 -0
  936. vllm/model_executor/models/starcoder2.py +356 -0
  937. vllm/model_executor/models/tarsier.py +643 -0
  938. vllm/model_executor/models/telechat2.py +140 -0
  939. vllm/model_executor/models/teleflm.py +79 -0
  940. vllm/model_executor/models/transformers.py +508 -0
  941. vllm/model_executor/models/ultravox.py +656 -0
  942. vllm/model_executor/models/utils.py +731 -0
  943. vllm/model_executor/models/vision.py +147 -0
  944. vllm/model_executor/models/whisper.py +747 -0
  945. vllm/model_executor/models/zamba2.py +1009 -0
  946. vllm/model_executor/parameter.py +459 -0
  947. vllm/model_executor/pooling_metadata.py +72 -0
  948. vllm/model_executor/sampling_metadata.py +597 -0
  949. vllm/model_executor/utils.py +77 -0
  950. vllm/multimodal/__init__.py +33 -0
  951. vllm/multimodal/audio.py +106 -0
  952. vllm/multimodal/base.py +219 -0
  953. vllm/multimodal/hasher.py +118 -0
  954. vllm/multimodal/image.py +97 -0
  955. vllm/multimodal/inputs.py +876 -0
  956. vllm/multimodal/parse.py +461 -0
  957. vllm/multimodal/processing.py +1895 -0
  958. vllm/multimodal/profiling.py +258 -0
  959. vllm/multimodal/registry.py +331 -0
  960. vllm/multimodal/utils.py +436 -0
  961. vllm/multimodal/video.py +198 -0
  962. vllm/outputs.py +512 -0
  963. vllm/platforms/__init__.py +291 -0
  964. vllm/platforms/cpu.py +266 -0
  965. vllm/platforms/cuda.py +526 -0
  966. vllm/platforms/hpu.py +106 -0
  967. vllm/platforms/interface.py +538 -0
  968. vllm/platforms/neuron.py +150 -0
  969. vllm/platforms/rocm.py +435 -0
  970. vllm/platforms/tpu.py +216 -0
  971. vllm/platforms/xpu.py +156 -0
  972. vllm/plugins/__init__.py +94 -0
  973. vllm/plugins/lora_resolvers/README.md +15 -0
  974. vllm/plugins/lora_resolvers/__init__.py +0 -0
  975. vllm/plugins/lora_resolvers/filesystem_resolver.py +50 -0
  976. vllm/pooling_params.py +54 -0
  977. vllm/profiler/__init__.py +0 -0
  978. vllm/profiler/layerwise_profile.py +375 -0
  979. vllm/profiler/utils.py +148 -0
  980. vllm/prompt_adapter/__init__.py +0 -0
  981. vllm/prompt_adapter/layers.py +83 -0
  982. vllm/prompt_adapter/models.py +358 -0
  983. vllm/prompt_adapter/request.py +37 -0
  984. vllm/prompt_adapter/utils.py +98 -0
  985. vllm/prompt_adapter/worker_manager.py +179 -0
  986. vllm/py.typed +2 -0
  987. vllm/reasoning/__init__.py +15 -0
  988. vllm/reasoning/abs_reasoning_parsers.py +192 -0
  989. vllm/reasoning/deepseek_r1_reasoning_parser.py +173 -0
  990. vllm/reasoning/granite_reasoning_parser.py +363 -0
  991. vllm/reasoning/qwen3_reasoning_parser.py +151 -0
  992. vllm/sampling_params.py +602 -0
  993. vllm/scalar_type.py +347 -0
  994. vllm/scripts.py +15 -0
  995. vllm/sequence.py +1568 -0
  996. vllm/spec_decode/__init__.py +0 -0
  997. vllm/spec_decode/batch_expansion.py +506 -0
  998. vllm/spec_decode/draft_model_runner.py +349 -0
  999. vllm/spec_decode/interfaces.py +99 -0
  1000. vllm/spec_decode/medusa_worker.py +138 -0
  1001. vllm/spec_decode/metrics.py +213 -0
  1002. vllm/spec_decode/mlp_speculator_worker.py +94 -0
  1003. vllm/spec_decode/mqa_scorer.py +160 -0
  1004. vllm/spec_decode/multi_step_worker.py +423 -0
  1005. vllm/spec_decode/ngram_worker.py +196 -0
  1006. vllm/spec_decode/proposer_worker_base.py +59 -0
  1007. vllm/spec_decode/smaller_tp_proposer_worker.py +196 -0
  1008. vllm/spec_decode/spec_decode_worker.py +1326 -0
  1009. vllm/spec_decode/target_model_runner.py +45 -0
  1010. vllm/spec_decode/top1_proposer.py +275 -0
  1011. vllm/spec_decode/util.py +277 -0
  1012. vllm/test_utils.py +130 -0
  1013. vllm/third_party/__init__.py +0 -0
  1014. vllm/third_party/pynvml.py +6140 -0
  1015. vllm/tracing.py +131 -0
  1016. vllm/transformers_utils/__init__.py +24 -0
  1017. vllm/transformers_utils/chat_templates/__init__.py +5 -0
  1018. vllm/transformers_utils/chat_templates/registry.py +60 -0
  1019. vllm/transformers_utils/chat_templates/template_basic.jinja +3 -0
  1020. vllm/transformers_utils/chat_templates/template_blip2.jinja +11 -0
  1021. vllm/transformers_utils/chat_templates/template_chatml.jinja +10 -0
  1022. vllm/transformers_utils/chat_templates/template_deepseek_vl2.jinja +23 -0
  1023. vllm/transformers_utils/chat_templates/template_fuyu.jinja +3 -0
  1024. vllm/transformers_utils/config.py +887 -0
  1025. vllm/transformers_utils/configs/__init__.py +61 -0
  1026. vllm/transformers_utils/configs/arctic.py +207 -0
  1027. vllm/transformers_utils/configs/chatglm.py +72 -0
  1028. vllm/transformers_utils/configs/cohere2.py +195 -0
  1029. vllm/transformers_utils/configs/dbrx.py +280 -0
  1030. vllm/transformers_utils/configs/deepseek_vl2.py +216 -0
  1031. vllm/transformers_utils/configs/eagle.py +85 -0
  1032. vllm/transformers_utils/configs/exaone.py +190 -0
  1033. vllm/transformers_utils/configs/falcon.py +90 -0
  1034. vllm/transformers_utils/configs/h2ovl.py +16 -0
  1035. vllm/transformers_utils/configs/internvl.py +54 -0
  1036. vllm/transformers_utils/configs/jais.py +238 -0
  1037. vllm/transformers_utils/configs/kimi_vl.py +37 -0
  1038. vllm/transformers_utils/configs/medusa.py +63 -0
  1039. vllm/transformers_utils/configs/minimax_text_01.py +70 -0
  1040. vllm/transformers_utils/configs/minimax_vl_01.py +71 -0
  1041. vllm/transformers_utils/configs/mllama.py +31 -0
  1042. vllm/transformers_utils/configs/mlp_speculator.py +68 -0
  1043. vllm/transformers_utils/configs/moonvit.py +33 -0
  1044. vllm/transformers_utils/configs/mpt.py +180 -0
  1045. vllm/transformers_utils/configs/nemotron.py +205 -0
  1046. vllm/transformers_utils/configs/nemotron_h.py +258 -0
  1047. vllm/transformers_utils/configs/nvlm_d.py +15 -0
  1048. vllm/transformers_utils/configs/ovis.py +184 -0
  1049. vllm/transformers_utils/configs/skyworkr1v.py +54 -0
  1050. vllm/transformers_utils/configs/solar.py +247 -0
  1051. vllm/transformers_utils/configs/telechat2.py +64 -0
  1052. vllm/transformers_utils/configs/ultravox.py +108 -0
  1053. vllm/transformers_utils/detokenizer.py +168 -0
  1054. vllm/transformers_utils/detokenizer_utils.py +189 -0
  1055. vllm/transformers_utils/processor.py +221 -0
  1056. vllm/transformers_utils/processors/__init__.py +8 -0
  1057. vllm/transformers_utils/processors/deepseek_vl2.py +363 -0
  1058. vllm/transformers_utils/processors/ovis.py +420 -0
  1059. vllm/transformers_utils/s3_utils.py +162 -0
  1060. vllm/transformers_utils/tokenizer.py +302 -0
  1061. vllm/transformers_utils/tokenizer_base.py +149 -0
  1062. vllm/transformers_utils/tokenizer_group.py +120 -0
  1063. vllm/transformers_utils/tokenizers/__init__.py +10 -0
  1064. vllm/transformers_utils/tokenizers/mistral.py +493 -0
  1065. vllm/transformers_utils/utils.py +99 -0
  1066. vllm/triton_utils/__init__.py +14 -0
  1067. vllm/triton_utils/importing.py +50 -0
  1068. vllm/usage/__init__.py +0 -0
  1069. vllm/usage/usage_lib.py +256 -0
  1070. vllm/utils.py +2910 -0
  1071. vllm/v1/__init__.py +0 -0
  1072. vllm/v1/attention/__init__.py +0 -0
  1073. vllm/v1/attention/backends/__init__.py +0 -0
  1074. vllm/v1/attention/backends/cpu_attn.py +163 -0
  1075. vllm/v1/attention/backends/flash_attn.py +869 -0
  1076. vllm/v1/attention/backends/flashinfer.py +651 -0
  1077. vllm/v1/attention/backends/flex_attention.py +477 -0
  1078. vllm/v1/attention/backends/mla/__init__.py +0 -0
  1079. vllm/v1/attention/backends/mla/common.py +931 -0
  1080. vllm/v1/attention/backends/mla/cutlass_mla.py +97 -0
  1081. vllm/v1/attention/backends/mla/flashmla.py +152 -0
  1082. vllm/v1/attention/backends/mla/rocm_aiter_mla.py +220 -0
  1083. vllm/v1/attention/backends/mla/triton_mla.py +120 -0
  1084. vllm/v1/attention/backends/pallas.py +240 -0
  1085. vllm/v1/attention/backends/triton_attn.py +285 -0
  1086. vllm/v1/attention/backends/utils.py +52 -0
  1087. vllm/v1/core/__init__.py +0 -0
  1088. vllm/v1/core/block_pool.py +349 -0
  1089. vllm/v1/core/encoder_cache_manager.py +150 -0
  1090. vllm/v1/core/kv_cache_coordinator.py +363 -0
  1091. vllm/v1/core/kv_cache_manager.py +392 -0
  1092. vllm/v1/core/kv_cache_utils.py +996 -0
  1093. vllm/v1/core/sched/__init__.py +0 -0
  1094. vllm/v1/core/sched/interface.py +150 -0
  1095. vllm/v1/core/sched/output.py +154 -0
  1096. vllm/v1/core/sched/scheduler.py +1044 -0
  1097. vllm/v1/core/sched/utils.py +23 -0
  1098. vllm/v1/core/single_type_kv_cache_manager.py +403 -0
  1099. vllm/v1/engine/__init__.py +173 -0
  1100. vllm/v1/engine/async_llm.py +558 -0
  1101. vllm/v1/engine/coordinator.py +253 -0
  1102. vllm/v1/engine/core.py +961 -0
  1103. vllm/v1/engine/core_client.py +1129 -0
  1104. vllm/v1/engine/detokenizer.py +261 -0
  1105. vllm/v1/engine/exceptions.py +17 -0
  1106. vllm/v1/engine/llm_engine.py +317 -0
  1107. vllm/v1/engine/logprobs.py +199 -0
  1108. vllm/v1/engine/mm_input_cache.py +91 -0
  1109. vllm/v1/engine/output_processor.py +428 -0
  1110. vllm/v1/engine/parallel_sampling.py +133 -0
  1111. vllm/v1/engine/processor.py +407 -0
  1112. vllm/v1/executor/__init__.py +0 -0
  1113. vllm/v1/executor/abstract.py +113 -0
  1114. vllm/v1/executor/multiproc_executor.py +537 -0
  1115. vllm/v1/executor/ray_distributed_executor.py +62 -0
  1116. vllm/v1/kv_cache_interface.py +194 -0
  1117. vllm/v1/metrics/__init__.py +0 -0
  1118. vllm/v1/metrics/loggers.py +523 -0
  1119. vllm/v1/metrics/prometheus.py +82 -0
  1120. vllm/v1/metrics/ray_wrappers.py +131 -0
  1121. vllm/v1/metrics/reader.py +246 -0
  1122. vllm/v1/metrics/stats.py +239 -0
  1123. vllm/v1/outputs.py +116 -0
  1124. vllm/v1/request.py +193 -0
  1125. vllm/v1/sample/__init__.py +0 -0
  1126. vllm/v1/sample/metadata.py +44 -0
  1127. vllm/v1/sample/ops/__init__.py +0 -0
  1128. vllm/v1/sample/ops/bad_words.py +39 -0
  1129. vllm/v1/sample/ops/penalties.py +59 -0
  1130. vllm/v1/sample/ops/topk_topp_sampler.py +293 -0
  1131. vllm/v1/sample/rejection_sampler.py +631 -0
  1132. vllm/v1/sample/sampler.py +286 -0
  1133. vllm/v1/sample/tpu/__init__.py +0 -0
  1134. vllm/v1/sample/tpu/metadata.py +124 -0
  1135. vllm/v1/sample/tpu/sampler.py +145 -0
  1136. vllm/v1/serial_utils.py +315 -0
  1137. vllm/v1/spec_decode/__init__.py +0 -0
  1138. vllm/v1/spec_decode/eagle.py +432 -0
  1139. vllm/v1/spec_decode/medusa.py +62 -0
  1140. vllm/v1/spec_decode/metadata.py +62 -0
  1141. vllm/v1/spec_decode/metrics.py +178 -0
  1142. vllm/v1/spec_decode/ngram_proposer.py +132 -0
  1143. vllm/v1/spec_decode/utils.py +46 -0
  1144. vllm/v1/structured_output/__init__.py +222 -0
  1145. vllm/v1/structured_output/backend_guidance.py +245 -0
  1146. vllm/v1/structured_output/backend_types.py +134 -0
  1147. vllm/v1/structured_output/backend_xgrammar.py +318 -0
  1148. vllm/v1/structured_output/request.py +86 -0
  1149. vllm/v1/structured_output/utils.py +175 -0
  1150. vllm/v1/utils.py +743 -0
  1151. vllm/v1/worker/__init__.py +0 -0
  1152. vllm/v1/worker/block_table.py +142 -0
  1153. vllm/v1/worker/cpu_model_runner.py +86 -0
  1154. vllm/v1/worker/cpu_worker.py +152 -0
  1155. vllm/v1/worker/gpu_input_batch.py +681 -0
  1156. vllm/v1/worker/gpu_model_runner.py +2320 -0
  1157. vllm/v1/worker/gpu_worker.py +393 -0
  1158. vllm/v1/worker/lora_model_runner_mixin.py +173 -0
  1159. vllm/v1/worker/tpu_model_runner.py +1673 -0
  1160. vllm/v1/worker/tpu_worker.py +299 -0
  1161. vllm/v1/worker/utils.py +111 -0
  1162. vllm/v1/worker/worker_base.py +65 -0
  1163. vllm/version.py +41 -0
  1164. vllm/vllm_flash_attn/.gitkeep +0 -0
  1165. vllm/worker/__init__.py +0 -0
  1166. vllm/worker/cache_engine.py +145 -0
  1167. vllm/worker/cpu_enc_dec_model_runner.py +326 -0
  1168. vllm/worker/cpu_model_runner.py +671 -0
  1169. vllm/worker/cpu_pooling_model_runner.py +125 -0
  1170. vllm/worker/cpu_worker.py +450 -0
  1171. vllm/worker/enc_dec_model_runner.py +555 -0
  1172. vllm/worker/hpu_model_runner.py +2320 -0
  1173. vllm/worker/hpu_worker.py +484 -0
  1174. vllm/worker/model_runner.py +2178 -0
  1175. vllm/worker/model_runner_base.py +282 -0
  1176. vllm/worker/multi_step_hpu_worker.py +123 -0
  1177. vllm/worker/multi_step_model_runner.py +911 -0
  1178. vllm/worker/multi_step_neuron_model_runner.py +84 -0
  1179. vllm/worker/multi_step_neuronx_distributed_model_runner.py +63 -0
  1180. vllm/worker/multi_step_tpu_worker.py +108 -0
  1181. vllm/worker/multi_step_worker.py +197 -0
  1182. vllm/worker/neuron_model_runner.py +460 -0
  1183. vllm/worker/neuron_worker.py +193 -0
  1184. vllm/worker/neuronx_distributed_model_runner.py +294 -0
  1185. vllm/worker/pooling_model_runner.py +211 -0
  1186. vllm/worker/tpu_model_runner.py +909 -0
  1187. vllm/worker/tpu_worker.py +337 -0
  1188. vllm/worker/utils.py +53 -0
  1189. vllm/worker/worker.py +577 -0
  1190. vllm/worker/worker_base.py +646 -0
  1191. vllm/worker/xpu_model_runner.py +606 -0
  1192. vllm/worker/xpu_worker.py +186 -0
  1193. vllm_cpu_amxbf16-0.9.1.dist-info/METADATA +305 -0
  1194. vllm_cpu_amxbf16-0.9.1.dist-info/RECORD +1197 -0
  1195. vllm_cpu_amxbf16-0.9.1.dist-info/WHEEL +5 -0
  1196. vllm_cpu_amxbf16-0.9.1.dist-info/entry_points.txt +5 -0
  1197. vllm_cpu_amxbf16-0.9.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1197 @@
1
+ vllm/_C.abi3.so,sha256=Opd1cgWqrRGSwVWwdu_j0zYp3L_9di3FQgN5i5DaHqQ,20240976
2
+ vllm/__init__.py,sha256=Kem0adJsL5ncZMaclrWZFD3aQPlyXDpPx7JEM40_zXE,1984
3
+ vllm/_custom_ops.py,sha256=Z2ZcP0Gn-WzcHhhUt8WzQVWj3pIFCqi07tmMDIcjZmU,76683
4
+ vllm/_ipex_ops.py,sha256=Bus71ROUjsvueWC_fGEODjrUZ0aYgfzyNT_kVYXmmqQ,8715
5
+ vllm/_version.py,sha256=LwGndsRSpclYq-j3wgRr2nzOXwUYj0Jtg7Kof7R0BEw,704
6
+ vllm/beam_search.py,sha256=X6PLlBDWaeZNKK4ttym328Rvjnup5wQnIDmSafMNSQI,2639
7
+ vllm/collect_env.py,sha256=Z2dauhcwD50tOczPNrk5Ynqqvckc6N3tJk7scouvms4,28292
8
+ vllm/config.py,sha256=juBa1hTLqWRyhwLt6q8-cE9epvw8spbOhAp_H4fskUY,207019
9
+ vllm/connections.py,sha256=YL5sQpi0vg05ZwNhpnn-7h9oCGjQeKWjTRYPCB_pKGQ,5088
10
+ vllm/env_override.py,sha256=fbUyMmMdoOLUP9aht_CHUPfvG3vkbKoxZHbEMR-T3sA,1698
11
+ vllm/envs.py,sha256=pdiltv1CrrmNlrzgcUiMOmIBWxvLlTqWHf0ez0jgsY8,38271
12
+ vllm/forward_context.py,sha256=PllkGCMC5x00RlodMHBwWeUerZvweoHxWvUUR5mm1pc,7241
13
+ vllm/jsontree.py,sha256=Fl-Ae-1z0GOKsDrdx3ibbXg2IFhbL_B3L3S90In0vQY,2212
14
+ vllm/logger.py,sha256=DI8-5MoH8fubVMAeqDWYQSAS79T2h6pfgErjaP8T7bE,7575
15
+ vllm/logits_process.py,sha256=l7EmiG8n4-zYS1ZHr3jnCJDHSvWUfXdfaIx7y6yYzXw,4440
16
+ vllm/outputs.py,sha256=lG2fRt1bpCYRtAYxEuMn_beWdt3JBhsAofq_EzQwnEw,20029
17
+ vllm/pooling_params.py,sha256=VYOF0KqoseeB5lnjekAOUA_GQfTiSdEqK4MjNeFLD4c,2086
18
+ vllm/py.typed,sha256=F5LUrt0voM87SNuuOky2X9veCVDqJUgRg_VohYqDigY,65
19
+ vllm/sampling_params.py,sha256=sqUvh-RT8aMtr-j7EGzjWjhoUQcy-2PEjLR5oyTNVRw,27008
20
+ vllm/scalar_type.py,sha256=L6WsIC1s15P7cczrNMo4Kp-t0PhJa2N9hooM7sx4RQA,12371
21
+ vllm/scripts.py,sha256=GBLs96wGlHWNj7WDtb-kuwHCR7HCdQIJN-vDoPh2Ud8,501
22
+ vllm/sequence.py,sha256=6Ufm46WIR2LMC0GW1pEEzfXr9yYw7EtuQqIvqEv-Ihw,62319
23
+ vllm/test_utils.py,sha256=kefc-m-RCrSmJFGtlla-jPk6F9uxO7kibBSjIIM-lco,6061
24
+ vllm/tracing.py,sha256=6fIL7vcKohcG2tbilH0UheKvev9N7TCLxWCvfYJOwSI,4845
25
+ vllm/utils.py,sha256=8O-nJAyxq0MRnrTJUPb9F41YZPKDXP5Eqy-lgkSyHqw,98828
26
+ vllm/version.py,sha256=j5_jpV6lcpUIkq56JF2uxJS02TJjG_7nGrzjvf7ptDI,1375
27
+ vllm/adapter_commons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ vllm/adapter_commons/layers.py,sha256=ahfExqPZiZfRoi8gmzp31Hj3MKGvcalVGGR6w9lhdNA,450
29
+ vllm/adapter_commons/models.py,sha256=5XvXSRMnC8TufQZwf7O01SH83chilebej3Y4LnQKrhU,2870
30
+ vllm/adapter_commons/request.py,sha256=PGKCbvlRywX4PTQgT9hPBGYwkc9scpiic-Hi3K6Uokg,686
31
+ vllm/adapter_commons/utils.py,sha256=eAbRtVVso5fvSLp28k9VjArFUjrbl8BT0NkJPmJIgF4,3329
32
+ vllm/adapter_commons/worker_manager.py,sha256=PKuagBEoFxWzPz9AbUogHGaNJst7mWlAOarcZkUBa7I,992
33
+ vllm/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ vllm/assets/audio.py,sha256=03OUK2QRgJDgEucc2HDl3vdrwgiRAuBxzckas015Ces,1254
35
+ vllm/assets/base.py,sha256=Yv1bK5DmfZbWr_1Uqhb-mawh5M6yC1tlFT6gRkHtcgs,1265
36
+ vllm/assets/image.py,sha256=vW3KXy3bM-0UfQ4ZSFZPwOQfIQCIRd-4D8Xp3oY1Ics,1024
37
+ vllm/assets/video.py,sha256=71Tk4-IEQJmpVSoCyLS0Az1HYwQrpjhu1ahomIeu8h0,3481
38
+ vllm/attention/__init__.py,sha256=yfqspQ_l0_9DtVE42eeurvzLLQCyx_TCBV6Uo4_nZK8,679
39
+ vllm/attention/layer.py,sha256=_l3YBKs3F5SYU25uvmlCy0EgzeYgUbWhGYCVoCRPdvs,19268
40
+ vllm/attention/selector.py,sha256=_2J2G9oDqZaIwvz0Md7kJ8K4jpGXFRrUPYT0n2PIBwA,5934
41
+ vllm/attention/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
+ vllm/attention/backends/abstract.py,sha256=116UUlAFXWKKWhtJTcz6_IEQ4vG-DUlF48xgFCInjOo,9486
43
+ vllm/attention/backends/blocksparse_attn.py,sha256=EQb3Ek_ZyJYBgasxIlCtQt6DkU9gKUe9csx4W5nPwY8,18264
44
+ vllm/attention/backends/cpu_mla.py,sha256=I5vbO4xEMp0kb7IZySOaA1z7qy6bIBysP-4hPKFTiGw,11342
45
+ vllm/attention/backends/dual_chunk_flash_attn.py,sha256=n2Cj2cq4_kWmvzMvdoPLdUhoJQhHjK9h7pkLvTLGS6E,66236
46
+ vllm/attention/backends/flash_attn.py,sha256=2mZqQZiNfxnfFyR4i3hlSu5YUyNDQBFX8eR_5khkg6I,44738
47
+ vllm/attention/backends/flashinfer.py,sha256=resEqV9oL4a2Hwfdgk3rcT8OguStktTyaaMyaBlWYeg,47962
48
+ vllm/attention/backends/flashmla.py,sha256=l0Rf9Jj08GqugdjE38XvDeiVVsG4E7g7fy1WlaQiJp4,9175
49
+ vllm/attention/backends/hpu_attn.py,sha256=bZ74G0qwa-obI_hg4H46yzpJu_MslFRO6oQXTKpj8ms,12535
50
+ vllm/attention/backends/ipex_attn.py,sha256=-s73WbXIrOqtC2f5-r15ncQa5Ts18gQyLrawRz5pZoI,15080
51
+ vllm/attention/backends/pallas.py,sha256=N8Nrk-WKp_FJIJvHJPqKOr9AlTF8IFoImzRpYbIRDZs,13934
52
+ vllm/attention/backends/placeholder_attn.py,sha256=lY1GvhayOg_v04gsJbLHCyF9-Eb3gY9bNGUY8rG3ZZM,16165
53
+ vllm/attention/backends/rocm_aiter_mla.py,sha256=c19DnPe_k5tCOnQ8gTK5H2DU1G1maYjjffbj1BPa7A8,18027
54
+ vllm/attention/backends/rocm_flash_attn.py,sha256=fjnqqUGW052tBbrsHNG3eTsR6lI88AHkaxl0yupPV2g,42770
55
+ vllm/attention/backends/torch_sdpa.py,sha256=lBP2dzsNndin1-9CCR5TO19fTBDTQPe3AkHo70N3vos,27801
56
+ vllm/attention/backends/triton_mla.py,sha256=fwawwKUDQPKQ6ytss1usWLn78F3TBe-V7--pUvADrXk,4099
57
+ vllm/attention/backends/utils.py,sha256=56GrHMcL6g-7MKW_VPzE6b-SNGJIIV-UiWYRmDbnIXk,25995
58
+ vllm/attention/backends/xformers.py,sha256=4bxITBZMb_W3__8ueY3GR1ZCwrnU-6sVSx5kgQTz1Zk,33891
59
+ vllm/attention/backends/mla/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
+ vllm/attention/backends/mla/common.py,sha256=Hm7B1DYStT5H6UPBIiTeKdvQCGlhayyO1fpOSBSENHA,57866
61
+ vllm/attention/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
+ vllm/attention/ops/chunked_prefill_paged_decode.py,sha256=T63JOKLYGZErqGFnDGaAbrxKN6czkIKeSG34m-r4Jak,12710
63
+ vllm/attention/ops/flashmla.py,sha256=khmeRslq_BU7yrGhRUzHShRLeALP8OXVNzfdCVhEzkM,3953
64
+ vllm/attention/ops/hpu_paged_attn.py,sha256=h_kMJN7z3JV0ckHOcRRYmCHiA3UIIDxf630B_c5-S6k,2986
65
+ vllm/attention/ops/ipex_attn.py,sha256=7AVjr3jnRivgTgZxVSQYj3EEJ9TuAmKGjW5QF1YYXRs,5719
66
+ vllm/attention/ops/merge_attn_states.py,sha256=1ed_lE3BuH_ahf1PW4d0I6izUasZuKbQdKSeVLu6ESw,1706
67
+ vllm/attention/ops/nki_flash_attn.py,sha256=XIqlLhupMDdX4vGTyoHyTsRT2OWlxzCJoudEE6cTL5Q,32681
68
+ vllm/attention/ops/paged_attn.py,sha256=79sb3nWpOaZF-5YxqhqUSmOnhc209_Kf0l1fkfsaY9w,8388
69
+ vllm/attention/ops/prefix_prefill.py,sha256=ZelX65iQLi91xl7y9DpsfYiEfXjFmTjqHkeLPzY6kas,31106
70
+ vllm/attention/ops/rocm_aiter_mla.py,sha256=6RKmanIe8iZTQAMju9p9PCEXar7Q1oF6DamqmdkIkY0,3580
71
+ vllm/attention/ops/rocm_aiter_paged_attn.py,sha256=NofcR9IMgoqBVT-iyHNNFmJCmq6pv41deXucZcF-WPg,3954
72
+ vllm/attention/ops/triton_decode_attention.py,sha256=Wv9ZicJpuhnruLom2yeN4LMf4_mQOqN5ZXaxqTnaXOw,19190
73
+ vllm/attention/ops/triton_flash_attention.py,sha256=PyUtobu-7ZHIO8Fv-swpx58L6r9FFd-wUP4AILuRhbE,32259
74
+ vllm/attention/ops/triton_merge_attn_states.py,sha256=gKRLVpClXSa4wBO3HkFk4OBKtUPaNOUq7oJkKHQ5X0o,3563
75
+ vllm/attention/ops/triton_unified_attention.py,sha256=hMpRDwYSwaS-Yj-0R6pIazCGzmvOhWxOqpZJJ_VyDSQ,10771
76
+ vllm/attention/ops/blocksparse_attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
+ vllm/attention/ops/blocksparse_attention/blocksparse_attention_kernel.py,sha256=-l-CrX89lHlSlWKX8oPm1YICFLgJmWOj-yOMrc0mVXA,11603
78
+ vllm/attention/ops/blocksparse_attention/interface.py,sha256=B8Nam87zhetCakxhplBaTrGWgTfx5zYPW562KOIqDVI,9395
79
+ vllm/attention/ops/blocksparse_attention/utils.py,sha256=wS5dY0KCSVr9czfs0co93WuHLFziTIhlePjlD_cs33g,8178
80
+ vllm/attention/utils/fa_utils.py,sha256=H0xd2ONZzoibdctyBAzuOdDA6-jpjP7ntMbhteihyVE,2087
81
+ vllm/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
+ vllm/benchmarks/datasets.py,sha256=9bCaWBNVE8JHeagDNmV-wmAJYx8KjbrziFK-Z-TD8Xk,44157
83
+ vllm/benchmarks/endpoint_request_func.py,sha256=r0fMEXJhQIxsIOyIxcgvR89HqUlKmG4B_CfAPQMZ4eM,14528
84
+ vllm/benchmarks/latency.py,sha256=res4151NBrj9zX_-qBVXGNmihj490wfSq8cdpbpqWsk,6047
85
+ vllm/benchmarks/serve.py,sha256=8TK8gi2Ht7mLOkexn6letAKsD4l8ykqtz-GypB3rW68,44843
86
+ vllm/benchmarks/throughput.py,sha256=9UFSkX4ATBFvIpxHmvkgij3lGU6ES3H08JEGXtDnl9E,24868
87
+ vllm/benchmarks/utils.py,sha256=a1KVuL7NttB3GP_aGBCls90lULsr2ryIA4PEv_92yr0,2247
88
+ vllm/compilation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
89
+ vllm/compilation/activation_quant_fusion.py,sha256=QLg7qfOaIWv8nCslmq_JtMEZAlxaSmB4CSFWyET6a-w,3235
90
+ vllm/compilation/backends.py,sha256=ZLABN3Gva0F08PyU8nFWvNxrPFOy4m0w15zKp698ljg,22489
91
+ vllm/compilation/base_piecewise_backend.py,sha256=upn_akHBpZsFqq6N6b5dKKn0K-6mvEqIAkt1wtnaK5w,2865
92
+ vllm/compilation/collective_fusion.py,sha256=NoJ0fo8hdY9xP7sWsaRSlBA5yZms_G1zJjqA7bFOmLo,4332
93
+ vllm/compilation/compiler_interface.py,sha256=qnlAWsJQcrbA18jGXX-5ae1TgUvjGozPQyChoPEaxWk,22207
94
+ vllm/compilation/counter.py,sha256=3SgHVIe4QBRwnrhTfGYjWwV2em9JGYBCrPUU7d6benY,1141
95
+ vllm/compilation/cuda_piecewise_backend.py,sha256=0syIQ7CEgUn1EATdhlaiyCze6ZoDquWxL3NJy8CCTL8,9058
96
+ vllm/compilation/decorators.py,sha256=SOy5q_vIbYAQUfj2NpTGiOzmiSBODgDtFQ0FRpU0jEo,10303
97
+ vllm/compilation/fix_functionalization.py,sha256=SGqUN-XC3PlEhSQFSfi3SM5hzlPWaadKeujLP1jwF_I,8462
98
+ vllm/compilation/fusion.py,sha256=zgzbD4fhoqO_pr0MdiitG7fKF_msJqjiluzwYZDBRwg,24583
99
+ vllm/compilation/fx_utils.py,sha256=HD0dNMaq6RCx0LJ0R86S9GZ4BVwJgA5dP4zNeY3r6zI,2119
100
+ vllm/compilation/inductor_pass.py,sha256=C3vwg9EQGkqcK32ffo6lxkU1BB400gb3X7NO2mlydJg,3451
101
+ vllm/compilation/monitor.py,sha256=3eICJCWnpV8yFqh1W3Dc2pb5e78-lu_BoiVr5EvE1mU,1415
102
+ vllm/compilation/multi_output_match.py,sha256=uuVLvzaBuEZjOLbrfQVH9tZvaBD2GdNwbidSNaWv9dY,3904
103
+ vllm/compilation/noop_elimination.py,sha256=ABGKVff7gMb0gCkWAj-4Z5_tSqSVyy_LpyKixcPVPZk,5303
104
+ vllm/compilation/pass_manager.py,sha256=KBIYnBgOBiV_H1DBamEYgEwthWc51NH9ZcfkUTIE02U,2971
105
+ vllm/compilation/sequence_parallelism.py,sha256=AVR_vm3BSTJYNEGgyoJvEY-FrkPhbmMDnbpRiLZd5RM,9718
106
+ vllm/compilation/torch25_custom_graph_pass.py,sha256=OTZc1hc3eLlS8LhG4MvHwpglY5_1E_voPW7ShGS5HJs,1430
107
+ vllm/compilation/vllm_inductor_pass.py,sha256=DT6-FafI6GoMsatha-63vGGtJwZ8W6aR39cpd8T8MYg,2473
108
+ vllm/compilation/wrapper.py,sha256=J4zjVIUTfQ5nXO16kG2zUEfoi4J7LHO0H0bWoB0OIrE,5933
109
+ vllm/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
+ vllm/core/block_manager.py,sha256=0ZUWAiPHfAsP2vHK5o6eid0cI6qmVE4Abbl883ZE4NY,22273
111
+ vllm/core/evictor.py,sha256=FUiGVDiY8cmE59jf6ydo5CtYMEMGWWu6Zg6OLVyyQkw,5515
112
+ vllm/core/interfaces.py,sha256=90Km4Z25RxvUjTZr23sorms3bSyDsqA-viC9g903Ty4,3659
113
+ vllm/core/placeholder_block_space_manager.py,sha256=SXJsXrm2QoTI6vuLrxsBqHTITxyvzAkpO7bgAE1w5T8,3040
114
+ vllm/core/scheduler.py,sha256=YNrXuXDizcQJ4Ocz5ZMB7gbTOB8K-kGMqEvYDUG6AMQ,91778
115
+ vllm/core/block/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
116
+ vllm/core/block/block_table.py,sha256=Yokf5KU-onU1gp7dlqfWa0mQlCbtx9vNceGcg1dFDDg,16091
117
+ vllm/core/block/common.py,sha256=nTf2wtA0nWtbGoh5qsN8XmwIP8r9xlP8ry1lInaZTRY,13269
118
+ vllm/core/block/cpu_gpu_block_allocator.py,sha256=SKxpxXRGXLQuY3aFChuGkOnp_Ojpx_rFZJ0CMZY1FLc,17016
119
+ vllm/core/block/interfaces.py,sha256=BLv8rVkTVINbae_-Y5MLKUuZK-9Tix5qxlMRcb1jMRY,8213
120
+ vllm/core/block/naive_block.py,sha256=XPf0jZ5e5fJoXUss_Y6_mXAQ0D5bdYkCXZ2_XxlVoGU,16424
121
+ vllm/core/block/prefix_caching_block.py,sha256=BG_PRgkcH9vv25RhUlEVXPVSHBJw3ysKA-DSLBF2kk0,44251
122
+ vllm/core/block/utils.py,sha256=ecLOYLGeBIeqMCpbUNBoW6hJIHlG5EINgsf2Or1BHyk,997
123
+ vllm/device_allocator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
124
+ vllm/device_allocator/cumem.py,sha256=8mgglTfOAPBTO3A-aPswwh3HmGwye7UFoFoyhW49MuY,11018
125
+ vllm/distributed/__init__.py,sha256=l_KMMLMmYq-MqxY5OeTuQjA38RYwdS4TtVy8JafQq3E,191
126
+ vllm/distributed/communication_op.py,sha256=igCBXNoAhJ8eZooR79KhpzgYlVP1TUgnPF5C7BSpSJE,1562
127
+ vllm/distributed/kv_events.py,sha256=3A-SxHLe1Dar2IMHTtTlVHUTRjg1VSTvBUJzMMLmZ18,12357
128
+ vllm/distributed/parallel_state.py,sha256=xlv9jwsT7sVzpq7bX0VdL78WClFUR73MaNsSOxF16ZU,50271
129
+ vllm/distributed/tpu_distributed_utils.py,sha256=8xkJxtqRonnjlFMmbPA8-NeGF3KvjvQTgVr_3mMVQeo,7758
130
+ vllm/distributed/utils.py,sha256=K2CNfdyDZChfy6QWoMx-n6pDzbG8X4SBmdutEnhPNqQ,21012
131
+ vllm/distributed/device_communicators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
132
+ vllm/distributed/device_communicators/all2all.py,sha256=Vi-UzA5LJFl497LGOua0xTV8P-YP7uX0raQk6WVBE2I,9819
133
+ vllm/distributed/device_communicators/base_device_communicator.py,sha256=YeqXOmte0UY-kApX7BKBSjZ0TuQyOGGE1nh3OmgPsyY,10312
134
+ vllm/distributed/device_communicators/cpu_communicator.py,sha256=BQEdx2b0lsrjFKzWeCDmsKIMEUyPoUo-rssTiRIoHyk,5593
135
+ vllm/distributed/device_communicators/cuda_communicator.py,sha256=3muBQSx8tzsCSt0yiVxW69wIqTguhFmCgSjwt4ZLy18,7123
136
+ vllm/distributed/device_communicators/cuda_wrapper.py,sha256=1I1OZOc9-St5Zlr4gUmoDm7HxdS-T9ZE1ixOJGJK55s,7185
137
+ vllm/distributed/device_communicators/custom_all_reduce.py,sha256=h-P7nPNq6ZflvXdeqPvceE4aDxe3DVB60okDzVqbGWM,12675
138
+ vllm/distributed/device_communicators/custom_all_reduce_utils.py,sha256=FCWwNPzVLllZLBiK50myd3SlBiLIf0m1Ydpip5SM6OY,10558
139
+ vllm/distributed/device_communicators/hpu_communicator.py,sha256=SCAWX8SxecNzR79UE5AKysm7OP20nw_q-ABkLzqOtpk,1836
140
+ vllm/distributed/device_communicators/neuron_communicator.py,sha256=NF-k7fuSxpLtEatxVzTCt162N9rkRDTsxd82gV6J7VI,693
141
+ vllm/distributed/device_communicators/pynccl.py,sha256=6aOeCmloJqswF8rnfLhyuggNSN2D2SNDi4deLS7wj3E,9211
142
+ vllm/distributed/device_communicators/pynccl_wrapper.py,sha256=xeMmSohTi4kc9vFSnW25ss91HoVOvKNsff7IdIxtrEI,13763
143
+ vllm/distributed/device_communicators/shm_broadcast.py,sha256=x4C6QKStqUotHdkmI5OjcqWR24gA3U07v8V3t8EmC6Q,24959
144
+ vllm/distributed/device_communicators/tpu_communicator.py,sha256=mDqS1cHYs0zeptNKlMjkG667moa68Dc_Anx1iAVrfmY,4159
145
+ vllm/distributed/device_communicators/xpu_communicator.py,sha256=_a34w4noXIZW0099sLO3Z7CdLc-QC_NxKsnxH5wn5zk,2176
146
+ vllm/distributed/kv_transfer/README.md,sha256=B4s4s-6F9FP4wbgmrYJDSpdUu0_Yq4EeWLEyZMNkAyk,2006
147
+ vllm/distributed/kv_transfer/__init__.py,sha256=s4nYYd_WS6iDGPpYMvXnq8-x9vt-776yOsxnnRU2cvU,461
148
+ vllm/distributed/kv_transfer/disagg_prefill_workflow.jpg,sha256=fOFUEx-2Fm1uxHCGopvCREaRqdvR87Z7C0bMqEVH3Iw,142656
149
+ vllm/distributed/kv_transfer/kv_connector_agent.py,sha256=Pg8790EwU7LAYJMVdK8_jWs4V-7SYfQZjdzvSU0nvAE,2489
150
+ vllm/distributed/kv_transfer/kv_transfer_state.py,sha256=aYn9HaM0YH-4S-8V7R4wacCJ4kaYbXuOgZnhA7J2xvE,2335
151
+ vllm/distributed/kv_transfer/kv_connector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
152
+ vllm/distributed/kv_transfer/kv_connector/base.py,sha256=pU3LM2zTdnzLb-GlxUeppL0i9Gl-ZoTew-Qu2U8ZSmQ,4490
153
+ vllm/distributed/kv_transfer/kv_connector/factory.py,sha256=i-kS8KBLQrH6U4YArb99wo6JqKrfXg0nxoHf9RKxXq0,4918
154
+ vllm/distributed/kv_transfer/kv_connector/lmcache_connector.py,sha256=14YN5LyPfFUPOlqXtAGp5t1c6d6AkeA7orahdQ7W2Xs,3739
155
+ vllm/distributed/kv_transfer/kv_connector/mooncake_store_connector.py,sha256=-Dd0D8xfrt6dVXhWy2CL6Dy6-rHaXkoBIFGWcYlOUqY,8679
156
+ vllm/distributed/kv_transfer/kv_connector/simple_connector.py,sha256=n-613lm8SPqXM2_fpJin3lYqePhbir3UpQu2eR48pp4,13926
157
+ vllm/distributed/kv_transfer/kv_connector/utils.py,sha256=aj28w0bLGPpUAc1FRnTNATv-B1TVTTUI77Gvc1XhZWg,4464
158
+ vllm/distributed/kv_transfer/kv_connector/v1/__init__.py,sha256=Vgcn88rEfiLwJ3-YkZKWsvMurr-vsV4_47b9_Mv-vlo,265
159
+ vllm/distributed/kv_transfer/kv_connector/v1/base.py,sha256=92ACWysP74FT4jc-m5A0WKrMj016UbzN2rK7RHiK6DI,10012
160
+ vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py,sha256=RGY8QM17-GTg46L2KlZwRsX0qDn-55gFGuVD0uZeV5Q,5185
161
+ vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py,sha256=s9MS6JeW94duB9dw0OKkiUODHprffi6VtZz622s0Xv8,8243
162
+ vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py,sha256=oiGMxVMnH2SQ81HO97I6mtKRpqz2BKDTx84Fy6W19yk,46090
163
+ vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py,sha256=zJmIJum57R6TRjXojfz1SSO8nLnGt9OXOVj3SR_70ZQ,15780
164
+ vllm/distributed/kv_transfer/kv_lookup_buffer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
+ vllm/distributed/kv_transfer/kv_lookup_buffer/base.py,sha256=ZZYJZBDDny_StDcmXUFwOtDslRrTCL9iSUcr8XWe08g,6280
166
+ vllm/distributed/kv_transfer/kv_lookup_buffer/mooncake_store.py,sha256=atXfrR3n4MZAB88TtNmVAC5Gn9FhyVFcVRO1VjjL4uA,5679
167
+ vllm/distributed/kv_transfer/kv_lookup_buffer/simple_buffer.py,sha256=m8XNP5UiFwq2g33f3fiJeLMu87OhKwSvjWcI2jppztk,9156
168
+ vllm/distributed/kv_transfer/kv_pipe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
169
+ vllm/distributed/kv_transfer/kv_pipe/base.py,sha256=FHfg3C53oZjBBZjEWHmxMOPKTvJitfBOXXFzh8j70cU,2156
170
+ vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py,sha256=1agXvCvhn94CCK_0QbIcmOz-omp69UNCc8kCptsjlBc,12111
171
+ vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py,sha256=NjIOnIl2Fv7H7S-SZn2_xroAcZ39Vg2CCi90Q4iSRzw,9720
172
+ vllm/engine/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
173
+ vllm/engine/arg_utils.py,sha256=DjwVO_y-9b2Ke6u507Fm9n7S5iS1aMoNY5PwyMwCvsE,78328
174
+ vllm/engine/async_llm_engine.py,sha256=8iiLRsYuNDmaqjYMLo--IgRvfDOP3MhWcuzJZvWgI90,49066
175
+ vllm/engine/async_timeout.py,sha256=uxUlhUevs6Ae1VvJ0GTkj23bAkg_w_CWteeXTCAFYvM,6594
176
+ vllm/engine/llm_engine.py,sha256=Rwfx18emhFb08QWllPw7agY-zajXKV-Vfyq65aT8hPA,91931
177
+ vllm/engine/metrics.py,sha256=LG_2wawnok658rpG6ZJFnMod7G8aydjteOYz4WwFMz4,26793
178
+ vllm/engine/metrics_types.py,sha256=8_E0ssV3TKZZr63dInytvsEluXgUvrPhrpnRPISUdso,3248
179
+ vllm/engine/protocol.py,sha256=Zoz-g1CBgk-9staLLGOlOPhdmGqRn8QkECGHRo3bJjo,11294
180
+ vllm/engine/multiprocessing/__init__.py,sha256=DMMFIKmNM1hn6leoQn35h2d5UaTpaHilKJfC2ar9jv4,3747
181
+ vllm/engine/multiprocessing/client.py,sha256=tWa-XGcWzDiwhnHTfnclR_eivmzxSJS7kyJDpa2c31A,28210
182
+ vllm/engine/multiprocessing/engine.py,sha256=27etqUvs7Qdkbo-aa577eZBSOXM545ZiT6B8otT7xXI,18400
183
+ vllm/engine/output_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
+ vllm/engine/output_processor/interfaces.py,sha256=jSj7l8R2YbHeKyDrcOamQAOZPgvarX2lERWMFFBJZd4,3063
185
+ vllm/engine/output_processor/multi_step.py,sha256=aZhtnbioId3KlW6nRc4GPJd_fHubXtaLoCx5ZabIbzA,9507
186
+ vllm/engine/output_processor/single_step.py,sha256=W11dmtv_-7wwuDafhgUdLR_Hf6GHrg56pmQI6LHITj4,6422
187
+ vllm/engine/output_processor/stop_checker.py,sha256=wPsxTqC2x2HkggUrTtnme3DL37gdwVZ37SDspMsdWTo,5137
188
+ vllm/engine/output_processor/util.py,sha256=358XXxAOG9vNO6uRTfINK0B-oCzqbidVpXfV5nrNs2k,1125
189
+ vllm/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
190
+ vllm/entrypoints/api_server.py,sha256=F1c5uZ-WFzmNtrpSH158bOzv6p623tmY7gimPZiwMGY,5818
191
+ vllm/entrypoints/chat_utils.py,sha256=DM-C7nv3WIoTXcZM-wnqd4t46aQu7RaQsFAHZ2wCQ0g,46925
192
+ vllm/entrypoints/launcher.py,sha256=l_VfSBIAVo-xE_HauO8G5GxKPcue16XdITvTxs9oyjM,5281
193
+ vllm/entrypoints/llm.py,sha256=m6bMV4v1XRiMXWtlJmlf-mc7YQzys_odzGGgbKD5JgU,69229
194
+ vllm/entrypoints/logger.py,sha256=fFcBqHM8hitDzkj-pNaFGN7EsJywkV8GXXI_ZsfL9u4,1685
195
+ vllm/entrypoints/score_utils.py,sha256=ZUxphYOqXVj14KbPAvvKgX2derLYMvir1rM2WXjsP3Y,1723
196
+ vllm/entrypoints/ssl.py,sha256=2InxP04Dt_I84ORXewngn5cUdXvRkrK_zZB6o8Ng60k,2805
197
+ vllm/entrypoints/utils.py,sha256=IcKt2N0a8Kk7wfZg0CmjlVci3LwODt1LfKGSPfHyX5I,9555
198
+ vllm/entrypoints/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
+ vllm/entrypoints/cli/collect_env.py,sha256=4GKIFhL_7kERjTJ0F-K18plm2wuEJolCjPKpTcdfMzk,1098
200
+ vllm/entrypoints/cli/main.py,sha256=9eUFDzgPMjvkyELCvOr4fd0CwMVZYnhkCILfr5jeL98,1795
201
+ vllm/entrypoints/cli/openai.py,sha256=8alMdXJ8Css2D5Xo76jJbmScCmONkr0gm_uKSjQTwdc,6921
202
+ vllm/entrypoints/cli/run_batch.py,sha256=_BwIdVKw9M0ERHhk37PC3C4wLSZdATOlmR9BoEjYWmY,2312
203
+ vllm/entrypoints/cli/serve.py,sha256=LPSKgYJK8CEg_gGpmDr-HtZPEIlCr8uYY7eAOnoON_w,12327
204
+ vllm/entrypoints/cli/types.py,sha256=OPisQT2_vqXvlETiUNd2xjH5Sau3qJhalpNBLrIhfVg,706
205
+ vllm/entrypoints/cli/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
206
+ vllm/entrypoints/cli/benchmark/base.py,sha256=GgBT4_2aIK2RcybeYOB5kbZEZRDpE58mbwvx4bk64-w,1174
207
+ vllm/entrypoints/cli/benchmark/latency.py,sha256=L4OBo9qy3i9d9zLbcezTGWnzygm4YmfBHJ5rbK03LJk,879
208
+ vllm/entrypoints/cli/benchmark/main.py,sha256=BaK1_IrQdE0jzEcCcREpXUfM90DFt6wXDhH8rkhrND0,1849
209
+ vllm/entrypoints/cli/benchmark/serve.py,sha256=-rV0FpX45O0NwPGNX2hKFDYIjajsyoSdQAXt6SCNcWU,861
210
+ vllm/entrypoints/cli/benchmark/throughput.py,sha256=MTK_QPQZUTECAJHcyayN-RsgSo86KYN-tQjb2fsNtGc,881
211
+ vllm/entrypoints/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
212
+ vllm/entrypoints/openai/api_server.py,sha256=MyNVvxTZ45dNvFpjdTbNy8xdyQDbeo2tEDsot8V81sI,52595
213
+ vllm/entrypoints/openai/cli_args.py,sha256=Z39HxSdJfIx_e6LCB72bGCjpEcsJdYWCBa2RUKsynYU,12362
214
+ vllm/entrypoints/openai/logits_processors.py,sha256=QAJn3DMAfFySExoA5aaSNVjXwtlOXGCv1DX6Fltj2ZY,3230
215
+ vllm/entrypoints/openai/protocol.py,sha256=nGzaaNrFvrlz-Qo9YN8T9aZ-XwIFduZBUgpqXuc4ziA,70885
216
+ vllm/entrypoints/openai/run_batch.py,sha256=1cj9gUT-tM9gIvivuur5eRvIun8MgLlkM8tdIyGl0es,17740
217
+ vllm/entrypoints/openai/serving_chat.py,sha256=6aIGheQ77SVWQzgThno0zPnh0dedDbwsQv42fJACtcY,57820
218
+ vllm/entrypoints/openai/serving_classification.py,sha256=8bcKvF1Nnp2MjcphEGke8ar6srj-Ef1gkFF5CzgUduA,5335
219
+ vllm/entrypoints/openai/serving_completion.py,sha256=oVirjXifsLsQzf6Mke1f6dfebFVrMRhVL-8PloCOm8s,25584
220
+ vllm/entrypoints/openai/serving_embedding.py,sha256=ejT3CsStaQQFYO3Vms9tsitHkaumFMHr6VxkGq4CWKc,7384
221
+ vllm/entrypoints/openai/serving_engine.py,sha256=inA_0HSBExYAN7XnZqVNE-oAkg__o-EfIkaUEQf6pDo,38711
222
+ vllm/entrypoints/openai/serving_models.py,sha256=GYqPJtMw350_fpm8b0kxjHU1z_omJQTPgptTq0lJgPM,12797
223
+ vllm/entrypoints/openai/serving_pooling.py,sha256=yKTe7YqS_ynztPjYRiUXhkmi1bq5KatLotNzUjkZIPE,8719
224
+ vllm/entrypoints/openai/serving_score.py,sha256=cpZSoRdaIUajrKo7rNWlY-jKODHKHY0Tf9srOMry4Sk,15976
225
+ vllm/entrypoints/openai/serving_tokenization.py,sha256=9SMgGGjM3uLkmXRfzRno57BEX6xOMgSm9pJtET1XCTM,6090
226
+ vllm/entrypoints/openai/serving_transcription.py,sha256=i8zWUiQJklZyD5D2S9EfxvhZWkIHzXBasZn7zAoO-1U,15843
227
+ vllm/entrypoints/openai/tool_parsers/__init__.py,sha256=xbsgwoDLlW3vqIer2iscCEZnb0YWkz1UeAf4DN1lE9w,1104
228
+ vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py,sha256=UyO8engPSTh8Ej8EuQay-mDz9YPzHAsmzJA43-4vmq8,6095
229
+ vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py,sha256=gKlW7rFyEx6lZJmkKdEuyqz5F7MGJPkk0zwtd9aZ8rY,16678
230
+ vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py,sha256=YqhxroDlNDpZOwbrAHrdpBLKRnahVNQahd2N2z9Lpl4,11502
231
+ vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py,sha256=1LKw06AkMy82c4zXRu41Edthks6Z4xazB4nyhLiRy9g,10723
232
+ vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py,sha256=BTNW2L9BHkqNEmiTkqTZlYrCecr5KNIIvbAB4SIquTY,16891
233
+ vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py,sha256=t5AjAiwJsYdLMurJJROlGPHXiEpMgO9NLGQAT_du2RA,9461
234
+ vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py,sha256=R1uMMM4Vnji28xyHVqB1N2O0CpHaW6NISeoc1NsxjJQ,13947
235
+ vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py,sha256=k6f1xFWCjG1KIyHOQ-bN2nYaPbgW2bNvkMpn8GV2FsM,13258
236
+ vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py,sha256=yY9n9uxy0q5SQ5yI9VGf6dSpOcde-3gY-o-wZjSncPU,12313
237
+ vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py,sha256=ipV3kFRxjoOywvxXfo4bVV9Fyc38QEeQLEZ_vYlv82g,16553
238
+ vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py,sha256=o7RbhWFthf2VKwtuxZfZyVdJAQ_5iOuSLHdsoz2Hf4E,4346
239
+ vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py,sha256=xJELNmJ0_6tHl6OZh_EmY57b5QjGO1yQ3rLI46XoozU,12636
240
+ vllm/entrypoints/openai/tool_parsers/utils.py,sha256=RrvMsSpaYYMPefvM1ktVKzo9Gs5KsHpi21QaP1h4EKU,3874
241
+ vllm/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
242
+ vllm/executor/executor_base.py,sha256=L9dpD4kIXgn2KJ5XTHvtZ2OpMXM1JEJfZ1-QVIbCcRI,15732
243
+ vllm/executor/mp_distributed_executor.py,sha256=zhyIz7Smu33879dkoQ4FHO0r47BSWjBnFfpwWjqsf-w,9923
244
+ vllm/executor/msgspec_utils.py,sha256=0tCYOinMJxzOI-U8fAY8C9A9seN0f4MdaE9H8PJuuAE,978
245
+ vllm/executor/multiproc_worker_utils.py,sha256=1Ddh8AEFrh6GIWbRqm7I-Kupluw40FPIVijgak_5434,10794
246
+ vllm/executor/ray_distributed_executor.py,sha256=dkxCh1bmW39r0RGgLYqI7eRz7vsqt0owBZrF9AxgIDY,30775
247
+ vllm/executor/ray_utils.py,sha256=rsav_z9cZnKIGz37knp2YUtx3ThtYAdP85smG2y21d4,16883
248
+ vllm/executor/uniproc_executor.py,sha256=23FH6Z3EqfCQEf7IOFFkhKIaycByazM5EkQrUNoZkbM,5548
249
+ vllm/inputs/__init__.py,sha256=8cagQSlaOP05DjPdGL1EPpOOYaYkWlNHzQ25JigzVu4,1329
250
+ vllm/inputs/data.py,sha256=_KoSfLXeSwi4981LvUyMJU22Sjs2oO4KfkRUKcth2RA,10829
251
+ vllm/inputs/parse.py,sha256=_Lbf4PQyczm3OPsLgvyPf3CiKGNP1Ctd4bW_AnuORpk,4499
252
+ vllm/inputs/preprocess.py,sha256=bBfQA5MfOD9UJix2qhuWnCUYCcib847zsEEgP3qjZe4,34416
253
+ vllm/inputs/registry.py,sha256=8aAcHouGBJ2KVvnTzUhwsCbSc2MztfZNOMmbyBIXoL4,7482
254
+ vllm/logging_utils/__init__.py,sha256=lGnFUwOOIQYnuCu-Pf3LKtxPRmx6DgkxlxknU9fO3-Q,205
255
+ vllm/logging_utils/dump_input.py,sha256=mCQrk4AI6K7w0lBbYWVfRf-44Z7WBbZLbnuRbIk9Ioc,3110
256
+ vllm/logging_utils/formatter.py,sha256=kIZNddJTgIcB1RRoJc0fFmDtsVv3X0GGUgblS2PdW48,594
257
+ vllm/lora/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
258
+ vllm/lora/fully_sharded_layers.py,sha256=6o8xjkxFJd1A6ESz1KeEDPYWrs0RNjHAJZ8d6IUntc8,12694
259
+ vllm/lora/layers.py,sha256=-iEPCZ9e5oYUQBDdS6RFqyIHh2P7_0GXvFe2aXs49ss,46740
260
+ vllm/lora/lora.py,sha256=AuAxQNXr-6RLCn7Kc28qqbgA715IDb7yfuReHchu10I,6294
261
+ vllm/lora/models.py,sha256=Gb75UF3VRAdwXBRISlCiyfDkywACBvlA0oGTK4SFqV0,36053
262
+ vllm/lora/peft_helper.py,sha256=VBwUFq0hnhQZK8qL90VEjazb4ClFDO2lro_-UVwOYcg,5334
263
+ vllm/lora/request.py,sha256=spYbJvjKyPlu1a1otY3sxI4JBpAzUvLLjhyxFJP2Cg0,3178
264
+ vllm/lora/resolver.py,sha256=6iZpDMfxWGcy8MWlj3nCCcVmgr6nKs8EzzOhaN4TiCs,2881
265
+ vllm/lora/utils.py,sha256=yi5k_4gUwrJv4g_rNrEgUMUBci2xASELsROgOUISC8g,9318
266
+ vllm/lora/worker_manager.py,sha256=-kKrfkTgU16e5QggBmDfUB1DEkvMronp9K4fiUruzMo,11107
267
+ vllm/lora/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
268
+ vllm/lora/ops/torch_ops/__init__.py,sha256=ywu1d5miStgp7A9BRAo6kUvuE3AcgOAOJHxYHD4cKvQ,535
269
+ vllm/lora/ops/torch_ops/lora_ops.py,sha256=LOSdfKpQM365cIWVkyElcLpfgn8cF44IVw-KUN2NMKs,4517
270
+ vllm/lora/ops/triton_ops/__init__.py,sha256=arDrNt_kvrDaC4mN3RNbu6-0YACXQRp9Md2cJxXI1ao,384
271
+ vllm/lora/ops/triton_ops/kernel_utils.py,sha256=3P-CWMjNQR2vp3QUNynjXAIaWQy06mEFDTRLRQljDME,8509
272
+ vllm/lora/ops/triton_ops/lora_expand_op.py,sha256=740f2SjW7lP2rtbFIGFl2e-FnCHxGv_tVs9WmbqftBE,8965
273
+ vllm/lora/ops/triton_ops/lora_kernel_metadata.py,sha256=YgL-K9fVeFZebx68DJNvrqsfZI5o0thpa---d4NYlnA,5967
274
+ vllm/lora/ops/triton_ops/lora_shrink_op.py,sha256=WfuQ1-7dtJAQ_Pc7sN8yl7jRkcsYX0MXZ08uwatdEfg,7996
275
+ vllm/lora/ops/triton_ops/utils.py,sha256=zsQcSl0sWdYUefY5IWqVOpjdXySLSSsHgTwNPwKkWJY,4916
276
+ vllm/lora/ops/xla_ops/__init__.py,sha256=2Yh5hqiUt7hGz9SwznSZlj-_G7-SknTauNyHGDcUjBY,304
277
+ vllm/lora/ops/xla_ops/lora_ops.py,sha256=hLERTyHuXcH5J5ePn_3cak2Mi5S8_i0BcXiJmNZHBEk,4400
278
+ vllm/lora/punica_wrapper/__init__.py,sha256=A5cDJmdCPRBN23kwLfemRlWI4YA-t_7qIxeoeimCkT8,313
279
+ vllm/lora/punica_wrapper/punica_base.py,sha256=oR-f11G-ga_JOazy2M3iBoBaRb1D6ifWn7xbkCB3gGI,18500
280
+ vllm/lora/punica_wrapper/punica_cpu.py,sha256=sFBBu_qBEL8LGALamK7hE1ympyipYNT4qpav-V1nllU,12527
281
+ vllm/lora/punica_wrapper/punica_gpu.py,sha256=Wg8n1z8_pdxolBakhIPV8rNi059HnmzK2l1kPS_2LV4,10885
282
+ vllm/lora/punica_wrapper/punica_hpu.py,sha256=qljiMC-THj_SIhaFeHygMQuB13wBKyz7dr-ba_aqpaI,5848
283
+ vllm/lora/punica_wrapper/punica_selector.py,sha256=Of6p5uYMMLnA6g46VK7M3xJT8Xq4q1VFoeOpTIsPf3s,799
284
+ vllm/lora/punica_wrapper/punica_tpu.py,sha256=utenbLEam1-CCYjnXFDC-zKfzp2H8ndBop3XRlCp5YU,16290
285
+ vllm/lora/punica_wrapper/utils.py,sha256=vOSc9EvIlgLbdJq5iZ7LOLdkadvPX18JbZZei2OO2eY,7050
286
+ vllm/model_executor/__init__.py,sha256=vE7mcT8ZyKWc-wlc4pq2-ofRwEDVYT67HSgD-hLut0Y,574
287
+ vllm/model_executor/custom_op.py,sha256=Ugjx_52-NSL7SWVL5Kg0_I5X-Yzpz3Agr_qSictAx9U,5669
288
+ vllm/model_executor/parameter.py,sha256=H_qq9zP0q0QbDaPbDkH9coanlZZVWsTZ_kbvlt7KYEc,16758
289
+ vllm/model_executor/pooling_metadata.py,sha256=bbOjrSCh_8aX9M3oML5oyUzijmJhy9Gb5_7bURoCi6w,2127
290
+ vllm/model_executor/sampling_metadata.py,sha256=6uc7pahkFyei2c-4Mn0jX10MQFQKKKhm7VJUCxRbQ8Q,23013
291
+ vllm/model_executor/utils.py,sha256=yt3QtDHwHpiQJA-vPrVpsG3dTMEAzACaxc0gZ7ie888,2921
292
+ vllm/model_executor/guided_decoding/__init__.py,sha256=XEvEfOGGBuBsKrwc_mOi9XjbzCdr0SK6N0PmyWyhyVs,8332
293
+ vllm/model_executor/guided_decoding/guidance_decoding.py,sha256=-goDVm60krE6RH482nbUTXSeDQ0jnxnWVCW3ep8qoRo,2600
294
+ vllm/model_executor/guided_decoding/guidance_logits_processors.py,sha256=mcy35zX1YHfKoxJZADvLxkP5F8OYiEHO_P-Xg_lQZrA,3344
295
+ vllm/model_executor/guided_decoding/guided_fields.py,sha256=KSAGqffFpedIz4LfgwAodX2eJkyOZ4RTD49ynzSbrTU,1566
296
+ vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py,sha256=jl5gxFhJkIL2pSOzKhgyHL_1qRszIakyNN6LbrFJXfs,2747
297
+ vllm/model_executor/guided_decoding/outlines_decoding.py,sha256=-MKzmEGn1QuulI2JDU1cAejfo03nHxlQluadMJe27h0,5571
298
+ vllm/model_executor/guided_decoding/outlines_logits_processors.py,sha256=UfCgpaI1KwSFNjtQ-2lmBTYkLszSXQrVZhtvKpeKras,10889
299
+ vllm/model_executor/guided_decoding/utils.py,sha256=n6prXaBa-qiTes5wPhIX6CJg4Chrje_wQNsWxYuiYf8,7966
300
+ vllm/model_executor/guided_decoding/xgrammar_decoding.py,sha256=plEHV0pFHJF06jgMg2w1Tlok2h31D3M6C2GA-Zpx7lU,16833
301
+ vllm/model_executor/layers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
302
+ vllm/model_executor/layers/activation.py,sha256=JkTpc5D05uhREoAs_zk4t64GP-xxefz3VdBBpg0cviE,12584
303
+ vllm/model_executor/layers/layernorm.py,sha256=_tQTYGrfHXNqPc-WXdhqtK1XQzJ-yIpIRwOhDazPqKE,9230
304
+ vllm/model_executor/layers/lightning_attn.py,sha256=KqXTG_XlaAu2K6kT1wSNU5fGAKWEcrZR1CR0RmdpxCM,20962
305
+ vllm/model_executor/layers/linear.py,sha256=GrK_DBoCoMoNCTfj4oNo2aMPdfptz0OqUfo6H3_YFTU,65470
306
+ vllm/model_executor/layers/logits_processor.py,sha256=ehI9fTIQE3eOUxZTl9tzjYSWtq6NtPvPPW_HlzqYBq4,7825
307
+ vllm/model_executor/layers/pooler.py,sha256=T8aG7N0dCDM_ZyIRHdIBbFHjXJMYu_2HGyLVs7Ge4o8,12158
308
+ vllm/model_executor/layers/rejection_sampler.py,sha256=CEugEvE9tmLJqaCGx033TFMQ-_xGAeNKFByYrQFKt-g,16384
309
+ vllm/model_executor/layers/resampler.py,sha256=jDG2clcusNHfxLptLZUbbwxmxC3f_I-KJ8tJIdjyuLM,10506
310
+ vllm/model_executor/layers/rotary_embedding.py,sha256=nGMjpuERRXO1OtuucAQ9E2Ok0CepunDnGhIifdwAyKU,78083
311
+ vllm/model_executor/layers/sampler.py,sha256=gOsxvuWntPAVabVMCKSma_pS6XnmOr80QMAE1iAxdbw,49689
312
+ vllm/model_executor/layers/spec_decode_base_sampler.py,sha256=ZtvhMhd0DYJlYpqkLyIoiUec0gvqVT8IqnotqN60EAE,10253
313
+ vllm/model_executor/layers/typical_acceptance_sampler.py,sha256=AZ-q02vuqxGMQLgBL07uLl67T3Zrw_YtwglJwmyu5UA,7026
314
+ vllm/model_executor/layers/utils.py,sha256=RYUsT7GZniK2seIQGxARh2Uph90KIceza9SmpWCoNaQ,3851
315
+ vllm/model_executor/layers/vocab_parallel_embedding.py,sha256=DWzKb224fam0ctWb769LRHXzh7Nq0YmLyuGeNXvD7AA,22765
316
+ vllm/model_executor/layers/fused_moe/__init__.py,sha256=Yf9Pz1_mAfW6V1Fi6NLGSiqGz00lk0FJaCNnuOMzHUM,1434
317
+ vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py,sha256=zzl3sQxTCF7C40De8xM-nZiBVPEIyLGsddyvvrVbrRE,4637
318
+ vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py,sha256=-uBk2KEGa6zf1xXuzKyzLdash630CE7nLogqZBlAHrQ,4876
319
+ vllm/model_executor/layers/fused_moe/cutlass_moe.py,sha256=prI3_SnvLsDx8mM74LLFw8uEn5ZkG4KC3X_Bl8yKFJE,18969
320
+ vllm/model_executor/layers/fused_moe/deep_gemm_moe.py,sha256=3YREsVaktZLc_VAxlaI_BH2IZPtsPgayejLhuUsHLN4,8408
321
+ vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py,sha256=oFlutUNnQxAtZEr8ehyJQ5NeioDtC5-6KOwHp28qV8Q,10076
322
+ vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py,sha256=BDCcEym3y79KsQA84SL81zNIazr0XKzZXPVzoTcL46w,7391
323
+ vllm/model_executor/layers/fused_moe/fused_batched_moe.py,sha256=LRHXdmPPiVm27VaIHbhD-G3fS3_als2IDOO1boBXO8o,26614
324
+ vllm/model_executor/layers/fused_moe/fused_marlin_moe.py,sha256=Goxq_TjD7ls01emorNZsGDhGZJmtOYAl-b_Q0UdjDA0,8911
325
+ vllm/model_executor/layers/fused_moe/fused_moe.py,sha256=KGxqU8k3wJPcfTmlYgn-oTgkqFzG8ABklGi3AhIObSw,69329
326
+ vllm/model_executor/layers/fused_moe/layer.py,sha256=YPu4_JQrIV6CRxHdqgz1eI59woX3ni9gcDDmMVNedfg,63828
327
+ vllm/model_executor/layers/fused_moe/modular_kernel.py,sha256=lw0mpzbEMYsip0kZit6ERAispBdELtUfmoNo1lZm-OI,18014
328
+ vllm/model_executor/layers/fused_moe/moe_align_block_size.py,sha256=CHcWjXIHHztUit7b67lvQAFO_IfqO1W5XuhB-LepXf4,8340
329
+ vllm/model_executor/layers/fused_moe/moe_pallas.py,sha256=AynMAg-Va0T9qvSeofHp4ZuRYWN6pQdZPTL5EKHRcps,3158
330
+ vllm/model_executor/layers/fused_moe/moe_permute_unpermute.py,sha256=A8JBstS-8q_zQG_QCcSGrQUpFgajfE-rOxIkrkVRYCA,7973
331
+ vllm/model_executor/layers/fused_moe/moe_torch_iterative.py,sha256=ZSvVVU8rUzENLnHycTypPg2UhO9eF_eu6wm1HEtpl-s,2156
332
+ vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py,sha256=GVhqzJdlFoRlNSamOcs79OStkDaGMxQaXw7YN26GFD8,5780
333
+ vllm/model_executor/layers/fused_moe/prepare_finalize.py,sha256=6Ymie6pXbphT9cH3_NO6DuRJB3QkGBlN8e2l6hjTbDE,2462
334
+ vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py,sha256=V31muuurjEsD8vI8dfxX59HOUITvZ0bOQjsIFE5VcpQ,14558
335
+ vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py,sha256=4jfw4S_iB8koAGkdtYxeb8d720DI7HTDy7tqlx64cUc,4298
336
+ vllm/model_executor/layers/fused_moe/utils.py,sha256=SI2vXr4Q_EAWfMODbSRqpE4OgdYn9M8MZwy5kYiAGA4,3281
337
+ "vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=iNGsE2ZeVnQEnN4A8UJ9Jv0d3hbRF2MJ9oBgjup5Szk,2737
338
+ "vllm/model_executor/layers/fused_moe/configs/E=1,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=hH5rRN9Wtyv35azxMzyUMHWtiKgOHev5tNjIG8j6dsE,2751
339
+ "vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=qPumkNxaHMvVBnEjPe_Xiuz9ICb6Hqc-9I1DAR8s3gA,4130
340
+ "vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=s47lb8VLnyxMgWlqcIR4BdPBsjKWL4olXF49uZvygzQ,4140
341
+ "vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=gzfjrYDcS0vsACq7ONGVkNA3FqVjr3e89q9fO9kokkg,4133
342
+ "vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Np7yRX9Z7Y7Z5Nutbl02wpKdZRltbt4WqlPlleiYs2E,4146
343
+ "vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=XsNfNXY8v0eatazkLCDiDclI0FnTudUGLYO01e1_4aA,4149
344
+ "vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=H0K4_O1CMbNLi-srcycT3lSl4JaBl3EGF89GY5Rj9MU,4130
345
+ "vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=arPqstZMzZjz8BNpY3alKT4vGCJyUj5I2hEeK02aq98,4152
346
+ "vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=wjnQ4v-dflJMR3iFDHBuZI_1R0xXjsNoWc2kHu6C8JI,4135
347
+ "vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=7WHPz_0fxeI3Ed0D9VIpZVoeN9RtJVVARvptfcmQu40,4146
348
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=2kWS9Qvy5Q3mvUFmbPVures5iZAriAXsy8WrtE5wu00,3727
349
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=AMD_Instinct_MI300X.json",sha256=D2dn9vXyN4FCKsZCf7VYgAWLedCx8XpPjbkQVVAvwAA,4737
350
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=5QqFljwwA8OaPlFnXy1zogl5oi6aE0OqN39xk2IUC64,3245
351
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=I3k416HbXU_rYb8scD8gAI4fuBlElHl06PM347Qa11w,3253
352
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H20-3e.json",sha256=CoC3pMKx0vkjI9T6rqRLTIwbDskxljTj31fCHI34B5w,3232
353
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H20.json",sha256=RgV8C4F1LO09h01YsgF_eqX6GNoBtC7ulPfJRUUbg_g,3241
354
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=192,device_name=NVIDIA_H200.json",sha256=nsNEuDNks0tVLfQfIm7xxFwEeptTfQcoa9fJy0NS8xQ,3247
355
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=PvRpT_brUV3Y3zMfWEcsXMmdrYKjiq2qI9iHejPhhsU,3743
356
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=qbqjisJ4oKmcYzumHPRk5UyOzsdi8J6xas82UWHMeAI,3263
357
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20-3e.json",sha256=gkimxy2r78McckKxx4U4R3xahTI1KMH2pMOdUFOUdu8,3234
358
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H20.json",sha256=vS2DRIDOqWyiBvbG6H746ownfkD1F8Aj2YZ0ET9xll8,3232
359
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=MlpzcrkZo78kFYr6cqmh4lBdpxKcEvlzqvRf0bmeduQ,3264
360
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=384,device_name=NVIDIA_H200.json",sha256=xqhl748it8GV2KXX0XixitE_ywnsKksqK8AGL7tAgT8,3254
361
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=512,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=FsWbV4Q6AzAtgegVuENBDz2ZcSJsqNiwUIVfQbpP7hQ,3244
362
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=rN55MyeJ8U6VGNRg7lwC3aa8BgjxdzVg-CofcZ7LTyk,3743
363
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=IuvyC8TNhCVAmUZfLSoETsyCKsmejKXrs_0zuwFLPAU,3265
364
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H20.json",sha256=10Ntu2aVD5vGLonx-jW0qNw-tgZWdZmzMGx7utDVeng,3237
365
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RFH5FcN2ZCPk6DsxviTti1Q8JU5jzBRFXvUQNgOvnmI,3265
366
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=768,device_name=NVIDIA_H200.json",sha256=JraM-Nvbg5V_TJkSl6UPFYZN1zHHoIbr2pAcksenoTY,3248
367
+ "vllm/model_executor/layers/fused_moe/configs/E=128,N=96,device_name=NVIDIA_H20.json",sha256=JtcHRlPz8xQEAqJ9EWI63oYvdmjQFG6VTHqtt85VOSA,3221
368
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=AMD_Instinct_MI300X.json",sha256=f3iM3xm8hGUirJ4ilAIPO6Pe9bs4sm3qaRKMswN9SKE,4731
369
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=1024,device_name=NVIDIA_H100.json",sha256=Bq57MPQXuSib06u6OwiEmSzOr3XvPYoD6ohYDJaBnII,3244
370
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=pCCKkdUzzuBVtljyk7AEIAbeDf12DUiieXaODZXzm5E,3254
371
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=trX2-c4N6hTTD6zFNi6A2bT3FkhxKjkM2rPl-o1K9ss,3250
372
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=I4d56uD7E1JMXD9RAxq3FebdPquDsnNEkVaIY9Ctm9w,3246
373
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=ypuAxMQ7JESPXLBltt68wly2wTrJzlnobhUMip6xAmc,2751
374
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=tUptlureu5QgyAEedtx5sm7CFudXAE6fIXepOb9gfas,2745
375
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=h57svdmDlZC_D8w9XWjPRS8ciYVkJiPEYfhrD2NRVVY,4127
376
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=JmXhUnhX6YOy8RsmT0zFLGyNCpRBPV2q2Db9Y9ctZeE,4144
377
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=G4PKqWxh0MlBhg7QHKj0m--_fP3Ll0gs7VJaeg-NIDM,3254
378
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=bKX9AvcxN6k-i3RUmHSchZZ3rjoYRYb4iBqhCI4L3MY,3257
379
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=bWR6XBZ4nJ_ROg8rEgrQGc04I3BDbwILDHMZxATO-H4,2740
380
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Gu1wROuky-xS0dsFgbXS2QD_hOVV8yol9a5iqiYyq3s,2749
381
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=3200,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=_9HO7SaR6aQeh6vqCDpo3kjHnGJ9BVKLiMwYYgd3SmQ,2913
382
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=2ONiQSa9odzdPe1dIgBpP24l5z-5wB1eos06xOj0V_Q,2738
383
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=Twkm9DVNxijpowfvioJ_4cKwIIlAWdyNWO9TA3gxAHs,4149
384
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=6400,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=THQWP1o2bWhnJh0rq3ZIVvs_sagIJgoK4x3pJbiFbHk,2910
385
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a16.json",sha256=o1pR3rNpO1eW4BHOKpPIQLjviw4P2X5Fr4HQBcdHA-I,2747
386
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=iySqae0zI_PRBLqV-vfSCwDS4Jxcl5QjWa2NnhndL0U,2752
387
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json",sha256=Uhq0SrWiCrldkWbb0ZZZhWaCZ0SsvpiNL4z30KZUN5g,2747
388
+ "vllm/model_executor/layers/fused_moe/configs/E=16,N=800,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=ydsFUdXdVE_ZSScVhUxvxOFwKG-nkTraNeN69wqzxIM,2903
389
+ "vllm/model_executor/layers/fused_moe/configs/E=160,N=192,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=TtDngG7ljrU5RtWZ7g-xxdBT3uEuawiKhP8EwPr97XM,3254
390
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325X,block_shape=[128,128].json",sha256=fT7fwjuit4HbbyREYV3ECJ9Rm88FW-V54e27nG9nA_Q,4741
391
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fT7fwjuit4HbbyREYV3ECJ9Rm88FW-V54e27nG9nA_Q,4741
392
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=HNvrgcXxV-eVMLwb7zY_R5KgJ7uBz-YIyQsKq1lWnWA,3263
393
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8.json",sha256=bHJEVy-CeImiY9JBRCMlHfHPAUi5xO7ENxgVVboN2Yo,3258
394
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=PnNmKSRFznCIUzZ4ZfaYTrMHeF2_kCQr4_bsEy_9Zu8,3259
395
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json",sha256=0Vlxxzp4wrvkFj-NF4OAsJAaPkm-hhisJg0tgNl-W9g,3254
396
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0aSYzpv_cBAlpWCPrfGgNTCfae1KdKQnT56E8XFQl7A,3262
397
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Lqom_VMIPduSZTZQdeL2Wl_x3r9q6RmI9bojJrYwQZ4,3255
398
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fd2p65T9OboKIgw7MQc4IdKaJsoO73Nu3VQiKjV6Ffk,3261
399
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FUGuYbs_QhqKfErofvbTUplhAVN465A7NR_-ryXvebE,3741
400
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bpDPbTyrXLyCSy-o0diveVVeVUF_xj-fdSzCzWmEcKA,4733
401
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bpDPbTyrXLyCSy-o0diveVVeVUF_xj-fdSzCzWmEcKA,4733
402
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=skSJdv0Pr4rba5ODxp-fHZ6dpxn8KkvACGzNf74j81I,3257
403
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=wMt0NyoRSdACdmS1Qi3qFiu6GiFX-4lVvbGEno1W4zE,3252
404
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=mtm7RgEBEJJkHsOis9BtAFo1OCk3vBbt7l7eumDzd7k,3263
405
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=kfEjBrcwt21QRzimrgY_SQ0syJVJzx0AcWQcKQ3j5sQ,3254
406
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=R4B2n2vGt4pPo6jS4Bmnx8AYtcfF9qQJE5bD7OhmXHs,3265
407
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=JnqtO0t2HBcQECdYavi18mu9_MwblGr4zfRcW4zU7_c,3265
408
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bpDPbTyrXLyCSy-o0diveVVeVUF_xj-fdSzCzWmEcKA,4733
409
+ "vllm/model_executor/layers/fused_moe/configs/E=256,N=64,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=rVORXxNsxy4WmO5SJR8Sd4k7vozKqhYf50wZNCMeQzs,3239
410
+ "vllm/model_executor/layers/fused_moe/configs/E=60,N=1408,device_name=AMD_Instinct_MI300X.json",sha256=4UXbsSNHmrSWnD85SdRMLp4cFGRufndzJjB6hoQPclU,4736
411
+ "vllm/model_executor/layers/fused_moe/configs/E=60,N=176,device_name=AMD_Instinct_MI300X.json",sha256=p6TKUp-KDeLB9E9LqThR1e7J2-ogSXPJojISdHgCxaY,4727
412
+ "vllm/model_executor/layers/fused_moe/configs/E=60,N=352,device_name=AMD_Instinct_MI300X.json",sha256=gHxtmO_uvpueLVlsJgXBVE3_pS1S9EeRxNmHG_ZQszg,4729
413
+ "vllm/model_executor/layers/fused_moe/configs/E=60,N=704,device_name=AMD_Instinct_MI300X.json",sha256=tVdpbIU1scsylx6oz3IADhkcwvZaNqw-_QVb7a6oVX8,4732
414
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=6QPLhZurIqcMVdy3w0Dd7gLViKxsyJRBz-qd8agpi6Q,3248
415
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=WPu80-OWyEJBy1hdnewLN1H1neFW8UVJrqyeDGegXc0,3250
416
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=PaFLbT5ftJiiVSOVkq_DH01EcbIs0sBVkCd9PdYYmw4,3253
417
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=ozS2ECxk-Dsd4Y9DgCGGwDwJlCf5T20ANf5gnTUMuSc,3252
418
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=KEN6xt8pgPH_FbLT2fsAD4s03_V-Z9GXuEC4IKe3cPg,3262
419
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=1280,device_name=NVIDIA_H200.json",sha256=w18R3eHB4oUhfbcCXjHyDvp0RiDSeCrfM-VFESim2hQ,3253
420
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=iz4W1UAV1fcz1ZFh4hNQSLJ_F1MdXW-V3msy7t0WrRM,3262
421
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=dYpKgvuG7Jji0W0zg_E9NfIojStBAdBcKd4B3nhimqk,3263
422
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=2560,device_name=NVIDIA_H200.json",sha256=CXiHlGpea5cEGmFi28Jec34uxEZITF2XldVFcJteZX0,3251
423
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=W1q4PfievvgJ_SiPsDhOsR0Q0eJKb4o8JZhMcVhC-_4,3264
424
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=tku4-yTbIr0H5TNrm1Pq3tJJFYTXqHpdzJDSEF3bk9A,3238
425
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=HJcV-Tzt-yojzNQkPCgi84B44F_RppXxOIicRyg20-U,3264
426
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=320,device_name=NVIDIA_H200.json",sha256=bM9g-XpirsThO3Q2x8ChSx3PPtHuHRXLvVMnTWt8jLI,3243
427
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=oxOKFDrgmw1YmgxTtRa1uoe3p09ylTLrkj_jOTqNh1Q,3249
428
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_A800-SXM4-80GB.json",sha256=-B6gZAEYLwMJZOnpO81pTxqs-YVKs_144Nn9BSLaMh0,3247
429
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json",sha256=GPjPHicomrS7ntHu7nnvgNXcHCoUw9vhyTUewkXpppo,3252
430
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=ObHUCUAgHTnld8Cq9Dy1n3ilmbBzyNC4jZcz6YYhMXA,3264
431
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=WegYsHl39QVlHu_4EZJSrgA4LQ5fYxSVNWFhoL6W2Rc,3251
432
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=Hrlas0Nt7d3JMr1vTpI3OVgkzxqcRziSMfFf_U5pQ58,3267
433
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=640,device_name=NVIDIA_H200.json",sha256=J59rmqF8NQWkqmay__ahA3t3IwaPXNu5AVNLnTaDfYA,3252
434
+ "vllm/model_executor/layers/fused_moe/configs/E=64,N=896,device_name=NVIDIA_H20.json",sha256=GNbp4W4MBoHHN4-0sXJovY0lX6rHfZzGyKicrumupGQ,3225
435
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=X8FVPE7rLblDs_Dw_Iu-KDw9H7PaC417EHyVclYjfv8,3733
436
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json",sha256=FsIv5bqSpkWbxK2dBfg1N6tX9epZ55ZhgkJCD7hENlY,4733
437
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=CnjQX3SlQn6fIGsX6P_dbNO0TYgAd-sVUb1FfDcDFUo,3732
438
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI325X.json",sha256=fnO-v4YqBz0vUo0UtOTTD0n7VDG_ivczeQ1tR6Qm9f0,4734
439
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=V_sgDtEtGEuBsGVa0maYJHhhGqe1NE7l-1ek2ed9WP8,3082
440
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=QaITFIJU4UsrOBXaGdPYJwTmYJ0nT9kiiqeUiZzvd1k,3270
441
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=NVIDIA_H200.json",sha256=CC_jsMhXzrYne7eIOroDa0fCBKNnffiaVW2TKd4P-ek,3260
442
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=LgHbxG1kQV36zZPkJcnurHYzwAjMh04lvEHEsfzS1t0,3732
443
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI300X.json",sha256=_fcdkmWvdMqHiH8ZAGke-zXhH7qVPQx5CmKELW5hRCA,4735
444
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=mVH8Rl4sLATinf7_0A9lTS83kv1E7Cm9oC0BL-pc9n4,3732
445
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=16384,device_name=AMD_Instinct_MI325X.json",sha256=JKYW21c0CzR0fgE5ZnYp6C1sY_tVRlm8L_lgak5V5zE,4736
446
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=yTf2R9cngSf4OafucAYlDDn4-bftaMFKaY7qhaBZPqQ,3739
447
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json",sha256=_1eVE7ok935L2V43-3D3bVNWSVaoViia19sh0VrXmXM,4735
448
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=5exlPUKvZxGDR0UT4_Dn5fp-_ZETJ6_Dbw_Vk1u8bbE,3735
449
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI325X.json",sha256=18v6YruKbQ95pXPV8ocV4VdM1zNw3aZFp3WByeUkNSM,4736
450
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=AffDc0_51ML8HiA3757zbD10TZJdUsUDIYIqO4g0yUw,3250
451
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=IEYBNjt9HGnzoOVSWvL0A0jUqq926QD0_BvVYR4RA1Y,3252
452
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=Ns9Y12aZbJnFhcG3nwb67bDqqiQAo9tdTAIe8K2Ajz4,3255
453
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=uGSLFPZXK_JQ3GTDUAEiIecDor1yjbC3bJvMolF0Xl8,3267
454
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H200.json",sha256=8q6ol5JQBWj6yVfzFOn7Gz5MSXTaW9javL7qQmYVOwg,3245
455
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=6jRC0oOpVpq5c1xePFKNRy-Xtmb038i4LE9N2zao2W4,3730
456
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI300X.json",sha256=cFWeyNJtEbs-Bfohgzclxo1rcYGU863oV0BzJyQ4T0w,4734
457
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=SMtsqtQeqcyy8aNwl9hPxRvx_XQdT7I3SBDNJ3OIvwY,3728
458
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=AMD_Instinct_MI325X.json",sha256=ZyOFJB6GUgGZsAjjT43XJwG8P-QrZ5yTvmgzQP7ThQY,4734
459
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=HOxWmCI2ifHmWc0or2y8nEen86jDeLDov1-tuMzuhxo,3256
460
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=csHezh0HGWaNwrblGzMgcE95hqbqjWS8HImLRJYr_ts,3266
461
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=_5weLBinQCDzyV75hHKIT95Y0ce94KWft2_5BC6EkbQ,3254
462
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=DlatRLPaSr8HJuO50gRZ2lzXoelx55EP3SDUdgIT2v4,3269
463
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H200.json",sha256=TXSOoqvi-x8H13xPqrB9qz2T3opEGA-2D0v_4n5BEG4,3259
464
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=ro3drDpWAdeXH7IjMvx8wYGhIuDPOl0bpbJaIB5Msns,3732
465
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json",sha256=w_R2LL8k5jNVUARcqvSgGLvNoQiQC0Mh73ciqSIAz54,4734
466
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=hjDoTXRmEFLKhhmBFEjPowQus_z23ISonxFljql3c9k,3732
467
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI325X.json",sha256=AdOTy7ASetdAXUhNM8buoU8_rLLjcUYF0m8RGFrLWRo,4733
468
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json",sha256=Ru460ZgnUP4U8OsJfwF8n-AI-gfcolNR3_qzoxG6DtY,3254
469
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=K6BGrKw_oHTAtHjsZldcjp-BUM1dIecKXrrRn9OpRGs,3254
470
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_GeForce_RTX_4090,dtype=fp8_w8a8.json",sha256=4aK_plqztXcJ-hs5_PsAvM0jclMzcO3hd3zTo0FhDro,3251
471
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=qqFoMaObuO8pFWcSb9q0wYsdC4eSCO7B-_ruQhR1N9M,3264
472
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=-5nkLIunjG1ghPoUEtt2AXEQw9oGiilP7K3UvQv9CqE,3252
473
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=WKzddrIXo-KavpuXuouW3aLLAptu5Q4XJUb5K2PLgDM,3262
474
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H200.json",sha256=ad1ZkkSyLJwRGb4Kf24qg5hW_DPmt0BXrKR85oAiV34,3257
475
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_L40S.json",sha256=qX5_yErBEwDRzhv2FvxrS3pEMa8zn0GHzLp5TUMX90g,3872
476
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=ysRCWmxV20K2BYD9XEUtxwREFGtA3QHI191vHRA0k_Q,3733
477
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI300X.json",sha256=L8VA1sfygHoyLJ-Ybfs8DP5c0YWFmMkwxHT8yJ9PEFM,4732
478
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=FJWpDLr13XF3hHiHfJykpjbLiP7Ccu2en3U6BL-QwXw,3732
479
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=AMD_Instinct_MI325X.json",sha256=FnVcfzf5gXkQRt0XgsRzIQVbDPaUDOwWJX_9qOlyvRc,4731
480
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=DxYu8regZOSFu8ugFGA_QbwWK4g8xwQUZF9a_nNY4Cs,3255
481
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=obzfE_9XgsbFNfC9biYOHxR-V_Bgc7PKT8qZZJaiJJc,3262
482
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=qwKy8oaMsd3QrXgQbM_x9xcfYiHK_Ou1CEwDPL5Gbgo,3259
483
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=qUifbWbE4cOKZbIHWmmLx68VRaslQX69eZHwRIQx-7I,3269
484
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H200.json",sha256=JT-ZMLhAqqzSkqivOW5ATTKRlyyaFQkqQDnaPS4DE10,3262
485
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=QsR-Xr9vyuiArMTSo-dX-1DFgATfqwIGOzFuQJAuE_Y,3734
486
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json",sha256=EtVorGY4khTEuimlqZu0AAlPz84PH3ZkDZmVpxLtgQw,4735
487
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=D3wX0_s_ylo3nLIUfaWZmGYtMvX7oiieOLMdQ9k7mng,3734
488
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI325X.json",sha256=JPdO0azlh4yUvbpC9dEHYpRT11ELEr5LXBSb5XP4E_4,4735
489
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json",sha256=BAJnXTZoewwCtzJLUPJ0oYuALv640MvDuLseGcsYaaw,3252
490
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=-Tj7ImS6ZFDof_0VTyq7kVm8XD9B54RD6CUOPSf3Jjg,3265
491
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json",sha256=tme0ydWzIxdABZLk4tU8G_X2dJUYGGZNkQzNGcmcvUc,3261
492
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=g6Ivy4wvadaCAMJ4ZElbUU-CwyTMdbaa49M7IVQhVjk,3273
493
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H200.json",sha256=GstQosPPHUn_I2DV3eMGtn3xXOw6kl1hb8L0EvRsbEU,3261
494
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json",sha256=kF4Fx0yHUmiMSLFNXT6xqAEA4AgCaHOoy_3irv4dNss,3732
495
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI300X.json",sha256=uOlVzTdJl_4VrRK4wmxIb8JKfveFZRjO9syjw_oEeL0,4732
496
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8.json",sha256=plnx7r9jkcYXkhvapbeeNvUg3NMGdGsIgIPSrfVy2qU,3733
497
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=AMD_Instinct_MI325X.json",sha256=UC-iTgh8_dUSXRaYHOIhDH31KOiJmcfqM_Bv_UBf3ks,4733
498
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8.json",sha256=sY2nWMPh9lsIkhPCjkHO245wpnfFbrHmzdcZDVFPVww,3265
499
+ "vllm/model_executor/layers/fused_moe/configs/E=8,N=8192,device_name=NVIDIA_H200,dtype=fp8_w8a8.json",sha256=WQLKugnKzlQ0avf1N-41lRHtG6wJ56DfVPv_nip6NBc,3273
500
+ vllm/model_executor/layers/fused_moe/configs/README,sha256=W2yIZkP9O8GGlg97We9BJfTtWUtPbuz5ZH3esrrjBX0,572
501
+ vllm/model_executor/layers/mamba/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
502
+ vllm/model_executor/layers/mamba/mamba2_metadata.py,sha256=nkO73Gy2bXldUCJsNMptrCwyHvIEL0cdYzNf4zCQn88,5093
503
+ vllm/model_executor/layers/mamba/mamba_mixer.py,sha256=8Ee4TAKRrTICZKJ83XHP56S5-WjFvwN9BNgiFBXUdpc,10210
504
+ vllm/model_executor/layers/mamba/mamba_mixer2.py,sha256=iE78icB4I2GCY6WpLTYYIXoaEQVp_1OhC2TfdpKWtjU,24796
505
+ vllm/model_executor/layers/mamba/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
506
+ vllm/model_executor/layers/mamba/ops/causal_conv1d.py,sha256=BF7OI_1NkflshfWnoYGD1hJ-4sjfEpm30od2EpG_HK8,4539
507
+ vllm/model_executor/layers/mamba/ops/mamba_ssm.py,sha256=cqVVlHBLoeVIo_iB4pLveRkBfXS_g6wlI67oUXx13Hw,14234
508
+ vllm/model_executor/layers/mamba/ops/ssd_bmm.py,sha256=_k43ejVu7CTTVnxNNQPQ8_ByedPi9kUDirJpcD0jzhw,8640
509
+ vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py,sha256=HjpasgV0w53ypVrSbOhI7BbuyUhe7_wYxrW46Z0YYtg,20905
510
+ vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py,sha256=J_HGERmSVwkf4Djb8h8RSd6dC2jH5SKlVJewJlsH0GA,25681
511
+ vllm/model_executor/layers/mamba/ops/ssd_combined.py,sha256=vNqBweQEvwdIEl_woyMenhdItwEAXjG1L0B8LmY89sg,9401
512
+ vllm/model_executor/layers/mamba/ops/ssd_state_passing.py,sha256=4jeOPs4utX_CfzkjIgZv6XUjX-wCnYtJNembsov12-o,7438
513
+ vllm/model_executor/layers/quantization/__init__.py,sha256=Fz9aeiFDXM-4BK5b_-xHf6Nz8vzYiF21XRDADHF_P9c,5220
514
+ vllm/model_executor/layers/quantization/aqlm.py,sha256=4djL6YcziiKOPdzOy2ozHN7Bd0Sbro6H-RCFEvHPtJA,13777
515
+ vllm/model_executor/layers/quantization/auto_round.py,sha256=pE1IiSCk4ekyjAlyN4z5L5_ZSvuBAJZ4mNkZK-ODn_M,13421
516
+ vllm/model_executor/layers/quantization/awq.py,sha256=wLndIMx0ZVe-Kc4hArACCc1jvxskz8l3RVcUUrjYTfw,7356
517
+ vllm/model_executor/layers/quantization/awq_marlin.py,sha256=Dbm1laSvoaTMnwqMgo--iiZq2GSGnd2IHevdhHqb5BY,21383
518
+ vllm/model_executor/layers/quantization/awq_triton.py,sha256=SAYgtOiFmM43fsuLICbGpeJzF9Of1dc3iXIQKhczJ80,12483
519
+ vllm/model_executor/layers/quantization/base_config.py,sha256=f3TzbJi69WdFjKOrNSFja_HKr03J_kevFOPRzDW7cJI,5227
520
+ vllm/model_executor/layers/quantization/bitblas.py,sha256=hhp1TW5SQ2lUHhjM8yshaY268Z7aN09SYPwt3YNZfEk,17560
521
+ vllm/model_executor/layers/quantization/bitsandbytes.py,sha256=8kAoJ32nJ91B-9xm8l2JpkjpvflXjsvXjvH6kja34L0,15321
522
+ vllm/model_executor/layers/quantization/deepspeedfp.py,sha256=Q_QmEjovzhMyJjWoF79-myHwOQGzaC9EMXxZm4Ljf3I,7286
523
+ vllm/model_executor/layers/quantization/experts_int8.py,sha256=hDwPyTqtWh6Naj1clCww9DMvZ-KhHun6sE-gzJmTCKs,7689
524
+ vllm/model_executor/layers/quantization/fbgemm_fp8.py,sha256=rGi__Usa7F08TeGFt6i_ezuBC46UrXSNa4dQNmjeygA,6964
525
+ vllm/model_executor/layers/quantization/fp8.py,sha256=pof2FuOi723wtbF3bSErTM81BR2612IZJdKI4VUT-3o,42243
526
+ vllm/model_executor/layers/quantization/gguf.py,sha256=XPW-xo12mCGQir7hAcxv63-1QnsRT9MzgIcd9N5gAr4,21170
527
+ vllm/model_executor/layers/quantization/gptq.py,sha256=J0tyDms8oZj-MIcaOHMNzKpYqZWSe7i9uZQlRPTewQA,10838
528
+ vllm/model_executor/layers/quantization/gptq_bitblas.py,sha256=iZMUYR0ajVvCC1yz-xBMKoJMBHSZ4NxzWBOnVOWq-uE,17018
529
+ vllm/model_executor/layers/quantization/gptq_marlin.py,sha256=Nr86TGx3r_s1sUoA4Mq9K5uuRasW2hGCxtqoRSDDtKY,26125
530
+ vllm/model_executor/layers/quantization/gptq_marlin_24.py,sha256=f72YUgTT0ijIZoALWx4wn6M0L8aYpZxxKrb-cRWmZKU,11018
531
+ vllm/model_executor/layers/quantization/hqq_marlin.py,sha256=aXhrjhWHL8HdRffMzd2ttSLrvMZAthWXbPPXgk_Mh2Q,12912
532
+ vllm/model_executor/layers/quantization/ipex_quant.py,sha256=ef0PGhe-bCWzpKA33HmmUmcjQtSX5xvMVCcwaJHiGd4,9842
533
+ vllm/model_executor/layers/quantization/kv_cache.py,sha256=8e7uCDHW4rPO27FvzNeN5yk46jNSXgRse8lCzYQkTgM,6226
534
+ vllm/model_executor/layers/quantization/marlin.py,sha256=HyohF1Sv0qJhvn7bmVCYfpvOMoCuyYpBd202-_L3FXs,9727
535
+ vllm/model_executor/layers/quantization/modelopt.py,sha256=yiueoSErDDUr0cIqL_yiniGOUT2QZGcV3YikPSWxplY,31132
536
+ vllm/model_executor/layers/quantization/moe_wna16.py,sha256=jshzyWYVZ7M_uQiAa6ms12d550OQp51JDRFxQ9YGpRY,19842
537
+ vllm/model_executor/layers/quantization/neuron_quant.py,sha256=j7px2wDFac330ezDhUxvu6reYcQPRHTHkqHHUwCOdOk,2730
538
+ vllm/model_executor/layers/quantization/ptpc_fp8.py,sha256=UZ7eQtNb0YiXwuVFUSSemq1Xpp0e-Qc3Q_abIW-BOMI,5351
539
+ vllm/model_executor/layers/quantization/qqq.py,sha256=b_Ar5gW6hUXaugMmj4X3Ajzi9k4QchpVeZELNtSaWXU,10083
540
+ vllm/model_executor/layers/quantization/schema.py,sha256=x7y16hNaValmG7etgyK0RwpeBCPapT1_GznqWBQ5kGg,3749
541
+ vllm/model_executor/layers/quantization/torchao.py,sha256=NhnWRnkK6XrZAdXC7lkSyH7Dcc5Mte3BUDWsogx-sCc,5807
542
+ vllm/model_executor/layers/quantization/tpu_int8.py,sha256=NJ4oRWVCxebWqG8vztU22_ntye7te_Nk6YK1z4gh0ms,4581
543
+ vllm/model_executor/layers/quantization/compressed_tensors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
544
+ vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py,sha256=HfRvTUBaP3ht_hAY3GRicKWtxZpQOXW9cjCzCtb93CI,29225
545
+ vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py,sha256=cPvhgHFH3wX2x4HoTqRWAafoHE3TAoeivaYFhVNjSVI,55262
546
+ vllm/model_executor/layers/quantization/compressed_tensors/triton_scaled_mm.py,sha256=lvGKHzRLtDHkkNAAvoD-PyE8AUt26Rf9YNE29P32iOo,7793
547
+ vllm/model_executor/layers/quantization/compressed_tensors/utils.py,sha256=QIonPI7CuMCJwRYgajuT1ohv6t4zk9-Q165TRe6b53w,7859
548
+ vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py,sha256=sqmMJiVttpUjs-SmJrk3jO4VKsNe5dpmf_aKxWBZeU4,1199
549
+ vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py,sha256=R1W0Q4pEe_WUjR31fTU1bfhPwaA6eiJzpoJBULBQod4,14115
550
+ vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py,sha256=phymTDNBBnHfLXhq9vmwklinzKDZKdYrfp59en2VOok,1596
551
+ vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py,sha256=002voQj76mVNI3xF3d3IDmI5LM3-_eGXs7VJOarOWuU,6270
552
+ vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_nvfp4.py,sha256=h1boI5tsFuddBAjvdEPUXRbvmcv_0gdju5lBSjdHU6s,4042
553
+ vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_nvfp4.py,sha256=Q1MTj9lFTbOMHIKSIde08HIKTvNp1z9NUDA9IqVMLAA,7615
554
+ vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py,sha256=Z1-7Bh_1qTwnKkbbj2xO1zn19O76iU-uLWRZwdH08QA,5491
555
+ vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py,sha256=pgtg07yokeUP3Cu_CwDrn8fyqPsq1FXWfv79-ADAWR4,6506
556
+ vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py,sha256=TpqBbRB2j5Jbvo4Gczvjy4vqlIEP-j8fljl_nQ66UtI,4930
557
+ vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py,sha256=tmixOaeebRl-bVTUwRwZMlqr-vhBXE6BeixzhzAqCdg,8541
558
+ vllm/model_executor/layers/quantization/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
559
+ vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py,sha256=n6clH1JvR3UZ6TPUKIj4xmnRRliIMaOgM4PM50BrRPs,2941
560
+ vllm/model_executor/layers/quantization/kernels/mixed_precision/__init__.py,sha256=i847I8VXbTiXeV2YD_QpkUjQFJLP20UlSDNeErWg6ng,3205
561
+ vllm/model_executor/layers/quantization/kernels/mixed_precision/allspark.py,sha256=feWdTXJ4hDpJjQEIQhabjfyZs56JVY-uza8UD81i5MA,4444
562
+ vllm/model_executor/layers/quantization/kernels/mixed_precision/bitblas.py,sha256=RUeSb5-gb5wsEUy0-1WfgD3g6qCOjRVBONNXKOrC9dw,12036
563
+ vllm/model_executor/layers/quantization/kernels/mixed_precision/exllama.py,sha256=rKbWF_TyxXjbkm_a28tge9GhOowg121HECmWJIIAm-o,6213
564
+ vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py,sha256=JTAvDrLuO_Qnw8RUASGTCZuSt3R2C7XCSKyqK1v-lzc,5043
565
+ vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py,sha256=L4TpdojuurExORWS-aaPv2gcGFN4yNR0c9XXc0N8WGw,5760
566
+ vllm/model_executor/layers/quantization/kernels/scaled_mm/ScaledMMLinearKernel.py,sha256=0cL2WbqxsGFMJMlg4AgAt5_EDOncrIB2z3vlg6FoOk8,2108
567
+ vllm/model_executor/layers/quantization/kernels/scaled_mm/__init__.py,sha256=4n01vds_ytbnGQ5jFtEJekgSbum5aZzyT39uwE2bGc4,3507
568
+ vllm/model_executor/layers/quantization/kernels/scaled_mm/aiter.py,sha256=EJiwelJCPuryD3f4wYJed52ebxWsGTdGcfTJkD-9O1w,4884
569
+ vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py,sha256=wc9MAgmQFFcg0_wqGRIeXrdAO-_78MoaKfEvqT6odFw,6064
570
+ vllm/model_executor/layers/quantization/kernels/scaled_mm/triton.py,sha256=CgQydI5ERFdaOzOJNR5SPTYIkAUDI-pWr8BqydEB46A,1345
571
+ vllm/model_executor/layers/quantization/kernels/scaled_mm/xla.py,sha256=KxcEOM3PQnLXqvSRIxb6UnGNZlbuE5qrkGY_cdARgfU,4365
572
+ vllm/model_executor/layers/quantization/quark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
573
+ vllm/model_executor/layers/quantization/quark/quark.py,sha256=sKM8z7rZWvizP3jsi7ocsCf8JRqy8JFxhEqEt4xr1Uw,19009
574
+ vllm/model_executor/layers/quantization/quark/quark_moe.py,sha256=T44d54IWP7roR8dC0V6iLaRZi37kjc06kcUilX6LhIQ,10845
575
+ vllm/model_executor/layers/quantization/quark/utils.py,sha256=Y1MHt_RTfPOCSb7_kQHK2CQZCaQvG1A6mMA9s70vbDQ,3658
576
+ vllm/model_executor/layers/quantization/quark/schemes/__init__.py,sha256=TvlHrwGTaJp9nBDtUl4n5xtuuPCR18XQVyYGa11AdMM,353
577
+ vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py,sha256=YkvgTw1sECoubOhGXMixFd47StURg3bzGiYsur_izzg,1560
578
+ vllm/model_executor/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py,sha256=LXtP0CTKNOMD8v_7r7VxN-m9W71Ngrqtptwl2bh7Egg,4729
579
+ vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py,sha256=v253YQ-a9q7iQXoCm6pxBWT11RFS2ekaCK0Z4j30L48,6302
580
+ vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py,sha256=TAZaMWM1lydY9bxKj-mdzRbMcUhSVTIPXvUmt8vll0I,5462
581
+ vllm/model_executor/layers/quantization/utils/__init__.py,sha256=k9dh5aEvZi-6ECfjG_Jq2iijwEfjmdRA9fcHjG9uKd8,235
582
+ vllm/model_executor/layers/quantization/utils/allspark_utils.py,sha256=ejjOMJ4V0UhgYiSvYJf5_x6zJA5iQkAYtzLqaw2AuXE,2260
583
+ vllm/model_executor/layers/quantization/utils/bitblas_utils.py,sha256=BGT0qmMJi66O1PX2HS2nd7bApVpLNm5cJIYqUqsr_5g,8213
584
+ vllm/model_executor/layers/quantization/utils/fp8_utils.py,sha256=eHOjXcSKaRt5GkLhuqnqbkeIl1jN1J988eDGNuB1EEk,20075
585
+ vllm/model_executor/layers/quantization/utils/gptq_utils.py,sha256=e49tTqau8qyu95QN4vLXH-UGA-JU6dtpTY6IYPWa1TU,3874
586
+ vllm/model_executor/layers/quantization/utils/int8_utils.py,sha256=CGlycvdrW_w68qGC0fSOkyWaOvKfIEB4VJiJdNXknyM,15114
587
+ vllm/model_executor/layers/quantization/utils/layer_utils.py,sha256=KwNOkW1XYBIOjb8UJgyEKd6T_chNqE_YVskWMtrOmyo,1631
588
+ vllm/model_executor/layers/quantization/utils/machete_utils.py,sha256=hQYHZiCMJFCeKEbk2E3EQmUpiOgQ0b73AO_PjdxeSco,1130
589
+ vllm/model_executor/layers/quantization/utils/marlin_utils.py,sha256=KCrE0m7TRSFKYvAx_qdq_k9ToNCy8oTuUvf0JOtF3RE,19007
590
+ vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py,sha256=mlHZqTXRV0aO5TYqywOnYZUh26FOM01buE3s6oKHh6U,11235
591
+ vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py,sha256=UBXQyCa52ixKbH_kMJZnktbARcmsVy86O8lXuByKkLE,13152
592
+ vllm/model_executor/layers/quantization/utils/marlin_utils_test.py,sha256=GtmzVpQhQCQ8-ajEADNfzUhZW_Ph0GWs0VoSx8QJJ-o,5374
593
+ vllm/model_executor/layers/quantization/utils/marlin_utils_test_24.py,sha256=3_204vZjqNj1Em7VxymB0CfK3TpoHpaS0xY0p29WyDY,17603
594
+ vllm/model_executor/layers/quantization/utils/marlin_utils_test_qqq.py,sha256=zbQaHDmhnWSrZUBLPsT4ZQgIwytQUTwD23DWaSYH_RQ,4145
595
+ vllm/model_executor/layers/quantization/utils/mxfp4_utils.py,sha256=xC1klknQQztk1wVla1y2MNHqU-0V0p8GnfPyyD4d1jo,1645
596
+ vllm/model_executor/layers/quantization/utils/nvfp4_emulation_utils.py,sha256=FKYBY3y9pM3O-3h2iom7hkcS7YaB2y0_lMMwwGKpU5U,3788
597
+ vllm/model_executor/layers/quantization/utils/quant_utils.py,sha256=sn5_s-X8ijBsqrT4n-yXK8PPr_OqBkNPFIb3APHVBJU,19541
598
+ vllm/model_executor/layers/quantization/utils/w8a8_utils.py,sha256=TaE_0zKzrIQZT-r72ZrH5qyo-7yLuL9zHp1j3zKXEWY,16766
599
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=t8TaODfMF2Nq0qg6KOc8NSTs7m90Jcu6Ih3BXUvFb04,3799
600
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=CNI-I9ncqHJ7ukpzgyxdJtz0bd29vsgC38tvMM6TV1U,3803
601
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=CNI-I9ncqHJ7ukpzgyxdJtz0bd29vsgC38tvMM6TV1U,3803
602
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=-j7Xyk4xFaiAD90FeH4AqRSnS82f4owKRGMHbObrrHQ,3250
603
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=sW_T-BdLbjJoFqlr-B5f9emF8E0IdKfy_1wUSIEi55g,3253
604
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=tkLjwLC_aVXhzuvo-2QHkojXZauPJsf3jNHFn1S7uRA,3244
605
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=akDNAjUZ3EXBznF9w6qUcpXxaLWq7oXnX5jy-R9cleI,3246
606
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=JAycl7EaUZtmCoXMjq4JwKXCeXxZ6S4Ts_DricRUw_o,549
607
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=q5KZyi9T-l07P3r1u9i6-Dpw89Upjw1gpTp3f1CluEo,3799
608
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RTnTPFQNg5JULbPLWJDTRNRZHI7FsrTxqSDkZfSbmzw,3806
609
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RTnTPFQNg5JULbPLWJDTRNRZHI7FsrTxqSDkZfSbmzw,3806
610
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=DLCfW5tQ9k74AGZ2yER1etP-HgUGglPp_woJiaPuxgQ,3249
611
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=8v9mdWPs1eXczo3iwFrNnRo2LF9wPU4Scm-r9bL7Fz8,3251
612
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Qoj9rLLRDbKM4IKBCXvN8RcxzSmNPd0TQUiM7CXDqHI,3241
613
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=7OFCbBqqEA7vQ1oiygfW-7Tqqx8OJATaLujtcQIgyTU,3247
614
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4D3Ku4y7BCVEJzueKvQC_KvOR026w3ONWsxfsA_YrEc,3249
615
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=iJZ_tAzoYGUmg9ltil4e8vzKlKi980yTmswEMWqV1Jw,546
616
+ "vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fDomA7uBQKX8kbO_4MFcoBwHhIR_7sOkngQPv6cQq4Y,548
617
+ "vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ucrZBIN_ivmmfMAvkT40xQpH87LdQK38lZbeLWMyV4M,3806
618
+ "vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=zDnVqBqgT-nLkz_Cou-KTPsNIVh-YbTBno9L2MgdRTM,3803
619
+ "vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=zDnVqBqgT-nLkz_Cou-KTPsNIVh-YbTBno9L2MgdRTM,3803
620
+ "vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=zd5cMYrxQ6PD0jKpd3YF6ThT9RGdqgEQnCW6F4W-r4E,3249
621
+ "vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=CjO6dh_qt1iTu5kYRs98tTLL-W6FOzLO4AESMUFHz5s,3254
622
+ "vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=7v4tp0RaT4vxF4urSBrkK5FR_5ikeFQ1htF3DwDl1lk,3249
623
+ "vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=M5F5wzSmFokEm0X8__ogLvdE1QVC6EW8atqq-kp3rVA,3253
624
+ "vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0J2MFgaLkv-mfVE5x363lgVKYU6miLG_xRO3tJUga_M,3249
625
+ "vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=983yfFeeo-BClL_H1g-owXwbA6t0l-kREiy7kLURUMw,550
626
+ "vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=C2eM8RudmP-qXEf_Apg-qcB5n2Ugxf8-7uG8hQDSt1g,3801
627
+ "vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=llI6PWlSDgQf-ouTDXkFYOoSz9u3bzklwBtZYY_fWVM,3807
628
+ "vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=llI6PWlSDgQf-ouTDXkFYOoSz9u3bzklwBtZYY_fWVM,3807
629
+ "vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=q9HUcoL0cdZCOWZ8MKbcpR8NSy5iNEBq6NPTaHLgRB0,3242
630
+ "vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=uJu6Gv4e80vxVrDyBo8_y47tOV03RmWVsMIWQ-bbW6Q,3251
631
+ "vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4ubbhwSFX_XbefRLEkLoWxJkcetFWPzsszPu0X3_Wrw,3242
632
+ "vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=euiKvhb3DXkvPPQJLqNE_xN2evsTOoZnVIiquyN2Cm4,3246
633
+ "vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FhyniGTx5QeCuVrBSVTQys6q05Pr5lPEcPykpAX7Iyo,3247
634
+ "vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=pLQvMaVvlet_JenEz25mxxplAaHNisl6SFTSZ7lYP2w,548
635
+ "vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=uAa-ZQmASwlqZbr1l1CM6FyJI9irNdLBzc1U5Hdyw1E,3802
636
+ "vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RnN7lfu15CE-4ywMjAbEz8wWV743AP-1Fq5U_j8EQeI,3812
637
+ "vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RnN7lfu15CE-4ywMjAbEz8wWV743AP-1Fq5U_j8EQeI,3812
638
+ "vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=cE3BscS_zEtF_m_jr51IPfpaZZgIEojmhTHsrb9jABM,3260
639
+ "vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=SScyo-oYCBxJR9C7ZIKu_pJJNiXdpT13kYe26rddvPQ,3261
640
+ "vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0v17v78pETXv6S2ZoibekxOVhiTmCm807DYG4DONUck,3259
641
+ "vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=I44PvJj758-sw_fCOVROLTpG0NQ5_5PCYyQcpZC1YSY,3259
642
+ "vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ulvOEAFO8c-UOa34FEZrjOkCR6ovhJlfFFDhmaKIBiU,3245
643
+ "vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=BiZowqExbvXftuE37SYcheOdtYX7Z5BEXyykJ6GbYSk,3254
644
+ "vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-CVHqClROli9FWe_FnlnuAG2LiFivDFK_nghH6t-BWc,3261
645
+ "vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=7ok0uooTihvRSckZMNd6jInRvht_xkC5posHO66ejqc,552
646
+ "vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=h_Z6wBKdSGBEo5BfQKaxuFlxztrnbbZR0pkcYKv92sk,551
647
+ "vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=k63VgniyN3Rl_-h1hYmT_q9QZtSFqQmXBqhEXJQkxqE,3800
648
+ "vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=icswqRYUsUdoQMrv4YIqO46GG9BzepmBJmnTre9-VjU,3800
649
+ "vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=icswqRYUsUdoQMrv4YIqO46GG9BzepmBJmnTre9-VjU,3800
650
+ "vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=sL0E4zZzb01g6GHaTCXltg20uSbthXHSJFQ0SaxZ7PU,3245
651
+ "vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=MZcJz7BjwVOHHHxvYqGrWw77WnxslYhwW80bZw-jSKQ,3249
652
+ "vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=GsLoYkaZ2p4Qu0Coj-X90s7JWyfZBOloIHPlyNKSIes,3246
653
+ "vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4--7YWnJYUK4XmQ2zZ4M1ZYdKvUkET0VkNgIBn6xaOA,3247
654
+ "vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=NjEA2QjOVXyOaVSMPch5qa1Dq3igbW7MmE986-7taW0,547
655
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=21Buh2aiGTHjpW45Rm-TwZD8MSaAy8NMUrK5l_hGT5k,3803
656
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=P8p-dZZt_D61G6k3PgUetF01xzTRmCDJAnqCIsSDW8I,3805
657
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=P8p-dZZt_D61G6k3PgUetF01xzTRmCDJAnqCIsSDW8I,3805
658
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=8zuJhFdd6aXREpiqPFhIKEFWA5lgLVGrG0-a9UXcBqk,3262
659
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=_42bDZX4VODErI6OL-NrWja36iNHC4DzgF1l5Mk67-c,3248
660
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Zn1TvhAoPOv0zQBYHOZhwdDw3oqyxm0zIa7IJkTCHpo,3247
661
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=H9wONEU0XXSxOJfkx5UkS8Ss3A2QCp9G0XNoJEqE9nQ,548
662
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=2T2TYZhXgC97slH92HQ8GvZS3KuUt1ZiC3RtudPVEPA,3802
663
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=b6_bhUuQrI9HYvvwmAvUYh4v1GZ8w0sjApOmwuj_t8Y,3806
664
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=b6_bhUuQrI9HYvvwmAvUYh4v1GZ8w0sjApOmwuj_t8Y,3806
665
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=yqjO7zML7EseBJw6Bn5MTyHeAitkPsl1dndXeL6Rn6A,3257
666
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-nQIhKAOVCQrxLV6HDlcD0V8HMWvqrv-vyiORVU7qls,3244
667
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=KKmCvNh5T_qfD8v7JijMqXxQ5L6-gRX7oc6c5re6EF0,3248
668
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=M3nwpZd2-0w263ywZt9gaw53z7MN673T5tl4tc43Ntk,3249
669
+ "vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=H9wONEU0XXSxOJfkx5UkS8Ss3A2QCp9G0XNoJEqE9nQ,548
670
+ "vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=KmEgJ7zP2Sr_7GsAfL-12_g2S2a2wVpnxgCiF5dFiLI,3802
671
+ "vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=J4SXwpsioBRdTXOaj2OjrdNrEuW1NF43cLds65UWzCY,3808
672
+ "vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=J4SXwpsioBRdTXOaj2OjrdNrEuW1NF43cLds65UWzCY,3808
673
+ "vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=UjBOmVqYynBH3dJVuMJXjKnuZ6LssohzzEBpLBG4_G4,3256
674
+ "vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=8BJsjc2UUYdotrIqwyzisjrq0wcyW4jnTo_M8J3qYwA,3263
675
+ "vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=vLoV3JMtvHOKpR5D1BeCQPMuYlWUAlrXu54gByNkwKY,3266
676
+ "vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Mtw7a9BSspj2TzC-aPxE82o1LEvwzgbUuIofwRxUNA0,3263
677
+ "vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=N0kCPHvybNK-HvMO2EqNDLkj7m7WrHTl-3AD32LBD4k,3248
678
+ "vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=mjh-AgJN_IoWAc1uwhUiB1lE3ufAPDf-KPP6vUTrDKw,3251
679
+ "vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=NHdx3tZnfLF7NplswMzcTRbQEQFLtChg4rd7GU9lMbM,3262
680
+ "vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=mcF12eQTtGxocrVIA3I98NHd1NLd0-8EyfXtqDgv0PM,549
681
+ "vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AThoa7FUcGdNXYB_v9iMpBh2X8C0iLfc7y-C0xy2cRY,548
682
+ "vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=MJgIvZHf01ju8IWEVO6vyMedy5OTZxDpzv6A7_8W-Tg,3813
683
+ "vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AT2yrMoTvmoizi4sxwLtiULZ57P1CBhKGg9-6Gxnuc4,3819
684
+ "vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AT2yrMoTvmoizi4sxwLtiULZ57P1CBhKGg9-6Gxnuc4,3819
685
+ "vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=laYeH4w0iZOj2Yg3vDgtKoroNQnwBEX4GUGLrO9095I,3260
686
+ "vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=TWcPDZ2miQMD6OWDC1FteRs80ND9RC-oJL3PLVmJbtI,3257
687
+ "vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=cPtr1UJq_B-dTqgMrVm8ptiYXA6qOy_F8rs2f7ljuEI,3811
688
+ "vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=cobt_ZhR3dt2CySr12bGPVwn1oS98YvGLdIh9H8BDQ0,3801
689
+ "vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=cobt_ZhR3dt2CySr12bGPVwn1oS98YvGLdIh9H8BDQ0,3801
690
+ "vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=6Z7kIa14RjVq3ek_C15q5mUu1IrY2r0OP8S-_pm-MYU,3252
691
+ "vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=r63SZkUJJV87B00hAX074_uaC7wwQXdurlJsB1jUA0I,3254
692
+ "vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=hL3doX7zzxld3UcS8p9ACSadDaE6t3xXlYwM7X3GOeI,3252
693
+ "vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=xBhxdCFf3waTUsLxJxA54R90zODbC_DKI3XXBVKjKRw,3252
694
+ "vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=2ks7TQUULAD-Zn5i69YHo_2hpmsmxlocdYmJccSh2No,552
695
+ "vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=eiI8X2fFNknJmiT0uHbzSaEKQwwZk5bxn676gNvcyg0,3802
696
+ "vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fQQDJMlLdYsY5Cosg5HkRzvrJ4asjQmc0WGgoD4bC20,3810
697
+ "vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fQQDJMlLdYsY5Cosg5HkRzvrJ4asjQmc0WGgoD4bC20,3810
698
+ "vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=O_SV2vo_oaABfT6Mxqcmo12pnhKtfX4TnXfe02OcHJk,3254
699
+ "vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=g12Xkurat7oUS7LdS9pHLKFlur4_FaMGiGBvdq-iBCs,3242
700
+ "vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=EWLxbWncwGJyL-dV6EO-s8kk25wfYrESa0STjCnzD64,3244
701
+ "vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=tFdrY5nADmXUlShdN8w8Jzkxuj_RPLXCRceX9FhQ35E,3251
702
+ "vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=M-ewEHbgHLBLYLi1Hgz5Pp4kypnUiCRo0ut2scNnvDw,550
703
+ "vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=zTzLbdff09HwMuWlWpoAIgQZ6NEjsFXSF0Y5z4Be7Ig,3802
704
+ "vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=dcPHbYEbz8T9SM5-a5sP_K_npDkhH7u0KM9aiLn9esE,3806
705
+ "vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=dcPHbYEbz8T9SM5-a5sP_K_npDkhH7u0KM9aiLn9esE,3806
706
+ "vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=TO2qRGmp37v53Zqu8Joeq_BSbtwM_mpVoozGyoNg0-o,3254
707
+ "vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=QqijmgLqIoBUxRPnuUQGsoQASRFRMsCVQKTjEjGecVo,3247
708
+ "vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=0xquf00fgfrDODpaxyre0VDcjqfzqExj939rzeJ8pMo,3244
709
+ "vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ipg8iK8w2ySRe1Z08YJUWAHX43rvkrXpR6svxRhSnFE,548
710
+ "vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-wuzdNXf3K0jfFQGB8nFSyoSZ4BfAvIkY10k6FdjnLY,3800
711
+ "vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-o9QqqQQ-9kRVCuDOUGBuKXHRTd0asGTzrDcHGGYJLQ,3799
712
+ "vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-o9QqqQQ-9kRVCuDOUGBuKXHRTd0asGTzrDcHGGYJLQ,3799
713
+ "vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=DbemSQdo2h5vGjSNB6Fovnn-aAGfjti04Bp-5KxLALk,3246
714
+ "vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=6glWpljtfiuspJv_Esg_LWCDDQ57d2HETsOIv0zr2Ec,3249
715
+ "vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=qG6v3n3qF6LE2DdGT-mDIXecZ1a7vg7p3QqXYCMX85k,3254
716
+ "vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=EgFTGyW_YuDwyEDUCoGglyI1ETdj9J7AR0UfJ86jMoI,3249
717
+ "vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4--7YWnJYUK4XmQ2zZ4M1ZYdKvUkET0VkNgIBn6xaOA,3247
718
+ "vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=ZfPPlx0qcuR4WjaFAE-W1QZgSPAMf3NyGcpvQIvyFMs,3245
719
+ "vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=NiorJgOotxkQcP49ID3z5al1UA4QQDrT8MvbCwAWL5Y,3248
720
+ "vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=QgSlDAhlB2W4bzTd2O98UL-C_IKfJm_cVmQz8FqsLF0,361
721
+ "vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=i3wy_CBO7BQQVhKReRC2F0PaRIQDdN9F5lJ7kD0xe1I,548
722
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=QpkqpJnyjuHH8Zo4U4QZgehUF2F2uQDZFb8fdhixXWI,3794
723
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=wv5GjGAA-NyJ41SYdYG3tPAgwf6JK7Zf6SaWALQ5c3Y,3806
724
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=wv5GjGAA-NyJ41SYdYG3tPAgwf6JK7Zf6SaWALQ5c3Y,3806
725
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=RRMNeM_qiHvlUTOAeqwgs7ukSoAZSlK8XN4z8hgWl0k,3258
726
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=brB0-FFr-Sv2bdrz4DQJ_NaFhETctf1g4Yzwj_Fcczc,3251
727
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bPQWtvaJrzOOIgI-R-MIxs_f4yC_FobkDydu3OkOFtg,3252
728
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=RYLh-Uim9U2_djLkFwwpV0rNQHik0tZHzecuj1_hPLw,3248
729
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ZRgiuHZ2SFC6u-WV5DGwau4k1RiPLI67eENO0e-5Ylg,3253
730
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4EzbnLWHVwrjyKYPMcDxbxM2o-krjlT0YXvM8oPH5Cg,549
731
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=OFgOtRkUHwyOT7Hk_BQft_WzuZOwbhMSLP65Fbr4goA,3799
732
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AOu05da2LZbCzD9SKsrgnzH-ih3CdXsRIdJc_4J1lps,3807
733
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=AOu05da2LZbCzD9SKsrgnzH-ih3CdXsRIdJc_4J1lps,3807
734
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=qzmFm2pqxphir1LBrycDZp5JA4It8OdQeQ5iTrTwLNE,3253
735
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=2UyOMRMdbvHt6WlZdOKALm3Or0eMCx7vvwgLiCYyoOs,3259
736
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=-hP_P8NM0K04mGzTmpGBNibQ5xxh5gPz5WtoMXhoz1E,3253
737
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=vEU4_YOMnLdYFf1BkBEdFbGRMG8KLhsO_t0gv7vaO4Y,3244
738
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FB5Le4obvPoCgFSnC_3-Uh59n-Mt4Rol8saXVcK3RPw,3252
739
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=k1rzpgm9m19AHf_HPQcNCuSBtAwFgMePUYB1jZeFyYY,549
740
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=9IbzTwLRgTCfFLSvjEWKiajCjG81R-wTljIV2zUYUA8,3809
741
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=B4uEwuftvaj9gHGdoDBnVhxbNRmzUtzu4LH0u-O7voA,3804
742
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=B4uEwuftvaj9gHGdoDBnVhxbNRmzUtzu4LH0u-O7voA,3804
743
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=ZTPWtJA3JBL2jhy7C60RdsntKCN8oQ-DDIL17ok7OB4,3257
744
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=mokCWoXdKi8p4mLYqgljjwDRJWK5I2oF6_MJuObi5sU,3254
745
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=kLviGvVngpgOuelfKtvv9Is7MWQ89rGxlomMRP6t0Ic,3250
746
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=bIVRtaaHThozH54VIte0Nk0sOGV67K4s2YZUE6QWx2s,3252
747
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=_YXzQ6N3QpF3Ou1Fy-51YyL-J3i5gOBVCgSM42vOT9I,549
748
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=csaz7AaVDTvCuzaptN-e8K1PNuIwZm9OwnPSJydHI90,3803
749
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=scfO3_ncCtyrqcYSnIoAZTMfvBzjB4o_0_bdiiVSNh4,3803
750
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=scfO3_ncCtyrqcYSnIoAZTMfvBzjB4o_0_bdiiVSNh4,3803
751
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=CE1wRLyFONo4_icKO8fcTTX-5giKNJ9_1F-2mr-lGQU,3257
752
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=JdUaLiMmf8oEbwuhPHMIncvWzXS2SxOEgfM80ZjM7l0,3259
753
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=vlys0Zi_CaaU41OHGbWSBtbVglFi98bgqEySBMc9Sdg,3258
754
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=YWyByOlKSqp5lbcUa8eu6N2dHRKJqJDbCDSjdDQJngg,3249
755
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=GY9VBPi21K6vJlF1NOEzCyqMS7LX3xq5dRxrK0jvIHk,3244
756
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=8LWF55ZPjrOY_sEdRGqf1eLcTNySgUiiWNWsN4EGxLY,3247
757
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=j5PTW0IC4Z2yQIygcdICaOsvb639u6Mv-ZpJYkrBQ2k,3254
758
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=_Cc0EqUzl6d93OxWJRWYbYpEaTIp0glJhdfV-GSAi5M,552
759
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=ZSHvdnC2vOXI2HPW1iNI9HdihoLcNYlRLMF85pqjWZE,551
760
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=SkyMLsoxGoHdO4kgTerihone7eEi0nmHlrvZUI1I_V4,3804
761
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=6Jo2hw2gQpyiNoCRZpGItu4MBkYytzdW-VggWUC4fPE,3804
762
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=6Jo2hw2gQpyiNoCRZpGItu4MBkYytzdW-VggWUC4fPE,3804
763
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_A100-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=xbDfUYLphVtZWJojZWODlxGMCoiIgxn4LsnD9ge3r9A,3257
764
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8,block_shape=[128,128].json",sha256=hqh8TQw3t5hPM9u7rmHPuaMjwgxmQ-Zt35fSTgOS0HQ,3261
765
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Ggy4hejkcWjiw5Bi-wGzSP5JLVuvOjip_rbjXFBJZbs,3257
766
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=Xy4mgZx5iiEvuv2ydO4dFNIT8s0jgBhNHE1vu93fGJM,3250
767
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=qKG9hmaxN_7tCB_06L1dh0csxs3TGeya9B-X6W-tNhg,3245
768
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=jb7vGi1RJefImkT3BZU_9iOkiCulcd5oDjxpVSt7big,3246
769
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=x476nFeltB_2iO9_6y-z2P_unAbh7ghLPFi5z2LOTOo,3253
770
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=_Cc0EqUzl6d93OxWJRWYbYpEaTIp0glJhdfV-GSAi5M,552
771
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=NVIDIA_L20Y,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=TWpzs48j0QwApAsBWt3iIlu6cqR46Meslyp96MOANcc,551
772
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=i5b52A1Oe8kCdPrPLBGud7OMHm8779JD0rBocYO_lo4,3797
773
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=U20Q4JwG63kU-6cc241VHGdpettCWbBXRJ9EZ-fbkqA,3803
774
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=U20Q4JwG63kU-6cc241VHGdpettCWbBXRJ9EZ-fbkqA,3803
775
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4uWiQMh3cZY_EtLA0a3PU8Z1VCunF2PpolTPYeP9Rjo,3256
776
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=D0moiKqS73oril32iNj5gRJUWpT2SZ5jf-ZesUZnNv4,3254
777
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=toHzCprq0KetQI0-9IrLYCIm1bQ0nSeP1gXArU0GogI,3245
778
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=N37dUL_J2JVpgLFlnlz__Ck7Z4njROnNAO8V2oiDqr8,3253
779
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=pGZZj_gZms1T9Zgjs4tbIm90LhbEy1UUkkgrto9jPts,551
780
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=fqnjZCn0gbY7fO9JwZOHMYJJHe8gceWhWCZOFPRUlYM,3802
781
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=OTZt3ell0OZ7Cg5L17K2NPU4UwayAkTihV5HjUmUiAw,3810
782
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=OTZt3ell0OZ7Cg5L17K2NPU4UwayAkTihV5HjUmUiAw,3810
783
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=LdtOyXsA9r18GiFkmDOkiRinsDSZBZ8NYapL59EZ4iM,3264
784
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H100_80GB_HBM3,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=07GarBHmiiYkyqn-qxEtrAcgCETuUbqm6HqlbH9yJi8,3252
785
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=xMNxtLL_8tyg4TWSt_llz_IJ2qlxc2NEwhUzhV1VsG8,3252
786
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=kEuvCsW3YNByF-DALYqPZpW3TL8ZbtQ5gUNq7-8YvZ4,3252
787
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=4uNqB71a6ctZ-c4tF3r66vOsHFrqcR28g_UWy0N8iBo,550
788
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=QkrfZ69jxW_mweigtHL5R0Sv_WcSBp7wjFX75G9kbHw,3805
789
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=xMGmoN2ZTjKQBZS-k75mFTPpAEbPR3kyMwqZVtgbEiM,3802
790
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=xMGmoN2ZTjKQBZS-k75mFTPpAEbPR3kyMwqZVtgbEiM,3802
791
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=PD4AJYCkHfy2ivv9baMouFXzBTy0eKMumbAfxfm91HI,3256
792
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H20,dtype=int8_w8a8,block_shape=[128,128].json",sha256=iu8M35YR-RDpKWbjXSRzk02sW9nr_dtbhalfLSNtxNs,3251
793
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=FFBjSWlpKXMxfAUUYUqXbOK_Hd7qBeBsfbcaa9uB4qY,3249
794
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=NVIDIA_L20,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=41m0bvskFUzVtlr_yppBr4PZ0cVkqHvy9Hrc5pUCUyY,552
795
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=2VxMGfWtxTzXcF0bP3d5s7rc1cKb5TNBAn-WiCKAngw,3804
796
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=VtQGO3fEiyhbKG4sl07cuVc6id2EtKeV05ozLmN_ENQ,3807
797
+ "vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=VtQGO3fEiyhbKG4sl07cuVc6id2EtKeV05ozLmN_ENQ,3807
798
+ "vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=W3cYFteFIZLu5c1K41cOh4_-WZzFU6-jGnZocDzmKaA,3796
799
+ "vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI325X,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=HIoWSUgAOcNaK2kj2YwDjDa23PzQVTT2C2ePW985Ovw,3805
800
+ "vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI325_OAM,dtype=fp8_w8a8,block_shape=[128,128].json",sha256=HIoWSUgAOcNaK2kj2YwDjDa23PzQVTT2C2ePW985Ovw,3805
801
+ vllm/model_executor/model_loader/__init__.py,sha256=fDu4hGQAHb614QbKbbpsW8yycr5DYetA8gUwaWeoyTo,2756
802
+ vllm/model_executor/model_loader/base_loader.py,sha256=I6Q-W48VimC49Chd6FJSm4kIWoLe7bGDoF5wy37SuBI,1743
803
+ vllm/model_executor/model_loader/bitsandbytes_loader.py,sha256=K0VLTr3VIS5DQxVdBzvnH_7j2iUawmtGtqbZlhaCbYE,25588
804
+ vllm/model_executor/model_loader/default_loader.py,sha256=heCRhupa4ANn7ssMBOY6unTVIF_HCA-FdWcfDVp0kcU,11409
805
+ vllm/model_executor/model_loader/dummy_loader.py,sha256=QHjsazcUyw6aMTI4wocgtFzDylK6rDT0paw733iOE5A,1098
806
+ vllm/model_executor/model_loader/gguf_loader.py,sha256=uApRTt0Mb9WSaLQp3_kTJmlEUbSIzJMUbCGV9zeqkv4,5490
807
+ vllm/model_executor/model_loader/neuron.py,sha256=S7924o11-QrnIixME0vuyq-yTqqeReKNVkJNIQpDBUk,19869
808
+ vllm/model_executor/model_loader/neuronx_distributed.py,sha256=FaWp1IGJas7zTsh8U-3842Ylmth_DU51QEPXjJrWPGs,31285
809
+ vllm/model_executor/model_loader/runai_streamer_loader.py,sha256=gQg_bRxKOZFOLf7eG24wTp21M_jMLJlzH9hsmZeL5Pw,4415
810
+ vllm/model_executor/model_loader/sharded_state_loader.py,sha256=N4uH8NgaKX7TDatrOLnx3CK5hCmTSiofeZRhY7xn72A,8064
811
+ vllm/model_executor/model_loader/tensorizer.py,sha256=Z6HrzLErYsQpVmQpmRyF-OhTggzGr2oFQgwNN7jUVYE,24293
812
+ vllm/model_executor/model_loader/tensorizer_loader.py,sha256=CtkZz8sOfK8h5CWt0HDZAl_I5AIvYx7I3m9UMdbq7zE,5189
813
+ vllm/model_executor/model_loader/tpu.py,sha256=WxhmPnJhYVKBPL-Z59agvQ7JOVDJIjDBjO-yQhRqoeg,4720
814
+ vllm/model_executor/model_loader/utils.py,sha256=VoCUn_EmYwhisbyvg3-MXozARlPe8JOv1nEc3GC6sPA,13132
815
+ vllm/model_executor/model_loader/weight_utils.py,sha256=VsZgTiLHzh_tIZnQUe6r8orJd8WUnNdvH6yiz6AsIsU,29950
816
+ vllm/model_executor/models/__init__.py,sha256=RazsVxlJOcl7TJEpnm9pkfhXQYz6CaKo5a1OKe-oCzE,932
817
+ vllm/model_executor/models/adapters.py,sha256=ULy-8VzK8Cco0gwYD3PsfaeHb_lYEAnoSA31KYW7uEQ,8326
818
+ vllm/model_executor/models/aimv2.py,sha256=fzq5a8XtmAmoEHnQT0kEqJRb5XX0gXR1E8bANLy3YOc,8661
819
+ vllm/model_executor/models/arctic.py,sha256=PMmXq8JXcLb8gNuwH9I7X4EwC4CvTYY5k5bCIqw2KUo,24472
820
+ vllm/model_executor/models/aria.py,sha256=iU4QaBrH7KAi0wS2N5u0zmb1-BkDaofEhbl6AYrTGTE,25634
821
+ vllm/model_executor/models/aya_vision.py,sha256=Vc3G146-2AERXd36wiljRGasfacvZscOxmFQQDWPe1U,18952
822
+ vllm/model_executor/models/baichuan.py,sha256=JA0soAwsSEjWM88MtkE8ymKKwY6ykS2vF5vzaEgySQw,18947
823
+ vllm/model_executor/models/bamba.py,sha256=jj3pV57ECvgeTXnWdooSbJn-Nv5FKsNJ_3VWMgs2UaA,21376
824
+ vllm/model_executor/models/bart.py,sha256=YBWWU4aL0nOKA0tVHqItdGi3z2piEDg9FYoOZ0std6U,33889
825
+ vllm/model_executor/models/bert.py,sha256=D7JtXlyyQ48r_DlD0EQaC7O5L5uO3E-Ul5oZ7yZXiTw,20664
826
+ vllm/model_executor/models/bert_with_rope.py,sha256=yeypk-MmIGAYNitKyTjBVmEpz8x3svyU4KmosbdBbco,30562
827
+ vllm/model_executor/models/blip.py,sha256=E0Qw2SGxfBPBttkLIqhuIZTmwkS6tAy2-9R48mc3NYM,12402
828
+ vllm/model_executor/models/blip2.py,sha256=-NOhxMXu62S1KMSnHZD7vdnEwRQE3OWo8ZTHU5Bi25E,25926
829
+ vllm/model_executor/models/bloom.py,sha256=JpU-c7z6HdjADppHa57wNyNcRG33Q7HEOo8yYnTaIaI,14523
830
+ vllm/model_executor/models/chameleon.py,sha256=BaJPqY5E2VdMXXWDphCVzhevj9ZWjylkDN-3kBBCHdw,45423
831
+ vllm/model_executor/models/chatglm.py,sha256=yqtyEDROvPTfBJHoIFjO5ytOzujyeZnJtfR_NiC6s-U,18460
832
+ vllm/model_executor/models/clip.py,sha256=rrt1JfGp1KSVWV9ioWgIks6wZEw_WT2HKvGRuf0T0lY,14924
833
+ vllm/model_executor/models/commandr.py,sha256=psHUZXWl-KccoJMZOam0pgBdL1JHVyiWNJRvUbc2110,19380
834
+ vllm/model_executor/models/constant_size_cache.py,sha256=O7tfU9CrkKNIHVc8FIPP4IGOn_naP5Q8r-IagakUYzE,5908
835
+ vllm/model_executor/models/dbrx.py,sha256=u_ITrs1XlLjd0q_p-ohcEBrKRdvq_i6HMfq6wEHyeHg,18433
836
+ vllm/model_executor/models/deepseek.py,sha256=s9WL-LoXexUURKjvwOB9FRYKgoNBmetF378CJu9cV6c,19869
837
+ vllm/model_executor/models/deepseek_mtp.py,sha256=x_dcLh5A2iF0lV95adYsWEiVS3ROfwf0KSrZky3EYjg,11032
838
+ vllm/model_executor/models/deepseek_v2.py,sha256=hiyRDfgg1_2Ybgo_ASMH53rYnFyUZuW36AZZJ4UQcDM,35306
839
+ vllm/model_executor/models/deepseek_vl2.py,sha256=qC4vxp3w3AzZRnP14V1gBh_1qGhrCjiXY-ofameXE6g,25216
840
+ vllm/model_executor/models/eagle.py,sha256=qZDWZbOQ8jQVP_HcQiLAtZSYeZlIZ_wKcftv_g6mFw0,11765
841
+ vllm/model_executor/models/exaone.py,sha256=eW-1d3aj-VNOPFQARNxzq3IYCAobAHgwlourzeUiSFc,21157
842
+ vllm/model_executor/models/fairseq2_llama.py,sha256=62IX9r3OrOKXD-TiNJRx_mI_TMLyqUOvmQxk8aoUZ-g,6555
843
+ vllm/model_executor/models/falcon.py,sha256=rjS9x_rMHNfwdXj-_xZf4FGPc7XorREripli3MRdra8,21383
844
+ vllm/model_executor/models/falcon_h1.py,sha256=hVNRgtOT0lcOKk9OXkt0UhGTP2r3Jg8BwvwkrLy3kd8,26901
845
+ vllm/model_executor/models/florence2.py,sha256=20pcLt4cLfq91a-GQhfPyNQ3U4qsckmk0lb-0hEnerY,39262
846
+ vllm/model_executor/models/fuyu.py,sha256=pUtD9A4L0QQKbwAi91QEYu_QSBPn5OXH-S4aNA_gw5A,14297
847
+ vllm/model_executor/models/gemma.py,sha256=q4BnHMW2qhijDZgIEJF3peJLX82DOqdqddiQbtNl2_s,16308
848
+ vllm/model_executor/models/gemma2.py,sha256=1T2xz68BiLaLvaUlFSnwvQo51OCP0LaoAYjVYxoSedM,17539
849
+ vllm/model_executor/models/gemma3.py,sha256=c9pkT5htdCqaC97SLtfgcd2IG-dEz8gnvD_2_cIRj8I,21750
850
+ vllm/model_executor/models/gemma3_mm.py,sha256=laaXS6x1avCiQ-4yr1YAVHY8VDyR4AqF4BnMRbh7ad4,25673
851
+ vllm/model_executor/models/glm.py,sha256=d2mDle-FA4NVjt8bVZwYQm1ddtp75b8Ji-1aVlYKb28,1059
852
+ vllm/model_executor/models/glm4.py,sha256=J_NOzM6avf9dNJjJkA6XnKaXblh-xEaynmLvPrcShNg,11864
853
+ vllm/model_executor/models/glm4v.py,sha256=P-vNsmV0cJZX1l0V0s3QW-S4lYNcg6YDWgpsDudETco,22175
854
+ vllm/model_executor/models/gpt2.py,sha256=gL4tgfMClO6oFKvO26KnwoDQAbJ-QIPUC20cOcjNnr8,12907
855
+ vllm/model_executor/models/gpt_bigcode.py,sha256=Z8KWyYGY0to0iizR05TsVSXezYAezcJzT1wHj-9MNDk,13179
856
+ vllm/model_executor/models/gpt_j.py,sha256=DXwkPWHvcyfYANh_q9cD4hsF6oDDrB41Qbznupbw2ew,13248
857
+ vllm/model_executor/models/gpt_neox.py,sha256=U4Qta239p3CHLT4haOgzh-ZijHTf1XBS5gfYHcWHi18,13387
858
+ vllm/model_executor/models/granite.py,sha256=0LHRQCxu_4SaKhqb4CRZ1h7saWbjj9MVpoOoLXzQ9zY,20125
859
+ vllm/model_executor/models/granite_speech.py,sha256=R4vq7mkynuzrFyiIr28nCtDdn5LC2hoaLcF2e9ZMKYg,31318
860
+ vllm/model_executor/models/granitemoe.py,sha256=xEo_FLUnplEBWQzwzOSlmXeB-7aKrjZsqkgQP-JHzmA,17976
861
+ vllm/model_executor/models/granitemoehybrid.py,sha256=dTH6y6dJP7cvNzA1zdprOMX1SZE7ZMzS91ZiuDY1JHM,24511
862
+ vllm/model_executor/models/granitemoeshared.py,sha256=aOMNA3QROPISeXBLYNLp9zxVTLcDHeI9mzgA3Wj-Mqg,13763
863
+ vllm/model_executor/models/gritlm.py,sha256=JeCdwTJLg_VHJX0YDDA1msgjDZGA7Ow6O1i6YTyIoek,8165
864
+ vllm/model_executor/models/grok1.py,sha256=htPR-OsKJVdor9c3iwsWcLwZM8wyt9TfZTB2yQeyzRQ,22507
865
+ vllm/model_executor/models/h2ovl.py,sha256=tEJy9W5kFfD-y40mESIZy1jygbYtOfDUUc3IIJI5Nn8,18345
866
+ vllm/model_executor/models/idefics2_vision_model.py,sha256=kIJ7fIH6EqRDz9zyb8aybH28CCntgRODy0Sc-VNi3io,15032
867
+ vllm/model_executor/models/idefics3.py,sha256=wmnVrKsyvSq1xbBAEBLFM4NrUVD3y_GI4OIBI6ofNqY,28073
868
+ vllm/model_executor/models/interfaces.py,sha256=bW_OcBvljObG6Qk8quTDGDvZ-O2YbjFKv6QeMcHD6Vo,16149
869
+ vllm/model_executor/models/interfaces_base.py,sha256=JzOV3tbmnDd7T9ioD-0QhhPfwsTpJYGthSUUov-hfDg,4432
870
+ vllm/model_executor/models/intern_vit.py,sha256=moNji6HTfcM01A74d4g9m5ZUumuMnvxgWnhpse7bh5k,17312
871
+ vllm/model_executor/models/internlm2.py,sha256=zqG49lANNdrA8QdVl6ufUA0ibCQJMDt9UBK4Y7GCycM,17258
872
+ vllm/model_executor/models/internlm2_ve.py,sha256=PBc0lISli_zQWI1KUPFaYGiU0MMqjjB0i48DbjayVrQ,5801
873
+ vllm/model_executor/models/internvl.py,sha256=RJ8GqtGbDCW4ie3AYVQiWkqsC6LsKlo9DA97rBYffRY,51523
874
+ vllm/model_executor/models/jais.py,sha256=g5_UOoxXLe77ynXWP9yiqa40Nvz_bhK6buqdfEubhP0,14615
875
+ vllm/model_executor/models/jamba.py,sha256=jf-HS6Qd2UUZu61dG450kub4Xi3pUMGA4X-EzbrVPEU,24339
876
+ vllm/model_executor/models/kimi_vl.py,sha256=R8fwJ5RNx_6T_lAWkICoXSRMdIKuo96oMbZ9sGjldnU,24433
877
+ vllm/model_executor/models/llama.py,sha256=9F2-D-X6iV4E0Zqly9e4gfi4bf8awz0IKY-9E4-xU7g,25953
878
+ vllm/model_executor/models/llama4.py,sha256=XpEUtu5E_AFzSL1oy48zColZ3sOUwfUbRr-kqVL0xk4,21891
879
+ vllm/model_executor/models/llama_eagle.py,sha256=iwQuxwCvJmkwvgTm0Vda-m5tsMDO7QB92O_rXrEIPT0,5889
880
+ vllm/model_executor/models/llama_eagle3.py,sha256=HDljQQQMno0w4fQrMRuCP2DvQGmjDaF1Yj6udggDtj0,9499
881
+ vllm/model_executor/models/llava.py,sha256=kWIs5WUX3Uc5Dz2ClP6FxiQ8qxcGKpkA-irsOBQsdUM,32306
882
+ vllm/model_executor/models/llava_next.py,sha256=XSfiHc9yXfWeFv4m1wACjOq81923JcLsEbQMGG-8v-g,23806
883
+ vllm/model_executor/models/llava_next_video.py,sha256=QC3uOeIXXrQ89wGRwBhSsp2AQg7wmePwo-bCpJdTCS0,17670
884
+ vllm/model_executor/models/llava_onevision.py,sha256=R3e9BSqTQfn4pkPd4bUnrtgrvXFOrCiu96BC4WOs8CY,37008
885
+ vllm/model_executor/models/mamba.py,sha256=w_gqekW_61yAvd4pTPkQk8ULH1XmgF7uuZWkm2Uh61A,11648
886
+ vllm/model_executor/models/mamba2.py,sha256=mzdfo6ZMC2KaHNYpL-Yg9lg7fus3O8Jwp-Icg2sUdLE,12683
887
+ vllm/model_executor/models/mamba_cache.py,sha256=jeDF4fTtFfhHEFiLqa2nrbOjg11_KS7qVqz5Jp8cMMM,2967
888
+ vllm/model_executor/models/medusa.py,sha256=iR59lwoGI-KtA_rh0O7ABUJiDOWocdbdrAbND6qmqVY,9003
889
+ vllm/model_executor/models/mimo.py,sha256=HhJVShVhvEuyxCTuqQUPUX4tmfbNB2X97CrLLKVUSuY,7916
890
+ vllm/model_executor/models/mimo_mtp.py,sha256=0l_36yBG6UgRiCsQrmV6P3McyeYS6W0XwOkTJFIaPtE,11445
891
+ vllm/model_executor/models/minicpm.py,sha256=HPi3QK4F96zXHH8bwysx83Tq7agKz8-2NcffeTd6S3c,23905
892
+ vllm/model_executor/models/minicpm3.py,sha256=3gR9YhnAHhgoi9hfQupHmNhhAt--aeyMDMerp7sncuw,9429
893
+ vllm/model_executor/models/minicpm_eagle.py,sha256=qOHJH4raDUbtTJYiOWpAUBfYo3dUow4ImkdZfx6K88w,15901
894
+ vllm/model_executor/models/minicpmo.py,sha256=jZMHzka9i-BRCb0viIgXPZKNWhS0MpCabW6A1Q-sxFw,28855
895
+ vllm/model_executor/models/minicpmv.py,sha256=O8fqCdwfbiQha-66zgbZS5DlsqC39BGrG0OjYCP-LhI,47298
896
+ vllm/model_executor/models/minimax_cache.py,sha256=k52vWrBlTlPPEOxeZ4Pn8scFroolWzI5DYXpBtX1fjo,1212
897
+ vllm/model_executor/models/minimax_text_01.py,sha256=4RB805VlehZ_adPiT-yGgt51p-AmKdJu7uV-r1RGX_E,51704
898
+ vllm/model_executor/models/minimax_vl_01.py,sha256=b0l_z-GQzHpW-o8zm17kLuPl7zHCutU3KwpXqXxYQZc,13973
899
+ vllm/model_executor/models/mistral3.py,sha256=lWWE5wqyoAU0E1H2dt7nR6ZCxceLgF0qEkg3EjFo5GU,22968
900
+ vllm/model_executor/models/mixtral.py,sha256=wROyHrIf3ZGywQgvY5Y-RGjo08VyWtYJgSjMKY900X8,20207
901
+ vllm/model_executor/models/mixtral_quant.py,sha256=drRwp7bdlBkSs6_sd51m72ON6KD9wupcsSya2J-KMlI,18527
902
+ vllm/model_executor/models/mllama.py,sha256=cz9LwxP4cIpNRNabdiZCatw0OqfSvrGon-xr56zlAG0,66798
903
+ vllm/model_executor/models/mllama4.py,sha256=N-OCNVQqBB5saJOjEUxg83McbtkXqOWuKH8mOMgHZgM,34539
904
+ vllm/model_executor/models/mlp_speculator.py,sha256=Y7j6_gQPcxM53DHel_hLHdT2Xm1t3V6bv2_Q2e_jgFI,7965
905
+ vllm/model_executor/models/modernbert.py,sha256=Zj8LJcQu5CSUhBVkjldfIm9gbWIJa9ifaapk7F4UeKQ,12741
906
+ vllm/model_executor/models/module_mapping.py,sha256=vNEOOezDnDR5JgMltbviAANLu8CM6tQdr3RX6tZu_i0,1844
907
+ vllm/model_executor/models/molmo.py,sha256=XkpJG8l4spBTBvkAsNgceZOvbwp72dNCbAmgVZjV-DY,54941
908
+ vllm/model_executor/models/moonvit.py,sha256=uIhdvvpCQImQCVXAKFAfM_2BbgvHycdQcGR9q1UA_7U,24120
909
+ vllm/model_executor/models/mpt.py,sha256=OlBtxlKQL-NfEElsrlZwd5G8luQ42-06N7p8ZLFgK8A,12754
910
+ vllm/model_executor/models/nemotron.py,sha256=Ir2Jq2DAypuwWLpda_dyjNeENZfiK4MwlUQsJRgn-r4,20715
911
+ vllm/model_executor/models/nemotron_h.py,sha256=UAfH86GIcUqiFWcHjHJ7cqeEEH-2HdJ8Bm9yLyj0ClU,21198
912
+ vllm/model_executor/models/nemotron_nas.py,sha256=t_uY2L4iPWCun6h6ZGRYqreEucld_1sH87u5Os6QETI,19207
913
+ vllm/model_executor/models/nvlm_d.py,sha256=7AeV--WpvjDOe5sotcAx-8sxjCO-lgz4xZkmduQmvaM,8066
914
+ vllm/model_executor/models/olmo.py,sha256=FklGAz1Q3vGrLQIfTuRUuIRtKMEnMx20tWVgBPp4W8I,14924
915
+ vllm/model_executor/models/olmo2.py,sha256=24LqnlygqCbumLWlCMtF1g2qPaVmZzqxAE39DFkeTag,15984
916
+ vllm/model_executor/models/olmoe.py,sha256=Ha9Kmux5e_EVmyPIywkw0cg74kpXQhEOFGEz8zgOtkI,19416
917
+ vllm/model_executor/models/opt.py,sha256=SRHn5LMjOnO5ECwJxfXjBYq_MINaRD0ynTUwzsIozFo,16572
918
+ vllm/model_executor/models/orion.py,sha256=DeAgZLf8sOiQcJ5dVPjtc1EJ74G4CkjxReF2_5_SpOA,13862
919
+ vllm/model_executor/models/ovis.py,sha256=gKZCZOo97EcugDzUIcbdQ9D2X2GSa1N-APbwb6usuaE,21865
920
+ vllm/model_executor/models/paligemma.py,sha256=dA01kAm5xVlrCKqxI24PoHrmtYd3_OLkQpoLBn7Ho-g,14648
921
+ vllm/model_executor/models/persimmon.py,sha256=zcVATPUTd1A3Jl1TrQXd5KulD1B_AhcPob_N9PvWMUs,14328
922
+ vllm/model_executor/models/phi.py,sha256=1dUL1DRw8swSgChiBgdMB9bgIdMwWefBN746VasstVo,14172
923
+ vllm/model_executor/models/phi3.py,sha256=OdDcrMZ2IYHzg_R_gu89ae3lijNSFlxUXKTw4mmcR8U,457
924
+ vllm/model_executor/models/phi3_small.py,sha256=sr2jvhCsi1Hv2mqSZi3kPUZuDVA23JUr5SwY3s16fyE,18282
925
+ vllm/model_executor/models/phi3v.py,sha256=-X-k0OC3RTV0eiGxRmFqFgwzsCrXVbVARTm_I-0P7Mw,28483
926
+ vllm/model_executor/models/phi4mm.py,sha256=azMxQERDZpYGGUA8YYDQIJFEGcHO406T4mNkp9uG3ow,49383
927
+ vllm/model_executor/models/phi4mm_audio.py,sha256=qOVU4zt58LKSFij9yz2N5_wFM-MBREsQHNzzGqTL2ig,49132
928
+ vllm/model_executor/models/phi4mm_utils.py,sha256=dg5ivyxJCdlyWmKxX4pZOF6I-BTgpUMqY4nmHdZFCEk,66739
929
+ vllm/model_executor/models/phimoe.py,sha256=dpJo_zaxYY57O8yPpF4F0MWc6KbiAYCVX4LxvS91zd4,24738
930
+ vllm/model_executor/models/pixtral.py,sha256=Eb0i_EDik3BLr-9vdann0_EfQHf0bvYrbg6rUl7yIW0,48235
931
+ vllm/model_executor/models/plamo2.py,sha256=_RujxGJdXQnep1ws4g-7cW-1dUYKhuPiUn8YPvpehS4,30125
932
+ vllm/model_executor/models/prithvi_geospatial_mae.py,sha256=aYNorj3NlBmgh3wK3GFdkCmPNArhdY2am0ODPqE17g8,9256
933
+ vllm/model_executor/models/qwen.py,sha256=aRjSXkeI2dwoOhf5yvYu0Nj_-rLezwQQR1UnKpNghJM,13916
934
+ vllm/model_executor/models/qwen2.py,sha256=uEC_blysusUjq0OuC45jqeQEMpD2RWTF-a83xSvBJAY,19770
935
+ vllm/model_executor/models/qwen2_5_omni_thinker.py,sha256=l_fhjc78TyRx8gqSYe2z4550-a4DhJpnUD1sAS_TcmU,37186
936
+ vllm/model_executor/models/qwen2_5_vl.py,sha256=nPSgUgBnUrh7oCW5AcHzQDv8KGmn-66jbATia3ktEnI,47525
937
+ vllm/model_executor/models/qwen2_audio.py,sha256=Jvl8apcyxYQ7jM-XJFboJ2NXbFVi2BNY1ZJDfHr_EZw,16581
938
+ vllm/model_executor/models/qwen2_moe.py,sha256=SYlWCTr5dTpVA0DyTfg7G3TVbpZfwRNY7lRL7bysFh8,23070
939
+ vllm/model_executor/models/qwen2_rm.py,sha256=Rbe_5YLaxELesX7hpxlY9u97tM3MfCLBEatoEIz-YiY,4620
940
+ vllm/model_executor/models/qwen2_vl.py,sha256=OSKWS51pP8scwZdwZKbIXh8wbRU1-CNnCMO4UFPUeS8,53664
941
+ vllm/model_executor/models/qwen3.py,sha256=3aWxRi2BAGRyQE9tTYeWmXsuui52IQ811-FVNOupyzo,12474
942
+ vllm/model_executor/models/qwen3_moe.py,sha256=k8FsAIRdm__9cDv07C9-zfDb2iCZh-XMblDIIhaZSdI,23026
943
+ vllm/model_executor/models/qwen_vl.py,sha256=Rk62-xRwtEVS69pf05ujaQvw_rXHYMIMlsOmJnYe52A,26838
944
+ vllm/model_executor/models/registry.py,sha256=VoCUkg9sz3OA96EVI4WaKjVqazuPdofB5bG0pszMLkc,25796
945
+ vllm/model_executor/models/roberta.py,sha256=CzDGg0QF5eTJViqGfZX3HR5sH5CMsrT1jLOqC7a464U,11511
946
+ vllm/model_executor/models/siglip.py,sha256=zfwSZK8jlQL8B6_C1WvF_sUlacyRNMIDXk_Gp2NPrfM,18720
947
+ vllm/model_executor/models/skyworkr1v.py,sha256=5H3rjeqVXy_rBD22Ee-nQLomSYC8lLax-8JvhSV-3dQ,34039
948
+ vllm/model_executor/models/smolvlm.py,sha256=5Lopc0syQDwQQZ6agYHWi2Ew8c5PdzOLfcLfBFqv27A,1799
949
+ vllm/model_executor/models/solar.py,sha256=HLDduE5re-mJnZEEq6djQTugObzWM6HBYFMs3yhJaXU,19962
950
+ vllm/model_executor/models/stablelm.py,sha256=HOSTK482W_yh2yCoMxH_200DsyTGIRGsd7Ymga0AA5M,15007
951
+ vllm/model_executor/models/starcoder2.py,sha256=fC4Vg6oFZow9GBS1QeyuumChhfv8AuBRZOLkTBhdjMs,14610
952
+ vllm/model_executor/models/tarsier.py,sha256=v9oZCqegdGNiuWAAyeePIcjewEX1t26yRkJIGVxd220,26291
953
+ vllm/model_executor/models/telechat2.py,sha256=s6Xfzd3cBY06n3kT3w8RjID5M5imOYVLWkXSDPeWrkQ,6063
954
+ vllm/model_executor/models/teleflm.py,sha256=8BsSo5Ox4ENo4Y1UKicd2nq41lksPZhW-ippU-498NU,3212
955
+ vllm/model_executor/models/transformers.py,sha256=fK5wtvHTUQR7r_Uhn_9_1PLcYcHjAv_Sq39gKZ6HBHA,20464
956
+ vllm/model_executor/models/ultravox.py,sha256=jxyLoOMhmCeIpFarIPKKFnYS2J6RAzGMg13K1u4mlH8,26960
957
+ vllm/model_executor/models/utils.py,sha256=VutntdonPxq2QEQ_DzlXS47bOzxzll6lTLoDxjam1gk,24970
958
+ vllm/model_executor/models/vision.py,sha256=ocDWi7KE4XXlz9iOZpYxkYg8dD9-z_jHU1hh1pdPZuc,5603
959
+ vllm/model_executor/models/whisper.py,sha256=uxRiZU3776PKxIHv4O03SvMJU3EMSXtBkdPc2UPtm-E,27616
960
+ vllm/model_executor/models/zamba2.py,sha256=TCH1kn4HeNHjFHn3mEfhQeCELij0joKrl_5Lb2wVC00,39668
961
+ vllm/multimodal/__init__.py,sha256=itu8-L_cTTq8GzajfSi4OHsAGe4lqyOyKjOlk6GLlPk,1059
962
+ vllm/multimodal/audio.py,sha256=nXiSIMdzGBZ0T6yf76fUhK03NquqbqQQDi2xAk5de04,3148
963
+ vllm/multimodal/base.py,sha256=7D7GyxhfTkqSOsHtUuKqXWR1znFk71Mq7ByHD45KP60,7002
964
+ vllm/multimodal/hasher.py,sha256=95fhWlh7Ho_ralw56Y8hALCBwLFRfofXIK8cBsH3Oqs,3454
965
+ vllm/multimodal/image.py,sha256=5y4QojUye1SlYB5n8oOfCKyRfPMsZ6Ocjn56bIqs2YU,2995
966
+ vllm/multimodal/inputs.py,sha256=VGIthhXqBqtAy4yTH5T6J_xwz-fUxujLMLA_65BWiD0,28421
967
+ vllm/multimodal/parse.py,sha256=58RZ1XMtOouTEryKAgmGGr7WmUYnVT4dLdeM-dF26D8,14485
968
+ vllm/multimodal/processing.py,sha256=qkeIL3ONvmc87dmy7DXDf4YD8YuP5AG8pBNTwSlpAwg,60996
969
+ vllm/multimodal/profiling.py,sha256=09DUYqSsyhaobl19gMJwEERfdbXgK6c--mBl-NCMSK0,8654
970
+ vllm/multimodal/registry.py,sha256=4OS18qN4_7168jA0o44lwrO77DAeq6Vh2WXv38h6ygs,11254
971
+ vllm/multimodal/utils.py,sha256=Y3wnhgll7_ms_aJUdp9ip-DoR8X_2isbpiEqM8gCjVQ,14061
972
+ vllm/multimodal/video.py,sha256=oc2702Ggy1GqTjLOFPyWjJWJChR-FVNIvoyUhbG9BDA,6148
973
+ vllm/platforms/__init__.py,sha256=_vGc1ySlqBeuEjIE75yobQ0jeuk2TbMP4112uogrXaE,10637
974
+ vllm/platforms/cpu.py,sha256=qg_fTQzs_91LRiNkib7KtBgXZhsDs31uvWAkU0QJKHw,10618
975
+ vllm/platforms/cuda.py,sha256=x-EjRKsEi2fMHLhRsrsE9LIIYh-uEUbF8f0M9uj5M1k,21631
976
+ vllm/platforms/hpu.py,sha256=FLff8i9N1klXT7UORGVGc_EP-6GgFXBfqKPqonJ6p1o,4206
977
+ vllm/platforms/interface.py,sha256=X-MqbRyM9vlPzGda15BN3rZJ-3XFI6VYyJnR5oTHfaM,17720
978
+ vllm/platforms/neuron.py,sha256=4kGXZ7nuMKkIXlGBqCa8UMFKe9308pbZDW41hzAVx8c,5575
979
+ vllm/platforms/rocm.py,sha256=muI-076S_ZQ0S2FRWEYLeiRlgi9BTcSIn-y0HiO8wnA,16987
980
+ vllm/platforms/tpu.py,sha256=YgmI5qIU3wQ9oOJ7-9dsiC0tvsswg2bFO06IxyOaLFU,8278
981
+ vllm/platforms/xpu.py,sha256=9IQvpu9OSqnrGdP5kmeufE5jpNnIpAc4IEIx10nLvYo,6200
982
+ vllm/plugins/__init__.py,sha256=XZWErO0pxklJEsdPQiqL7LWi_JQ1AbvbY2UrZSQ4R6o,3322
983
+ vllm/plugins/lora_resolvers/README.md,sha256=I4lYxAwarJdoR322hv-UQqsvuqjdQIxexWtBVdwyrL0,828
984
+ vllm/plugins/lora_resolvers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
985
+ vllm/plugins/lora_resolvers/filesystem_resolver.py,sha256=Ic4o6SWF7DeDhNAAE1dxzaAMb_0L03DNzM0590FC1Fk,2086
986
+ vllm/profiler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
987
+ vllm/profiler/layerwise_profile.py,sha256=ePkIStAYP58wRo3WUuy9dvOMvd4t5fRPeUN2QDALW0U,13887
988
+ vllm/profiler/utils.py,sha256=zh9V6T6baIqC_EXfG39TUF2-d0z20JVqxfVtKWFDl6Q,4714
989
+ vllm/prompt_adapter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
990
+ vllm/prompt_adapter/layers.py,sha256=qa3iB58VSjE9dZPBVrLwNnBbANgp9KwhBtAd7DLLttc,2795
991
+ vllm/prompt_adapter/models.py,sha256=s3K69Ul2W8GVfM8oJip_Mymz1T4d0FamTQO5U5nyFMQ,13791
992
+ vllm/prompt_adapter/request.py,sha256=ZUEDOsSr938VwW19gh2pv8H7d7TjJbsd97DvfaYB6WI,907
993
+ vllm/prompt_adapter/utils.py,sha256=1iulqD-AWfgrBwwa87FIz_OMb3Zy6aEqmAQQMoEeCaw,3737
994
+ vllm/prompt_adapter/worker_manager.py,sha256=emiWFvvZJew530MDzT3WwbbebvPO9Y1DyvuKQeeviI0,7605
995
+ vllm/reasoning/__init__.py,sha256=4sr5FvUGkzpACLGXKoJu075EFK5Bs4gXsDWKgRxAU8s,528
996
+ vllm/reasoning/abs_reasoning_parsers.py,sha256=jmxq1sBhGLfIQQrLSIhFma5mHOWyQtnpPZwdsvVGdaE,6645
997
+ vllm/reasoning/deepseek_r1_reasoning_parser.py,sha256=fVAV4ZsMfd_6bB3Rf6ucSGnMJLnI1NjcAMA2i2yucwA,7465
998
+ vllm/reasoning/granite_reasoning_parser.py,sha256=1D8ojEdvWm391P8xoovSgLsEk2pESZIeNv3vqwfo-Yo,15899
999
+ vllm/reasoning/qwen3_reasoning_parser.py,sha256=gzC5_aje9dv5Juw5TRXQIOOP4QRPa_BXZkcE6cmrb68,6496
1000
+ vllm/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1001
+ vllm/spec_decode/batch_expansion.py,sha256=XXuJzD1RxHYRVA_az4ULrt7bFuWW24dy2eE8y7xvXZE,22809
1002
+ vllm/spec_decode/draft_model_runner.py,sha256=CCT401JFi-5_SmVfqteN98scAGS_eapYX_NH3YdkBKI,15222
1003
+ vllm/spec_decode/interfaces.py,sha256=9BnwWHd0SiMOOInnnjFHyyiidD6uk2HE7avvdgtNdvY,3155
1004
+ vllm/spec_decode/medusa_worker.py,sha256=EtxnCq_wFhOAxx4VYZjdZIp0KK2pSiYq0tukM6TqzdA,4969
1005
+ vllm/spec_decode/metrics.py,sha256=K-g8vi-_RoyMlEThMYcyb36Or5iXHz0Y5-jpnh7KE6o,8152
1006
+ vllm/spec_decode/mlp_speculator_worker.py,sha256=cofbNTCITpuRV8k6QeaIgm5RlDYHMvDavU6Gzsk2CNU,3806
1007
+ vllm/spec_decode/mqa_scorer.py,sha256=QttFp8DNHqVBAI4dGprnKjM_pIorE82jJZeDKk_FSBs,7577
1008
+ vllm/spec_decode/multi_step_worker.py,sha256=tPwGnoIKLuUoQeJprtOvESBvQfSgasJ2R8vbOn6biBk,19673
1009
+ vllm/spec_decode/ngram_worker.py,sha256=lCof1qiVJgLwRs5yuTUDzGca9WuVnHWgsYO5gqs6fh8,7896
1010
+ vllm/spec_decode/proposer_worker_base.py,sha256=rffkFH0N1Z3roRimkMdqTiaZqZ1GLhvn6fBg8bKgNDo,2158
1011
+ vllm/spec_decode/smaller_tp_proposer_worker.py,sha256=8w0TnEbusk49rf3GoTIZq8mklYjYTkxXu093qhmZuz0,6952
1012
+ vllm/spec_decode/spec_decode_worker.py,sha256=Q-hPtClVLfyO-cIJnLo4FWTTeS8fMMjn52Pur8k40ys,63027
1013
+ vllm/spec_decode/target_model_runner.py,sha256=evuaAOQnTwrRsXo-yMOXM-0SICI7gYNpnUrhPKRMQAk,2142
1014
+ vllm/spec_decode/top1_proposer.py,sha256=Cljv7ojLAzqVMPUdKsxISdTo2aF6io47gVsKUpq5F9c,12423
1015
+ vllm/spec_decode/util.py,sha256=H22iXEsK3HPptmui5JldppTXPLN8efrEbaKLj9NCMDU,9957
1016
+ vllm/third_party/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1017
+ vllm/third_party/pynvml.py,sha256=p0NkH1xRdvKZlP9F6Wfy5mBzdP4tbKpHc2TsFj0V_pw,234653
1018
+ vllm/transformers_utils/__init__.py,sha256=yVNs7WNM1UWvqmKYPW-R4D9NXhYWEjKYQKlfcpxL5HI,910
1019
+ vllm/transformers_utils/config.py,sha256=fBX8ym5iJ_tvH-_qoiUrB4DvG0ys096_Fsii0zxLWac,32938
1020
+ vllm/transformers_utils/detokenizer.py,sha256=UkxFpYPV-TwBGy9GhnPmSnf-SBslKcw4LQaIYgbEJsI,7299
1021
+ vllm/transformers_utils/detokenizer_utils.py,sha256=UoNBvr9gUNhgDNxMmVIT78HjY_KZZHbad8SgwSI6ltE,7321
1022
+ vllm/transformers_utils/processor.py,sha256=FNOmqYVq2Y-foTJs1xsvjqFhazb9mJh9rcS4iYse-XA,7602
1023
+ vllm/transformers_utils/s3_utils.py,sha256=3j5x2VDqiSsW4YwJRG-3bdbSONiw74cEQpf2Qz0SzLw,4954
1024
+ vllm/transformers_utils/tokenizer.py,sha256=kuzCvWjPiXCEYBlU854jBfVwsE5QyEvo6Av89g9PFXo,10721
1025
+ vllm/transformers_utils/tokenizer_base.py,sha256=_1iqKJZTSQW9ITGpZkC-A1lhYw7ND1ZDzpY3SpDhj_c,4029
1026
+ vllm/transformers_utils/tokenizer_group.py,sha256=RqfnicvJU7xQjcAGF1C6czeDf4xjKZoBcWuTlpQYPTc,5259
1027
+ vllm/transformers_utils/utils.py,sha256=C7fFp4Eyw5dGw9neyADzwExlhK5A0g0efIDVL3PHnzg,2699
1028
+ vllm/transformers_utils/chat_templates/__init__.py,sha256=U1sUyX9swSjxaULlg0B6rSroU5H8upeyInuHsF74SEE,208
1029
+ vllm/transformers_utils/chat_templates/registry.py,sha256=ZmIsEElC787e2vLsWnuyhmnB4nmlGDjGpF_tAYWno5k,1945
1030
+ vllm/transformers_utils/chat_templates/template_basic.jinja,sha256=DMH0156UMA7eoJelXKUMEDzB-SigjbyCOBxIu9OyFJE,78
1031
+ vllm/transformers_utils/chat_templates/template_blip2.jinja,sha256=ltMbjFdK7T4HUcN_OQaX4hj2r0PGlS1EJ9zhSlnTz1c,332
1032
+ vllm/transformers_utils/chat_templates/template_chatml.jinja,sha256=CKxCWf_KemM_DntV70Hf03WNkDvxznolyW-03SJJw54,370
1033
+ vllm/transformers_utils/chat_templates/template_deepseek_vl2.jinja,sha256=WX32uOZ7h8_xqrWvmsI5R-6Ns8ZcXVn74CKB7FJOifA,785
1034
+ vllm/transformers_utils/chat_templates/template_fuyu.jinja,sha256=hzdsPgeUMaZnd5L23QPiz2oC6_wMBy5WgZkXMVs3Dgo,85
1035
+ vllm/transformers_utils/configs/__init__.py,sha256=rYfqq6wGJmSaXooZ0aIED27NPs0CjSrbIjDJ8BREDFk,2638
1036
+ vllm/transformers_utils/configs/arctic.py,sha256=8WAZRegtPG1_qaFIplNemJJLlCLHBYB3p3m8ZR5co88,9053
1037
+ vllm/transformers_utils/configs/chatglm.py,sha256=6jt7jwkyzTBLCYQsNbFwww1-FCgBxfhoV8y-PBqm0-Y,2939
1038
+ vllm/transformers_utils/configs/cohere2.py,sha256=GaulobYCg5yygFpKJKiwT5EeI3VGkZBFZ36Cuv10WEc,10422
1039
+ vllm/transformers_utils/configs/dbrx.py,sha256=7zZOGYrASjYZiiXjHRtx8NcHin3efEz3X5y830qzZUk,10984
1040
+ vllm/transformers_utils/configs/deepseek_vl2.py,sha256=BwZtrbgFNLRHyOu-yMpOS8uGIFamH8_DAWxw8RtzFJA,7296
1041
+ vllm/transformers_utils/configs/eagle.py,sha256=pfj6dlz4ElIMGZykeCp1snCCBB-rapqtBrN_JuH2M84,3217
1042
+ vllm/transformers_utils/configs/exaone.py,sha256=19I7krrE3PayhGglYd0H4zzH0gaFCl0mehaGJkayomM,8927
1043
+ vllm/transformers_utils/configs/falcon.py,sha256=vKXtykJL5NGzcDFfSnE532vBV5KLrQvOKm7v5P58y-Y,2986
1044
+ vllm/transformers_utils/configs/h2ovl.py,sha256=54jsgYNkfr31YxIdkRJdkWkj5Az5Qcl0RjzV-TyryLI,558
1045
+ vllm/transformers_utils/configs/internvl.py,sha256=-Ixey6eygi6vwZ0es-4vNS49a1rjG9fg3WaezAOw8w0,1936
1046
+ vllm/transformers_utils/configs/jais.py,sha256=1jEXh11bRdFpH8ptGPKOZaTOr-Ck_BCgMbXpc959eVg,10432
1047
+ vllm/transformers_utils/configs/kimi_vl.py,sha256=xXtkLgTdOt5ZgSxva36iLzgZqqklIoiHaoJhCNgJyVw,1486
1048
+ vllm/transformers_utils/configs/medusa.py,sha256=ZZcus4c6s4A1iTOPCR2bwzJpSHKsms98dycjVpmoi2E,2012
1049
+ vllm/transformers_utils/configs/minimax_text_01.py,sha256=XcsvDZWzH1KHP9SQiD13yUQ5ox6WJsG9PMgz46ygqC0,2384
1050
+ vllm/transformers_utils/configs/minimax_vl_01.py,sha256=M9PrR1OoyRuRrA2KE2YhTnMCb88hu24QVnfiJ-Uqriw,2617
1051
+ vllm/transformers_utils/configs/mllama.py,sha256=DK3NNVpe70EH-HC8CWjtlhXRvzt95uQnDIOZ3kIOrwI,874
1052
+ vllm/transformers_utils/configs/mlp_speculator.py,sha256=2it7HgAv-ZqGDLoE7q66oxXjk8R_mBdnGw31_TVXI7w,2500
1053
+ vllm/transformers_utils/configs/moonvit.py,sha256=Egyjh8mvpzPlX-RmbfDf8ZmeBe7K9fqimYX-QgK9NrQ,1272
1054
+ vllm/transformers_utils/configs/mpt.py,sha256=8G0gnNNmTousHr6yInzh68_XGmqJkL9vptWpDPe3xUw,7652
1055
+ vllm/transformers_utils/configs/nemotron.py,sha256=8_mTkW7hUq0gLXuyrdQjsJDwQlmn_fkTFz-8xCUi7pI,9043
1056
+ vllm/transformers_utils/configs/nemotron_h.py,sha256=xD74xuTxIr2zraGF0acR5YOcPmRoUN5B57L2vf3hXao,12195
1057
+ vllm/transformers_utils/configs/nvlm_d.py,sha256=vlI3jyLRvc6Yrwy1GPd6XcQUwoBtyGoiYJWKE9LfHOo,527
1058
+ vllm/transformers_utils/configs/ovis.py,sha256=tuFDhwa1tfivFenL6Os1PImwTkfcsVy2ll7nS5aNS40,7724
1059
+ vllm/transformers_utils/configs/skyworkr1v.py,sha256=tFblvu0RGA1yVxAlegYtX_he7vBp5gb4czxDzhXGatU,1938
1060
+ vllm/transformers_utils/configs/solar.py,sha256=2BDfqhbvjutvzoNvqrJ6a1ESouK7YmIAQGJBIgfLTIU,10910
1061
+ vllm/transformers_utils/configs/telechat2.py,sha256=i-VmqZFsMjS2O7-ikd0-9cmFQHxUgM-Hz7cfnWsLuVc,2269
1062
+ vllm/transformers_utils/configs/ultravox.py,sha256=K1hw-jFVbTpOZ1mw7PsHXXNus-W8jf7cqjmS3zslkUY,4528
1063
+ vllm/transformers_utils/processors/__init__.py,sha256=w199hgueQE5uEOCjzPLiU1JKZNzB-1XAl4Asz_LdmYA,317
1064
+ vllm/transformers_utils/processors/deepseek_vl2.py,sha256=jhVoBgnvziilKiZjGoePzrIcDJM0lYluQGTkcJ-8El8,14639
1065
+ vllm/transformers_utils/processors/ovis.py,sha256=trrJofhlstXFklZKj_IhS18ct6sSXFM7IX0ldv_XqbE,18928
1066
+ vllm/transformers_utils/tokenizers/__init__.py,sha256=dN6RDCTGacE-3exN7VSZHlEcHhu__4dQM6Ry0lQ43w8,372
1067
+ vllm/transformers_utils/tokenizers/mistral.py,sha256=YxvurhrAcpEiGPs6u-_KpgTsm2_ZgIwlMEMe-3WCZNw,19107
1068
+ vllm/triton_utils/__init__.py,sha256=QuRhc0qyR8n2bM64Mv4VqhR3lIcWl77dQO8XX8d9prI,433
1069
+ vllm/triton_utils/importing.py,sha256=hZoqcRv-TyOBXB9B1kykL4cEPERLYYjBnGAkdIBwHKQ,1430
1070
+ vllm/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1071
+ vllm/usage/usage_lib.py,sha256=JmS8NnHHf0XBlt8rUJi8HD8CiV-KRU_tAGZW5oMOYyY,8903
1072
+ vllm/v1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1073
+ vllm/v1/kv_cache_interface.py,sha256=9wCva7ngubsybUZh-iVbgPBTobdsmbSnJecJx0rNepk,6400
1074
+ vllm/v1/outputs.py,sha256=bhO2rIKofypa6yMgz0zFulX1OLN8dGqmpyFdK_eTZvE,3780
1075
+ vllm/v1/request.py,sha256=kYw5aWSU0RpK6z271gp5_n4Sto6c5vvAptTaZ4PgBsE,7282
1076
+ vllm/v1/serial_utils.py,sha256=V3_YCe5ZntdHJ1_fSgeY29KbIDzGz1gyJnolpIvZ-OY,13219
1077
+ vllm/v1/utils.py,sha256=61oFrABtvMLFgbP3Y5XaHzcjU8y4eFkq1zKbvX8Eixs,27563
1078
+ vllm/v1/attention/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1079
+ vllm/v1/attention/backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1080
+ vllm/v1/attention/backends/cpu_attn.py,sha256=17GK9hhin-jdcSIy0T5XvIscyhz0fhsH2-yEjRwzwUI,6522
1081
+ vllm/v1/attention/backends/flash_attn.py,sha256=LXiZJlp7GAaK4rxEHt-Gt4k7vgBQxcU7Y9hh7XU2bD0,37547
1082
+ vllm/v1/attention/backends/flashinfer.py,sha256=Nw1U54uAYkr-zQ3AazesVGub5EeUYnazDquruXfv2Uo,27220
1083
+ vllm/v1/attention/backends/flex_attention.py,sha256=77TnZn9BycWVRGO5nhLyKuhxBxiIW3n-Wr9XHToOaiU,18266
1084
+ vllm/v1/attention/backends/pallas.py,sha256=WM1g3Cm3D0psAkkxiJ8F0bDMm3-oVVNT8CkpcbFTLF4,9028
1085
+ vllm/v1/attention/backends/triton_attn.py,sha256=vGCHZ3j4-omritdvaCpJ2j3YZJ7gp4knmLwQy9IuhU4,11559
1086
+ vllm/v1/attention/backends/utils.py,sha256=YDXJoydgj_viMMm4inrJuDg8zI75PoM0aa5OY9Sdx-U,2202
1087
+ vllm/v1/attention/backends/mla/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1088
+ vllm/v1/attention/backends/mla/common.py,sha256=xeZydrTJpbTgldI9Bf20RLrebOqs2WBbdGmzzF5xkG4,36419
1089
+ vllm/v1/attention/backends/mla/cutlass_mla.py,sha256=WohRgTRutHzfit1mjAmvivs4ESA-0dDJEPrDS403UVQ,3479
1090
+ vllm/v1/attention/backends/mla/flashmla.py,sha256=izT4ktr5VZWuM2vPTJSdFle2njAr2eWZEjSa8f4U4vw,5395
1091
+ vllm/v1/attention/backends/mla/rocm_aiter_mla.py,sha256=aR6AMwHqyrZlePsoyFtO1lxQQaIh7XNAG7CDdnilSxE,7947
1092
+ vllm/v1/attention/backends/mla/triton_mla.py,sha256=iK-qVDbWSRCtBFNzduxI4uYgkTgaDIVWaXxdhQ0WEds,4261
1093
+ vllm/v1/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1094
+ vllm/v1/core/block_pool.py,sha256=hFU1r4q220ZrAne94YPOBt4fbGB-URNjgeM7BCO0kXU,14645
1095
+ vllm/v1/core/encoder_cache_manager.py,sha256=t--JCTlsjg9nBUVdb97eGAIY3SkfnV-0RU3rweCucGI,5326
1096
+ vllm/v1/core/kv_cache_coordinator.py,sha256=BYLGMU3YawXXH4Hn6K1fU8EELqFEwSYqr1M2z27AcGM,15229
1097
+ vllm/v1/core/kv_cache_manager.py,sha256=CQG1S4qI-VLWM_loProtOKsC0xrTpwvRtSXc6C531xU,16599
1098
+ vllm/v1/core/kv_cache_utils.py,sha256=VXXxF2elU6X9kT423c7egQkPmqhiNqJuUaEkFC6rBV8,40669
1099
+ vllm/v1/core/single_type_kv_cache_manager.py,sha256=AtSo04ar6dDwM9iioeRQaUfY0ag67zAZVosDTR9lgvg,16418
1100
+ vllm/v1/core/sched/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1101
+ vllm/v1/core/sched/interface.py,sha256=mKU1I1yEhcSf5PFUt7TSkVtRd8Lliqdh8lpy3w9oB58,5860
1102
+ vllm/v1/core/sched/output.py,sha256=HizhOfXORVqvgHacIOUP7Nuiqwf8LpEnnD4NWPcTinE,5762
1103
+ vllm/v1/core/sched/scheduler.py,sha256=2qQ38RBvKDGlA8sOHxGBfv-POOl6vbbzGgTbNAl-mXY,48533
1104
+ vllm/v1/core/sched/utils.py,sha256=DwiO7yhp1odUeQhCU0u7O83JQPEVC3I8VUO5CZXx_d4,882
1105
+ vllm/v1/engine/__init__.py,sha256=TqVOiaFD3Y2GKs3psDIgbBusFe0kWdT4VK2ok0Gkja4,5281
1106
+ vllm/v1/engine/async_llm.py,sha256=pgEdf_CZy4REc37onJvY2Qp_XRDgZo4qf__RJsGI_NQ,22187
1107
+ vllm/v1/engine/coordinator.py,sha256=rawACUpcbEocwE0zQo8KbWn-avB08PL4gujBVM4SK2U,10755
1108
+ vllm/v1/engine/core.py,sha256=4lAeeq7j8yT5i4lNDESmHs-GKcGx22datY2pgdQD1kw,39625
1109
+ vllm/v1/engine/core_client.py,sha256=0MWQJo2eggvcId7lXrJ_lCnHOx6LB30N4CPx-n34570,44725
1110
+ vllm/v1/engine/detokenizer.py,sha256=19zOB6hJZrSzt1GKKsFLkABYzExse8ATr2bFKNhnflM,9618
1111
+ vllm/v1/engine/exceptions.py,sha256=OurXOSPqCuoLWzIZ2vi5ahe9NnyGESnO-HZqlvSB-Xs,731
1112
+ vllm/v1/engine/llm_engine.py,sha256=Rlg84c2Xj5zmT7P5F1JQAHgcpM_9ZkmIGVLicw1CvPA,12560
1113
+ vllm/v1/engine/logprobs.py,sha256=iaVMIsZWBRZNZSGaziWQ19qZIjGS9h8KKPwZfiWuDGo,7128
1114
+ vllm/v1/engine/mm_input_cache.py,sha256=FxnAXtNQqtrWGLSY1Qz60LGJd0kEEnuI75zNsO31c4U,3217
1115
+ vllm/v1/engine/output_processor.py,sha256=1hAqG7tzX6umOY6_8_VsGeyAfKueYXiqROkhYGzTEfo,16368
1116
+ vllm/v1/engine/parallel_sampling.py,sha256=uViFaH5NFxMQfWBz8rOdRZ5eIb_ZHMlJu3aCuhq2TIM,4834
1117
+ vllm/v1/engine/processor.py,sha256=WLOPgs1jjcVTa4Op6sWfi84ZL4TAWh6sdZ3Kwgz3qek,17940
1118
+ vllm/v1/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1119
+ vllm/v1/executor/abstract.py,sha256=PT8RwkVRgGjwuXeVZUU-h0_Vk5ywGolNU5iu-l8vbQY,4533
1120
+ vllm/v1/executor/multiproc_executor.py,sha256=73O4gsOsau0M13p30LJo47QwbsXuZlN8Uz3YIuRwtmg,20780
1121
+ vllm/v1/executor/ray_distributed_executor.py,sha256=JM3NMLxS_fzbtnK7bq-k5v-VB4h2uSCm29hG7pgpZVY,2061
1122
+ vllm/v1/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1123
+ vllm/v1/metrics/loggers.py,sha256=xWpU8YyPOXnC5OUKuI48a1uh5WitZh0YVx6ZGpFjvdA,21267
1124
+ vllm/v1/metrics/prometheus.py,sha256=RMS41lcHWzKGRrS8l92sbxfMfM0NIPiy8bSJKkKSAak,2821
1125
+ vllm/v1/metrics/ray_wrappers.py,sha256=1MzBlfON0nxqrc-QXK3SncCyvX6owJPJPtGmxwvGzW0,4475
1126
+ vllm/v1/metrics/reader.py,sha256=9rx29TV3t8P49Hx4a_F1LB2WHTwFDHkVc5v3utoEOFg,8702
1127
+ vllm/v1/metrics/stats.py,sha256=upnyyyJKJESiC71QQdjlBnFkzkiMQA0Dn70oE3sg9xQ,9455
1128
+ vllm/v1/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1129
+ vllm/v1/sample/metadata.py,sha256=T8sjcAIz2rlGznHepGGcnBg86WcxlYoshf05gG3C_GY,1163
1130
+ vllm/v1/sample/rejection_sampler.py,sha256=wHdMkjv8E__M75iD5N2B5QFNfFcFworhKqSNCQHUZes,23006
1131
+ vllm/v1/sample/sampler.py,sha256=SKLgi0VyrimJ-iDNvmDrXsG9mvIexcNnMjW2amVl7QI,10948
1132
+ vllm/v1/sample/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1133
+ vllm/v1/sample/ops/bad_words.py,sha256=JOnrJXq2RD45cA7cmePX2ktRjBg_1uJQsPqCc0gf2Vw,1191
1134
+ vllm/v1/sample/ops/penalties.py,sha256=wpBU2bBU9jUc_P6DOWolprtf9Jm37nOMUvett-wqRf0,2218
1135
+ vllm/v1/sample/ops/topk_topp_sampler.py,sha256=zF29jKbtTEZCPGJqgfWbI6zRqyxeu02YU73Uf8FvpPg,11084
1136
+ vllm/v1/sample/tpu/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1137
+ vllm/v1/sample/tpu/metadata.py,sha256=IEGdj_wnsf2oSwVlYVGwmi6gtac6lnOCf-B1GdCvRHc,4695
1138
+ vllm/v1/sample/tpu/sampler.py,sha256=fcT-k807VTSnOOgwg9fDFb_FloHZ4ghLq-GpjdKhFzw,5098
1139
+ vllm/v1/spec_decode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1140
+ vllm/v1/spec_decode/eagle.py,sha256=NIOwf_n4YS1GaT-CmHw_dD3FYieeVUZzFFNrkXgCQIw,19124
1141
+ vllm/v1/spec_decode/medusa.py,sha256=baRvI3XjeP-GXDyMwjFuTOI6nYOJSMERO1ZfsQCXxD0,2148
1142
+ vllm/v1/spec_decode/metadata.py,sha256=I3rD5wVyLs5ID9LnIN3E5hy0lu5Eetz5DGcxstQMeR0,2257
1143
+ vllm/v1/spec_decode/metrics.py,sha256=_Ql1shXH0DurEm1MRuB1SIv8j0VOZywNnBPEonKOU4I,6735
1144
+ vllm/v1/spec_decode/ngram_proposer.py,sha256=l5tmQWxkdg8brzW-KA39GX6s8RjUUQS3VpV54k-hndY,4293
1145
+ vllm/v1/spec_decode/utils.py,sha256=OTgi53oPrx4-LQssrds8uTUu1LxdAVnAg-Uw_znSHXI,1444
1146
+ vllm/v1/structured_output/__init__.py,sha256=eZ2QCnfTcnXSmjN5ouPm9RxSrpQh6zqhThVC4jjwl1Y,9761
1147
+ vllm/v1/structured_output/backend_guidance.py,sha256=X_5Ajlp2t1twy10Oit1OVU3t4pWQelu5Q-N8JXOTzpc,8784
1148
+ vllm/v1/structured_output/backend_types.py,sha256=AvBbM-6oA7KcX60ecRLTa3geFnc4Dbgwa5HPcyax6D0,3806
1149
+ vllm/v1/structured_output/backend_xgrammar.py,sha256=Nkdy9bh7vZxvuATE8T3c3GK4hyH6zCxw1V20QOuGqPE,12249
1150
+ vllm/v1/structured_output/request.py,sha256=qEeKTd0vL27BJ4cPvnlF8O9bVzfALoxekG-fNA3Sr8o,3220
1151
+ vllm/v1/structured_output/utils.py,sha256=KYQOcqpl3TGmWkYXpFFqAyZE4IwiSHa8uqjyHrmKD1g,5866
1152
+ vllm/v1/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1153
+ vllm/v1/worker/block_table.py,sha256=e51jo43IYbk1JpR5DzdXBKhL6Fd76Rs5EfmLidOGPeo,5112
1154
+ vllm/v1/worker/cpu_model_runner.py,sha256=kFQyQXKPbQLCgaTMN2gbzFnDVsW6dJG79D9JybyRxSQ,3275
1155
+ vllm/v1/worker/cpu_worker.py,sha256=0I7JIE1G34aoFCFWCVzGHrLrwV2ZnObDs7pqV_3H_3I,6204
1156
+ vllm/v1/worker/gpu_input_batch.py,sha256=hYDMcddniRJXNFjleSLwpBBUy4MWlyPx3WzihHkWKpM,29981
1157
+ vllm/v1/worker/gpu_model_runner.py,sha256=8n7UiFzmJsr0zP5SdUduKFeetXmfPuor-LCdhzol32c,107863
1158
+ vllm/v1/worker/gpu_worker.py,sha256=rDiJr3rTKa28GsRa25TMZGgHznQahFdztupBJ-6MOOw,16617
1159
+ vllm/v1/worker/lora_model_runner_mixin.py,sha256=UBdOPm4snKiiW9_vCtVGW3pq5PrGlUQ9XWMLcjuz5As,6761
1160
+ vllm/v1/worker/tpu_model_runner.py,sha256=cb_kXQrmiwcs0wOhQiiPbKOFbPuqXhoiuLg-aU3y4_g,78182
1161
+ vllm/v1/worker/tpu_worker.py,sha256=S_iQ0kZ1jdk_sobRKTpPqmn7B9Hp4tMG8mKnYkpDlEg,12787
1162
+ vllm/v1/worker/utils.py,sha256=snDvL4WCRN16DoEoRtGWycSQ1vefSLQ2JEY4PuiYXi8,4342
1163
+ vllm/v1/worker/worker_base.py,sha256=VXzA1tCiP-UfXxQ_h-WeP41KSC_8JidGW4j0-S4nKIk,2045
1164
+ vllm/vllm_flash_attn/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1165
+ vllm/worker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1166
+ vllm/worker/cache_engine.py,sha256=CsyWo93EoS2h6VKrn-LfgGMlEq_X8B5Mu4j_og6qhaY,6075
1167
+ vllm/worker/cpu_enc_dec_model_runner.py,sha256=0TvNYphZ6xL2AZ7VWR__jySDkE_jqJ68kNRvF2SOjGk,13089
1168
+ vllm/worker/cpu_model_runner.py,sha256=Y5Hp-IFdTq4OQM4RvD-tL0VcOk9EbNOX0ALvFqGqwO8,28375
1169
+ vllm/worker/cpu_pooling_model_runner.py,sha256=IDZqfWjSTwGQho4H5Mcq2gmfdwuniUz15bwv0BzcO14,4830
1170
+ vllm/worker/cpu_worker.py,sha256=QmUtzODL3lBVBLBMJbt1eaaA96W2STkQjFq-vHHgYX8,18407
1171
+ vllm/worker/enc_dec_model_runner.py,sha256=9n8QPo1z5hc7Ab08MEWyalOrbidIBBkNsz8umv4Yd58,24066
1172
+ vllm/worker/hpu_model_runner.py,sha256=B3GdV74Eni9Hd4HOxEWSdPwYw9tP5mvifMkytTPLCnk,104410
1173
+ vllm/worker/hpu_worker.py,sha256=0iWcTgXJqtrNFYeA_Wv_5TplTA2L82H99xPbUGdb_3Y,21648
1174
+ vllm/worker/model_runner.py,sha256=4Jz7uXeAlUDLGmjVK9Ho2IAOHvgRavU1mTfxdQ3QhT0,97895
1175
+ vllm/worker/model_runner_base.py,sha256=yzPjB5vhVYBMQGK0fMAWz4mwnv0pW9Y5F_mpxopwh_8,9439
1176
+ vllm/worker/multi_step_hpu_worker.py,sha256=HYUA_dkgxGDjNDt0WoIdiD6hfH4qej3YVrYGVJMgdxo,5365
1177
+ vllm/worker/multi_step_model_runner.py,sha256=GbZaqrmAm2mkM5_nQER5DmaFYC2QRvloF4cBmBbWczU,39409
1178
+ vllm/worker/multi_step_neuron_model_runner.py,sha256=8m851aQ_GEeVRXBdp5xVJSmk8jabYA3MKqpO2R-0ENY,3292
1179
+ vllm/worker/multi_step_neuronx_distributed_model_runner.py,sha256=GhGSxnwC8B5gAoby7BsunFszu1xJL9iHL_bX_3PJ8pk,2199
1180
+ vllm/worker/multi_step_tpu_worker.py,sha256=YExBBMoOldMmgqhh0H80nZm9h4Q1EwGLAjLRs5iK1DE,4519
1181
+ vllm/worker/multi_step_worker.py,sha256=-OyCJfuXibg_qsLQfNkdNt4giuySR96Vkz5TsKHik-U,9485
1182
+ vllm/worker/neuron_model_runner.py,sha256=81JUkKFU9jVEQo3GHOfDnojUOxHGNlIVh7IxVU_874Y,19651
1183
+ vllm/worker/neuron_worker.py,sha256=aA-rK2XcrcbHvo6MMXE3O3oFEMM-vfg_yaMVBRRAVpY,7486
1184
+ vllm/worker/neuronx_distributed_model_runner.py,sha256=hdEN_hzfk_opVKM7VwfHuGaJ7sn30QTJU0S8Fvz5vz8,12667
1185
+ vllm/worker/pooling_model_runner.py,sha256=24D8B5IYzMX-Mz9pUk4MeNyV6HdyD1smMH4xv87qK0g,9193
1186
+ vllm/worker/tpu_model_runner.py,sha256=Fm7XpWJqs4Eejrzy9trqht2dRvo1lFiRrty0-hNJQpo,40848
1187
+ vllm/worker/tpu_worker.py,sha256=CEmhDsd-7f0JX0uyBbmQDMap8YHGIaYounYywKloe0M,14861
1188
+ vllm/worker/utils.py,sha256=BTNX3J84abaAQBEYkYbfv474Ye36KqkZqTK61IqJ_OQ,1980
1189
+ vllm/worker/worker.py,sha256=h946LI_ca1r1g-abtAlM82S19T4K8j_7QPkIlItYZgU,25551
1190
+ vllm/worker/worker_base.py,sha256=zArLuwSeYCUERQHK8vD5ep81tNs2WH9uejXUkp05XXY,26193
1191
+ vllm/worker/xpu_model_runner.py,sha256=_4-ioHBnFw4hx0WecoqGdudAjr5lUoqeowPuTbYme3s,24547
1192
+ vllm/worker/xpu_worker.py,sha256=NQ6h-2gjTmjEHUP2gPvOSaQFk3uOkRc2KFcz08ZpEhA,7990
1193
+ vllm_cpu_amxbf16-0.9.1.dist-info/METADATA,sha256=8oJt4JhueIflnahsNT4snnQKliL0eIlKsKOmynkwRMM,14504
1194
+ vllm_cpu_amxbf16-0.9.1.dist-info/WHEEL,sha256=IoFti0xAvoDtAxuPJyI4RJkGn0ThylEbxytRcNSoLaU,113
1195
+ vllm_cpu_amxbf16-0.9.1.dist-info/entry_points.txt,sha256=ErfiCUEEMrGDD3jBwf8c54AolBCFv7qrc8Ix9iqzzfs,184
1196
+ vllm_cpu_amxbf16-0.9.1.dist-info/top_level.txt,sha256=fAgb8Pt4zQoKTUA3ZnKEIgcjh0L97_dwEjYDTL5MEEo,5
1197
+ vllm_cpu_amxbf16-0.9.1.dist-info/RECORD,,