vllm-ascend 0.12.0rc1__tar.gz → 0.13.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vllm_ascend-0.13.0/.github/Dockerfile.buildwheel +45 -0
- vllm_ascend-0.13.0/.github/Dockerfile.nightly.a2 +43 -0
- vllm_ascend-0.13.0/.github/Dockerfile.nightly.a3 +43 -0
- vllm_ascend-0.13.0/.github/actionlint.yaml +27 -0
- vllm_ascend-0.13.0/.github/workflows/_e2e_nightly_multi_node.yaml +299 -0
- vllm_ascend-0.13.0/.github/workflows/_e2e_nightly_single_node.yaml +123 -0
- vllm_ascend-0.13.0/.github/workflows/_e2e_nightly_single_node_models.yaml +232 -0
- vllm_ascend-0.13.0/.github/workflows/_e2e_test.yaml +317 -0
- vllm_ascend-0.13.0/.github/workflows/_pre_commit.yml +57 -0
- vllm_ascend-0.13.0/.github/workflows/_schedule_image_build.yaml +184 -0
- vllm_ascend-0.13.0/.github/workflows/_unit_test.yaml +93 -0
- vllm_ascend-0.13.0/.github/workflows/bot_merge_conflict.yaml +20 -0
- vllm_ascend-0.13.0/.github/workflows/bot_pr_create.yaml +113 -0
- vllm_ascend-0.13.0/.github/workflows/labled_doctest.yaml +87 -0
- vllm_ascend-0.13.0/.github/workflows/labled_download_model.yaml +80 -0
- vllm_ascend-0.13.0/.github/workflows/labled_test_310.yaml +110 -0
- vllm_ascend-0.13.0/.github/workflows/misc/model_list.json +240 -0
- vllm_ascend-0.13.0/.github/workflows/nightly_test_a2.yaml +144 -0
- vllm_ascend-0.13.0/.github/workflows/nightly_test_a3.yaml +187 -0
- vllm_ascend-0.13.0/.github/workflows/pr_close_cancel_job.yaml +46 -0
- vllm_ascend-0.13.0/.github/workflows/pr_test_full.yaml +85 -0
- vllm_ascend-0.13.0/.github/workflows/pr_test_light.yaml +104 -0
- vllm_ascend-0.13.0/.github/workflows/schedule_codecov_refresh.yaml +42 -0
- vllm_ascend-0.13.0/.github/workflows/schedule_image_build_and_push.yaml +87 -0
- vllm_ascend-0.13.0/.github/workflows/schedule_nightly_image_build.yaml +60 -0
- vllm_ascend-0.13.0/.github/workflows/schedule_release_code_and_wheel.yml +156 -0
- vllm_ascend-0.13.0/.github/workflows/schedule_test_benchmarks.yaml +203 -0
- vllm_ascend-0.13.0/.github/workflows/schedule_test_vllm_main.yaml +39 -0
- vllm_ascend-0.13.0/.pre-commit-config.yaml +137 -0
- vllm_ascend-0.13.0/CMakeLists.txt +140 -0
- vllm_ascend-0.13.0/CONTRIBUTING.md +3 -0
- vllm_ascend-0.13.0/Dockerfile +81 -0
- vllm_ascend-0.13.0/Dockerfile.310p +66 -0
- vllm_ascend-0.13.0/Dockerfile.310p.openEuler +61 -0
- vllm_ascend-0.13.0/Dockerfile.a3 +80 -0
- vllm_ascend-0.13.0/Dockerfile.a3.openEuler +79 -0
- vllm_ascend-0.13.0/Dockerfile.openEuler +79 -0
- vllm_ascend-0.13.0/PKG-INFO +150 -0
- vllm_ascend-0.13.0/README.md +93 -0
- vllm_ascend-0.13.0/README.zh.md +92 -0
- vllm_ascend-0.13.0/csrc/CMakeLists.txt +645 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_host/CMakeLists.txt +39 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_def.cpp +71 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_infershape.cpp +84 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.cpp +443 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.h +53 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_host/error_log.h +71 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias.cpp +72 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias.h +368 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_merge_n.h +471 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_multi_n.h +339 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_single_n.h +376 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_kernel/add_rms_norm_bias_split_d.h +395 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_kernel/reduce_common.h +179 -0
- vllm_ascend-0.13.0/csrc/add_rms_norm_bias/op_kernel/rms_norm_base.h +316 -0
- vllm_ascend-0.13.0/csrc/build_aclnn.sh +92 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_host/aclnn_dispatch_ffn_combine.cpp +84 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_host/aclnn_dispatch_ffn_combine.h +64 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_def.cpp +88 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_tiling.cpp +315 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine.h +278 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_kernel.hpp +903 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_tiling.h +57 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2.cpp +133 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_row.hpp +208 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_swiglu.hpp +316 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_kernel/utils/block_mmad_preload_async_fixpipe_quant.hpp +504 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_kernel/utils/const_args.hpp +9 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_kernel/utils/get_tensor_addr.hpp +16 -0
- vllm_ascend-0.13.0/csrc/dispatch_ffn_combine/op_kernel/utils/hccl_shmem.hpp +176 -0
- vllm_ascend-0.13.0/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.cpp +103 -0
- vllm_ascend-0.13.0/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.h +52 -0
- vllm_ascend-0.13.0/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_def.cpp +88 -0
- vllm_ascend-0.13.0/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_proto.cpp +95 -0
- vllm_ascend-0.13.0/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_tiling.cpp +486 -0
- vllm_ascend-0.13.0/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_multistage_workspace.h +383 -0
- vllm_ascend-0.13.0/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_swiglu_quant_multistage_workspace.h +2060 -0
- vllm_ascend-0.13.0/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_combine.h +846 -0
- vllm_ascend-0.13.0/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_dispatch.h +1124 -0
- vllm_ascend-0.13.0/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.cpp +35 -0
- vllm_ascend-0.13.0/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.h +440 -0
- vllm_ascend-0.13.0/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_base.h +21 -0
- vllm_ascend-0.13.0/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_tiling.h +77 -0
- vllm_ascend-0.13.0/csrc/kernels/bgmv_expand.cpp +369 -0
- vllm_ascend-0.13.0/csrc/kernels/bgmv_shrink.cpp +252 -0
- vllm_ascend-0.13.0/csrc/kernels/sgmv_expand.cpp +389 -0
- vllm_ascend-0.13.0/csrc/kernels/sgmv_shrink.cpp +275 -0
- vllm_ascend-0.13.0/csrc/matmul_allreduce_add_rmsnorm/op_host/aclnn_matmul_allreduce_add_rmsnorm.cpp +89 -0
- vllm_ascend-0.13.0/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm.cpp +53 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_host/CMakeLists.txt +42 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_host/error_log.h +56 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_host/math_util.h +61 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_host/moe_gating_top_k_def.cpp +71 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_host/moe_gating_top_k_infershape.cpp +147 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_host/moe_gating_top_k_proto.cpp +15 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_host/moe_gating_top_k_proto.h +66 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling.cpp +573 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling.h +86 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling_arch35.cpp +521 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_host/moe_gating_top_k_tiling_base.cpp +38 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_kernel/common.h +89 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_kernel/error_log.h +55 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k.cpp +63 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_apt.cpp +46 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_e_k_fullload.h +404 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_generalized.h +669 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/op_kernel/moe_gating_top_k_without_group.h +338 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/tiling_base/data_copy_transpose_tiling.h +51 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/tiling_base/data_copy_transpose_tiling_def.h +43 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/tiling_base/error_log.h +56 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/tiling_base/tiling_base.h +256 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/tiling_base/tiling_key.h +63 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/tiling_base/tiling_templates_registry.h +351 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/tiling_base/tiling_type.h +139 -0
- vllm_ascend-0.13.0/csrc/moe_gating_top_k/tiling_base/tiling_util.h +30 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_host/CMakeLists.txt +55 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_host/aclnn_moe_init_routing_custom.cpp +143 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_host/aclnn_moe_init_routing_custom.h +47 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom.cpp +50 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom.h +25 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_def.cpp +105 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_infershape.cpp +797 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.cpp +1267 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.h +143 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling_base.cpp +68 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_common.h +110 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_expert_tokens_count.h +371 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load.h +280 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_base.h +512 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_dynamic_quant.h +300 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_static_quant.h +229 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_full_load_unquantized.h +224 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_droppad_static_quant.h +238 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_dynamic_quant.h +602 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_out.h +321 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_out_droppad.h +210 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_sort_multi_core.h +242 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_gather_static_quant.h +329 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort.h +207 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_out.h +232 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_out_performance.h +239 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_performance.h +206 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather.h +204 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather_droppad.h +306 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather_droppad_dynamic.h +582 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_actual_expert.h +430 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_base.h +71 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_multi_core.h +377 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_multi_core_performance.h +171 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_custom_sort_one_core.h +167 -0
- vllm_ascend-0.13.0/csrc/moe_init_routing_custom/op_kernel/moe_init_routing_custom.cpp +412 -0
- vllm_ascend-0.13.0/csrc/torch_binding.cpp +1483 -0
- vllm_ascend-0.13.0/csrc/torch_binding_meta.cpp +478 -0
- vllm_ascend-0.13.0/docs/README.md +24 -0
- vllm_ascend-0.13.0/docs/source/_templates/sections/header.html +58 -0
- vllm_ascend-0.13.0/docs/source/assets/cp/blocktable.png +0 -0
- vllm_ascend-0.13.0/docs/source/assets/cp/chunkedprefill.png +0 -0
- vllm_ascend-0.13.0/docs/source/assets/cp/dcp-decode.png +0 -0
- vllm_ascend-0.13.0/docs/source/assets/cp/dcp-prefill.png +0 -0
- vllm_ascend-0.13.0/docs/source/assets/cp/head-tail-style.png +0 -0
- vllm_ascend-0.13.0/docs/source/assets/cp/overview.png +0 -0
- vllm_ascend-0.13.0/docs/source/assets/cp/pcp-decode.png +0 -0
- vllm_ascend-0.13.0/docs/source/assets/cp/pcp-prefill.png +0 -0
- vllm_ascend-0.13.0/docs/source/assets/quantization/get_quant_method.png +0 -0
- vllm_ascend-0.13.0/docs/source/assets/quantization/quant_algorithm_overview.png +0 -0
- vllm_ascend-0.13.0/docs/source/assets/quantization/quant_method_base_class.png +0 -0
- vllm_ascend-0.13.0/docs/source/assets/quantization/quant_method_call_flow.png +0 -0
- vllm_ascend-0.13.0/docs/source/assets/quantization/quant_methods_overview.png +0 -0
- vllm_ascend-0.13.0/docs/source/community/contributors.md +291 -0
- vllm_ascend-0.13.0/docs/source/community/versioning_policy.md +162 -0
- vllm_ascend-0.13.0/docs/source/conf.py +145 -0
- vllm_ascend-0.13.0/docs/source/developer_guide/contribution/multi_node_test.md +349 -0
- vllm_ascend-0.13.0/docs/source/developer_guide/contribution/testing.md +288 -0
- vllm_ascend-0.13.0/docs/source/developer_guide/evaluation/index.md +10 -0
- vllm_ascend-0.13.0/docs/source/developer_guide/evaluation/using_evalscope.md +176 -0
- vllm_ascend-0.13.0/docs/source/developer_guide/feature_guide/ACL_Graph.md +102 -0
- vllm_ascend-0.13.0/docs/source/developer_guide/feature_guide/context_parallel.md +119 -0
- vllm_ascend-0.13.0/docs/source/developer_guide/feature_guide/disaggregated_prefill.md +103 -0
- vllm_ascend-0.13.0/docs/source/developer_guide/feature_guide/index.md +17 -0
- vllm_ascend-0.13.0/docs/source/developer_guide/feature_guide/patch.md +75 -0
- vllm_ascend-0.13.0/docs/source/developer_guide/feature_guide/quantization.md +111 -0
- vllm_ascend-0.13.0/docs/source/developer_guide/performance_and_debug/msprobe_guide.md +516 -0
- vllm_ascend-0.13.0/docs/source/developer_guide/performance_and_debug/optimization_and_tuning.md +268 -0
- vllm_ascend-0.13.0/docs/source/developer_guide/performance_and_debug/performance_benchmark.md +239 -0
- vllm_ascend-0.13.0/docs/source/faqs.md +250 -0
- vllm_ascend-0.13.0/docs/source/index.md +71 -0
- vllm_ascend-0.13.0/docs/source/installation.md +494 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +5508 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +218 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +98 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +97 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +926 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +182 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/multi_node_test.po +219 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +225 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po +239 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +107 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +118 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +80 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/ACL_Graph.po +266 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/KV_Cache_Pool_Guide.po +300 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/ModelRunner_prepare_inputs.po +625 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/add_custom_aclnn_op.po +85 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/context_parallel.po +369 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/disaggregated_prefill.po +347 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/eplb_swift_balancer.po +457 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +224 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/quantization.po +360 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +331 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/index.po +26 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po +588 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/optimization_and_tuning.po +349 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po +312 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/profile_execute_duration.po +78 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po +612 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +654 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/index.po +75 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +453 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +160 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/310p.po +125 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-R1.po +370 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-V3.1.po +612 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/DeepSeek-V3.2.po +395 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/GLM4.x.po +325 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Kimi-K2-Thinking.po +65 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/PaddleOCR-VL.po +218 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen-VL-Dense.po +363 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen2.5-7B.po +279 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen2.5-Omni.po +302 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-235B-A22B.po +739 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-30B-A3B.po +67 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-32B-W4A4.po +91 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-8B-W4A8.po +73 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Coder-30B-A3B.po +216 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Dense.po +908 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Next.po +305 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-Omni-30B-A3B-Thinking.po +248 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3-VL-235B-A22B-Instruct.po +475 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3_embedding.po +164 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/Qwen3_reranker.po +171 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/long_sequence_context_parallel_multi_node.po +447 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/long_sequence_context_parallel_single_node.po +387 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/pd_colocated_mooncake_multi_instance.po +518 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/pd_disaggregation_mooncake_multi_node.po +406 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/pd_disaggregation_mooncake_single_node.po +214 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/ray.po +235 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +464 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/index.po +25 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/using_volcano_kthena.po +290 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po +327 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Multi_Token_Prediction.po +260 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/context_parallel.po +304 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/dynamic_batch.po +101 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/eplb_swift_balancer.po +268 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po +175 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +143 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po +290 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po +496 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/layer_sharding.po +181 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +106 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/netloader.po +359 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +193 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +148 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/speculative_decoding.po +176 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +93 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/ucm_deployment.po +240 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +5278 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +31 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +244 -0
- vllm_ascend-0.13.0/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +537 -0
- vllm_ascend-0.13.0/docs/source/tutorials/DeepSeek-R1.md +309 -0
- vllm_ascend-0.13.0/docs/source/tutorials/DeepSeek-V3.1.md +724 -0
- vllm_ascend-0.13.0/docs/source/tutorials/DeepSeek-V3.2.md +656 -0
- vllm_ascend-0.13.0/docs/source/tutorials/GLM4.x.md +173 -0
- vllm_ascend-0.13.0/docs/source/tutorials/PaddleOCR-VL.md +227 -0
- vllm_ascend-0.13.0/docs/source/tutorials/Qwen2.5-7B.md +178 -0
- vllm_ascend-0.13.0/docs/source/tutorials/Qwen2.5-Omni.md +209 -0
- vllm_ascend-0.13.0/docs/source/tutorials/Qwen3-235B-A22B.md +622 -0
- vllm_ascend-0.13.0/docs/source/tutorials/Qwen3-30B-A3B.md +113 -0
- vllm_ascend-0.13.0/docs/source/tutorials/Qwen3-32B-W4A4.md +143 -0
- vllm_ascend-0.13.0/docs/source/tutorials/Qwen3-8B-W4A8.md +138 -0
- vllm_ascend-0.13.0/docs/source/tutorials/Qwen3-Dense.md +378 -0
- vllm_ascend-0.13.0/docs/source/tutorials/Qwen3-Next.md +181 -0
- vllm_ascend-0.13.0/docs/source/tutorials/Qwen3-Omni-30B-A3B-Thinking.md +311 -0
- vllm_ascend-0.13.0/docs/source/tutorials/Qwen3-VL-235B-A22B-Instruct.md +273 -0
- vllm_ascend-0.13.0/docs/source/tutorials/Qwen3_embedding.md +117 -0
- vllm_ascend-0.13.0/docs/source/tutorials/Qwen3_reranker.md +188 -0
- vllm_ascend-0.13.0/docs/source/tutorials/index.md +33 -0
- vllm_ascend-0.13.0/docs/source/tutorials/long_sequence_context_parallel_multi_node.md +372 -0
- vllm_ascend-0.13.0/docs/source/tutorials/long_sequence_context_parallel_single_node.md +174 -0
- vllm_ascend-0.13.0/docs/source/tutorials/pd_colocated_mooncake_multi_instance.md +343 -0
- vllm_ascend-0.13.0/docs/source/tutorials/pd_disaggregation_mooncake_multi_node.md +947 -0
- vllm_ascend-0.13.0/docs/source/tutorials/pd_disaggregation_mooncake_single_node.md +278 -0
- vllm_ascend-0.13.0/docs/source/user_guide/configuration/additional_config.md +119 -0
- vllm_ascend-0.13.0/docs/source/user_guide/deployment_guide/index.md +7 -0
- vllm_ascend-0.13.0/docs/source/user_guide/deployment_guide/using_volcano_kthena.md +433 -0
- vllm_ascend-0.13.0/docs/source/user_guide/feature_guide/Fine_grained_TP.md +103 -0
- vllm_ascend-0.13.0/docs/source/user_guide/feature_guide/context_parallel.md +88 -0
- vllm_ascend-0.13.0/docs/source/user_guide/feature_guide/graph_mode.md +82 -0
- vllm_ascend-0.13.0/docs/source/user_guide/feature_guide/images/layer_sharding.png +0 -0
- vllm_ascend-0.13.0/docs/source/user_guide/feature_guide/index.md +25 -0
- vllm_ascend-0.13.0/docs/source/user_guide/feature_guide/kv_pool.md +362 -0
- vllm_ascend-0.13.0/docs/source/user_guide/feature_guide/large_scale_ep.md +504 -0
- vllm_ascend-0.13.0/docs/source/user_guide/feature_guide/layer_sharding.md +71 -0
- vllm_ascend-0.13.0/docs/source/user_guide/feature_guide/quantization.md +148 -0
- vllm_ascend-0.13.0/docs/source/user_guide/feature_guide/sleep_mode.md +116 -0
- vllm_ascend-0.13.0/docs/source/user_guide/feature_guide/speculative_decoding.md +146 -0
- vllm_ascend-0.13.0/docs/source/user_guide/feature_guide/ucm_deployment.md +141 -0
- vllm_ascend-0.13.0/docs/source/user_guide/release_notes.md +1056 -0
- vllm_ascend-0.13.0/docs/source/user_guide/support_matrix/supported_features.md +48 -0
- vllm_ascend-0.13.0/docs/source/user_guide/support_matrix/supported_models.md +86 -0
- vllm_ascend-0.13.0/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +585 -0
- vllm_ascend-0.13.0/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +897 -0
- vllm_ascend-0.13.0/examples/eplb/eplb_strategy.py +183 -0
- vllm_ascend-0.13.0/examples/external_online_dp/dp_load_balance_proxy_server.py +404 -0
- vllm_ascend-0.13.0/examples/external_online_dp/launch_online_dp.py +98 -0
- vllm_ascend-0.13.0/examples/offline_disaggregated_prefill_npu.py +168 -0
- vllm_ascend-0.13.0/examples/offline_embed.py +58 -0
- vllm_ascend-0.13.0/examples/offline_external_launcher.py +331 -0
- vllm_ascend-0.13.0/examples/offline_inference_audio_language.py +106 -0
- vllm_ascend-0.13.0/examples/offline_inference_npu_long_seq.py +59 -0
- vllm_ascend-0.13.0/examples/offline_weight_load.py +334 -0
- vllm_ascend-0.13.0/examples/prompt_embed_inference.py +97 -0
- vllm_ascend-0.13.0/examples/quantization/llm-compressor/w8a8_int8.py +162 -0
- vllm_ascend-0.13.0/examples/quantization/llm-compressor/w8a8_int8_dynamic.py +82 -0
- vllm_ascend-0.13.0/format.sh +44 -0
- vllm_ascend-0.13.0/mypy.ini +34 -0
- vllm_ascend-0.13.0/pyproject.toml +45 -0
- vllm_ascend-0.13.0/requirements-dev.txt +25 -0
- vllm_ascend-0.13.0/requirements.txt +38 -0
- vllm_ascend-0.13.0/setup.py +536 -0
- vllm_ascend-0.13.0/tests/e2e/conftest.py +794 -0
- vllm_ascend-0.13.0/tests/e2e/model_utils.py +76 -0
- vllm_ascend-0.13.0/tests/e2e/models/configs/InternVL3_5-8B-hf.yaml +10 -0
- vllm_ascend-0.13.0/tests/e2e/models/configs/Llama-3.2-3B-Instruct.yaml +10 -0
- vllm_ascend-0.13.0/tests/e2e/models/configs/Qwen3-Omni-30B-A3B-Instruct.yaml +11 -0
- vllm_ascend-0.13.0/tests/e2e/models/configs/accuracy.txt +15 -0
- vllm_ascend-0.13.0/tests/e2e/models/configs/gemma-3-4b-it.yaml +14 -0
- vllm_ascend-0.13.0/tests/e2e/models/configs/llava-onevision-qwen2-0.5b-ov-hf.yaml +10 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py +153 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py +240 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/test_data_parallel.py +79 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/test_expert_parallel.py +34 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/test_external_launcher.py +240 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/test_full_graph_mode.py +116 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/test_ilama_lora_tp2.py +26 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/test_offline_inference_distributed.py +237 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/test_offline_weight_load.py +75 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/test_pipeline_parallel.py +48 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/test_prefix_caching.py +85 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/test_quantization.py +44 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/test_qwen3_moe.py +128 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/2-cards/test_shared_expert_dp.py +92 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py +281 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/4-cards/long_sequence/test_basic.py +248 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py +122 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/4-cards/long_sequence/test_mtp.py +153 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py +155 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/4-cards/test_data_parallel_tp2.py +49 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/4-cards/test_kimi_k2.py +44 -0
- vllm_ascend-0.13.0/tests/e2e/multicard/4-cards/test_qwen3_next.py +77 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-A2.yaml +62 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-EPLB.yaml +195 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml +111 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8.yaml +209 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/DeepSeek-V3.1-BF16.yaml +82 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/DeepSeek-V3.yaml +112 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-A3-dual-nodes.yaml +86 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/Kimi-K2-Instruct-W8A8.yaml +79 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/Qwen3-235B-A22B-A2.yaml +72 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/Qwen3-235B-A22B.yaml +73 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8-EPLB.yaml +93 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8-longseq.yaml +100 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/Qwen3-235B-W8A8.yaml +89 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/Qwen3-235B-disagg-pd.yaml +121 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/config/Qwen3-VL-235B-disagg-pd.yaml +108 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 +140 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/scripts/multi_node_config.py +352 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/scripts/run.sh +170 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/scripts/test_multi_node.py +46 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/scripts/utils.py +149 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_deepseek_r1_0528_w8a8.py +117 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_deepseek_r1_0528_w8a8_eplb.py +115 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_deepseek_r1_w8a8_hbm.py +123 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_deepseek_v3_2_w8a8.py +108 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_glm4_5.py +115 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_kimi_k2_thinking.py +110 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_mtpx_deepseek_r1_0528_w8a8.py +140 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_prefix_cache_deepseek_r1_0528_w8a8.py +107 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_prefix_cache_qwen3_32b_int8.py +99 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_qwen2_5_vl_32b.py +110 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_qwen2_5_vl_7b.py +102 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_qwen3_235b_a22b_w8a8_eplb.py +105 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_qwen3_235b_w8a8.py +101 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_qwen3_30b_w8a8.py +92 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_qwen3_32b_int8.py +129 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_qwen3_32b_int8_a3_feature_stack3.py +98 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_qwen3_8b.py +99 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_qwen3_next.py +113 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_qwen3_next_w8a8.py +104 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models/test_qwq_32b.py +116 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py +135 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py +234 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py +469 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py +148 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py +338 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py +117 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py +101 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py +118 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py +349 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py +210 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py +361 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py +100 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py +65 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py +34 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py +80 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py +229 -0
- vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py +214 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/compile/test_norm_quant_fusion.py +357 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/model_runner_v2/test_basic.py +51 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/pooling/test_classification.py +34 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/pooling/test_embedding.py +99 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/pooling/test_scoring.py +187 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/spec_decode/test_mtp_eagle_correctness.py +209 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py +382 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_aclgraph_accuracy.py +115 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_aclgraph_mem.py +99 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_async_scheduling.py +240 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_batch_invariant.py +672 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_camem.py +60 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_completion_with_prompt_embeds.py +76 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_cpu_offloading.py +178 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_guided_decoding.py +155 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_ilama_lora.py +65 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_models.py +75 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +108 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_quantization.py +79 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_sampler.py +70 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_vlm.py +98 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/test_xlite.py +107 -0
- vllm_ascend-0.13.0/tests/e2e/singlecard/utils.py +76 -0
- vllm_ascend-0.13.0/tests/e2e/vllm_interface/singlecard/test_sampler.py +37 -0
- vllm_ascend-0.13.0/tests/ut/attention/test_attention_cp.py +672 -0
- vllm_ascend-0.13.0/tests/ut/attention/test_attention_v1.py +322 -0
- vllm_ascend-0.13.0/tests/ut/attention/test_mla_cp.py +1003 -0
- vllm_ascend-0.13.0/tests/ut/attention/test_mla_v1.py +1051 -0
- vllm_ascend-0.13.0/tests/ut/attention/test_sfa_v1.py +207 -0
- vllm_ascend-0.13.0/tests/ut/attention/utils.py +64 -0
- vllm_ascend-0.13.0/tests/ut/compilation/test_acl_graph.py +857 -0
- vllm_ascend-0.13.0/tests/ut/compilation/test_add_rms_norm_quant.py +148 -0
- vllm_ascend-0.13.0/tests/ut/conftest.py +39 -0
- vllm_ascend-0.13.0/tests/ut/core/test_scheduler_dynamic_batch.py +750 -0
- vllm_ascend-0.13.0/tests/ut/device_allocator/test_camem.py +195 -0
- vllm_ascend-0.13.0/tests/ut/distributed/mooncake/test_config_data.py +78 -0
- vllm_ascend-0.13.0/tests/ut/distributed/test_parallel_state.py +77 -0
- vllm_ascend-0.13.0/tests/ut/eplb/adaptor/test_abstract_adaptor.py +61 -0
- vllm_ascend-0.13.0/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +116 -0
- vllm_ascend-0.13.0/tests/ut/eplb/core/test_eplb_utils.py +211 -0
- vllm_ascend-0.13.0/tests/ut/kv_connector/test_mooncake_connector.py +1345 -0
- vllm_ascend-0.13.0/tests/ut/kv_connector/test_mooncake_layerwise_connector.py +930 -0
- vllm_ascend-0.13.0/tests/ut/kv_connector/utils.py +210 -0
- vllm_ascend-0.13.0/tests/ut/ops/test_activation.py +76 -0
- vllm_ascend-0.13.0/tests/ut/ops/test_fused_moe.py +593 -0
- vllm_ascend-0.13.0/tests/ut/ops/test_layernorm.py +77 -0
- vllm_ascend-0.13.0/tests/ut/ops/test_linear.py +160 -0
- vllm_ascend-0.13.0/tests/ut/ops/test_mla.py +155 -0
- vllm_ascend-0.13.0/tests/ut/ops/test_moe_comm_method.py +228 -0
- vllm_ascend-0.13.0/tests/ut/ops/test_moe_mlp.py +51 -0
- vllm_ascend-0.13.0/tests/ut/ops/test_prepare_finalize.py +223 -0
- vllm_ascend-0.13.0/tests/ut/ops/test_rotary_embedding.py +453 -0
- vllm_ascend-0.13.0/tests/ut/ops/test_token_dispatcher.py +479 -0
- vllm_ascend-0.13.0/tests/ut/quantization/test_quant_config.py +158 -0
- vllm_ascend-0.13.0/tests/ut/quantization/test_utils.py +50 -0
- vllm_ascend-0.13.0/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +216 -0
- vllm_ascend-0.13.0/tests/ut/quantization/test_w4a8_dynamic.py +306 -0
- vllm_ascend-0.13.0/tests/ut/quantization/test_w8a16.py +91 -0
- vllm_ascend-0.13.0/tests/ut/quantization/test_w8a8.py +218 -0
- vllm_ascend-0.13.0/tests/ut/quantization/test_w8a8_dynamic.py +106 -0
- vllm_ascend-0.13.0/tests/ut/sample/test_rejection_sampler.py +250 -0
- vllm_ascend-0.13.0/tests/ut/sample/test_sampler.py +11 -0
- vllm_ascend-0.13.0/tests/ut/spec_decode/test_eagle_proposer.py +440 -0
- vllm_ascend-0.13.0/tests/ut/spec_decode/test_mtp_proposer.py +344 -0
- vllm_ascend-0.13.0/tests/ut/test_ascend_config.py +109 -0
- vllm_ascend-0.13.0/tests/ut/test_platform.py +489 -0
- vllm_ascend-0.13.0/tests/ut/test_utils.py +304 -0
- vllm_ascend-0.13.0/tests/ut/worker/test_block_table.py +258 -0
- vllm_ascend-0.13.0/tests/ut/worker/test_pcp_manager.py +512 -0
- vllm_ascend-0.13.0/tests/ut/worker/test_worker_v1.py +1166 -0
- vllm_ascend-0.13.0/tools/aisbench.py +335 -0
- vllm_ascend-0.13.0/tools/collect_user_first_contribution.sh +83 -0
- vllm_ascend-0.13.0/tools/format_contributors.py +98 -0
- vllm_ascend-0.13.0/tools/install_flash_infer_attention_score_ops_a2.sh +37 -0
- vllm_ascend-0.13.0/tools/install_flash_infer_attention_score_ops_a3.sh +36 -0
- vllm_ascend-0.13.0/tools/mypy.sh +40 -0
- vllm_ascend-0.13.0/tools/send_request.py +37 -0
- vllm_ascend-0.13.0/tools/vllm_bench.py +154 -0
- vllm_ascend-0.13.0/vllm_ascend/_version.py +34 -0
- vllm_ascend-0.13.0/vllm_ascend/ascend_config.py +321 -0
- vllm_ascend-0.13.0/vllm_ascend/ascend_forward_context.py +279 -0
- vllm_ascend-0.13.0/vllm_ascend/attention/attention_mask.py +102 -0
- vllm_ascend-0.13.0/vllm_ascend/attention/attention_v1.py +776 -0
- vllm_ascend-0.13.0/vllm_ascend/attention/context_parallel/attention_cp.py +908 -0
- vllm_ascend-0.13.0/vllm_ascend/attention/context_parallel/common_cp.py +132 -0
- vllm_ascend-0.13.0/vllm_ascend/attention/context_parallel/mla_cp.py +779 -0
- vllm_ascend-0.13.0/vllm_ascend/attention/mla_v1.py +1544 -0
- vllm_ascend-0.13.0/vllm_ascend/attention/sfa_v1.py +1110 -0
- vllm_ascend-0.13.0/vllm_ascend/attention/utils.py +271 -0
- vllm_ascend-0.13.0/vllm_ascend/batch_invariant.py +82 -0
- vllm_ascend-0.13.0/vllm_ascend/compilation/acl_graph.py +648 -0
- vllm_ascend-0.13.0/vllm_ascend/compilation/compiler_interface.py +139 -0
- vllm_ascend-0.13.0/vllm_ascend/compilation/graph_fusion_pass_manager.py +60 -0
- vllm_ascend-0.13.0/vllm_ascend/compilation/npugraph_ex_passes/add_rms_norm_quant.py +301 -0
- vllm_ascend-0.13.0/vllm_ascend/compilation/passes/norm_quant_fusion_pass.py +317 -0
- vllm_ascend-0.13.0/vllm_ascend/compilation/passes/qknorm_rope_fusion_pass.py +291 -0
- vllm_ascend-0.13.0/vllm_ascend/core/multi_block_pool.py +184 -0
- vllm_ascend-0.13.0/vllm_ascend/core/recompute_scheduler.py +883 -0
- vllm_ascend-0.13.0/vllm_ascend/core/scheduler_dynamic_batch.py +597 -0
- vllm_ascend-0.13.0/vllm_ascend/device_allocator/camem.py +275 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/__init__.py +44 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/cpu_offload_connector.py +528 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/cpu_offload_manager/metadata.py +272 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/kvpool/ascend_store_connector.py +183 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/kvpool/backend/memcache_backend.py +95 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/kvpool/backend/mooncake_backend.py +190 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/kvpool/config_data.py +405 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/kvpool/kv_transfer.py +366 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/kvpool/pool_scheduler.py +392 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/kvpool/pool_worker.py +626 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/mooncake_connector.py +1849 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/mooncake_layerwise_connector.py +1370 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/parallel_state.py +368 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/ucm_connector.py +254 -0
- vllm_ascend-0.13.0/vllm_ascend/distributed/utils.py +111 -0
- vllm_ascend-0.13.0/vllm_ascend/envs.py +155 -0
- vllm_ascend-0.13.0/vllm_ascend/eplb/adaptor/abstract_adaptor.py +40 -0
- vllm_ascend-0.13.0/vllm_ascend/eplb/adaptor/vllm_adaptor.py +184 -0
- vllm_ascend-0.13.0/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +133 -0
- vllm_ascend-0.13.0/vllm_ascend/eplb/core/eplb_utils.py +191 -0
- vllm_ascend-0.13.0/vllm_ascend/eplb/core/eplb_worker.py +443 -0
- vllm_ascend-0.13.0/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +768 -0
- vllm_ascend-0.13.0/vllm_ascend/eplb/core/policy/policy_flashlb.py +648 -0
- vllm_ascend-0.13.0/vllm_ascend/eplb/eplb_updator.py +233 -0
- vllm_ascend-0.13.0/vllm_ascend/eplb/utils.py +77 -0
- vllm_ascend-0.13.0/vllm_ascend/flash_common3_context.py +42 -0
- vllm_ascend-0.13.0/vllm_ascend/kv_offload/npu.py +64 -0
- vllm_ascend-0.13.0/vllm_ascend/lora/punica_npu.py +362 -0
- vllm_ascend-0.13.0/vllm_ascend/model_loader/netloader/executor/elastic_load.py +170 -0
- vllm_ascend-0.13.0/vllm_ascend/model_loader/netloader/executor/netloader_pg.py +188 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/__init__.py +62 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/flashcomm2_oshard_manager.py +100 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/fused_moe/experts_selector.py +354 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/fused_moe/fused_moe.py +604 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/fused_moe/moe_comm_method.py +345 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/fused_moe/moe_mlp.py +354 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/fused_moe/prepare_finalize.py +489 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/fused_moe/token_dispatcher.py +627 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/layer_shard_linear.py +280 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/layernorm.py +107 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/linear.py +470 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/linear_op.py +814 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/mla.py +185 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/mm_encoder_attention.py +146 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/register_custom_ops.py +379 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/rotary_embedding.py +655 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/activation/__init__.py +0 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/activation/swiglu_quant.py +117 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/batch_invariant/__init__.py +0 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/batch_invariant/matmul.py +403 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/batch_invariant/mean.py +177 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/batch_invariant/rmsnorm.py +153 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/batch_invariant/softmax.py +29 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/fla/__init__.py +0 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/fla/chunk.py +226 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/fla/fused_qkvzba_split_reshape.py +115 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/fla/l2norm.py +70 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/fla/sigmoid_gating.py +395 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/fused_gdn_gating.py +118 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/linearnorm/__init__.py +0 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/linearnorm/split_qkv_rmsnorm_rope.py +305 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/mamba/__init__.py +0 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/mamba/causal_conv1d.py +721 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/reject_sample.py +461 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/rope.py +207 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/spec_decode/__init__.py +0 -0
- vllm_ascend-0.13.0/vllm_ascend/ops/triton/spec_decode/utils.py +68 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/__init__.py +333 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/platform/__init__.py +48 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/platform/patch_balance_schedule.py +622 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/platform/patch_core.py +78 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/platform/patch_ec_connector.py +31 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/platform/patch_kv_cache_coordinator.py +142 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/platform/patch_kv_cache_utils.py +192 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/platform/patch_lora_model_manager.py +83 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/platform/patch_message_queue.py +78 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/platform/patch_multiproc_executor.py +182 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/platform/patch_set_cudagraph_sizes.py +146 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/platform/patch_vllm_config.py +436 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/worker/__init__.py +41 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/worker/patch_bert.py +44 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/worker/patch_deepseekv3.py +484 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/worker/patch_model_runner.py +298 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/worker/patch_module.py +36 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/worker/patch_multimodal_merge.py +59 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/worker/patch_qwen3_next.py +343 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/worker/patch_qwen3vl.py +41 -0
- vllm_ascend-0.13.0/vllm_ascend/patch/worker/patch_rejection_sampler.py +11 -0
- vllm_ascend-0.13.0/vllm_ascend/platform.py +581 -0
- vllm_ascend-0.13.0/vllm_ascend/quantization/__init__.py +0 -0
- vllm_ascend-0.13.0/vllm_ascend/quantization/compressed_tensors/__init__.py +0 -0
- vllm_ascend-0.13.0/vllm_ascend/quantization/quant_config.py +600 -0
- vllm_ascend-0.13.0/vllm_ascend/quantization/utils.py +125 -0
- vllm_ascend-0.13.0/vllm_ascend/quantization/w4a16.py +278 -0
- vllm_ascend-0.13.0/vllm_ascend/quantization/w4a4_flatquant_dynamic.py +191 -0
- vllm_ascend-0.13.0/vllm_ascend/quantization/w4a8_dynamic.py +482 -0
- vllm_ascend-0.13.0/vllm_ascend/quantization/w8a16.py +89 -0
- vllm_ascend-0.13.0/vllm_ascend/quantization/w8a8.py +193 -0
- vllm_ascend-0.13.0/vllm_ascend/quantization/w8a8_dynamic.py +353 -0
- vllm_ascend-0.13.0/vllm_ascend/quantization/w8a8mxfp8.py +98 -0
- vllm_ascend-0.13.0/vllm_ascend/sample/__init__.py +0 -0
- vllm_ascend-0.13.0/vllm_ascend/sample/rejection_sampler.py +798 -0
- vllm_ascend-0.13.0/vllm_ascend/sample/sampler.py +126 -0
- vllm_ascend-0.13.0/vllm_ascend/spec_decode/eagle_proposer.py +1140 -0
- vllm_ascend-0.13.0/vllm_ascend/spec_decode/interface.py +53 -0
- vllm_ascend-0.13.0/vllm_ascend/spec_decode/mtp_proposer.py +567 -0
- vllm_ascend-0.13.0/vllm_ascend/spec_decode/ngram_proposer.py +72 -0
- vllm_ascend-0.13.0/vllm_ascend/spec_decode/suffix_proposer.py +45 -0
- vllm_ascend-0.13.0/vllm_ascend/utils.py +1184 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/__init__.py +0 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/block_table.py +351 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/model_runner_v1.py +3157 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/npu_input_batch.py +252 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/pcp_utils.py +840 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/v2/README.md +6 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/v2/__init__.py +0 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/v2/aclgraph_utils.py +87 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/v2/attn_utils.py +171 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/v2/input_batch.py +56 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/v2/model_runner.py +369 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/v2/sample/__init__.py +0 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/v2/sample/gumbel.py +128 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/v2/sample/penalties.py +137 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/v2/sample/sampler.py +58 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/v2/states.py +105 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/v2/utils.py +20 -0
- vllm_ascend-0.13.0/vllm_ascend/worker/worker.py +544 -0
- vllm_ascend-0.13.0/vllm_ascend/xlite/__init__.py +0 -0
- vllm_ascend-0.13.0/vllm_ascend/xlite/xlite.py +298 -0
- vllm_ascend-0.13.0/vllm_ascend/xlite/xlite_worker.py +30 -0
- vllm_ascend-0.13.0/vllm_ascend.egg-info/PKG-INFO +150 -0
- vllm_ascend-0.13.0/vllm_ascend.egg-info/SOURCES.txt +1118 -0
- vllm_ascend-0.13.0/vllm_ascend.egg-info/requires.txt +26 -0
- vllm_ascend-0.12.0rc1/.github/Dockerfile.buildwheel +0 -45
- vllm_ascend-0.12.0rc1/.github/Dockerfile.nightly.a2 +0 -43
- vllm_ascend-0.12.0rc1/.github/Dockerfile.nightly.a3 +0 -43
- vllm_ascend-0.12.0rc1/.github/actionlint.yaml +0 -21
- vllm_ascend-0.12.0rc1/.github/workflows/_e2e_nightly_multi_node.yaml +0 -267
- vllm_ascend-0.12.0rc1/.github/workflows/_e2e_nightly_single_node.yaml +0 -130
- vllm_ascend-0.12.0rc1/.github/workflows/_e2e_nightly_single_node_models.yaml +0 -232
- vllm_ascend-0.12.0rc1/.github/workflows/_e2e_test.yaml +0 -286
- vllm_ascend-0.12.0rc1/.github/workflows/_nightly_image_build.yaml +0 -59
- vllm_ascend-0.12.0rc1/.github/workflows/_pre_commit.yml +0 -42
- vllm_ascend-0.12.0rc1/.github/workflows/image_build_and_push.yaml +0 -528
- vllm_ascend-0.12.0rc1/.github/workflows/label_merge_conflict.yml +0 -21
- vllm_ascend-0.12.0rc1/.github/workflows/nightly_benchmarks.yaml +0 -203
- vllm_ascend-0.12.0rc1/.github/workflows/pr_create.yaml +0 -121
- vllm_ascend-0.12.0rc1/.github/workflows/release_code_and_wheel.yml +0 -167
- vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_doctest.yaml +0 -87
- vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_310p.yaml +0 -116
- vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_full_vllm_main.yaml +0 -39
- vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_nightly_a2.yaml +0 -136
- vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_nightly_a3.yaml +0 -158
- vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_pr_full.yaml +0 -85
- vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_pr_light.yaml +0 -167
- vllm_ascend-0.12.0rc1/.github/workflows/vllm_ascend_test_report.yaml +0 -172
- vllm_ascend-0.12.0rc1/.pre-commit-config.yaml +0 -137
- vllm_ascend-0.12.0rc1/CMakeLists.txt +0 -136
- vllm_ascend-0.12.0rc1/CONTRIBUTING.md +0 -3
- vllm_ascend-0.12.0rc1/Dockerfile +0 -69
- vllm_ascend-0.12.0rc1/Dockerfile.310p +0 -61
- vllm_ascend-0.12.0rc1/Dockerfile.310p.openEuler +0 -58
- vllm_ascend-0.12.0rc1/Dockerfile.a3 +0 -68
- vllm_ascend-0.12.0rc1/Dockerfile.a3.openEuler +0 -71
- vllm_ascend-0.12.0rc1/Dockerfile.openEuler +0 -71
- vllm_ascend-0.12.0rc1/PKG-INFO +0 -146
- vllm_ascend-0.12.0rc1/README.md +0 -92
- vllm_ascend-0.12.0rc1/README.zh.md +0 -91
- vllm_ascend-0.12.0rc1/csrc/CMakeLists.txt +0 -642
- vllm_ascend-0.12.0rc1/csrc/build_aclnn.sh +0 -89
- vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_host/aclnn_dispatch_ffn_combine.cpp +0 -84
- vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_host/aclnn_dispatch_ffn_combine.h +0 -64
- vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_def.cpp +0 -88
- vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_tiling.cpp +0 -265
- vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine.h +0 -276
- vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_kernel.hpp +0 -814
- vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_tiling.h +0 -56
- vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2.cpp +0 -134
- vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_row.hpp +0 -207
- vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_swiglu.hpp +0 -316
- vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_kernel/utils/block_mmad_preload_async_fixpipe_quant.hpp +0 -502
- vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_kernel/utils/const_args.hpp +0 -6
- vllm_ascend-0.12.0rc1/csrc/dispatch_ffn_combine/op_kernel/utils/hccl_shmem.hpp +0 -162
- vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.cpp +0 -101
- vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_host/aclnn_dispatch_gmm_combine_decode.h +0 -51
- vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_def.cpp +0 -83
- vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_proto.cpp +0 -95
- vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_tiling.cpp +0 -339
- vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_multistage_workspace.h +0 -355
- vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_swiglu_quant_multistage_workspace.h +0 -1990
- vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_combine.h +0 -814
- vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_dispatch.h +0 -1072
- vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.cpp +0 -33
- vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.h +0 -436
- vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_base.h +0 -18
- vllm_ascend-0.12.0rc1/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_tiling.h +0 -74
- vllm_ascend-0.12.0rc1/csrc/kernels/bgmv_expand.cpp +0 -369
- vllm_ascend-0.12.0rc1/csrc/kernels/bgmv_shrink.cpp +0 -252
- vllm_ascend-0.12.0rc1/csrc/kernels/sgmv_expand.cpp +0 -389
- vllm_ascend-0.12.0rc1/csrc/kernels/sgmv_shrink.cpp +0 -275
- vllm_ascend-0.12.0rc1/csrc/matmul_allreduce_add_rmsnorm/op_host/aclnn_matmul_allreduce_add_rmsnorm.cpp +0 -89
- vllm_ascend-0.12.0rc1/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm.cpp +0 -50
- vllm_ascend-0.12.0rc1/csrc/torch_binding.cpp +0 -1257
- vllm_ascend-0.12.0rc1/csrc/torch_binding_meta.cpp +0 -319
- vllm_ascend-0.12.0rc1/docs/README.md +0 -24
- vllm_ascend-0.12.0rc1/docs/source/_templates/sections/header.html +0 -58
- vllm_ascend-0.12.0rc1/docs/source/community/contributors.md +0 -171
- vllm_ascend-0.12.0rc1/docs/source/community/versioning_policy.md +0 -149
- vllm_ascend-0.12.0rc1/docs/source/conf.py +0 -145
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/contribution/multi_node_test.md +0 -159
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/contribution/testing.md +0 -288
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/evaluation/accuracy_report/DeepSeek-V2-Lite.md +0 -20
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-VL-7B-Instruct.md +0 -19
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B.md +0 -21
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B-Base.md +0 -21
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/evaluation/accuracy_report/index.md +0 -10
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/evaluation/index.md +0 -11
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/evaluation/using_evalscope.md +0 -176
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/feature_guide/ACL_Graph.md +0 -102
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/feature_guide/disaggregated_prefill.md +0 -103
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/feature_guide/index.md +0 -16
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/feature_guide/patch.md +0 -75
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/performance_and_debug/msprobe_guide.md +0 -516
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/performance_and_debug/optimization_and_tuning.md +0 -184
- vllm_ascend-0.12.0rc1/docs/source/developer_guide/performance_and_debug/performance_benchmark.md +0 -195
- vllm_ascend-0.12.0rc1/docs/source/faqs.md +0 -242
- vllm_ascend-0.12.0rc1/docs/source/index.md +0 -70
- vllm_ascend-0.12.0rc1/docs/source/installation.md +0 -485
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po +0 -1647
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po +0 -204
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po +0 -103
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po +0 -87
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po +0 -624
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po +0 -187
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po +0 -237
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po +0 -112
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po +0 -65
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po +0 -83
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/patch.po +0 -248
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po +0 -333
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/index.po +0 -26
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po +0 -646
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po +0 -88
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/profile_execute_duration.po +0 -81
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po +0 -575
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/faqs.po +0 -479
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/index.po +0 -79
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/installation.po +0 -293
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po +0 -149
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po +0 -286
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po +0 -121
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po +0 -58
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po +0 -183
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po +0 -156
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po +0 -220
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po +0 -1660
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po +0 -30
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po +0 -264
- vllm_ascend-0.12.0rc1/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po +0 -190
- vllm_ascend-0.12.0rc1/docs/source/tutorials/DeepSeek-R1.md +0 -290
- vllm_ascend-0.12.0rc1/docs/source/tutorials/DeepSeek-V3.1.md +0 -807
- vllm_ascend-0.12.0rc1/docs/source/tutorials/DeepSeek-V3.2.md +0 -652
- vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen2.5-7B.md +0 -177
- vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen2.5-Omni.md +0 -206
- vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen3-235B-A22B.md +0 -313
- vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen3-30B-A3B.md +0 -110
- vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen3-32B-W4A4.md +0 -141
- vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen3-8B-W4A8.md +0 -134
- vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen3-Dense.md +0 -372
- vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen3-Next.md +0 -154
- vllm_ascend-0.12.0rc1/docs/source/tutorials/Qwen3_embedding.md +0 -100
- vllm_ascend-0.12.0rc1/docs/source/tutorials/index.md +0 -25
- vllm_ascend-0.12.0rc1/docs/source/tutorials/pd_disaggregation_mooncake_multi_node.md +0 -922
- vllm_ascend-0.12.0rc1/docs/source/tutorials/pd_disaggregation_mooncake_single_node.md +0 -252
- vllm_ascend-0.12.0rc1/docs/source/user_guide/configuration/additional_config.md +0 -98
- vllm_ascend-0.12.0rc1/docs/source/user_guide/feature_guide/graph_mode.md +0 -82
- vllm_ascend-0.12.0rc1/docs/source/user_guide/feature_guide/index.md +0 -20
- vllm_ascend-0.12.0rc1/docs/source/user_guide/feature_guide/kv_pool.md +0 -293
- vllm_ascend-0.12.0rc1/docs/source/user_guide/feature_guide/large_scale_ep.md +0 -504
- vllm_ascend-0.12.0rc1/docs/source/user_guide/feature_guide/quantization-llm-compressor.md +0 -65
- vllm_ascend-0.12.0rc1/docs/source/user_guide/feature_guide/quantization.md +0 -106
- vllm_ascend-0.12.0rc1/docs/source/user_guide/feature_guide/sleep_mode.md +0 -116
- vllm_ascend-0.12.0rc1/docs/source/user_guide/release_notes.md +0 -810
- vllm_ascend-0.12.0rc1/docs/source/user_guide/support_matrix/supported_features.md +0 -47
- vllm_ascend-0.12.0rc1/docs/source/user_guide/support_matrix/supported_models.md +0 -81
- vllm_ascend-0.12.0rc1/examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py +0 -586
- vllm_ascend-0.12.0rc1/examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py +0 -666
- vllm_ascend-0.12.0rc1/examples/eplb/eplb_strategy.py +0 -186
- vllm_ascend-0.12.0rc1/examples/external_online_dp/dp_load_balance_proxy_server.py +0 -405
- vllm_ascend-0.12.0rc1/examples/external_online_dp/launch_online_dp.py +0 -97
- vllm_ascend-0.12.0rc1/examples/offline_disaggregated_prefill_npu.py +0 -168
- vllm_ascend-0.12.0rc1/examples/offline_embed.py +0 -58
- vllm_ascend-0.12.0rc1/examples/offline_external_launcher.py +0 -331
- vllm_ascend-0.12.0rc1/examples/offline_inference_audio_language.py +0 -105
- vllm_ascend-0.12.0rc1/examples/offline_inference_npu_long_seq.py +0 -59
- vllm_ascend-0.12.0rc1/examples/offline_weight_load.py +0 -335
- vllm_ascend-0.12.0rc1/examples/prompt_embed_inference.py +0 -97
- vllm_ascend-0.12.0rc1/examples/quantization/llm-compressor/w8a8_int8.py +0 -160
- vllm_ascend-0.12.0rc1/examples/quantization/llm-compressor/w8a8_int8_dynamic.py +0 -83
- vllm_ascend-0.12.0rc1/format.sh +0 -44
- vllm_ascend-0.12.0rc1/mypy.ini +0 -32
- vllm_ascend-0.12.0rc1/pyproject.toml +0 -42
- vllm_ascend-0.12.0rc1/requirements-dev.txt +0 -24
- vllm_ascend-0.12.0rc1/requirements.txt +0 -33
- vllm_ascend-0.12.0rc1/setup.py +0 -533
- vllm_ascend-0.12.0rc1/tests/e2e/conftest.py +0 -773
- vllm_ascend-0.12.0rc1/tests/e2e/model_utils.py +0 -74
- vllm_ascend-0.12.0rc1/tests/e2e/models/configs/InternVL2-8B.yaml +0 -11
- vllm_ascend-0.12.0rc1/tests/e2e/models/configs/InternVL2_5-8B.yaml +0 -11
- vllm_ascend-0.12.0rc1/tests/e2e/models/configs/InternVL3-8B.yaml +0 -11
- vllm_ascend-0.12.0rc1/tests/e2e/models/configs/InternVL3_5-8B-hf.yaml +0 -11
- vllm_ascend-0.12.0rc1/tests/e2e/models/configs/Meta-Llama-3.1-8B-Instruct.yaml +0 -11
- vllm_ascend-0.12.0rc1/tests/e2e/models/configs/accuracy.txt +0 -17
- vllm_ascend-0.12.0rc1/tests/e2e/models/configs/gemma-3-4b-it.yaml +0 -13
- vllm_ascend-0.12.0rc1/tests/e2e/models/configs/llava-1.5-7b-hf.yaml +0 -11
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_aclgraph_capture_replay.py +0 -237
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_data_parallel.py +0 -83
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_data_parallel_tp2.py +0 -52
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_expert_parallel.py +0 -33
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_external_launcher.py +0 -239
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_full_graph_mode.py +0 -121
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_fused_moe_allgather_ep.py +0 -74
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_ilama_lora_tp2.py +0 -23
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_offline_inference_distributed.py +0 -223
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_offline_weight_load.py +0 -74
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_pipeline_parallel.py +0 -47
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_prefix_caching.py +0 -83
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_quantization.py +0 -47
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_qwen3_moe.py +0 -89
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_qwen3_next.py +0 -125
- vllm_ascend-0.12.0rc1/tests/e2e/multicard/test_shared_expert_dp.py +0 -93
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/features/test_mtpx_deepseek_r1_0528_w8a8.py +0 -140
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/features/test_prefix_cache_deepseek_r1_0528_w8a8.py +0 -110
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/features/test_prefix_cache_qwen3_32b_int8.py +0 -99
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/features/test_qwen3_32b_int8_a3_feature_stack3.py +0 -99
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py +0 -120
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8_eplb.py +0 -115
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_deepseek_v3_2_exp_w8a8.py +0 -105
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_glm4_5.py +0 -111
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwen2_5_vl_32b.py +0 -110
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwen2_5_vl_7b.py +0 -102
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwen3_235b_a22b_w8a8_eplb.py +0 -105
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwen3_235b_w8a8.py +0 -101
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwen3_30b_w8a8.py +0 -92
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwen3_32b_int8.py +0 -130
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/models/test_qwq_32b.py +0 -116
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml +0 -57
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-EPLB.yaml +0 -195
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml +0 -194
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml +0 -111
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3_2-Exp-bf16.yaml +0 -51
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A22B.yaml +0 -70
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8-EPLB.yaml +0 -91
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml +0 -87
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/multi_node_config.py +0 -285
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config/utils.py +0 -129
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 +0 -140
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/scripts/run.sh +0 -164
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/test_multi_node.py +0 -130
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/multicard_ops/test_dispatch_gmm_combine_decode.py +0 -411
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops/test_dispatch_ffn_combine.py +0 -168
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops/test_fused_moe.py +0 -352
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops/test_matmul_allreduce_add_rmsnorm.py +0 -135
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops/test_mla_preprocess.py +0 -115
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops/test_mla_preprocess_nq.py +0 -99
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops/test_mla_preprocess_qdown.py +0 -116
- vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops/triton/test_causal_conv1d.py +0 -230
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/compile/test_norm_quant_fusion.py +0 -113
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/pooling/test_classification.py +0 -34
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/pooling/test_embedding.py +0 -100
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/pooling/test_scoring.py +0 -187
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py +0 -176
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py +0 -241
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_aclgraph_accuracy.py +0 -213
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_aclgraph_mem.py +0 -99
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_async_scheduling.py +0 -239
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_camem.py +0 -99
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_completion_with_prompt_embeds.py +0 -75
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_guided_decoding.py +0 -153
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_ilama_lora.py +0 -62
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_multistream_overlap_shared_expert.py +0 -103
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_quantization.py +0 -35
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_sampler.py +0 -49
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_vlm.py +0 -89
- vllm_ascend-0.12.0rc1/tests/e2e/singlecard/test_xlite.py +0 -132
- vllm_ascend-0.12.0rc1/tests/e2e/vllm_interface/singlecard/test_sampler.py +0 -36
- vllm_ascend-0.12.0rc1/tests/ut/attention/test_attention_v1.py +0 -392
- vllm_ascend-0.12.0rc1/tests/ut/attention/test_mla_v1.py +0 -1221
- vllm_ascend-0.12.0rc1/tests/ut/attention/test_sfa_v1.py +0 -187
- vllm_ascend-0.12.0rc1/tests/ut/compilation/test_acl_graph.py +0 -847
- vllm_ascend-0.12.0rc1/tests/ut/conftest.py +0 -26
- vllm_ascend-0.12.0rc1/tests/ut/core/test_scheduler_dynamic_batch.py +0 -750
- vllm_ascend-0.12.0rc1/tests/ut/device_allocator/test_camem.py +0 -188
- vllm_ascend-0.12.0rc1/tests/ut/distributed/mooncake/test_config_data.py +0 -75
- vllm_ascend-0.12.0rc1/tests/ut/distributed/test_parallel_state.py +0 -81
- vllm_ascend-0.12.0rc1/tests/ut/eplb/adaptor/test_abstract_adaptor.py +0 -73
- vllm_ascend-0.12.0rc1/tests/ut/eplb/core/test_eplb_device_transfer_loader.py +0 -116
- vllm_ascend-0.12.0rc1/tests/ut/eplb/core/test_eplb_utils.py +0 -194
- vllm_ascend-0.12.0rc1/tests/ut/kv_connector/test_mooncake_connector.py +0 -1262
- vllm_ascend-0.12.0rc1/tests/ut/kv_connector/test_mooncake_layerwise_connector.py +0 -895
- vllm_ascend-0.12.0rc1/tests/ut/kv_connector/utils.py +0 -210
- vllm_ascend-0.12.0rc1/tests/ut/ops/test_activation.py +0 -76
- vllm_ascend-0.12.0rc1/tests/ut/ops/test_expert_load_balancer.py +0 -140
- vllm_ascend-0.12.0rc1/tests/ut/ops/test_fused_moe.py +0 -596
- vllm_ascend-0.12.0rc1/tests/ut/ops/test_layernorm.py +0 -57
- vllm_ascend-0.12.0rc1/tests/ut/ops/test_linear.py +0 -157
- vllm_ascend-0.12.0rc1/tests/ut/ops/test_mla.py +0 -155
- vllm_ascend-0.12.0rc1/tests/ut/ops/test_moe_comm_method.py +0 -245
- vllm_ascend-0.12.0rc1/tests/ut/ops/test_prepare_finalize.py +0 -224
- vllm_ascend-0.12.0rc1/tests/ut/ops/test_rotary_embedding.py +0 -453
- vllm_ascend-0.12.0rc1/tests/ut/ops/test_token_dispatcher.py +0 -535
- vllm_ascend-0.12.0rc1/tests/ut/quantization/test_quant_config.py +0 -241
- vllm_ascend-0.12.0rc1/tests/ut/quantization/test_utils.py +0 -62
- vllm_ascend-0.12.0rc1/tests/ut/quantization/test_w4a4_flatquant_dynamic.py +0 -246
- vllm_ascend-0.12.0rc1/tests/ut/quantization/test_w4a8_dynamic.py +0 -301
- vllm_ascend-0.12.0rc1/tests/ut/quantization/test_w8a8.py +0 -985
- vllm_ascend-0.12.0rc1/tests/ut/quantization/test_w8a8_dynamic.py +0 -61
- vllm_ascend-0.12.0rc1/tests/ut/sample/logits_processor/test_builtin.py +0 -40
- vllm_ascend-0.12.0rc1/tests/ut/sample/test_rejection_sampler.py +0 -204
- vllm_ascend-0.12.0rc1/tests/ut/sample/test_sampler.py +0 -35
- vllm_ascend-0.12.0rc1/tests/ut/spec_decode/test_eagle_proposer.py +0 -312
- vllm_ascend-0.12.0rc1/tests/ut/spec_decode/test_mtp_proposer.py +0 -445
- vllm_ascend-0.12.0rc1/tests/ut/test_ascend_config.py +0 -93
- vllm_ascend-0.12.0rc1/tests/ut/test_platform.py +0 -580
- vllm_ascend-0.12.0rc1/tests/ut/test_utils.py +0 -309
- vllm_ascend-0.12.0rc1/tests/ut/worker/test_input_batch.py +0 -375
- vllm_ascend-0.12.0rc1/tests/ut/worker/test_worker_v1.py +0 -1184
- vllm_ascend-0.12.0rc1/tools/aisbench.py +0 -326
- vllm_ascend-0.12.0rc1/tools/mypy.sh +0 -40
- vllm_ascend-0.12.0rc1/tools/send_request.py +0 -23
- vllm_ascend-0.12.0rc1/vllm_ascend/_version.py +0 -34
- vllm_ascend-0.12.0rc1/vllm_ascend/ascend_config.py +0 -321
- vllm_ascend-0.12.0rc1/vllm_ascend/ascend_forward_context.py +0 -233
- vllm_ascend-0.12.0rc1/vllm_ascend/attention/attention_cp.py +0 -915
- vllm_ascend-0.12.0rc1/vllm_ascend/attention/attention_mask.py +0 -75
- vllm_ascend-0.12.0rc1/vllm_ascend/attention/attention_v1.py +0 -761
- vllm_ascend-0.12.0rc1/vllm_ascend/attention/mla_v1.py +0 -2119
- vllm_ascend-0.12.0rc1/vllm_ascend/attention/sfa_v1.py +0 -1002
- vllm_ascend-0.12.0rc1/vllm_ascend/attention/utils.py +0 -235
- vllm_ascend-0.12.0rc1/vllm_ascend/compilation/acl_graph.py +0 -485
- vllm_ascend-0.12.0rc1/vllm_ascend/compilation/compiler_interface.py +0 -137
- vllm_ascend-0.12.0rc1/vllm_ascend/compilation/graph_fusion_pass_manager.py +0 -53
- vllm_ascend-0.12.0rc1/vllm_ascend/compilation/npugraph_ex_passes/add_rms_norm_quant.py +0 -123
- vllm_ascend-0.12.0rc1/vllm_ascend/compilation/passes/norm_quant_fusion_pass.py +0 -113
- vllm_ascend-0.12.0rc1/vllm_ascend/core/recompute_scheduler.py +0 -841
- vllm_ascend-0.12.0rc1/vllm_ascend/core/scheduler_dynamic_batch.py +0 -595
- vllm_ascend-0.12.0rc1/vllm_ascend/device_allocator/camem.py +0 -278
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/__init__.py +0 -40
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/cpu_offload_connector.py +0 -475
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/cpu_offload_manager/metadata.py +0 -271
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/kvpool/ascend_store_connector.py +0 -192
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/kvpool/backend/memcache_backend.py +0 -74
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/kvpool/backend/mooncake_backend.py +0 -186
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/kvpool/config_data.py +0 -377
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/kvpool/kv_transfer.py +0 -261
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/kvpool/pool_scheduler.py +0 -328
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/kvpool/pool_worker.py +0 -604
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/mooncake_connector.py +0 -1473
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/mooncake_layerwise_connector.py +0 -1157
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/parallel_state.py +0 -345
- vllm_ascend-0.12.0rc1/vllm_ascend/distributed/utils.py +0 -61
- vllm_ascend-0.12.0rc1/vllm_ascend/envs.py +0 -148
- vllm_ascend-0.12.0rc1/vllm_ascend/eplb/adaptor/abstract_adaptor.py +0 -44
- vllm_ascend-0.12.0rc1/vllm_ascend/eplb/adaptor/vllm_adaptor.py +0 -316
- vllm_ascend-0.12.0rc1/vllm_ascend/eplb/core/eplb_device_transfer_loader.py +0 -138
- vllm_ascend-0.12.0rc1/vllm_ascend/eplb/core/eplb_utils.py +0 -166
- vllm_ascend-0.12.0rc1/vllm_ascend/eplb/core/eplb_worker.py +0 -440
- vllm_ascend-0.12.0rc1/vllm_ascend/eplb/core/policy/policy_dynamic_ep_v2.py +0 -772
- vllm_ascend-0.12.0rc1/vllm_ascend/eplb/core/policy/policy_flashlb.py +0 -651
- vllm_ascend-0.12.0rc1/vllm_ascend/eplb/eplb_updator.py +0 -254
- vllm_ascend-0.12.0rc1/vllm_ascend/eplb/utils.py +0 -89
- vllm_ascend-0.12.0rc1/vllm_ascend/kv_offload/npu.py +0 -71
- vllm_ascend-0.12.0rc1/vllm_ascend/lora/punica_npu.py +0 -351
- vllm_ascend-0.12.0rc1/vllm_ascend/model_loader/netloader/executor/elastic_load.py +0 -170
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/__init__.py +0 -57
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/expert_load_balancer.py +0 -118
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/fused_moe/experts_selector.py +0 -305
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/fused_moe/fused_moe.py +0 -472
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/fused_moe/moe_comm_method.py +0 -317
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/fused_moe/moe_mlp.py +0 -344
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/fused_moe/prepare_finalize.py +0 -453
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/fused_moe/token_dispatcher.py +0 -749
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/layernorm.py +0 -122
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/linear.py +0 -468
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/linear_op.py +0 -683
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/mla.py +0 -187
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/register_custom_ops.py +0 -343
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/rotary_embedding.py +0 -437
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/shared_weight_layer.py +0 -252
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/triton/fla/chunk.py +0 -226
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/triton/fla/sigmoid_gating.py +0 -171
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/triton/mamba/causal_conv1d.py +0 -1348
- vllm_ascend-0.12.0rc1/vllm_ascend/ops/triton/rope.py +0 -210
- vllm_ascend-0.12.0rc1/vllm_ascend/patch/__init__.py +0 -255
- vllm_ascend-0.12.0rc1/vllm_ascend/patch/platform/__init__.py +0 -26
- vllm_ascend-0.12.0rc1/vllm_ascend/patch/platform/patch_ec_connector.py +0 -32
- vllm_ascend-0.12.0rc1/vllm_ascend/patch/platform/patch_multiproc_executor.py +0 -180
- vllm_ascend-0.12.0rc1/vllm_ascend/patch/worker/__init__.py +0 -35
- vllm_ascend-0.12.0rc1/vllm_ascend/patch/worker/patch_bert.py +0 -45
- vllm_ascend-0.12.0rc1/vllm_ascend/patch/worker/patch_module.py +0 -34
- vllm_ascend-0.12.0rc1/vllm_ascend/patch/worker/patch_multimodal_merge.py +0 -58
- vllm_ascend-0.12.0rc1/vllm_ascend/patch/worker/patch_qwen2_5_omni.py +0 -72
- vllm_ascend-0.12.0rc1/vllm_ascend/patch/worker/patch_qwen2_5_vl.py +0 -175
- vllm_ascend-0.12.0rc1/vllm_ascend/patch/worker/patch_qwen3_vl.py +0 -85
- vllm_ascend-0.12.0rc1/vllm_ascend/patch/worker/patch_weight_loader.py +0 -41
- vllm_ascend-0.12.0rc1/vllm_ascend/platform.py +0 -415
- vllm_ascend-0.12.0rc1/vllm_ascend/quantization/quant_config.py +0 -511
- vllm_ascend-0.12.0rc1/vllm_ascend/quantization/utils.py +0 -120
- vllm_ascend-0.12.0rc1/vllm_ascend/quantization/w4a16.py +0 -284
- vllm_ascend-0.12.0rc1/vllm_ascend/quantization/w4a4_flatquant_dynamic.py +0 -193
- vllm_ascend-0.12.0rc1/vllm_ascend/quantization/w4a8_dynamic.py +0 -496
- vllm_ascend-0.12.0rc1/vllm_ascend/quantization/w8a8.py +0 -711
- vllm_ascend-0.12.0rc1/vllm_ascend/quantization/w8a8_dynamic.py +0 -314
- vllm_ascend-0.12.0rc1/vllm_ascend/sample/logits_processor/__init__.py +0 -50
- vllm_ascend-0.12.0rc1/vllm_ascend/sample/logits_processor/builtin.py +0 -52
- vllm_ascend-0.12.0rc1/vllm_ascend/sample/rejection_sampler.py +0 -849
- vllm_ascend-0.12.0rc1/vllm_ascend/sample/sampler.py +0 -102
- vllm_ascend-0.12.0rc1/vllm_ascend/spec_decode/eagle_proposer.py +0 -684
- vllm_ascend-0.12.0rc1/vllm_ascend/spec_decode/interface.py +0 -54
- vllm_ascend-0.12.0rc1/vllm_ascend/spec_decode/mtp_proposer.py +0 -1210
- vllm_ascend-0.12.0rc1/vllm_ascend/spec_decode/ngram_proposer.py +0 -72
- vllm_ascend-0.12.0rc1/vllm_ascend/spec_decode/suffix_proposer.py +0 -45
- vllm_ascend-0.12.0rc1/vllm_ascend/utils.py +0 -1064
- vllm_ascend-0.12.0rc1/vllm_ascend/worker/block_table.py +0 -329
- vllm_ascend-0.12.0rc1/vllm_ascend/worker/model_runner_v1.py +0 -3522
- vllm_ascend-0.12.0rc1/vllm_ascend/worker/npu_input_batch.py +0 -982
- vllm_ascend-0.12.0rc1/vllm_ascend/worker/worker_v1.py +0 -478
- vllm_ascend-0.12.0rc1/vllm_ascend/xlite/xlite.py +0 -275
- vllm_ascend-0.12.0rc1/vllm_ascend/xlite/xlite_worker.py +0 -26
- vllm_ascend-0.12.0rc1/vllm_ascend.egg-info/PKG-INFO +0 -146
- vllm_ascend-0.12.0rc1/vllm_ascend.egg-info/SOURCES.txt +0 -866
- vllm_ascend-0.12.0rc1/vllm_ascend.egg-info/requires.txt +0 -23
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.gemini/config.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/ISSUE_TEMPLATE/100-documentation.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/ISSUE_TEMPLATE/110-user-story.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/ISSUE_TEMPLATE/200-installation.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/ISSUE_TEMPLATE/300-usage.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/ISSUE_TEMPLATE/400-bug-report.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/ISSUE_TEMPLATE/500-feature-request.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/ISSUE_TEMPLATE/600-new-model.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/ISSUE_TEMPLATE/700-performance-discussion.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/ISSUE_TEMPLATE/750-RFC.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/ISSUE_TEMPLATE/800-others.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/ISSUE_TEMPLATE/900-release-checklist.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/dependabot.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/labeler.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/workflows/matchers/actionlint.json +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.github/workflows/matchers/mypy.json +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.gitignore +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.gitmodules +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/.readthedocs.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/CODE_OF_CONDUCT.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/DCO +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/LICENSE +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/benchmarks/README.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/benchmarks/ops/ben_vocabparallelembedding.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/benchmarks/requirements-bench.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/benchmarks/scripts/convert_json_to_markdown.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/benchmarks/scripts/perf_result_template.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/benchmarks/scripts/run-performance-benchmarks.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/benchmarks/tests/latency-tests.json +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/benchmarks/tests/serving-tests.json +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/benchmarks/tests/throughput-tests.json +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/cmake/utils.cmake +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/codecov.yml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/collect_env.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/aclnn_torch_adapter/NPUBridge.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/aclnn_torch_adapter/NPUBridge.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/aclnn_torch_adapter/NPUStorageImpl.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/aclnn_torch_adapter/NPUStorageImpl.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/aclnn_torch_adapter/op_api_common.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/batch_matmul_transpose/op_host/batch_matmul_transpose.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/batch_matmul_transpose/op_host/common.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/batch_matmul_transpose/op_host/common_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/build.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/camem_allocator.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/cmake/config.cmake +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/cmake/func.cmake +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/cmake/intf.cmake +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/cmake/intf_pub.cmake +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/cmake/modules/Findalog.cmake +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/cmake/scripts/prepare.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_host/dispatch_ffn_combine_proto.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_host/error_log.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_host/hcom_topo_info.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_host/tiling_args.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_v2_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_common.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_expert_token_out.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_dynamic_quant.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant_base.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_dynamic_quant.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_out.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_quant.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_init_routing_fullload.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort_out.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_base.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_multi_core.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_one_core.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_and_gather.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_op.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_with_capacity.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/tiling_base.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/unpermute/moe_token_unpermute.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/unpermute/moe_token_unpermute_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/utils/copy_gm_to_l1_custom.hpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/utils/copy_l0c_to_gm_custom.hpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/utils/dispatch_policy_custom.hpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/utils/layout3d.hpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_ffn_combine/op_kernel/utils/select_helper.hpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_gmm_combine_decode/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant.hpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant_swiglu.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/dispatch_policy.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/tile/tile_stride_binary.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/tile/tile_stride_muls.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/block/block_mmad.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/block/block_mmad_preload_async_with_callback_resident_a.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/dispatch_policy.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_layout/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_layout/op_host/aclnn_dispatch_layout.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_layout/op_host/dispatch_layout.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_layout/op_host/dispatch_layout_tiling.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_layout/op_kernel/dispatch_layout.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_layout/op_kernel/dispatch_layout.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/dispatch_layout/op_kernel/dispatch_layout_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_def.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_proto.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list_split_ws.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list_utils.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/kernels/get_masked_input_and_mask_kernel.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/kernels/math_utils.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/kernels/pos_encoding_kernels.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/kernels/types.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/kernels/utils.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/lightning_indexer/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/lightning_indexer/op_host/lightning_indexer_def.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/lightning_indexer/op_host/lightning_indexer_proto.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/lightning_indexer/op_host/lightning_indexer_tiling.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/lightning_indexer/op_host/lightning_indexer_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/lightning_indexer/op_kernel/lightning_indexer.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/lightning_indexer/op_kernel/lightning_indexer_common.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/lightning_indexer/op_kernel/lightning_indexer_kernel.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/lightning_indexer/op_kernel/lightning_indexer_service_cube.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/lightning_indexer/op_kernel/lightning_indexer_service_vector.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/lightning_indexer/op_kernel/lightning_indexer_template_tiling_key.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/lightning_indexer/op_kernel/lightning_indexer_vector.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/matmul_allreduce_add_rmsnorm/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/matmul_allreduce_add_rmsnorm/op_host/aclnn_matmul_allreduce_add_rmsnorm.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_def.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_proto.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_tiling.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_workspace.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_aic_kernel.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_aiv_kernel.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_utils.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_host/mla_preprocess.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/common.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/common_func.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/hardware.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/iterator.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/layout.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/mem.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/mma.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/set_fpc.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/simd.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/kernel/utils.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/mla_preprocess.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16_nq.hpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16_qdown.hpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_combine_normal/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_combine_normal/op_host/aclnn_moe_combine_normal.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_combine_normal/op_host/moe_combine_normal.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_combine_normal/op_host/moe_combine_normal_tiling.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_combine_normal/op_kernel/moe_combine_normal.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_combine_normal/op_kernel/moe_combine_normal.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_combine_normal/op_kernel/moe_combine_normal_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_dispatch_normal/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_dispatch_normal/op_host/aclnn_moe_dispatch_normal.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_dispatch_normal/op_host/aclnn_moe_dispatch_normal.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_dispatch_normal/op_host/moe_dispatch_normal.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_dispatch_normal/op_host/moe_dispatch_normal_tiling.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/moe_dispatch_normal/op_kernel/moe_dispatch_normal_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/notify_dispatch/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/notify_dispatch/op_host/aclnn_notify_dispatch.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/notify_dispatch/op_host/notify_dispatch.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/notify_dispatch/op_host/notify_dispatch_tiling.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/notify_dispatch/op_kernel/notify_dispatch.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/notify_dispatch/op_kernel/notify_dispatch.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/notify_dispatch/op_kernel/notify_dispatch_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/ops.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/sparse_flash_attention/op_host/CMakeLists.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_def.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_proto.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/sparse_flash_attention/op_host/sparse_flash_attention_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_common.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_kernel_mla.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_cube_mla.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_service_vector_mla.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/sparse_flash_attention/op_kernel/sparse_flash_attention_template_tiling_key.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/CMakeLists.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/aclnn_util.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/error/ops_error.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/fallback.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/fallback_comm.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/kernel/comm_args.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/kernel/data_copy.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/kernel/dropmask.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/kernel/pse.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/kernel/sync_collectives.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/kernel/util.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/log/inner/dfx_base.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/log/ops_log.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/tiling/data_copy_transpose_tiling.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/tiling/data_copy_transpose_tiling_def.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/tiling/tiling_base.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/tiling/tiling_templates_registry.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/inc/tiling/tiling_type.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils/src/fallback_comm.cpp +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/csrc/utils.h +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/Makefile +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/requirements-docs.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/requirements-test.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/assets/deployment.png +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/assets/disaggregated_prefill_pull.png +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/assets/disaggregated_prefill_push.png +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/assets/eplb.png +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/assets/multi_node_dp_deepseek.png +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/assets/multi_node_dp_kimi.png +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/assets/workflow.png +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/community/governance.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/community/user_stories/index.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/community/user_stories/llamafactory.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/developer_guide/contribution/index.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/developer_guide/evaluation/using_ais_bench.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/developer_guide/evaluation/using_lm_eval.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/developer_guide/evaluation/using_opencompass.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/developer_guide/feature_guide/KV_Cache_Pool_Guide.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/developer_guide/feature_guide/ModelRunner_prepare_inputs.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/developer_guide/feature_guide/add_custom_aclnn_op.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/developer_guide/feature_guide/eplb_swift_balancer.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/developer_guide/performance_and_debug/index.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/developer_guide/performance_and_debug/profile_execute_duration.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/developer_guide/performance_and_debug/service_profiling_guide.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/feature_guide/index.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/index.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_node.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_moge.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_quantization.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/multi_npu_qwen3_moe.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_node_300i.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_audio.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_multimodal.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/tutorials/single_npu_qwen3_embedding.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/logos/vllm-ascend-logo-text-dark.png +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/logos/vllm-ascend-logo-text-light.png +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/quick_start.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/tutorials/310p.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/tutorials/Kimi-K2-Thinking.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/tutorials/Qwen-VL-Dense.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/tutorials/Qwen3-Coder-30B-A3B.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/tutorials/ray.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/user_guide/configuration/env_vars.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/user_guide/configuration/index.md +0 -0
- {vllm_ascend-0.12.0rc1/docs/source/developer_guide → vllm_ascend-0.13.0/docs/source/user_guide}/feature_guide/Multi_Token_Prediction.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/user_guide/feature_guide/dynamic_batch.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/user_guide/feature_guide/eplb_swift_balancer.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/user_guide/feature_guide/external_dp.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/user_guide/feature_guide/images/eplb_img.png +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/user_guide/feature_guide/images/netloader_flowchart.png +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/user_guide/feature_guide/images/netloader_timing_diagram.png +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/user_guide/feature_guide/images/structured_output_1.png +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/user_guide/feature_guide/lora.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/user_guide/feature_guide/netloader.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/user_guide/feature_guide/structured_output.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/docs/source/user_guide/support_matrix/index.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/examples/chat_templates/template_qwen2_audio.jinja +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/examples/eplb/eplb_deepseek.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/examples/external_online_dp/README.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/examples/external_online_dp/run_dp_template.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/examples/offline_data_parallel.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/examples/offline_inference_npu.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/examples/offline_inference_npu_tp2.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/examples/offline_inference_sleep_mode_npu.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/examples/prompt_embedding_inference.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/examples/run_dp_server.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/packages.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/requirements-lint.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/setup.cfg +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/310p/test_offline_inference_310p.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/common.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/doctests/001-quickstart-test.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/doctests/002-pip-binary-installation-test.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/configs/Molmo-7B-D-0924.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/configs/Qwen2.5-Omni-7B.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/configs/Qwen3-30B-A3B-W8A8.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/configs/Qwen3-30B-A3B.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/configs/Qwen3-8B-W8A8.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/configs/Qwen3-8B.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/configs/Qwen3-Next-80B-A3B-Instruct.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/configs/internlm3-8b-instruct.yaml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/conftest.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/report_template.md +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/models/test_lm_eval_correctness.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/multicard → vllm_ascend-0.13.0/tests/e2e/multicard/2-cards}/test_single_request_aclgraph.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/nightly/multi_node/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/nightly/multi_node/config → vllm_ascend-0.13.0/tests/e2e/nightly/multi_node/scripts}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/models}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/nightly → vllm_ascend-0.13.0/tests/e2e/nightly/single_node}/models/test_qwen3_32b.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops/triton → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/singlecard → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/multicard_ops_a3}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/singlecard/pooling → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops}/test_batch_matmul_transpose.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops}/test_bgmv_expand.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops}/test_bgmv_shrink.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops}/test_gating_top_k_softmax.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops}/test_gmm_swiglu_quant_weight_nz_tensor_list.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops}/test_grouped_matmul_swiglu_quant.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops}/test_rotary_embedding.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops}/test_vocabparallelembedding.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/ut → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/triton}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/multicard → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops/triton}/test_chunk_gated_delta_rule.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/e2e/nightly/ops → vllm_ascend-0.13.0/tests/e2e/nightly/single_node/ops/singlecard_ops}/triton/test_rope.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/prompts/example.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/run_doctests.sh +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/attention → vllm_ascend-0.13.0/tests/e2e/singlecard}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/compilation → vllm_ascend-0.13.0/tests/e2e/singlecard/compile}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/singlecard/compile/backend.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/compilation/npugraph_ex_passes → vllm_ascend-0.13.0/tests/e2e/singlecard/model_runner_v2}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/compilation/passes → vllm_ascend-0.13.0/tests/e2e/singlecard/pooling}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/core → vllm_ascend-0.13.0/tests/e2e/singlecard/spec_decode}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/singlecard/test_profile_execute_duration.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/utils.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/e2e/vllm_interface/vllm_test.cfg +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/device_allocator → vllm_ascend-0.13.0/tests/ut}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/attention/test_attention_mask.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/base.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/distributed/device_communicators/test_pyhccl.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/distributed/test_communicator.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/distributed/test_determin_expert_map_all.py +0 -0
- {vllm_ascend-0.12.0rc1/tests/ut/ops → vllm_ascend-0.13.0/tests/ut/eplb/core}/expert_map.json +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/eplb/core/policy/test_policy_abstract.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/eplb/core/policy/test_policy_dynamic_ep.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/eplb/core/policy/test_policy_dynamic_ep_v2.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/eplb/core/policy/test_policy_factor.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/fake_weight/config.json +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/kv_connector/test_remote_decode_lifecycle.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/kv_connector/test_remote_prefill_lifecycle.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/model_loader/netloader/test_netloader.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/model_loader/netloader/test_netloader_elastic.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/model_loader/netloader/test_netloader_load.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/model_loader/netloader/test_netloader_utils.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/ops/test_comm_utils.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/ops/test_vocab_parallel_embedding.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/patch/worker/patch_common/test_patch_distributed.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/patch/worker/patch_common/test_patch_minicpm.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/quantization/test_w4a16.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tests/ut/test_envs.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tools/actionlint.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tools/check_python_src_init.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tools/check_repo.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tools/enforce_regex_import.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tools/mooncake_installer.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tools/png-lint.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tools/send_mm_request.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tools/shellcheck.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/tools/sphinx-lint.sh +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/typos.toml +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/_cann_ops_custom/.gitkeep +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/distributed/cpu_offload_manager → vllm_ascend-0.13.0/vllm_ascend/attention}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/distributed/device_communicators → vllm_ascend-0.13.0/vllm_ascend/attention/context_parallel}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/eplb → vllm_ascend-0.13.0/vllm_ascend/compilation}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/eplb/adaptor → vllm_ascend-0.13.0/vllm_ascend/compilation/npugraph_ex_passes}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/eplb/core → vllm_ascend-0.13.0/vllm_ascend/compilation/passes}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/eplb/core/policy → vllm_ascend-0.13.0/vllm_ascend/core}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/cpu_binding.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/kv_offload → vllm_ascend-0.13.0/vllm_ascend/device_allocator}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/distributed/communicator.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/lora → vllm_ascend-0.13.0/vllm_ascend/distributed/cpu_offload_manager}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/distributed/cpu_offload_manager/cpu_kv_cache_manager.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/model_loader → vllm_ascend-0.13.0/vllm_ascend/distributed/device_communicators}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/distributed/device_communicators/pyhccl.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/distributed/kvpool/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/distributed/kvpool/backend/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/distributed/kvpool/backend/backend.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/distributed/mooncake_transfer_engine.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/model_loader/netloader/executor → vllm_ascend-0.13.0/vllm_ascend/eplb}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/model_loader/netloader/interaction → vllm_ascend-0.13.0/vllm_ascend/eplb/adaptor}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/ops/fused_moe → vllm_ascend-0.13.0/vllm_ascend/eplb/core}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/ops/triton → vllm_ascend-0.13.0/vllm_ascend/eplb/core/policy}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/eplb/core/policy/policy_abstract.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/eplb/core/policy/policy_dynamic_ep.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/eplb/core/policy/policy_factory.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/eplb/core/policy/policy_random.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/ops/triton/fla → vllm_ascend-0.13.0/vllm_ascend/kv_offload}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/kv_offload/cpu_npu.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/ops/triton/mamba → vllm_ascend-0.13.0/vllm_ascend/lora}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/lora/lora_ops.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/lora/utils.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/meta_registration.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/quantization → vllm_ascend-0.13.0/vllm_ascend/model_loader}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/model_loader/netloader/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/quantization/compressed_tensors → vllm_ascend-0.13.0/vllm_ascend/model_loader/netloader/executor}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/sample → vllm_ascend-0.13.0/vllm_ascend/model_loader/netloader/interaction}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/model_loader/netloader/interaction/elastic.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/model_loader/netloader/load.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/model_loader/netloader/netloader.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/model_loader/netloader/utils.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/ops/activation.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/worker → vllm_ascend-0.13.0/vllm_ascend/ops/fused_moe}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/ops/fused_moe/comm_utils.py +0 -0
- {vllm_ascend-0.12.0rc1/vllm_ascend/xlite → vllm_ascend-0.13.0/vllm_ascend/ops/triton}/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/ops/triton/fla/chunk_delta_h.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/ops/triton/fla/chunk_o.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/ops/triton/fla/chunk_scaled_dot_kkt.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/ops/triton/fla/cumsum.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/ops/triton/fla/layernorm_guard.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/ops/triton/fla/solve_tril.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/ops/triton/fla/utils.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/ops/triton/fla/wy_fast.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/ops/triton/triton_utils.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/ops/vocab_parallel_embedding.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/ops/weight_prefetch.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/patch/platform/patch_distributed.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/patch/platform/patch_mamba_config.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/patch/platform/patch_sched_yield.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/patch/worker/patch_deepseek.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/patch/worker/patch_distributed.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/patch/worker/patch_minicpm.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/patch/worker/patch_qwen3_next_mtp.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/patch/worker/patch_rope.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/patch/worker/patch_triton.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/profiling_config.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/quantization/compressed_tensors/compressed_tensors.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/quantization/w8a8_pdmix.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/spec_decode/__init__.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend/xlite/xlite_model_runner.py +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend.egg-info/dependency_links.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend.egg-info/entry_points.txt +0 -0
- {vllm_ascend-0.12.0rc1 → vllm_ascend-0.13.0}/vllm_ascend.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# This file is a part of the vllm-ascend project.
|
|
16
|
+
#
|
|
17
|
+
ARG PY_VERSION=3.11
|
|
18
|
+
FROM quay.io/ascend/manylinux:8.5.0-910b-manylinux_2_28-py${PY_VERSION}
|
|
19
|
+
|
|
20
|
+
ARG SOC_VERSION="ascend910b1"
|
|
21
|
+
|
|
22
|
+
# Define environments
|
|
23
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
|
24
|
+
ENV SOC_VERSION=$SOC_VERSION
|
|
25
|
+
RUN yum update -y && \
|
|
26
|
+
yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
|
|
27
|
+
rm -rf /var/cache/yum
|
|
28
|
+
|
|
29
|
+
WORKDIR /workspace
|
|
30
|
+
|
|
31
|
+
COPY . /workspace/vllm-ascend/
|
|
32
|
+
|
|
33
|
+
# Install req
|
|
34
|
+
RUN python3 -m pip install -r vllm-ascend/requirements.txt --extra-index https://download.pytorch.org/whl/cpu/ && \
|
|
35
|
+
python3 -m pip install twine attrs psutil
|
|
36
|
+
|
|
37
|
+
# Install vllm-ascend
|
|
38
|
+
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
|
|
39
|
+
source /usr/local/Ascend/nnal/atb/set_env.sh && \
|
|
40
|
+
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
|
|
41
|
+
cd vllm-ascend && \
|
|
42
|
+
python3 setup.py bdist_wheel && \
|
|
43
|
+
ls -l dist
|
|
44
|
+
|
|
45
|
+
CMD ["/bin/bash"]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# This file is a part of the vllm-ascend project.
|
|
16
|
+
#
|
|
17
|
+
|
|
18
|
+
FROM quay.io/ascend/vllm-ascend:releases-v0.13.0
|
|
19
|
+
|
|
20
|
+
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
|
|
21
|
+
ARG AIS_BENCH_TAG="v3.0-20250930-master"
|
|
22
|
+
ARG AIS_BENCH_URL="https://gitee.com/aisbench/benchmark.git"
|
|
23
|
+
|
|
24
|
+
# Define environments
|
|
25
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
|
26
|
+
|
|
27
|
+
WORKDIR /workspace
|
|
28
|
+
|
|
29
|
+
RUN pip config set global.index-url ${PIP_INDEX_URL}
|
|
30
|
+
|
|
31
|
+
# Install requirements-dev.txt for tests
|
|
32
|
+
RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
|
|
33
|
+
cd /vllm-workspace/vllm-ascend && \
|
|
34
|
+
python3 -m pip install -r requirements-dev.txt && \
|
|
35
|
+
python3 -m pip cache purge
|
|
36
|
+
|
|
37
|
+
# Install benchmark tools
|
|
38
|
+
RUN git clone -b ${AIS_BENCH_TAG} --depth 1 ${AIS_BENCH_URL} /vllm-workspace/vllm-ascend/benchmark && \
|
|
39
|
+
cd /vllm-workspace/vllm-ascend/benchmark && \
|
|
40
|
+
pip install -e . -r requirements/api.txt -r requirements/extra.txt && \
|
|
41
|
+
python3 -m pip cache purge
|
|
42
|
+
|
|
43
|
+
CMD ["/bin/bash"]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# This file is a part of the vllm-ascend project.
|
|
16
|
+
#
|
|
17
|
+
|
|
18
|
+
FROM quay.io/ascend/vllm-ascend:releases-v0.13.0-a3
|
|
19
|
+
|
|
20
|
+
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
|
|
21
|
+
ARG AIS_BENCH_TAG="v3.0-20250930-master"
|
|
22
|
+
ARG AIS_BENCH_URL="https://gitee.com/aisbench/benchmark.git"
|
|
23
|
+
|
|
24
|
+
# Define environments
|
|
25
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
|
26
|
+
|
|
27
|
+
WORKDIR /workspace
|
|
28
|
+
|
|
29
|
+
RUN pip config set global.index-url ${PIP_INDEX_URL}
|
|
30
|
+
|
|
31
|
+
# Install requirements-dev.txt for tests
|
|
32
|
+
RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
|
|
33
|
+
cd /vllm-workspace/vllm-ascend && \
|
|
34
|
+
python3 -m pip install -r requirements-dev.txt && \
|
|
35
|
+
python3 -m pip cache purge
|
|
36
|
+
|
|
37
|
+
# Install benchmark tools
|
|
38
|
+
RUN git clone -b ${AIS_BENCH_TAG} --depth 1 ${AIS_BENCH_URL} /vllm-workspace/vllm-ascend/benchmark && \
|
|
39
|
+
cd /vllm-workspace/vllm-ascend/benchmark && \
|
|
40
|
+
pip install -e . -r requirements/api.txt -r requirements/extra.txt && \
|
|
41
|
+
python3 -m pip cache purge
|
|
42
|
+
|
|
43
|
+
CMD ["/bin/bash"]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
self-hosted-runner:
|
|
2
|
+
# Labels of self-hosted runner in array of strings.
|
|
3
|
+
labels:
|
|
4
|
+
- linux-aarch64-a2-0
|
|
5
|
+
- linux-aarch64-a2-1
|
|
6
|
+
- linux-aarch64-a2-2
|
|
7
|
+
- linux-aarch64-a2-4
|
|
8
|
+
- linux-aarch64-a2-8
|
|
9
|
+
- linux-arm64-npu-static-8
|
|
10
|
+
- linux-aarch64-310p-1
|
|
11
|
+
- linux-aarch64-310p-2
|
|
12
|
+
- linux-aarch64-310p-4
|
|
13
|
+
- ubuntu-24.04-arm
|
|
14
|
+
- linux-aarch64-a3-1
|
|
15
|
+
- linux-aarch64-a3-2
|
|
16
|
+
- linux-aarch64-a3-4
|
|
17
|
+
- linux-aarch64-a3-8
|
|
18
|
+
- linux-amd64-cpu-0
|
|
19
|
+
- linux-amd64-cpu-8
|
|
20
|
+
- linux-amd64-cpu-16
|
|
21
|
+
- linux-aarch64-a3-0
|
|
22
|
+
- linux-amd64-cpu-8-hk
|
|
23
|
+
- linux-amd64-cpu-16-hk
|
|
24
|
+
- linux-aarch64-a2b3-0
|
|
25
|
+
- linux-aarch64-a2b3-1
|
|
26
|
+
- linux-aarch64-a2b3-2
|
|
27
|
+
- linux-aarch64-a2b3-4
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
name: 'e2e nightly test multi_node'
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_call:
|
|
5
|
+
inputs:
|
|
6
|
+
soc_version:
|
|
7
|
+
required: true
|
|
8
|
+
type: string
|
|
9
|
+
description: use a2 or a3
|
|
10
|
+
runner:
|
|
11
|
+
required: false
|
|
12
|
+
type: string
|
|
13
|
+
default: linux-aarch64-a3-0
|
|
14
|
+
image:
|
|
15
|
+
required: false
|
|
16
|
+
type: string
|
|
17
|
+
description: base image for pods
|
|
18
|
+
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11"
|
|
19
|
+
config_file_path:
|
|
20
|
+
required: true
|
|
21
|
+
type: string
|
|
22
|
+
description: the model config for multi_node test
|
|
23
|
+
replicas:
|
|
24
|
+
required: false
|
|
25
|
+
default: "1"
|
|
26
|
+
type: string
|
|
27
|
+
description: replicas of the k8s cluster
|
|
28
|
+
size:
|
|
29
|
+
required: false
|
|
30
|
+
default: "2"
|
|
31
|
+
type: string
|
|
32
|
+
description: how many pods will be pulled up via lws.yaml, indicates number of nodes we need
|
|
33
|
+
vllm_version:
|
|
34
|
+
required: false
|
|
35
|
+
default: "v0.13.0"
|
|
36
|
+
type: string
|
|
37
|
+
description: vllm version to use
|
|
38
|
+
vllm_ascend_remote_url:
|
|
39
|
+
required: false
|
|
40
|
+
default: https://github.com/vllm-project/vllm-ascend.git
|
|
41
|
+
type: string
|
|
42
|
+
description: used for pr level tests
|
|
43
|
+
vllm_ascend_ref:
|
|
44
|
+
required: false
|
|
45
|
+
default: main
|
|
46
|
+
type: string
|
|
47
|
+
description: used for pr level tests
|
|
48
|
+
secrets:
|
|
49
|
+
KUBECONFIG_B64:
|
|
50
|
+
required: true
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
|
54
|
+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
|
55
|
+
# It's used to activate ascend-toolkit environment variables.
|
|
56
|
+
defaults:
|
|
57
|
+
run:
|
|
58
|
+
shell: bash -el {0}
|
|
59
|
+
|
|
60
|
+
# only cancel in-progress runs of the same workflow
|
|
61
|
+
# and ignore the lint / 8 cards test type
|
|
62
|
+
concurrency:
|
|
63
|
+
group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.soc_version }}
|
|
64
|
+
cancel-in-progress: true
|
|
65
|
+
|
|
66
|
+
jobs:
|
|
67
|
+
e2e:
|
|
68
|
+
name: ${{ inputs.config_file_path }}
|
|
69
|
+
# This is the runner with no NPU for k8s controller
|
|
70
|
+
runs-on: ${{ inputs.runner }}
|
|
71
|
+
container:
|
|
72
|
+
image: m.daocloud.io/quay.io/ascend/cann:8.5.0-a3-ubuntu22.04-py3.11
|
|
73
|
+
env:
|
|
74
|
+
KUBECONFIG: /tmp/kubeconfig
|
|
75
|
+
KUBECTL: /root/.cache/.kube/kubectl
|
|
76
|
+
NAMESPACE: vllm-project
|
|
77
|
+
LEADER_POD: vllm-0
|
|
78
|
+
RESULT_FILE: /root/.cache/tests/ret_${{ inputs.soc_version }}
|
|
79
|
+
steps:
|
|
80
|
+
- name: Install system denpendencies
|
|
81
|
+
run: |
|
|
82
|
+
# configure apt and pip source
|
|
83
|
+
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
|
|
84
|
+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
|
|
85
|
+
pip install jinja2-cli
|
|
86
|
+
|
|
87
|
+
- name: Install kubectl
|
|
88
|
+
run: |
|
|
89
|
+
# Install kubectl
|
|
90
|
+
arch=$(uname -m)
|
|
91
|
+
|
|
92
|
+
if echo "$arch" | grep -qiE "arm|aarch64"; then
|
|
93
|
+
echo "Detected ARM architecture: $arch"
|
|
94
|
+
KUBECTL="$KUBECTL"_arm
|
|
95
|
+
fi
|
|
96
|
+
install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
|
|
97
|
+
|
|
98
|
+
# Verify kubectl installation
|
|
99
|
+
kubectl version --client=true
|
|
100
|
+
|
|
101
|
+
- name: Decode kubeconfig from secrets
|
|
102
|
+
run: |
|
|
103
|
+
# Decode and save kubeconfig
|
|
104
|
+
echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
|
|
105
|
+
|
|
106
|
+
- name: Checkout code
|
|
107
|
+
uses: actions/checkout@v6
|
|
108
|
+
|
|
109
|
+
- name: Prepare scripts
|
|
110
|
+
run: |
|
|
111
|
+
# prepare for lws entrypoint scripts
|
|
112
|
+
install -D tests/e2e/nightly/multi_node/scripts/run.sh /root/.cache/tests/run.sh
|
|
113
|
+
# clear log directory
|
|
114
|
+
rm -fr $RESULT_FILE
|
|
115
|
+
|
|
116
|
+
- name: Clear resources
|
|
117
|
+
run: |
|
|
118
|
+
set -euo pipefail
|
|
119
|
+
|
|
120
|
+
CRD_NAME="${CRD_NAME:-vllm}"
|
|
121
|
+
TIMEOUT=${TIMEOUT:-120}
|
|
122
|
+
SLEEP_INTERVAL=2
|
|
123
|
+
|
|
124
|
+
echo "Deleting leaderworkerset [$CRD_NAME] in namespace [$NAMESPACE]..."
|
|
125
|
+
kubectl delete leaderworkerset "$CRD_NAME" -n "$NAMESPACE" --ignore-not-found
|
|
126
|
+
|
|
127
|
+
echo "Waiting for all pods starting with 'vllm' to be deleted..."
|
|
128
|
+
START_TIME=$(date +%s)
|
|
129
|
+
|
|
130
|
+
while true; do
|
|
131
|
+
NOW=$(date +%s)
|
|
132
|
+
ELAPSED=$((NOW - START_TIME))
|
|
133
|
+
|
|
134
|
+
if [[ $ELAPSED -ge $TIMEOUT ]]; then
|
|
135
|
+
echo "Timeout reached ($TIMEOUT seconds), some pods still exist:"
|
|
136
|
+
kubectl get pods -n "$NAMESPACE" | grep '^vllm' || true
|
|
137
|
+
exit 1
|
|
138
|
+
fi
|
|
139
|
+
|
|
140
|
+
PODS_EXIST=$(kubectl get pods -n "$NAMESPACE" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null | tr ' ' '\n' | grep '^vllm' || true)
|
|
141
|
+
|
|
142
|
+
if [[ -z "$PODS_EXIST" ]]; then
|
|
143
|
+
echo "All vllm pods deleted."
|
|
144
|
+
break
|
|
145
|
+
else
|
|
146
|
+
echo "Waiting for pods to be deleted: $PODS_EXIST"
|
|
147
|
+
sleep $SLEEP_INTERVAL
|
|
148
|
+
fi
|
|
149
|
+
done
|
|
150
|
+
|
|
151
|
+
- name: Launch cluster
|
|
152
|
+
id: launcher
|
|
153
|
+
run: |
|
|
154
|
+
set -e
|
|
155
|
+
|
|
156
|
+
size="${{ inputs.size }}"
|
|
157
|
+
replicas="${{ inputs.replicas }}"
|
|
158
|
+
image="${{ inputs.image }}"
|
|
159
|
+
config_file_path="${{ inputs.config_file_path }}"
|
|
160
|
+
vllm_version="${{ inputs.vllm_version }}"
|
|
161
|
+
vllm_ascend_ref="${{ inputs.vllm_ascend_ref }}"
|
|
162
|
+
vllm_ascend_remote_url="${{ inputs.vllm_ascend_remote_url }}"
|
|
163
|
+
result_file_path="$RESULT_FILE"
|
|
164
|
+
fail_tag=FAIL_TAG_"${{ inputs.config_file_path }}"
|
|
165
|
+
echo "FAIL_TAG=${fail_tag}" >> $GITHUB_ENV
|
|
166
|
+
|
|
167
|
+
required_params=("size" "replicas" "image" "config_file_path")
|
|
168
|
+
for param in "${required_params[@]}"; do
|
|
169
|
+
if [ -z "${!param}" ]; then
|
|
170
|
+
echo "Error: Parameter '$param' is required but empty"
|
|
171
|
+
exit 1
|
|
172
|
+
fi
|
|
173
|
+
done
|
|
174
|
+
|
|
175
|
+
if [ "${{ inputs.soc_version }}" = "a3" ]; then
|
|
176
|
+
npu_per_node=16
|
|
177
|
+
else
|
|
178
|
+
npu_per_node=8
|
|
179
|
+
fi
|
|
180
|
+
|
|
181
|
+
jinja2 tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 \
|
|
182
|
+
-D size="$size" \
|
|
183
|
+
-D replicas="$replicas" \
|
|
184
|
+
-D image="$image" \
|
|
185
|
+
-D config_file_path="$config_file_path" \
|
|
186
|
+
-D vllm_version="$vllm_version" \
|
|
187
|
+
-D vllm_ascend_remote_url="$vllm_ascend_remote_url" \
|
|
188
|
+
-D vllm_ascend_ref="$vllm_ascend_ref" \
|
|
189
|
+
-D result_file_path="$result_file_path" \
|
|
190
|
+
-D npu_per_node="$npu_per_node" \
|
|
191
|
+
-D fail_tag="$fail_tag" \
|
|
192
|
+
--outfile lws.yaml
|
|
193
|
+
|
|
194
|
+
kubectl apply -f ./lws.yaml
|
|
195
|
+
|
|
196
|
+
- name: Waiting for pod ready
|
|
197
|
+
run: |
|
|
198
|
+
POD_PREFIX="${POD_PREFIX:-vllm-0}"
|
|
199
|
+
SIZE="${{ inputs.size }}"
|
|
200
|
+
TIMEOUT=1200 # default timeout 20 minutes
|
|
201
|
+
|
|
202
|
+
echo "Waiting for Pods in namespace [$NAMESPACE] to become Running and Ready (timeout ${TIMEOUT}s)..."
|
|
203
|
+
|
|
204
|
+
START_TIME=$(date +%s)
|
|
205
|
+
|
|
206
|
+
while true; do
|
|
207
|
+
NOW=$(date +%s)
|
|
208
|
+
ELAPSED=$((NOW - START_TIME))
|
|
209
|
+
if [[ $ELAPSED -ge $TIMEOUT ]]; then
|
|
210
|
+
echo "Timeout reached after ${ELAPSED}s"
|
|
211
|
+
echo "Dumping pod status for debugging:"
|
|
212
|
+
kubectl get pods -n "$NAMESPACE"
|
|
213
|
+
kubectl describe pod "$LEADER_POD" -n "$NAMESPACE"
|
|
214
|
+
exit 1
|
|
215
|
+
fi
|
|
216
|
+
|
|
217
|
+
# 1) check follower pods
|
|
218
|
+
ALL_FOLLOWERS_READY=true
|
|
219
|
+
for ((i=1; i<SIZE; i++)); do
|
|
220
|
+
POD="${POD_PREFIX}-${i}"
|
|
221
|
+
PHASE=$(kubectl get pod "$POD" -n "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
|
|
222
|
+
READY=$(kubectl get pod "$POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}' 2>/dev/null)
|
|
223
|
+
|
|
224
|
+
echo "Follower [$POD] phase=$PHASE ready=$READY"
|
|
225
|
+
|
|
226
|
+
if [[ "$PHASE" != "Running" || "$READY" != "true" ]]; then
|
|
227
|
+
echo "Follower [$POD] not Ready yet..."
|
|
228
|
+
ALL_FOLLOWERS_READY=false
|
|
229
|
+
break
|
|
230
|
+
fi
|
|
231
|
+
done
|
|
232
|
+
|
|
233
|
+
# 2) check leader pod
|
|
234
|
+
LEADER_PHASE=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
|
|
235
|
+
LEADER_READY=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}' 2>/dev/null)
|
|
236
|
+
|
|
237
|
+
echo "Leader [$LEADER_POD] phase=$LEADER_PHASE ready=$LEADER_READY"
|
|
238
|
+
|
|
239
|
+
if [[ "$LEADER_PHASE" != "Running" || "$LEADER_READY" != "true" ]]; then
|
|
240
|
+
echo "Leader not Ready yet..."
|
|
241
|
+
ALL_FOLLOWERS_READY=false
|
|
242
|
+
fi
|
|
243
|
+
|
|
244
|
+
if [[ "$ALL_FOLLOWERS_READY" == "true" ]]; then
|
|
245
|
+
echo "All follower pods and leader pod are Running and Ready — continuing."
|
|
246
|
+
break
|
|
247
|
+
fi
|
|
248
|
+
|
|
249
|
+
sleep 2
|
|
250
|
+
done
|
|
251
|
+
|
|
252
|
+
- name: Stream logs
|
|
253
|
+
run: |
|
|
254
|
+
set -euo pipefail
|
|
255
|
+
|
|
256
|
+
size="${{ inputs.size }}"
|
|
257
|
+
pids=()
|
|
258
|
+
|
|
259
|
+
cleanup() {
|
|
260
|
+
echo "Cleaning up background log streams..."
|
|
261
|
+
for pid in "${pids[@]}"; do
|
|
262
|
+
kill "$pid" 2>/dev/null || true
|
|
263
|
+
done
|
|
264
|
+
}
|
|
265
|
+
trap cleanup EXIT
|
|
266
|
+
|
|
267
|
+
for i in $(seq 1 $((size - 1))); do
|
|
268
|
+
POD="vllm-0-${i}"
|
|
269
|
+
|
|
270
|
+
echo "==== Collecting logs from worker pod: $POD ===="
|
|
271
|
+
kubectl logs -f "$POD" -n "$NAMESPACE" \
|
|
272
|
+
> "/tmp/${POD}_logs.txt" 2>&1 &
|
|
273
|
+
|
|
274
|
+
pids+=($!)
|
|
275
|
+
done
|
|
276
|
+
|
|
277
|
+
echo "==== Streaming logs from leader pod: $LEADER_POD ===="
|
|
278
|
+
echo "Looking for logs containing: $FAIL_TAG"
|
|
279
|
+
|
|
280
|
+
kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" | while IFS= read -r line; do
|
|
281
|
+
echo "$line"
|
|
282
|
+
if echo "$line" | grep -q "$FAIL_TAG"; then
|
|
283
|
+
exit 1
|
|
284
|
+
fi
|
|
285
|
+
done
|
|
286
|
+
|
|
287
|
+
- name: Upload logs
|
|
288
|
+
if: always()
|
|
289
|
+
uses: actions/upload-artifact@v6
|
|
290
|
+
with:
|
|
291
|
+
name: ${{ inputs.config_file_path }}-pod-logs
|
|
292
|
+
path: /tmp/vllm*_logs.txt
|
|
293
|
+
retention-days: 7
|
|
294
|
+
|
|
295
|
+
- name: Post process
|
|
296
|
+
if: always()
|
|
297
|
+
run: |
|
|
298
|
+
kubectl get pods -n $NAMESPACE --ignore-not-found=true
|
|
299
|
+
kubectl delete -f ./lws.yaml --ignore-not-found=true || true
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# This file is a part of the vllm-ascend project.
|
|
16
|
+
#
|
|
17
|
+
|
|
18
|
+
name: 'e2e nightly test'
|
|
19
|
+
|
|
20
|
+
on:
|
|
21
|
+
workflow_call:
|
|
22
|
+
inputs:
|
|
23
|
+
vllm:
|
|
24
|
+
required: true
|
|
25
|
+
type: string
|
|
26
|
+
runner:
|
|
27
|
+
required: true
|
|
28
|
+
type: string
|
|
29
|
+
image:
|
|
30
|
+
required: false
|
|
31
|
+
type: string
|
|
32
|
+
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-910b-ubuntu22.04-py3.11"
|
|
33
|
+
tests:
|
|
34
|
+
required: true
|
|
35
|
+
type: string
|
|
36
|
+
name:
|
|
37
|
+
required: false
|
|
38
|
+
type: string
|
|
39
|
+
|
|
40
|
+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
|
|
41
|
+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
|
|
42
|
+
# It's used to activate ascend-toolkit environment variables.
|
|
43
|
+
defaults:
|
|
44
|
+
run:
|
|
45
|
+
shell: bash -el {0}
|
|
46
|
+
|
|
47
|
+
# only cancel in-progress runs of the same workflow
|
|
48
|
+
# and ignore the lint / 1 card / 4 cards test type
|
|
49
|
+
concurrency:
|
|
50
|
+
group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.tests }}
|
|
51
|
+
cancel-in-progress: true
|
|
52
|
+
|
|
53
|
+
jobs:
|
|
54
|
+
e2e-nightly:
|
|
55
|
+
name: ${{ inputs.tests }}
|
|
56
|
+
runs-on: ${{ inputs.runner }}
|
|
57
|
+
timeout-minutes: 600
|
|
58
|
+
container:
|
|
59
|
+
image: ${{ inputs.image }}
|
|
60
|
+
env:
|
|
61
|
+
TRANSFORMERS_OFFLINE: 1
|
|
62
|
+
VLLM_USE_MODELSCOPE: True
|
|
63
|
+
steps:
|
|
64
|
+
- name: Check npu and CANN info
|
|
65
|
+
run: |
|
|
66
|
+
npu-smi info
|
|
67
|
+
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
|
|
68
|
+
|
|
69
|
+
- name: Show vLLM and vLLM-Ascend version
|
|
70
|
+
working-directory: /vllm-workspace
|
|
71
|
+
run: |
|
|
72
|
+
echo "Installed vLLM-related Python packages:"
|
|
73
|
+
pip list | grep vllm || echo "No vllm packages found."
|
|
74
|
+
|
|
75
|
+
echo ""
|
|
76
|
+
echo "============================"
|
|
77
|
+
echo "vLLM Git information"
|
|
78
|
+
echo "============================"
|
|
79
|
+
cd vllm
|
|
80
|
+
if [ -d .git ]; then
|
|
81
|
+
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
|
|
82
|
+
echo "Commit hash: $(git rev-parse HEAD)"
|
|
83
|
+
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
|
|
84
|
+
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
|
|
85
|
+
echo "Message: $(git log -1 --pretty=format:'%s')"
|
|
86
|
+
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
|
|
87
|
+
echo "Remote: $(git remote -v | head -n1)"
|
|
88
|
+
echo ""
|
|
89
|
+
else
|
|
90
|
+
echo "No .git directory found in vllm"
|
|
91
|
+
fi
|
|
92
|
+
cd ..
|
|
93
|
+
|
|
94
|
+
echo ""
|
|
95
|
+
echo "============================"
|
|
96
|
+
echo "vLLM-Ascend Git information"
|
|
97
|
+
echo "============================"
|
|
98
|
+
cd vllm-ascend
|
|
99
|
+
if [ -d .git ]; then
|
|
100
|
+
echo "Branch: $(git rev-parse --abbrev-ref HEAD)"
|
|
101
|
+
echo "Commit hash: $(git rev-parse HEAD)"
|
|
102
|
+
echo "Author: $(git log -1 --pretty=format:'%an <%ae>')"
|
|
103
|
+
echo "Date: $(git log -1 --pretty=format:'%ad' --date=iso)"
|
|
104
|
+
echo "Message: $(git log -1 --pretty=format:'%s')"
|
|
105
|
+
echo "Tags: $(git tag --points-at HEAD || echo 'None')"
|
|
106
|
+
echo "Remote: $(git remote -v | head -n1)"
|
|
107
|
+
echo ""
|
|
108
|
+
else
|
|
109
|
+
echo "No .git directory found in vllm-ascend"
|
|
110
|
+
fi
|
|
111
|
+
cd ..
|
|
112
|
+
|
|
113
|
+
- name: Run vllm-project/vllm-ascend test
|
|
114
|
+
env:
|
|
115
|
+
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
|
116
|
+
VLLM_USE_MODELSCOPE: True
|
|
117
|
+
VLLM_CI_RUNNER: ${{ inputs.runner }}
|
|
118
|
+
BENCHMARK_HOME: /vllm-workspace/vllm-ascend/benchmark
|
|
119
|
+
working-directory: /vllm-workspace/vllm-ascend
|
|
120
|
+
run: |
|
|
121
|
+
# ignore test_dispatch_ffn_combine until the test is fixed
|
|
122
|
+
pytest -sv ${{ inputs.tests }} \
|
|
123
|
+
--ignore=tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py
|