vllm-npu 0.4.2__tar.gz → 0.4.2.post2__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (310) hide show
  1. {vllm_npu-0.4.2/vllm_npu.egg-info → vllm_npu-0.4.2.post2}/PKG-INFO +4 -4
  2. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/setup.py +2 -2
  3. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2/vllm_npu.egg-info}/PKG-INFO +4 -4
  4. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm_npu.egg-info/requires.txt +2 -2
  5. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/CMakeLists.txt +0 -0
  6. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/LICENSE +0 -0
  7. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/MANIFEST.in +0 -0
  8. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/README.md +0 -0
  9. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/cmake/cpu_extension.cmake +0 -0
  10. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/cmake/hipify.py +0 -0
  11. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/cmake/utils.cmake +0 -0
  12. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/activation_kernels.cu +0 -0
  13. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/attention/attention_dtypes.h +0 -0
  14. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/attention/attention_generic.cuh +0 -0
  15. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/attention/attention_kernels.cu +0 -0
  16. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/attention/attention_utils.cuh +0 -0
  17. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/attention/dtype_bfloat16.cuh +0 -0
  18. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/attention/dtype_float16.cuh +0 -0
  19. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/attention/dtype_float32.cuh +0 -0
  20. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/attention/dtype_fp8.cuh +0 -0
  21. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/cache.h +0 -0
  22. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/cache_kernels.cu +0 -0
  23. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/cpu/activation.cpp +0 -0
  24. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/cpu/attention.cpp +0 -0
  25. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/cpu/cache.cpp +0 -0
  26. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/cpu/cpu_types.hpp +0 -0
  27. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/cpu/layernorm.cpp +0 -0
  28. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/cpu/pos_encoding.cpp +0 -0
  29. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/cpu/pybind.cpp +0 -0
  30. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/cuda_compat.h +0 -0
  31. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/cuda_utils.h +0 -0
  32. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/cuda_utils_kernels.cu +0 -0
  33. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/custom_all_reduce.cu +0 -0
  34. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/custom_all_reduce.cuh +0 -0
  35. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/custom_all_reduce_test.cu +0 -0
  36. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/dispatch_utils.h +0 -0
  37. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/layernorm_kernels.cu +0 -0
  38. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/moe/moe_ops.cpp +0 -0
  39. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/moe/moe_ops.h +0 -0
  40. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/moe/topk_softmax_kernels.cu +0 -0
  41. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/moe_align_block_size_kernels.cu +0 -0
  42. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/ops.h +0 -0
  43. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/pos_encoding_kernels.cu +0 -0
  44. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/punica/LICENSE +0 -0
  45. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu +0 -0
  46. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/punica/bgmv/bgmv_bf16_fp32_bf16.cu +0 -0
  47. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/punica/bgmv/bgmv_config.h +0 -0
  48. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/punica/bgmv/bgmv_fp16_fp16_fp16.cu +0 -0
  49. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/punica/bgmv/bgmv_fp16_fp32_fp16.cu +0 -0
  50. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/punica/bgmv/bgmv_fp32_bf16_bf16.cu +0 -0
  51. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/punica/bgmv/bgmv_fp32_fp16_fp16.cu +0 -0
  52. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/punica/bgmv/bgmv_impl.cuh +0 -0
  53. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/punica/bgmv/generator.py +0 -0
  54. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/punica/bgmv/vec_dtypes.cuh +0 -0
  55. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/punica/punica_ops.cc +0 -0
  56. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/pybind.cpp +0 -0
  57. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/aqlm/gemm_kernels.cu +0 -0
  58. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/awq/dequantize.cuh +0 -0
  59. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/awq/gemm_kernels.cu +0 -0
  60. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/fp8/amd_detail/hip_float8.h +0 -0
  61. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/fp8/amd_detail/hip_float8_impl.h +0 -0
  62. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/fp8/amd_detail/quant_utils.cuh +0 -0
  63. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/fp8/fp8_cuda_kernels.cu +0 -0
  64. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/fp8_e5m2_kvcache/quant_utils.cuh +0 -0
  65. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/gptq/compat.cuh +0 -0
  66. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/gptq/matrix_view.cuh +0 -0
  67. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/gptq/q_gemm.cu +0 -0
  68. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/gptq/qdq_2.cuh +0 -0
  69. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/gptq/qdq_3.cuh +0 -0
  70. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/gptq/qdq_4.cuh +0 -0
  71. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/gptq/qdq_8.cuh +0 -0
  72. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/gptq/qdq_util.cuh +0 -0
  73. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/gptq_marlin/gptq_marlin.cu +0 -0
  74. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/gptq_marlin/gptq_marlin.cuh +0 -0
  75. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/gptq_marlin/gptq_marlin_repack.cu +0 -0
  76. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/marlin/LICENSE +0 -0
  77. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/marlin/marlin_cuda_kernel.cu +0 -0
  78. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/quantization/squeezellm/quant_cuda_kernel.cu +0 -0
  79. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/csrc/reduction_utils.cuh +0 -0
  80. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/pyproject.toml +0 -0
  81. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/requirements-common.txt +0 -0
  82. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/requirements-cpu.txt +0 -0
  83. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/requirements-cuda.txt +0 -0
  84. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/requirements-neuron.txt +0 -0
  85. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/requirements-rocm.txt +0 -0
  86. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/setup.cfg +0 -0
  87. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/tests/test_cache_block_hashing.py +0 -0
  88. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/tests/test_config.py +0 -0
  89. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/tests/test_logger.py +0 -0
  90. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/tests/test_logits_processor.py +0 -0
  91. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/tests/test_regression.py +0 -0
  92. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/tests/test_sampling_params.py +0 -0
  93. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/tests/test_sequence.py +0 -0
  94. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/__init__.py +0 -0
  95. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/_custom_ops.py +0 -0
  96. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/__init__.py +0 -0
  97. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/backends/__init__.py +0 -0
  98. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/backends/abstract.py +0 -0
  99. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/backends/flash_attn.py +0 -0
  100. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/backends/flashinfer.py +0 -0
  101. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/backends/rocm_flash_attn.py +0 -0
  102. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/backends/torch_sdpa.py +0 -0
  103. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/backends/xformers.py +0 -0
  104. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/layer.py +0 -0
  105. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/ops/__init__.py +0 -0
  106. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/ops/paged_attn.py +0 -0
  107. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/ops/prefix_prefill.py +0 -0
  108. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/ops/triton_flash_attention.py +0 -0
  109. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/attention/selector.py +0 -0
  110. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/block.py +0 -0
  111. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/config.py +0 -0
  112. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/__init__.py +0 -0
  113. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/block/__init__.py +0 -0
  114. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/block/block_table.py +0 -0
  115. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/block/common.py +0 -0
  116. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/block/cpu_gpu_block_allocator.py +0 -0
  117. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/block/interfaces.py +0 -0
  118. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/block/naive_block.py +0 -0
  119. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/block/prefix_caching_block.py +0 -0
  120. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/block_manager_v1.py +0 -0
  121. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/block_manager_v2.py +0 -0
  122. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/evictor_v1.py +0 -0
  123. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/evictor_v2.py +0 -0
  124. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/interfaces.py +0 -0
  125. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/policy.py +0 -0
  126. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/core/scheduler.py +0 -0
  127. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/distributed/__init__.py +0 -0
  128. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/distributed/communication_op.py +0 -0
  129. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/distributed/device_communicators/__init__.py +0 -0
  130. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/distributed/device_communicators/custom_all_reduce.py +0 -0
  131. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/distributed/device_communicators/pynccl.py +0 -0
  132. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/distributed/device_communicators/pynccl_utils.py +0 -0
  133. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/distributed/parallel_state.py +0 -0
  134. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/distributed/utils.py +0 -0
  135. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/engine/__init__.py +0 -0
  136. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/engine/arg_utils.py +0 -0
  137. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/engine/async_llm_engine.py +0 -0
  138. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/engine/llm_engine.py +0 -0
  139. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/engine/metrics.py +0 -0
  140. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/engine/output_processor/__init__.py +0 -0
  141. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/engine/output_processor/interfaces.py +0 -0
  142. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/engine/output_processor/multi_step.py +0 -0
  143. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/engine/output_processor/single_step.py +0 -0
  144. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/engine/output_processor/stop_checker.py +0 -0
  145. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/engine/output_processor/util.py +0 -0
  146. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/entrypoints/__init__.py +0 -0
  147. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/entrypoints/api_server.py +0 -0
  148. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/entrypoints/llm.py +0 -0
  149. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/entrypoints/openai/__init__.py +0 -0
  150. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/entrypoints/openai/api_server.py +0 -0
  151. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/entrypoints/openai/cli_args.py +0 -0
  152. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/entrypoints/openai/protocol.py +0 -0
  153. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/entrypoints/openai/serving_chat.py +0 -0
  154. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/entrypoints/openai/serving_completion.py +0 -0
  155. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/entrypoints/openai/serving_engine.py +0 -0
  156. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/envs.py +0 -0
  157. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/executor/__init__.py +0 -0
  158. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/executor/cpu_executor.py +0 -0
  159. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/executor/distributed_gpu_executor.py +0 -0
  160. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/executor/executor_base.py +0 -0
  161. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/executor/gpu_executor.py +0 -0
  162. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/executor/multiproc_worker_utils.py +0 -0
  163. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/executor/neuron_executor.py +0 -0
  164. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/executor/ray_gpu_executor.py +0 -0
  165. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/executor/ray_utils.py +0 -0
  166. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/logger.py +0 -0
  167. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/logging/__init__.py +0 -0
  168. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/logging/formatter.py +0 -0
  169. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/lora/__init__.py +0 -0
  170. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/lora/fully_sharded_layers.py +0 -0
  171. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/lora/layers.py +0 -0
  172. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/lora/lora.py +0 -0
  173. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/lora/models.py +0 -0
  174. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/lora/punica.py +0 -0
  175. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/lora/request.py +0 -0
  176. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/lora/utils.py +0 -0
  177. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/lora/worker_manager.py +0 -0
  178. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/__init__.py +0 -0
  179. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/guided_decoding/__init__.py +0 -0
  180. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/guided_decoding/lm_format_enforcer_decoding.py +0 -0
  181. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/guided_decoding/outlines_decoding.py +0 -0
  182. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/guided_decoding/outlines_logits_processors.py +0 -0
  183. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/__init__.py +0 -0
  184. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/activation.py +0 -0
  185. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/__init__.py +0 -0
  186. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-40GB.json +0 -0
  187. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  188. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=16,N=1344,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  189. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  190. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=16,N=2688,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  191. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-40GB.json +0 -0
  192. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  193. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  194. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  195. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  196. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-40GB.json +0 -0
  197. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  198. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json +0 -0
  199. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  200. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  201. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=4096,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  202. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json +0 -0
  203. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json +0 -0
  204. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json +0 -0
  205. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/fused_moe/fused_moe.py +0 -0
  206. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/layernorm.py +0 -0
  207. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/linear.py +0 -0
  208. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/logits_processor.py +0 -0
  209. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/ops/__init__.py +0 -0
  210. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/ops/rand.py +0 -0
  211. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/ops/sample.py +0 -0
  212. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/quantization/__init__.py +0 -0
  213. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/quantization/aqlm.py +0 -0
  214. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/quantization/awq.py +0 -0
  215. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/quantization/base_config.py +0 -0
  216. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/quantization/fp8.py +0 -0
  217. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/quantization/gptq.py +0 -0
  218. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/quantization/gptq_marlin.py +0 -0
  219. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/quantization/marlin.py +0 -0
  220. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/quantization/schema.py +0 -0
  221. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/quantization/squeezellm.py +0 -0
  222. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/rejection_sampler.py +0 -0
  223. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/rotary_embedding.py +0 -0
  224. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/sampler.py +0 -0
  225. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/layers/vocab_parallel_embedding.py +0 -0
  226. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/model_loader/__init__.py +0 -0
  227. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/model_loader/loader.py +0 -0
  228. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/model_loader/neuron.py +0 -0
  229. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/model_loader/tensorizer.py +0 -0
  230. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/model_loader/utils.py +0 -0
  231. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/model_loader/weight_utils.py +0 -0
  232. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/__init__.py +0 -0
  233. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/baichuan.py +0 -0
  234. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/bloom.py +0 -0
  235. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/chatglm.py +0 -0
  236. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/commandr.py +0 -0
  237. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/dbrx.py +0 -0
  238. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/decilm.py +0 -0
  239. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/deepseek.py +0 -0
  240. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/falcon.py +0 -0
  241. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/gemma.py +0 -0
  242. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/gpt2.py +0 -0
  243. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/gpt_bigcode.py +0 -0
  244. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/gpt_j.py +0 -0
  245. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/gpt_neox.py +0 -0
  246. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/internlm2.py +0 -0
  247. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/jais.py +0 -0
  248. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/llama.py +0 -0
  249. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/llava.py +0 -0
  250. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/minicpm.py +0 -0
  251. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/mixtral.py +0 -0
  252. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/mixtral_quant.py +0 -0
  253. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/mpt.py +0 -0
  254. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/olmo.py +0 -0
  255. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/opt.py +0 -0
  256. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/orion.py +0 -0
  257. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/phi.py +0 -0
  258. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/qwen.py +0 -0
  259. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/qwen2.py +0 -0
  260. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/qwen2_moe.py +0 -0
  261. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/stablelm.py +0 -0
  262. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/starcoder2.py +0 -0
  263. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/models/xverse.py +0 -0
  264. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/sampling_metadata.py +0 -0
  265. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/model_executor/utils.py +0 -0
  266. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/outputs.py +0 -0
  267. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/py.typed +0 -0
  268. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/sampling_params.py +0 -0
  269. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/sequence.py +0 -0
  270. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/spec_decode/__init__.py +0 -0
  271. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/spec_decode/batch_expansion.py +0 -0
  272. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/spec_decode/interfaces.py +0 -0
  273. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/spec_decode/metrics.py +0 -0
  274. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/spec_decode/multi_step_worker.py +0 -0
  275. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/spec_decode/ngram_worker.py +0 -0
  276. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/spec_decode/spec_decode_worker.py +0 -0
  277. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/spec_decode/top1_proposer.py +0 -0
  278. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/spec_decode/util.py +0 -0
  279. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/test_utils.py +0 -0
  280. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/__init__.py +0 -0
  281. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/config.py +0 -0
  282. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/configs/__init__.py +0 -0
  283. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/configs/chatglm.py +0 -0
  284. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/configs/dbrx.py +0 -0
  285. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/configs/falcon.py +0 -0
  286. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/configs/jais.py +0 -0
  287. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/configs/mpt.py +0 -0
  288. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/detokenizer.py +0 -0
  289. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/tokenizer.py +0 -0
  290. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/tokenizer_group/__init__.py +0 -0
  291. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py +0 -0
  292. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py +0 -0
  293. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/tokenizer_group/tokenizer_group.py +0 -0
  294. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/tokenizers/__init__.py +0 -0
  295. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/transformers_utils/tokenizers/baichuan.py +0 -0
  296. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/usage/__init__.py +0 -0
  297. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/usage/usage_lib.py +0 -0
  298. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/utils.py +0 -0
  299. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/worker/__init__.py +0 -0
  300. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/worker/cache_engine.py +0 -0
  301. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/worker/cpu_model_runner.py +0 -0
  302. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/worker/cpu_worker.py +0 -0
  303. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/worker/model_runner.py +0 -0
  304. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/worker/neuron_model_runner.py +0 -0
  305. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/worker/neuron_worker.py +0 -0
  306. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/worker/worker.py +0 -0
  307. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm/worker/worker_base.py +0 -0
  308. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm_npu.egg-info/SOURCES.txt +0 -0
  309. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm_npu.egg-info/dependency_links.txt +0 -0
  310. {vllm_npu-0.4.2 → vllm_npu-0.4.2.post2}/vllm_npu.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
- Name: vllm_npu
3
- Version: 0.4.2
2
+ Name: vllm-npu
3
+ Version: 0.4.2.post2
4
4
  Summary: A high-throughput and memory-efficient inference and serving engine for LLMs
5
5
  Home-page: https://github.com/vllm-project/vllm
6
6
  Author: vLLM Team
@@ -20,7 +20,7 @@ Requires-Dist: cmake>=3.21
20
20
  Requires-Dist: ninja
21
21
  Requires-Dist: psutil
22
22
  Requires-Dist: sentencepiece
23
- Requires-Dist: numpy
23
+ Requires-Dist: numpy==1.26.4
24
24
  Requires-Dist: requests
25
25
  Requires-Dist: py-cpuinfo
26
26
  Requires-Dist: transformers>=4.40.0
@@ -38,7 +38,7 @@ Requires-Dist: filelock>=3.10.4
38
38
  Requires-Dist: ray==2.9.3
39
39
  Requires-Dist: pynvml==11.5.0
40
40
  Requires-Dist: outlines==0.0.34
41
- Requires-Dist: npu-vllm==0.4.2
41
+ Requires-Dist: npu-vllm==0.4.2.post3
42
42
  Provides-Extra: tensorizer
43
43
  Requires-Dist: tensorizer==2.9.0; extra == "tensorizer"
44
44
  Dynamic: author
@@ -262,8 +262,8 @@ if envs.VLLM_USE_PRECOMPILED:
262
262
  package_data["vllm"].append("*.so")
263
263
 
264
264
  setup(
265
- name="vllm_npu",
266
- version=get_vllm_version(),
265
+ name="vllm-npu",
266
+ version=get_vllm_version() + '.post2',
267
267
  author="vLLM Team",
268
268
  license="Apache 2.0",
269
269
  description=(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
- Name: vllm_npu
3
- Version: 0.4.2
2
+ Name: vllm-npu
3
+ Version: 0.4.2.post2
4
4
  Summary: A high-throughput and memory-efficient inference and serving engine for LLMs
5
5
  Home-page: https://github.com/vllm-project/vllm
6
6
  Author: vLLM Team
@@ -20,7 +20,7 @@ Requires-Dist: cmake>=3.21
20
20
  Requires-Dist: ninja
21
21
  Requires-Dist: psutil
22
22
  Requires-Dist: sentencepiece
23
- Requires-Dist: numpy
23
+ Requires-Dist: numpy==1.26.4
24
24
  Requires-Dist: requests
25
25
  Requires-Dist: py-cpuinfo
26
26
  Requires-Dist: transformers>=4.40.0
@@ -38,7 +38,7 @@ Requires-Dist: filelock>=3.10.4
38
38
  Requires-Dist: ray==2.9.3
39
39
  Requires-Dist: pynvml==11.5.0
40
40
  Requires-Dist: outlines==0.0.34
41
- Requires-Dist: npu-vllm==0.4.2
41
+ Requires-Dist: npu-vllm==0.4.2.post3
42
42
  Provides-Extra: tensorizer
43
43
  Requires-Dist: tensorizer==2.9.0; extra == "tensorizer"
44
44
  Dynamic: author
@@ -2,7 +2,7 @@ cmake>=3.21
2
2
  ninja
3
3
  psutil
4
4
  sentencepiece
5
- numpy
5
+ numpy==1.26.4
6
6
  requests
7
7
  py-cpuinfo
8
8
  transformers>=4.40.0
@@ -20,7 +20,7 @@ filelock>=3.10.4
20
20
  ray==2.9.3
21
21
  pynvml==11.5.0
22
22
  outlines==0.0.34
23
- npu-vllm==0.4.2
23
+ npu-vllm==0.4.2.post3
24
24
 
25
25
  [tensorizer]
26
26
  tensorizer==2.9.0
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes