tpu-inference 0.13.2rc1__tar.gz → 0.13.2rc3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tpu-inference might be problematic. Click here for more details.
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/MANIFEST.in +0 -1
- {tpu_inference-0.13.2rc1/tpu_inference.egg-info → tpu_inference-0.13.2rc3}/PKG-INFO +1 -1
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/setup.py +5 -19
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/test_qwix.py +1 -1
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/utils/qwix/qwix_utils.py +3 -3
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/kv_cache_manager.py +1 -2
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/tpu_runner.py +3 -1
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3/tpu_inference.egg-info}/PKG-INFO +1 -1
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference.egg-info/SOURCES.txt +0 -1
- tpu_inference-0.13.2rc1/requirements_v7x.txt +0 -25
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/LICENSE +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/README.md +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/pyproject.toml +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/requirements.txt +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/setup.cfg +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/core/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/core/test_core_tpu.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/core/test_disagg_executor.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/core/test_disagg_utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/core/test_dp_scheduler.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/core/test_init.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/distributed/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/distributed/test_distributed_utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/distributed/test_tpu_connector.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_async_scheduler.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_data_parallel.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_hybrid_kvcache.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_local_disagg.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_model_loader.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_multi_modal_inference.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_pipeline_parallel.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_runai_model_streamer_loader.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_sampling_params.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_speculative_decoding.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_structured_decoding.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/executors/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/executors/test_ray_distributed_executor.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/experimental/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/experimental/test_llama3_jax_stashed.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/collectives/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/collectives/all_gather_matmul_kernel_test.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/fused_moe_v1_test.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/gmm_test.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/mla_v1_test.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/quantized_matmul_kernel_test.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/ragged_kv_cache_update_v2_test.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/ragged_paged_attention_kernel_v2_test.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/ragged_paged_attention_kernel_v3_hd64_test.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/ragged_paged_attention_kernel_v3_test.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/common/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/common/test_attention_interface.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/common/test_quantization.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/attention/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/attention/test_common_attention.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/attention/test_deepseek_v3_attention.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/attention/test_llama4_attention.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/moe/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/moe/test_deepseek_moe.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/sample/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/sample/test_rejection_sampler.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/sample/test_sampling.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/sample/test_sampling_metadata.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/test_layers.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/test_rope.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/test_sharding.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/test_transformer_block.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_attention.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_awq.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_compressed_tensors_moe.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_compressed_tensors_w8a8_fp8.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_compressed_tensors_w8a8_int8.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_fp8.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_mxfp4.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_unquantized.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/conftest.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/test_bgmv.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/test_layers.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/test_lora.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/test_lora_perf.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/common/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/common/test_model_loader.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_deepseek_v3.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_llama3.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_llama4.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_llama_eagle3.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_llama_guard_4.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_qwen2.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_qwen2_5_vl.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_qwen3.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_weight_loading.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/utils/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/utils/test_multi_modal_utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/platforms/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/platforms/test_tpu_platform.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_block_table.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_input_batch.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_kv_cache.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_kv_cache_manager.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_multimodal_manager.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_persistent_batch_manager.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_speculative_decoding_manager.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_structured_decoding_manager.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_tpu_runner.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_tpu_runner_dp.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_tpu_runner_mesh.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/spec_decode/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/spec_decode/test_eagle3.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/test_base.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/test_envs.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/test_tpu_info.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/test_utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/worker/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/worker/tpu_worker_test.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/core/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/core/core_tpu.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/core/disagg_executor.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/core/disagg_utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/core/sched/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/core/sched/dp_scheduler.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/distributed/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/distributed/jax_parallel_state.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/distributed/tpu_connector.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/distributed/utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/env_override.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/envs.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/executors/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/executors/ray_distributed_executor.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/experimental/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/experimental/llama3_jax_stashed.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/collectives/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/collectives/all_gather_matmul.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/collectives/all_gather_matmul_tuned_block_sizes.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/collectives/util.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/flash_attention/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/flash_attention/kernel.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/fused_moe/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/fused_moe/v1/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/fused_moe/v1/kernel.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/megablox/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/megablox/common.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/megablox/gmm.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/mla/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/mla/v1/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/mla/v1/kernel.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/quantized_matmul/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/quantized_matmul/kernel.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/quantized_matmul/tuned_block_sizes.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/quantized_matmul/util.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v2/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v2/kernel.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v2/ragged_kv_cache_update.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v2/tuned_block_sizes.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v3/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v3/kernel.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v3/kernel_hd64.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v3/tuned_block_sizes_hd64.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v3/util.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/common/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/common/attention_interface.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/common/attention_metadata.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/common/binary_search.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/common/quant_methods.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/common/quantization.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/common/sharding.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/attention/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/attention/attention.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/attention/deepseek_v3_attention.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/attention/gpt_oss_attention.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/attention/llama4_attention.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/base.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/constants.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/layers.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/misc.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/moe/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/moe/deepseek_v3_moe.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/moe/gpt_oss_moe.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/moe/moe.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/pp_utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/rope.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/rope_interface.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/sample/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/sample/rejection_sampler.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/sample/sampling.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/sample/sampling_metadata.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/jax/transformer_block.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/attention.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/fused_moe.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/linear_common.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/quantization/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/quantization/awq.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/quantization/common.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/quantization/compressed_tensors/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/quantization/compressed_tensors/compressed_tensors_moe.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/quantization/fp8.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/quantization/mxfp4.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/quantization/unquantized.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/layers/vllm/sharding.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/logger.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/lora/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/lora/torch_lora_ops.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/lora/torch_punica_tpu.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/common/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/common/model_loader.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/deepseek_v3.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/gpt_oss.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/jax_intermediate_tensor.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/llama3.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/llama4.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/llama_eagle3.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/llama_guard_4.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/qwen2.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/qwen2_5_vl.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/qwen3.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/utils/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/utils/file_utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/utils/multi_modal_utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/utils/qwix/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/utils/weight_utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/vllm/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/vllm/vllm_model_wrapper.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/vllm/vllm_model_wrapper_context.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/platforms/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/platforms/tpu_platform.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/block_table.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/compilation_manager.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/input_batch.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/kv_cache.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/lora_utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/multimodal_manager.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/persistent_batch_manager.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/speculative_decoding_manager.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/structured_decoding_manager.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/spec_decode/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/spec_decode/jax/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/spec_decode/jax/eagle3.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/tpu_info.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/utils.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/worker/__init__.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/worker/tpu_worker.py +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference.egg-info/dependency_links.txt +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference.egg-info/requires.txt +0 -0
- {tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference.egg-info/top_level.txt +0 -0
|
@@ -20,40 +20,26 @@ def get_requirements() -> List[str]:
|
|
|
20
20
|
requirements = f.read().strip().split("\n")
|
|
21
21
|
resolved_requirements = []
|
|
22
22
|
for line in requirements:
|
|
23
|
-
if not line or line.startswith("#"):
|
|
24
|
-
continue
|
|
25
23
|
if line.startswith("-r "):
|
|
26
24
|
resolved_requirements += _read_requirements(line.split()[1])
|
|
27
|
-
elif line.startswith(
|
|
25
|
+
elif line.startswith("--"):
|
|
28
26
|
continue
|
|
29
27
|
else:
|
|
30
28
|
resolved_requirements.append(line)
|
|
31
29
|
return resolved_requirements
|
|
32
30
|
|
|
33
31
|
try:
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
# For TPU v7x build
|
|
37
|
-
if os.getenv("IS_FOR_V7X", "false").lower() == "true":
|
|
38
|
-
print("Using requirements_v7x.txt")
|
|
39
|
-
requirements = _read_requirements("requirements_v7x.txt")
|
|
40
|
-
#requirements.extend(v7x_requirements)
|
|
41
|
-
else:
|
|
42
|
-
#For TPU v6e build
|
|
43
|
-
print("Using requirements.txt")
|
|
44
|
-
requirements = _read_requirements("requirements.txt")
|
|
45
|
-
|
|
32
|
+
requirements = _read_requirements("requirements.txt")
|
|
46
33
|
except ValueError:
|
|
47
34
|
print("Failed to read requirements.txt in vllm_tpu.")
|
|
48
35
|
return requirements
|
|
49
36
|
|
|
50
37
|
|
|
51
38
|
def get_version():
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
39
|
+
if env_version := os.getenv("VLLM_VERSION_OVERRIDE"):
|
|
40
|
+
return env_version
|
|
41
|
+
return "0.0.0"
|
|
55
42
|
|
|
56
|
-
return version
|
|
57
43
|
|
|
58
44
|
setup(
|
|
59
45
|
name="tpu_inference",
|
|
@@ -832,7 +832,7 @@ class TestGetDefaultQwixQuantizationConfig(unittest.TestCase):
|
|
|
832
832
|
# Patch the constants in the module where the function resides
|
|
833
833
|
self.patchers = [
|
|
834
834
|
patch(
|
|
835
|
-
"tpu_inference.models.jax.utils.qwix.qwix_utils.
|
|
835
|
+
"tpu_inference.models.jax.utils.qwix.qwix_utils.DEFAULT_DEEPSEEK_FP4_MLP_MOE_FP8_ATTN_CONFIG",
|
|
836
836
|
self.mock_deepseek_config),
|
|
837
837
|
patch(
|
|
838
838
|
"tpu_inference.models.jax.utils.qwix.qwix_utils.DEFAULT_LLAMA4_FP8_CONFIG",
|
|
@@ -35,7 +35,7 @@ DEFAULT_NUM_TOKENS_FOR_MODEL_INPUTS = 512
|
|
|
35
35
|
DEFAULT_MAX_NUM_SEQS_FOR_MODEL_INPUTS = 256
|
|
36
36
|
DEFAULT_MAX_NUM_BLOCKS_PER_REQ = 16
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
DEFAULT_DEEPSEEK_FP4_MLP_MOE_FP8_ATTN_CONFIG = {
|
|
39
39
|
"qwix": {
|
|
40
40
|
"use_abstract_model":
|
|
41
41
|
True,
|
|
@@ -452,7 +452,7 @@ def get_default_qwix_quantization_config(
|
|
|
452
452
|
# NOTE (jacobplatin): we'll default to mixed FP8 (attention) + FP4 (MoE experts)
|
|
453
453
|
# for DeepSeek
|
|
454
454
|
if model_type == "deepseek_v3" and quant_method == "fp8":
|
|
455
|
-
config = copy.deepcopy(
|
|
455
|
+
config = copy.deepcopy(DEFAULT_DEEPSEEK_FP4_MLP_MOE_FP8_ATTN_CONFIG)
|
|
456
456
|
|
|
457
457
|
# Dynamically fetch block size from HF config if available
|
|
458
458
|
# Config fmt: 'weight_block_size': [1, 512] -> we want the 2nd dim for tile_size
|
|
@@ -462,7 +462,7 @@ def get_default_qwix_quantization_config(
|
|
|
462
462
|
block_size = hf_quant_config["weight_block_size"]
|
|
463
463
|
if isinstance(block_size, (list, tuple)) and len(block_size) == 2:
|
|
464
464
|
assert block_size[
|
|
465
|
-
0] == 1, f"Expected first dimension to be 1 (unchanneled), but got {block_size[0]}!"
|
|
465
|
+
0] == 1, f"Expected first dimension to be 1 (unchanneled), but got {block_size[0]}! If you are trying to run quantized DeepSeek, we currently only support 1D-subchannel quantization and those models can be found here: https://huggingface.co/collections/jrplatin/deepseek-r1-1d-subchannel"
|
|
466
466
|
tile_size = block_size[1]
|
|
467
467
|
assert tile_size > 1, f"Expected tile_size > 1 for DeepSeek, but got {tile_size}"
|
|
468
468
|
logger.info(
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/kv_cache_manager.py
RENAMED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import functools
|
|
16
|
-
from typing import TYPE_CHECKING,
|
|
16
|
+
from typing import TYPE_CHECKING, List
|
|
17
17
|
|
|
18
18
|
import jax
|
|
19
19
|
import jax.numpy as jnp
|
|
@@ -212,7 +212,6 @@ class KVCacheManager:
|
|
|
212
212
|
# uniform page size.
|
|
213
213
|
representative_spec = kv_cache_config.kv_cache_groups[0].kv_cache_spec
|
|
214
214
|
page_size_bytes = representative_spec.page_size_bytes
|
|
215
|
-
self.runner.layer_name_to_kvcache_index: Dict[str, int] = {}
|
|
216
215
|
kv_caches = self.runner.kv_caches
|
|
217
216
|
num_blocks_list = []
|
|
218
217
|
for i, kv_cache_tensor in enumerate(kv_cache_config.kv_cache_tensors):
|
|
@@ -282,6 +282,9 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
|
|
|
282
282
|
self._substitute_placeholder_token_fn = _substitute_placeholder_token
|
|
283
283
|
self.execute_model_state: ExecuteModelState | None = None
|
|
284
284
|
|
|
285
|
+
self.kv_caches: list[jax.Array] = []
|
|
286
|
+
self.layer_name_to_kvcache_index: dict[str, int] = {}
|
|
287
|
+
|
|
285
288
|
def _init_random(self):
|
|
286
289
|
if self.model_config.seed is None:
|
|
287
290
|
self.model_config.seed = 0
|
|
@@ -545,7 +548,6 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
|
|
|
545
548
|
self.topology_order_id = topology_order_id
|
|
546
549
|
self.kv_cache_config = kv_cache_config
|
|
547
550
|
self.use_hybrid_kvcache = len(kv_cache_config.kv_cache_groups) > 1
|
|
548
|
-
self.kv_caches = []
|
|
549
551
|
self.kv_cache_manager.initialize_kv_cache(kv_cache_config)
|
|
550
552
|
if has_kv_transfer_group():
|
|
551
553
|
get_kv_transfer_group().register_runner(self)
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
# This file contains additional dependencies needed for TPU v7x support.
|
|
2
|
-
# It is expected to be used in conjunction with the main requirements.txt file.
|
|
3
|
-
--pre
|
|
4
|
-
-i https://us-python.pkg.dev/ml-oss-artifacts-published/jax/simple/
|
|
5
|
-
-f https://storage.googleapis.com/jax-releases/libtpu_releases.html
|
|
6
|
-
jax==0.8.1
|
|
7
|
-
jaxlib==0.8.1
|
|
8
|
-
jaxtyping==0.3.2
|
|
9
|
-
libtpu==0.0.31
|
|
10
|
-
|
|
11
|
-
tpu-info==0.7.1
|
|
12
|
-
yapf==0.43.0
|
|
13
|
-
pytest
|
|
14
|
-
pytest-mock
|
|
15
|
-
absl-py
|
|
16
|
-
numpy
|
|
17
|
-
google-cloud-storage
|
|
18
|
-
flax==0.11.1
|
|
19
|
-
torchax==0.0.10
|
|
20
|
-
qwix==0.1.1
|
|
21
|
-
torchvision==0.24.0
|
|
22
|
-
pathwaysutils
|
|
23
|
-
parameterized
|
|
24
|
-
numba==0.62.1
|
|
25
|
-
runai-model-streamer[s3,gcs]==0.15.0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/distributed/test_distributed_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_runai_model_streamer_loader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/executors/test_ray_distributed_executor.py
RENAMED
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/experimental/test_llama3_jax_stashed.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/quantized_matmul_kernel_test.py
RENAMED
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/ragged_kv_cache_update_v2_test.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/common/test_attention_interface.py
RENAMED
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/common/test_quantization.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/moe/test_deepseek_moe.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/sample/test_sampling.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/test_transformer_block.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_compressed_tensors_moe.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/common/test_model_loader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/utils/test_multi_modal_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_persistent_batch_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_structured_decoding_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/core/sched/dp_scheduler.py
RENAMED
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/distributed/jax_parallel_state.py
RENAMED
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/distributed/tpu_connector.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/experimental/llama3_jax_stashed.py
RENAMED
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/collectives/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/collectives/util.py
RENAMED
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/flash_attention/kernel.py
RENAMED
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/fused_moe/__init__.py
RENAMED
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/fused_moe/v1/__init__.py
RENAMED
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/fused_moe/v1/kernel.py
RENAMED
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/megablox/__init__.py
RENAMED
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/megablox/common.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/mla/v1/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/quantized_matmul/kernel.py
RENAMED
|
File without changes
|
|
File without changes
|
{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/quantized_matmul/util.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|