PyPI - tpu-inference - Versions diffs - 0.13.2rc1__tar.gz → 0.13.2rc3__tar.gz - Mend

tpu-inference 0.13.2rc1tar.gz → 0.13.2rc3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tpu-inference might be problematic. Click here for more details.

Files changed (270) hide show

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/MANIFEST.in RENAMED Viewed

@@ -1,4 +1,3 @@
 include requirements.txt
-include requirements_v7x.txt
 include README.md
 include tpu_inference/models/jax/utils/quantization/configs/*.yaml

{tpu_inference-0.13.2rc1/tpu_inference.egg-info → tpu_inference-0.13.2rc3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tpu_inference
-Version: 0.13.2rc1
+Version: 0.13.2rc3
 Author: tpu_inference Contributors
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/setup.py RENAMED Viewed

@@ -20,40 +20,26 @@ def get_requirements() -> List[str]:
             requirements = f.read().strip().split("\n")
         resolved_requirements = []
         for line in requirements:
-            if not line or line.startswith("#"):
-                continue
             if line.startswith("-r "):
                 resolved_requirements += _read_requirements(line.split()[1])
-            elif line.startswith(("-", "--")):
+            elif line.startswith("--"):
                 continue
             else:
                 resolved_requirements.append(line)
         return resolved_requirements
     try:
-        #requirements = _read_requirements("requirements_v7x.txt")
-        # For TPU v7x build
-        if os.getenv("IS_FOR_V7X", "false").lower() == "true":
-            print("Using requirements_v7x.txt")
-            requirements = _read_requirements("requirements_v7x.txt")
-            #requirements.extend(v7x_requirements)
-        else:
-            #For TPU v6e build
-            print("Using requirements.txt")
-            requirements = _read_requirements("requirements.txt")
+        requirements = _read_requirements("requirements.txt")
     except ValueError:
         print("Failed to read requirements.txt in vllm_tpu.")
     return requirements
 def get_version():
-    version = os.getenv("VLLM_VERSION_OVERRIDE", "0.0.0").strip()
-    if os.getenv("IS_FOR_V7X", "false").lower() == "true":
-        version = f"{version}.post7"
+    if env_version := os.getenv("VLLM_VERSION_OVERRIDE"):
+        return env_version
+    return "0.0.0"
-    return version
 setup(
     name="tpu_inference",

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/test_qwix.py RENAMED Viewed

@@ -832,7 +832,7 @@ class TestGetDefaultQwixQuantizationConfig(unittest.TestCase):
         # Patch the constants in the module where the function resides
         self.patchers = [
             patch(
-                "tpu_inference.models.jax.utils.qwix.qwix_utils.DEFAULT_DEEPSEEK_FP8_CONFIG",
+                "tpu_inference.models.jax.utils.qwix.qwix_utils.DEFAULT_DEEPSEEK_FP4_MLP_MOE_FP8_ATTN_CONFIG",
                 self.mock_deepseek_config),
             patch(
                 "tpu_inference.models.jax.utils.qwix.qwix_utils.DEFAULT_LLAMA4_FP8_CONFIG",

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/models/jax/utils/qwix/qwix_utils.py RENAMED Viewed

@@ -35,7 +35,7 @@ DEFAULT_NUM_TOKENS_FOR_MODEL_INPUTS = 512
 DEFAULT_MAX_NUM_SEQS_FOR_MODEL_INPUTS = 256
 DEFAULT_MAX_NUM_BLOCKS_PER_REQ = 16
-DEFAULT_DEEPSEEK_FP8_CONFIG = {
+DEFAULT_DEEPSEEK_FP4_MLP_MOE_FP8_ATTN_CONFIG = {
     "qwix": {
         "use_abstract_model":
         True,
@@ -452,7 +452,7 @@ def get_default_qwix_quantization_config(
     # NOTE (jacobplatin): we'll default to mixed FP8 (attention) + FP4 (MoE experts)
     # for DeepSeek
     if model_type == "deepseek_v3" and quant_method == "fp8":
-        config = copy.deepcopy(DEFAULT_DEEPSEEK_FP8_CONFIG)
+        config = copy.deepcopy(DEFAULT_DEEPSEEK_FP4_MLP_MOE_FP8_ATTN_CONFIG)
         # Dynamically fetch block size from HF config if available
         # Config fmt: 'weight_block_size': [1, 512] -> we want the 2nd dim for tile_size
@@ -462,7 +462,7 @@ def get_default_qwix_quantization_config(
         block_size = hf_quant_config["weight_block_size"]
         if isinstance(block_size, (list, tuple)) and len(block_size) == 2:
             assert block_size[
-                0] == 1, f"Expected first dimension to be 1 (unchanneled), but got {block_size[0]}!"
+                0] == 1, f"Expected first dimension to be 1 (unchanneled), but got {block_size[0]}! If you are trying to run quantized DeepSeek, we currently only support 1D-subchannel quantization and those models can be found here: https://huggingface.co/collections/jrplatin/deepseek-r1-1d-subchannel"
             tile_size = block_size[1]
             assert tile_size > 1, f"Expected tile_size > 1 for DeepSeek, but got {tile_size}"
             logger.info(

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/kv_cache_manager.py RENAMED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 import functools
-from typing import TYPE_CHECKING, Dict, List
+from typing import TYPE_CHECKING, List
 import jax
 import jax.numpy as jnp
@@ -212,7 +212,6 @@ class KVCacheManager:
         # uniform page size.
         representative_spec = kv_cache_config.kv_cache_groups[0].kv_cache_spec
         page_size_bytes = representative_spec.page_size_bytes
-        self.runner.layer_name_to_kvcache_index: Dict[str, int] = {}
         kv_caches = self.runner.kv_caches
         num_blocks_list = []
         for i, kv_cache_tensor in enumerate(kv_cache_config.kv_cache_tensors):

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/runner/tpu_runner.py RENAMED Viewed

@@ -282,6 +282,9 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
         self._substitute_placeholder_token_fn = _substitute_placeholder_token
         self.execute_model_state: ExecuteModelState | None = None
+        self.kv_caches: list[jax.Array] = []
+        self.layer_name_to_kvcache_index: dict[str, int] = {}
     def _init_random(self):
         if self.model_config.seed is None:
             self.model_config.seed = 0
@@ -545,7 +548,6 @@ class TPUModelRunner(KVConnectorModelRunnerMixin, LoRAModelRunnerMixin):
         self.topology_order_id = topology_order_id
         self.kv_cache_config = kv_cache_config
         self.use_hybrid_kvcache = len(kv_cache_config.kv_cache_groups) > 1
-        self.kv_caches = []
         self.kv_cache_manager.initialize_kv_cache(kv_cache_config)
         if has_kv_transfer_group():
             get_kv_transfer_group().register_runner(self)

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3/tpu_inference.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tpu_inference
-Version: 0.13.2rc1
+Version: 0.13.2rc3
 Author: tpu_inference Contributors
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference.egg-info/SOURCES.txt RENAMED Viewed

@@ -3,7 +3,6 @@ MANIFEST.in
 README.md
 pyproject.toml
 requirements.txt
-requirements_v7x.txt
 setup.py
 tests/__init__.py
 tests/test_base.py

tpu_inference-0.13.2rc1/requirements_v7x.txt DELETED Viewed

@@ -1,25 +0,0 @@
-# This file contains additional dependencies needed for TPU v7x support.
-# It is expected to be used in conjunction with the main requirements.txt file.
---pre
--i https://us-python.pkg.dev/ml-oss-artifacts-published/jax/simple/
--f https://storage.googleapis.com/jax-releases/libtpu_releases.html
-jax==0.8.1
-jaxlib==0.8.1
-jaxtyping==0.3.2
-libtpu==0.0.31
-tpu-info==0.7.1
-yapf==0.43.0
-pytest
-pytest-mock
-absl-py
-numpy
-google-cloud-storage
-flax==0.11.1
-torchax==0.0.10
-qwix==0.1.1
-torchvision==0.24.0
-pathwaysutils
-parameterized
-numba==0.62.1
-runai-model-streamer[s3,gcs]==0.15.0

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/LICENSE RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/README.md RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/pyproject.toml RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/requirements.txt RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/setup.cfg RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/core/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/core/test_core_tpu.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/core/test_disagg_executor.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/core/test_disagg_utils.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/core/test_dp_scheduler.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/core/test_init.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/distributed/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/distributed/test_distributed_utils.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/distributed/test_tpu_connector.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_async_scheduler.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_data_parallel.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_hybrid_kvcache.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_local_disagg.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_model_loader.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_multi_modal_inference.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_pipeline_parallel.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_runai_model_streamer_loader.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_sampling_params.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_speculative_decoding.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/e2e/test_structured_decoding.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/executors/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/executors/test_ray_distributed_executor.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/experimental/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/experimental/test_llama3_jax_stashed.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/collectives/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/collectives/all_gather_matmul_kernel_test.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/fused_moe_v1_test.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/gmm_test.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/mla_v1_test.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/quantized_matmul_kernel_test.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/ragged_kv_cache_update_v2_test.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/ragged_paged_attention_kernel_v2_test.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/ragged_paged_attention_kernel_v3_hd64_test.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/kernels/ragged_paged_attention_kernel_v3_test.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/common/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/common/test_attention_interface.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/common/test_quantization.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/attention/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/attention/test_common_attention.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/attention/test_deepseek_v3_attention.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/attention/test_llama4_attention.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/moe/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/moe/test_deepseek_moe.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/sample/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/sample/test_rejection_sampler.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/sample/test_sampling.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/sample/test_sampling_metadata.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/test_layers.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/test_rope.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/test_sharding.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/jax/test_transformer_block.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_attention.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_awq.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_compressed_tensors_moe.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_compressed_tensors_w8a8_fp8.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_compressed_tensors_w8a8_int8.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_fp8.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_mxfp4.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/test_unquantized.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/layers/vllm/utils.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/conftest.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/test_bgmv.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/test_layers.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/test_lora.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/test_lora_perf.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/lora/utils.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/common/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/common/test_model_loader.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_deepseek_v3.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_llama3.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_llama4.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_llama_eagle3.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_llama_guard_4.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_qwen2.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_qwen2_5_vl.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_qwen3.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/test_weight_loading.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/utils/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/models/jax/utils/test_multi_modal_utils.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/platforms/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/platforms/test_tpu_platform.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_block_table.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_input_batch.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_kv_cache.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_kv_cache_manager.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_multimodal_manager.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_persistent_batch_manager.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_speculative_decoding_manager.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_structured_decoding_manager.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_tpu_runner.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_tpu_runner_dp.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_tpu_runner_mesh.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/runner/test_utils.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/spec_decode/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/spec_decode/test_eagle3.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/test_base.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/test_envs.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/test_tpu_info.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/test_utils.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/worker/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tests/worker/tpu_worker_test.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/core/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/core/core_tpu.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/core/disagg_executor.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/core/disagg_utils.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/core/sched/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/core/sched/dp_scheduler.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/distributed/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/distributed/jax_parallel_state.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/distributed/tpu_connector.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/distributed/utils.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/env_override.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/envs.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/executors/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/executors/ray_distributed_executor.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/experimental/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/experimental/llama3_jax_stashed.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/collectives/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/collectives/all_gather_matmul.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/collectives/all_gather_matmul_tuned_block_sizes.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/collectives/util.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/flash_attention/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/flash_attention/kernel.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/fused_moe/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/fused_moe/v1/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/fused_moe/v1/kernel.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/megablox/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/megablox/common.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/megablox/gmm.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/mla/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/mla/v1/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/mla/v1/kernel.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/quantized_matmul/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/quantized_matmul/kernel.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/quantized_matmul/tuned_block_sizes.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/quantized_matmul/util.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v2/__init__.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v2/kernel.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v2/ragged_kv_cache_update.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v2/tuned_block_sizes.py RENAMED Viewed

File without changes

{tpu_inference-0.13.2rc1 → tpu_inference-0.13.2rc3}/tpu_inference/kernels/ragged_paged_attention/v3/__init__.py RENAMED Viewed

File without changes

tpu-inference 0.13.2rc1__tar.gz → 0.13.2rc3__tar.gz

Potentially problematic release.

tpu-inference 0.13.2rc1tar.gz → 0.13.2rc3tar.gz